diff options
Diffstat (limited to 'ext/pcre/php_pcre.c')
-rw-r--r-- | ext/pcre/php_pcre.c | 146 |
1 files changed, 73 insertions, 73 deletions
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 7cc16ca6e6..cc7cd014d0 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -136,7 +136,7 @@ static PHP_MINFO_FUNCTION(pcre) static PHP_MINIT_FUNCTION(pcre) { REGISTER_INI_ENTRIES(); - + REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT); @@ -296,18 +296,18 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le #endif } } - + p = regex; - + /* Parse through the leading whitespace, and display a warning if we get to the end without encountering a delimiter. */ while (isspace((int)*(unsigned char *)p)) p++; if (*p == 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, + php_error_docref(NULL TSRMLS_CC, E_WARNING, p < regex + regex_len ? "Null byte in regex" : "Empty regular expression"); return NULL; } - + /* Get the delimiter and display a warning if it is alphanumeric or a backslash. */ delimiter = *p++; @@ -360,7 +360,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le } return NULL; } - + /* Make a copy of the actual pattern. */ pattern = estrndup(p, pp-p); @@ -368,7 +368,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le pp++; /* Parse through the options, setting appropriate flags. Display - a warning if we encounter an unknown modifier. */ + a warning if we encounter an unknown modifier. */ while (pp < regex + regex_len) { switch (*pp++) { /* Perl compatible options */ @@ -376,7 +376,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le case 'm': coptions |= PCRE_MULTILINE; break; case 's': coptions |= PCRE_DOTALL; break; case 'x': coptions |= PCRE_EXTENDED; break; - + /* PCRE specific options */ case 'A': coptions |= PCRE_ANCHORED; break; case 'D': coptions |= PCRE_DOLLAR_ENDONLY;break; @@ -389,12 +389,12 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le the PCRE_UCP option. */ #ifdef PCRE_UCP coptions |= PCRE_UCP; -#endif +#endif break; /* Custom preg options */ case 'e': poptions |= PREG_REPLACE_EVAL; break; - + case ' ': case '\n': break; @@ -474,7 +474,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le * at end of request. However PCRE_G(pcre_cache) must be consistent * on the next request as well. So we disable usage of interned strings * as hash keys especually for this table. - * See bug #63180 + * See bug #63180 */ if (IS_INTERNED(regex)) { regex = tmp = estrndup(regex, regex_len); @@ -503,7 +503,7 @@ PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_ if (preg_options) { *preg_options = pce ? pce->preg_options : 0; } - + return pce ? pce->re : NULL; } /* }}} */ @@ -513,7 +513,7 @@ PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_ PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC) { pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC); - + if (extra) { *extra = pce ? pce->extra : NULL; } @@ -523,7 +523,7 @@ PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *pr if (compile_options) { *compile_options = pce ? pce->compile_options : 0; } - + return pce ? pce->re : NULL; } /* }}} */ @@ -540,7 +540,7 @@ static inline void add_offset_pair(zval *result, char *str, int len, int offset, /* Add (match, offset) to the return value */ add_next_index_stringl(match_pair, str, len, 1); add_next_index_long(match_pair, offset); - + if (name) { zval_add_ref(&match_pair); zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL); @@ -565,7 +565,7 @@ static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ * &subject, &subject_len, &subpats, &flags, &start_offset) == FAILURE) { RETURN_FALSE; } - + /* Compile regex or get it from cache. */ if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) { RETURN_FALSE; @@ -676,7 +676,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec matched = 0; PCRE_G(error_code) = PHP_PCRE_NO_ERROR; - + do { /* Execute the regular expression. */ count = pcre_exec(pce->re, extra, subject, subject_len, start_offset, @@ -698,7 +698,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec /* If subpatterns array has been passed, fill it in with values. */ if (subpats != NULL) { /* Try to get the list of substrings and display a warning if failed. */ - if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) { + if ((offsets[1] - offsets[0] < 0) || pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) { efree(subpat_names); efree(offsets); if (match_sets) efree(match_sets); @@ -733,7 +733,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec ALLOC_ZVAL(result_set); array_init(result_set); INIT_PZVAL(result_set); - + /* Add all the subpatterns to it */ for (i = 0; i < count; i++) { if (offset_capture) { @@ -787,13 +787,13 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec pcre_handle_exec_error(count TSRMLS_CC); break; } - + /* If we have matched an empty string, mimic what Perl's /g options does. This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try the match again at the same point. If this fails (picked up above) we advance to the next character. */ g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0; - + /* Advance to the position right after the last full match */ start_offset = offsets[1]; } while (global); @@ -810,7 +810,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec } efree(match_sets); } - + efree(offsets); efree(subpat_names); @@ -860,7 +860,7 @@ static int preg_get_backref(char **str, int *backref) walk++; } else return 0; - + if (*walk && *walk >= '0' && *walk <= '9') { *backref = *backref * 10 + *walk - '0'; walk++; @@ -872,9 +872,9 @@ static int preg_get_backref(char **str, int *backref) else walk++; } - + *str = walk; - return 1; + return 1; } /* }}} */ @@ -884,7 +884,7 @@ static int preg_do_repl_func(zval *function, char *subject, int *offsets, char * { zval *retval_ptr; /* Function return value */ zval **args[1]; /* Argument to pass to function */ - zval *subpats; /* Captured subpatterns */ + zval *subpats; /* Captured subpatterns */ int result_len; /* Return value length */ int i; @@ -935,11 +935,11 @@ static int preg_do_eval(char *eval_str, int eval_str_len, char *subject, int backref; /* Current backref */ char *compiled_string_description; smart_str code = {0}; - + eval_str_end = eval_str + eval_str_len; walk = segment = eval_str; walk_last = 0; - + while (walk < eval_str_end) { /* If found a backreference.. */ if ('\\' == *walk || '$' == *walk) { @@ -992,15 +992,15 @@ static int preg_do_eval(char *eval_str, int eval_str_len, char *subject, } efree(compiled_string_description); convert_to_string(&retval); - + /* Save the return value and its length */ *result = estrndup(Z_STRVAL(retval), Z_STRLEN(retval)); result_len = Z_STRLEN(retval); - + /* Clean up */ zval_dtor(&retval); smart_str_free(&code); - + return result_len; } /* }}} */ @@ -1029,7 +1029,7 @@ PHPAPI char *php_pcre_replace(char *regex, int regex_len, /* }}} */ /* {{{ php_pcre_replace_impl() */ -PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val, +PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val, int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC) { pcre_extra *extra = pce->extra;/* Holds results of studying */ @@ -1105,7 +1105,7 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub } offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); - + alloc_len = 2 * subject_len + 1; result = safe_emalloc(alloc_len, sizeof(char), 0); @@ -1114,7 +1114,7 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub *result_len = 0; start_offset = 0; PCRE_G(error_code) = PHP_PCRE_NO_ERROR; - + while (1) { /* Execute the regular expression. */ count = pcre_exec(pce->re, extra, subject, subject_len, start_offset, @@ -1131,7 +1131,7 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub piece = subject + start_offset; - if (count > 0 && (limit == -1 || limit > 0)) { + if (count > 0 && (offsets[1] - offsets[0] >= 0) && (limit == -1 || limit > 0)) { if (replace_count) { ++*replace_count; } @@ -1139,7 +1139,7 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub match = subject + offsets[0]; new_len = *result_len + offsets[0] - start_offset; /* part before the match */ - + /* If evaluating, do it and add the return string's length */ if (eval) { eval_result_len = preg_do_eval(replace, replace_len, subject, @@ -1184,7 +1184,7 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub /* copy replacement and backrefs */ walkbuf = result + *result_len; - + /* If evaluating or using custom function, copy result to the buffer * and clean up. */ if (eval || is_callable_replace) { @@ -1254,13 +1254,13 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub result = NULL; break; } - + /* If we have matched an empty string, mimic what Perl's /g options does. This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try the match again at the same point. If this fails (picked up above) we advance to the next character. */ g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0; - + /* Advance to the next piece. */ start_offset = offsets[1]; } @@ -1284,18 +1284,18 @@ static char *php_replace_in_subject(zval *regex, zval *replace, zval **subject, *result; int subject_len; - /* Make sure we're dealing with strings. */ + /* Make sure we're dealing with strings. */ convert_to_string_ex(subject); /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */ ZVAL_STRINGL(&empty_replace, "", 0, 0); - + /* If regex is an array */ if (Z_TYPE_P(regex) == IS_ARRAY) { /* Duplicate subject string for repeated replacement */ subject_value = estrndup(Z_STRVAL_PP(subject), Z_STRLEN_PP(subject)); subject_len = Z_STRLEN_PP(subject); *result_len = subject_len; - + zend_hash_internal_pointer_reset(Z_ARRVAL_P(regex)); replace_value = replace; @@ -1304,9 +1304,9 @@ static char *php_replace_in_subject(zval *regex, zval *replace, zval **subject, /* For each entry in the regex array, get the entry */ while (zend_hash_get_current_data(Z_ARRVAL_P(regex), (void **)®ex_entry) == SUCCESS) { - /* Make sure we're dealing with strings. */ + /* Make sure we're dealing with strings. */ convert_to_string_ex(regex_entry); - + /* If replace is an array and not a callable construct */ if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) { /* Get current entry */ @@ -1321,7 +1321,7 @@ static char *php_replace_in_subject(zval *regex, zval *replace, zval **subject, replace_value = &empty_replace; } } - + /* Do the actual replacement and put the result back into subject_value for further replacements. */ if ((result = php_pcre_replace(Z_STRVAL_PP(regex_entry), @@ -1377,12 +1377,12 @@ static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_repl ulong num_key; char *callback_name; int replace_count=0, old_replace_count; - + /* Get function parameters and do error-checking. */ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZZ|lZ", ®ex, &replace, &subject, &limit, &zcount) == FAILURE) { return; } - + if (!is_callable_replace && Z_TYPE_PP(replace) == IS_ARRAY && Z_TYPE_PP(regex) != IS_ARRAY) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array"); RETURN_FALSE; @@ -1408,10 +1408,10 @@ static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_repl if (ZEND_NUM_ARGS() > 3) { limit_val = limit; } - + if (Z_TYPE_PP(regex) != IS_ARRAY) convert_to_string_ex(regex); - + /* if subject is an array */ if (Z_TYPE_PP(subject) == IS_ARRAY) { array_init(return_value); @@ -1439,7 +1439,7 @@ static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_repl efree(result); } } - + zend_hash_move_forward(Z_ARRVAL_PP(subject)); } } else { /* if subject is not an array */ @@ -1456,7 +1456,7 @@ static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_repl zval_dtor(*zcount); ZVAL_LONG(*zcount, replace_count); } - + } /* }}} */ @@ -1484,7 +1484,7 @@ static PHP_FUNCTION(preg_filter) } /* }}} */ -/* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]]) +/* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]]) Split string into an array using a perl-style regular expression as a delimiter */ static PHP_FUNCTION(preg_split) { @@ -1496,12 +1496,12 @@ static PHP_FUNCTION(preg_split) long flags = 0; /* Match control flags */ pcre_cache_entry *pce; /* Compiled regular expression */ - /* Get function parameters and do error checking */ + /* Get function parameters and do error checking */ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ll", ®ex, ®ex_len, &subject, &subject_len, &limit_val, &flags) == FAILURE) { RETURN_FALSE; } - + /* Compile regex or get it from cache. */ if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) { RETURN_FALSE; @@ -1538,7 +1538,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec no_empty = flags & PREG_SPLIT_NO_EMPTY; delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE; offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE; - + if (limit_val == 0) { limit_val = -1; } @@ -1549,7 +1549,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec } extra->match_limit = PCRE_G(backtrack_limit); extra->match_limit_recursion = PCRE_G(recursion_limit); - + /* Initialize return value */ array_init(return_value); @@ -1561,13 +1561,13 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec } size_offsets = (size_offsets + 1) * 3; offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); - + /* Start at the beginning of the string */ start_offset = 0; next_offset = 0; last_match = subject; PCRE_G(error_code) = PHP_PCRE_NO_ERROR; - + /* Get next piece if no limit or limit not yet reached and something matched*/ while ((limit_val == -1 || limit_val > 1)) { count = pcre_exec(pce->re, extra, subject, @@ -1582,9 +1582,9 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings"); count = size_offsets/3; } - + /* If something matched */ - if (count > 0) { + if (count > 0 && (offsets[1] - offsets[0] >= 0)) { if (!no_empty || &subject[offsets[0]] != last_match) { if (offset_capture) { @@ -1600,7 +1600,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec if (limit_val != -1) limit_val--; } - + last_match = &subject[offsets[1]]; next_offset = offsets[1]; @@ -1657,7 +1657,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec the match again at the same point. If this fails (picked up above) we advance to the next character. */ g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0; - + /* Advance to the position right after the last full match */ start_offset = offsets[1]; } @@ -1676,7 +1676,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec } } - + /* Clean up */ efree(offsets); } @@ -1697,13 +1697,13 @@ static PHP_FUNCTION(preg_quote) delim_char=0, /* Delimiter character to be quoted */ c; /* Current character */ zend_bool quote_delim = 0; /* Whether to quote additional delim char */ - + /* Get the arguments and check for errors */ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &in_str, &in_str_len, &delim, &delim_len) == FAILURE) { return; } - + in_str_end = in_str + in_str_len; /* Nothing to do if we got an empty string */ @@ -1715,11 +1715,11 @@ static PHP_FUNCTION(preg_quote) delim_char = delim[0]; quote_delim = 1; } - + /* Allocate enough memory so that even if each character is quoted, we won't run out of room */ out_str = safe_emalloc(4, in_str_len, 1); - + /* Go through the string and quote necessary characters */ for(p = in_str, q = out_str; p != in_str_end; p++) { c = *p; @@ -1763,7 +1763,7 @@ static PHP_FUNCTION(preg_quote) } } *q = '\0'; - + /* Reallocate string and return it */ RETVAL_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, 0); } @@ -1784,7 +1784,7 @@ static PHP_FUNCTION(preg_grep) &input, &flags) == FAILURE) { return; } - + /* Compile regex or get it from cache. */ if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) { RETURN_FALSE; @@ -1809,9 +1809,9 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return zend_bool invert; /* Whether to return non-matching entries */ int rc; - + invert = flags & PREG_GREP_INVERT ? 1 : 0; - + if (extra == NULL) { extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; extra = &extra_data; @@ -1827,7 +1827,7 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return } size_offsets = (size_offsets + 1) * 3; offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); - + /* Initialize return array */ array_init(return_value); @@ -1940,7 +1940,7 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2) ZEND_ARG_INFO(0, pattern) ZEND_ARG_INFO(0, subject) ZEND_ARG_INFO(0, limit) - ZEND_ARG_INFO(0, flags) + ZEND_ARG_INFO(0, flags) ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1) |