diff options
Diffstat (limited to 'ext/pcre/php_pcre.c')
-rw-r--r-- | ext/pcre/php_pcre.c | 87 |
1 files changed, 67 insertions, 20 deletions
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 06e35e537a..6b258b9a20 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -2,7 +2,7 @@ +----------------------------------------------------------------------+ | PHP Version 5 | +----------------------------------------------------------------------+ - | Copyright (c) 1997-2015 The PHP Group | + | Copyright (c) 1997-2016 The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | @@ -43,6 +43,11 @@ #define PCRE_CACHE_SIZE 4096 +/* not fully functional workaround for libpcre < 8.0, see bug #70232 */ +#ifndef PCRE_NOTEMPTY_ATSTART +# define PCRE_NOTEMPTY_ATSTART PCRE_NOTEMPTY +#endif + enum { PHP_PCRE_NO_ERROR = 0, PHP_PCRE_INTERNAL_ERROR, @@ -600,6 +605,8 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec int i, rc; int subpats_order; /* Order of subpattern matches */ int offset_capture; /* Capture match offsets: yes/no */ + unsigned char *mark = NULL; /* Target for MARK name */ + zval *marks = NULL; /* Array of marks for PREG_PATTERN_ORDER */ /* Overwrite the passed-in value for subpatterns with an empty array. */ if (subpats != NULL) { @@ -642,6 +649,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec } extra->match_limit = PCRE_G(backtrack_limit); extra->match_limit_recursion = PCRE_G(recursion_limit); +#ifdef PCRE_EXTRA_MARK + extra->mark = &mark; + extra->flags |= PCRE_EXTRA_MARK; +#endif /* Calculate the size of the offsets array, and allocate memory for it. */ rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats); @@ -718,6 +729,14 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec offsets[(i<<1)+1] - offsets[i<<1], 1); } } + /* Add MARK, if available */ + if (mark) { + if (!marks) { + MAKE_STD_ZVAL(marks); + array_init(marks); + } + add_index_string(marks, matched - 1, (char *) mark, 1); + } /* * If the number of captured subpatterns on this run is * less than the total possible number, pad the result @@ -748,6 +767,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec offsets[(i<<1)+1] - offsets[i<<1], 1); } } + /* Add MARK, if available */ + if (mark) { + add_assoc_string(result_set, "MARK", (char *) mark, 1); + } /* And add it to the output array */ zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL); } @@ -767,12 +790,16 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec offsets[(i<<1)+1] - offsets[i<<1], 1); } } + /* Add MARK, if available */ + if (mark) { + add_assoc_string(subpats, "MARK", (char *) mark, 1); + } } pcre_free((void *) stringlist); } } else if (count == PCRE_ERROR_NOMATCH) { - /* If we previously set PCRE_NOTEMPTY after a null match, + /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match, this is not necessarily the end. We need to advance the start offset, and continue. Fudge the offset values to achieve this, unless we're already at the end of the string. */ @@ -789,11 +816,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec } /* If we have matched an empty string, mimic what Perl's /g options does. - This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try + This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try the match again at the same point. If this fails (picked up above) we advance to the next character. */ - g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0; - + g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0; + /* Advance to the position right after the last full match */ start_offset = offsets[1]; } while (global); @@ -809,6 +836,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL); } efree(match_sets); + + if (marks) { + add_assoc_zval(subpats, "MARK", marks); + } } efree(offsets); @@ -880,7 +911,7 @@ static int preg_get_backref(char **str, int *backref) /* {{{ preg_do_repl_func */ -static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, char **result TSRMLS_DC) +static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark, char **result TSRMLS_DC) { zval *retval_ptr; /* Function return value */ zval **args[1]; /* Argument to pass to function */ @@ -896,6 +927,9 @@ static int preg_do_repl_func(zval *function, char *subject, int *offsets, char * } add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1); } + if (mark) { + add_assoc_string(subpats, "MARK", (char *) mark, 1); + } args[0] = &subpats; if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) { @@ -1061,6 +1095,7 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub *eval_result, /* Result of eval or custom function */ walk_last; /* Last walked character */ int rc; + unsigned char *mark = NULL; /* Target for MARK name */ if (extra == NULL) { extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; @@ -1068,6 +1103,10 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub } extra->match_limit = PCRE_G(backtrack_limit); extra->match_limit_recursion = PCRE_G(recursion_limit); +#ifdef PCRE_EXTRA_MARK + extra->mark = &mark; + extra->flags |= PCRE_EXTRA_MARK; +#endif eval = pce->preg_options & PREG_REPLACE_EVAL; if (is_callable_replace) { @@ -1147,7 +1186,7 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub new_len += eval_result_len; } else if (is_callable_replace) { /* Use custom function to get replacement string and its length. */ - eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, &eval_result TSRMLS_CC); + eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark, &eval_result TSRMLS_CC); new_len += eval_result_len; } else { /* do regular substitution */ walk = replace; @@ -1222,7 +1261,7 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub limit--; } else if (count == PCRE_ERROR_NOMATCH || limit == 0) { - /* If we previously set PCRE_NOTEMPTY after a null match, + /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match, this is not necessarily the end. We need to advance the start offset, and continue. Fudge the offset values to achieve this, unless we're already at the end of the string. */ @@ -1256,11 +1295,11 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub } /* If we have matched an empty string, mimic what Perl's /g options does. - This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try + This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try the match again at the same point. If this fails (picked up above) we advance to the next character. */ - g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0; - + g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0; + /* Advance to the next piece. */ start_offset = offsets[1]; } @@ -1374,6 +1413,7 @@ static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_repl int limit_val = -1; long limit = -1; char *string_key; + uint string_key_len; ulong num_key; char *callback_name; int replace_count=0, old_replace_count; @@ -1425,10 +1465,10 @@ static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_repl if ((result = php_replace_in_subject(*regex, *replace, subject_entry, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) { if (!is_filter || replace_count > old_replace_count) { /* Add to return array */ - switch(zend_hash_get_current_key(Z_ARRVAL_PP(subject), &string_key, &num_key, 0)) + switch(zend_hash_get_current_key_ex(Z_ARRVAL_PP(subject), &string_key, &string_key_len, &num_key, 0, NULL)) { case HASH_KEY_IS_STRING: - add_assoc_stringl(return_value, string_key, result, result_len, 0); + add_assoc_stringl_ex(return_value, string_key, string_key_len, result, result_len, 0); break; case HASH_KEY_IS_LONG: @@ -1549,7 +1589,10 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec } extra->match_limit = PCRE_G(backtrack_limit); extra->match_limit_recursion = PCRE_G(recursion_limit); - +#ifdef PCRE_EXTRA_MARK + extra->flags &= ~PCRE_EXTRA_MARK; +#endif + /* Initialize return value */ array_init(return_value); @@ -1621,7 +1664,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec } } } else if (count == PCRE_ERROR_NOMATCH) { - /* If we previously set PCRE_NOTEMPTY after a null match, + /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match, this is not necessarily the end. We need to advance the start offset, and continue. Fudge the offset values to achieve this, unless we're already at the end of the string. */ @@ -1653,11 +1696,11 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec } /* If we have matched an empty string, mimic what Perl's /g options does. - This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try + This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try the match again at the same point. If this fails (picked up above) we advance to the next character. */ - g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0; - + g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0; + /* Advance to the position right after the last full match */ start_offset = offsets[1]; } @@ -1805,6 +1848,7 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return int size_offsets; /* Size of the offsets array */ int count = 0; /* Count of matched subpatterns */ char *string_key; + uint string_key_len; ulong num_key; zend_bool invert; /* Whether to return non-matching entries */ @@ -1818,6 +1862,9 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return } extra->match_limit = PCRE_G(backtrack_limit); extra->match_limit_recursion = PCRE_G(recursion_limit); +#ifdef PCRE_EXTRA_MARK + extra->flags &= ~PCRE_EXTRA_MARK; +#endif /* Calculate the size of the offsets array, and allocate memory for it. */ rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets); @@ -1863,11 +1910,11 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return Z_ADDREF_PP(entry); /* Add to return array */ - switch (zend_hash_get_current_key(Z_ARRVAL_P(input), &string_key, &num_key, 0)) + switch (zend_hash_get_current_key_ex(Z_ARRVAL_P(input), &string_key, &string_key_len, &num_key, 0, NULL)) { case HASH_KEY_IS_STRING: zend_hash_update(Z_ARRVAL_P(return_value), string_key, - strlen(string_key)+1, entry, sizeof(zval *), NULL); + string_key_len, entry, sizeof(zval *), NULL); break; case HASH_KEY_IS_LONG: |