diff options
author | Aaron Piotrowski <aaron@trowski.com> | 2016-06-10 22:02:23 -0500 |
---|---|---|
committer | Aaron Piotrowski <aaron@trowski.com> | 2016-06-10 22:02:23 -0500 |
commit | e3c681aa5cc71122a8d2fae42e6513fc413ccac8 (patch) | |
tree | 5f1df62f7b666028edb0ee1adf083a52d63df45a /ext/pcre/php_pcre.c | |
parent | fb4e3085cbaa76eb8f28eebf848a81d1c0190067 (diff) | |
parent | 792e89385ca6fc722a03590722eb7745a2374720 (diff) | |
download | php-git-e3c681aa5cc71122a8d2fae42e6513fc413ccac8.tar.gz |
Merge branch 'master' into throw-error-in-extensions
Diffstat (limited to 'ext/pcre/php_pcre.c')
-rw-r--r-- | ext/pcre/php_pcre.c | 147 |
1 files changed, 96 insertions, 51 deletions
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 8e40f84673..9225c0f976 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -2,7 +2,7 @@ +----------------------------------------------------------------------+ | PHP Version 7 | +----------------------------------------------------------------------+ - | Copyright (c) 1997-2015 The PHP Group | + | Copyright (c) 1997-2016 The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | @@ -44,18 +44,29 @@ #define PCRE_CACHE_SIZE 4096 +/* not fully functional workaround for libpcre < 8.0, see bug #70232 */ +#ifndef PCRE_NOTEMPTY_ATSTART +# define PCRE_NOTEMPTY_ATSTART PCRE_NOTEMPTY +#endif + enum { PHP_PCRE_NO_ERROR = 0, PHP_PCRE_INTERNAL_ERROR, PHP_PCRE_BACKTRACK_LIMIT_ERROR, PHP_PCRE_RECURSION_LIMIT_ERROR, PHP_PCRE_BAD_UTF8_ERROR, - PHP_PCRE_BAD_UTF8_OFFSET_ERROR + PHP_PCRE_BAD_UTF8_OFFSET_ERROR, + PHP_PCRE_JIT_STACKLIMIT_ERROR }; PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre) +#ifdef PCRE_STUDY_JIT_COMPILE +#define PCRE_JIT_STACK_MIN_SIZE (32 * 1024) +#define PCRE_JIT_STACK_MAX_SIZE (64 * 1024) +ZEND_TLS pcre_jit_stack *jit_stack = NULL; +#endif static void pcre_handle_exec_error(int pcre_code) /* {{{ */ { @@ -77,6 +88,12 @@ static void pcre_handle_exec_error(int pcre_code) /* {{{ */ case PCRE_ERROR_BADUTF8_OFFSET: preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR; break; + +#ifdef PCRE_STUDY_JIT_COMPILE + case PCRE_ERROR_JIT_STACKLIMIT: + preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR; + break; +#endif default: preg_code = PHP_PCRE_INTERNAL_ERROR; @@ -117,6 +134,16 @@ static PHP_GINIT_FUNCTION(pcre) /* {{{ */ static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */ { zend_hash_destroy(&pcre_globals->pcre_cache); + +#ifdef PCRE_STUDY_JIT_COMPILE + /* Stack may only be destroyed when no cached patterns + possibly associated with it do exist. */ + if (jit_stack) { + pcre_jit_stack_free(jit_stack); + jit_stack = NULL; + } +#endif + } /* }}} */ @@ -169,6 +196,7 @@ static PHP_MINIT_FUNCTION(pcre) REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT); REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT); return SUCCESS; @@ -184,6 +212,19 @@ static PHP_MSHUTDOWN_FUNCTION(pcre) } /* }}} */ +#ifdef PCRE_STUDY_JIT_COMPILE +/* {{{ PHP_RINIT_FUNCTION(pcre) */ +static PHP_RINIT_FUNCTION(pcre) +{ + if (PCRE_G(jit)) { + jit_stack = pcre_jit_stack_alloc(PCRE_JIT_STACK_MIN_SIZE,PCRE_JIT_STACK_MAX_SIZE); + } + + return SUCCESS; +} +/* }}} */ +#endif + /* {{{ static pcre_clean_cache */ static int pcre_clean_cache(zval *data, void *arg) { @@ -448,6 +489,11 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex) extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; extra->match_limit = (unsigned long)PCRE_G(backtrack_limit); extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit); +#ifdef PCRE_STUDY_JIT_COMPILE + if (PCRE_G(jit) && jit_stack) { + pcre_assign_jit_stack(extra, NULL, jit_stack); + } +#endif } if (error != NULL) { php_error_docref(NULL, E_WARNING, "Error while studying pattern"); @@ -600,6 +646,11 @@ static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ * ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE); #endif + if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) { + php_error_docref(NULL, E_WARNING, "Subject is too long"); + RETURN_FALSE; + } + /* Compile regex or get it from cache. */ if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) { RETURN_FALSE; @@ -740,7 +791,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec /* If subpatterns array has been passed, fill it in with values. */ if (subpats != NULL) { /* Try to get the list of substrings and display a warning if failed. */ - if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) { + if ((offsets[1] - offsets[0] < 0) || pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) { if (subpat_names) { efree(subpat_names); } @@ -868,7 +919,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec pcre_free((void *) stringlist); } } else if (count == PCRE_ERROR_NOMATCH) { - /* If we previously set PCRE_NOTEMPTY after a null match, + /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match, this is not necessarily the end. We need to advance the start offset, and continue. Fudge the offset values to achieve this, unless we're already at the end of the string. */ @@ -885,10 +936,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec } /* If we have matched an empty string, mimic what Perl's /g options does. - This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try + This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try the match again at the same point. If this fails (picked up above) we advance to the next character. */ - g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0; + g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0; /* Advance to the position right after the last full match */ start_offset = offsets[1]; @@ -979,7 +1030,7 @@ static int preg_get_backref(char **str, int *backref) } if (in_brace) { - if (*walk == 0 || *walk != '}') + if (*walk != '}') return 0; else walk++; @@ -1068,8 +1119,8 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su char **subpat_names; /* Array for named subpatterns */ int num_subpats; /* Number of captured subpatterns */ int size_offsets; /* Size of the offsets array */ - int new_len; /* Length of needed storage */ - int alloc_len; /* Actual allocated length */ + size_t new_len; /* Length of needed storage */ + size_t alloc_len; /* Actual allocated length */ int match_len; /* Length of the current match */ int backref; /* Backreference number */ int start_offset; /* Where the new search starts */ @@ -1159,7 +1210,7 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su piece = subject + start_offset; /* if (EXPECTED(count > 0 && (limit == -1 || limit > 0))) */ - if (EXPECTED(count > 0 && limit)) { + if (EXPECTED(count > 0 && (offsets[1] - offsets[0] >= 0) && limit)) { if (UNEXPECTED(replace_count)) { ++*replace_count; } @@ -1263,7 +1314,7 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su limit--; } } else if (count == PCRE_ERROR_NOMATCH || UNEXPECTED(limit == 0)) { - /* If we previously set PCRE_NOTEMPTY after a null match, + /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match, this is not necessarily the end. We need to advance the start offset, and continue. Fudge the offset values to achieve this, unless we're already at the end of the string. */ @@ -1305,10 +1356,10 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su } /* If we have matched an empty string, mimic what Perl's /g options does. - This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try + This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try the match again at the same point. If this fails (picked up above) we advance to the next character. */ - g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0; + g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0; /* Advance to the next piece. */ start_offset = offsets[1]; @@ -1332,7 +1383,6 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int is_callable_replace, int *replace_count) { zval *regex_entry, - *replace_entry = NULL, *replace_value, empty_replace; zend_string *result; @@ -1342,6 +1392,11 @@ static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *sub /* FIXME: This might need to be changed to ZSTR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */ ZVAL_EMPTY_STRING(&empty_replace); + if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject_str))) { + php_error_docref(NULL, E_WARNING, "Subject is too long"); + return NULL; + } + /* If regex is an array */ if (Z_TYPE_P(regex) == IS_ARRAY) { replace_value = replace; @@ -1349,25 +1404,26 @@ static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *sub /* For each entry in the regex array, get the entry */ ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) { + zval replace_str; /* Make sure we're dealing with strings. */ zend_string *regex_str = zval_get_string(regex_entry); + ZVAL_UNDEF(&replace_str); /* If replace is an array and not a callable construct */ if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) { /* Get current entry */ - replace_entry = NULL; while (replace_idx < Z_ARRVAL_P(replace)->nNumUsed) { - if (Z_TYPE(Z_ARRVAL_P(replace)->arData[replace_idx].val) != IS_UNUSED) { - replace_entry = &Z_ARRVAL_P(replace)->arData[replace_idx].val; + if (Z_TYPE(Z_ARRVAL_P(replace)->arData[replace_idx].val) != IS_UNDEF) { + ZVAL_COPY(&replace_str, &Z_ARRVAL_P(replace)->arData[replace_idx].val); break; } replace_idx++; } - if (replace_entry != NULL) { + if (!Z_ISUNDEF(replace_str)) { if (!is_callable_replace) { - convert_to_string_ex(replace_entry); + convert_to_string(&replace_str); } - replace_value = replace_entry; + replace_value = &replace_str; replace_idx++; } else { /* We've run out of replacement strings, so use an empty one */ @@ -1390,10 +1446,12 @@ static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *sub } else { zend_string_release(subject_str); zend_string_release(regex_str); + zval_dtor(&replace_str); return NULL; } zend_string_release(regex_str); + zval_dtor(&replace_str); } ZEND_HASH_FOREACH_END(); return subject_str; @@ -1423,12 +1481,10 @@ static int preg_replace_impl(zval *return_value, zval *regex, zval *replace, zva int replace_count = 0, old_replace_count; if (Z_TYPE_P(replace) != IS_ARRAY && (Z_TYPE_P(replace) != IS_OBJECT || !is_callable_replace)) { - SEPARATE_ZVAL(replace); convert_to_string_ex(replace); } if (Z_TYPE_P(regex) != IS_ARRAY) { - SEPARATE_ZVAL(regex); convert_to_string_ex(regex); } @@ -1686,6 +1742,11 @@ static PHP_FUNCTION(preg_split) ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE); #endif + if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) { + php_error_docref(NULL, E_WARNING, "Subject is too long"); + RETURN_FALSE; + } + /* Compile regex or get it from cache. */ if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) { RETURN_FALSE; @@ -1703,8 +1764,6 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec zend_long limit_val, zend_long flags) { pcre_extra *extra = pce->extra;/* Holds results of studying */ - pcre *re_bump = NULL; /* Regex instance for empty matches */ - pcre_extra *extra_bump = NULL; /* Almost dummy */ pcre_extra extra_data; /* Used locally for exec options */ int *offsets; /* Array of subpattern offsets */ int size_offsets; /* Size of the offsets array */ @@ -1771,7 +1830,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec } /* If something matched */ - if (count > 0) { + if (count > 0 && (offsets[1] - offsets[0] >= 0)) { if (!no_empty || &subject[offsets[0]] != last_match) { if (offset_capture) { @@ -1807,44 +1866,26 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec } } } else if (count == PCRE_ERROR_NOMATCH) { - /* If we previously set PCRE_NOTEMPTY after a null match, + /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match, this is not necessarily the end. We need to advance the start offset, and continue. Fudge the offset values to achieve this, unless we're already at the end of the string. */ if (g_notempty != 0 && start_offset < subject_len) { - if (pce->compile_options & PCRE_UTF8) { - if (re_bump == NULL) { - int dummy; - zend_string *regex = zend_string_init("/./us", sizeof("/./us")-1, 0); - re_bump = pcre_get_compiled_regex(regex, &extra_bump, &dummy); - zend_string_release(regex); - if (re_bump == NULL) { - RETURN_FALSE; - } - } - count = pcre_exec(re_bump, extra_bump, subject, - subject_len, start_offset, - exoptions, offsets, size_offsets); - if (count < 1) { - php_error_docref(NULL, E_WARNING, "Unknown error"); - RETURN_FALSE; - } - } else { - offsets[0] = start_offset; - offsets[1] = start_offset + 1; - } - } else + offsets[0] = start_offset; + offsets[1] = start_offset + calculate_unit_length(pce, subject + start_offset); + } else { break; + } } else { pcre_handle_exec_error(count); break; } /* If we have matched an empty string, mimic what Perl's /g options does. - This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try + This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try the match again at the same point. If this fails (picked up above) we advance to the next character. */ - g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0; + g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0; /* Advance to the position right after the last full match */ start_offset = offsets[1]; @@ -2191,7 +2232,11 @@ zend_module_entry pcre_module_entry = { pcre_functions, PHP_MINIT(pcre), PHP_MSHUTDOWN(pcre), - NULL, +#ifdef PCRE_STUDY_JIT_COMPILE + PHP_RINIT(pcre), +#else + NULL +#endif NULL, PHP_MINFO(pcre), PHP_PCRE_VERSION, |