diff options
-rw-r--r-- | ext/fileinfo/libmagic/funcs.c | 8 | ||||
-rw-r--r-- | ext/pcre/php_pcre.c | 810 | ||||
-rw-r--r-- | ext/pcre/php_pcre.h | 6 | ||||
-rw-r--r-- | ext/spl/spl_iterators.c | 2 | ||||
-rw-r--r-- | win32/sendmail.c | 18 |
5 files changed, 582 insertions, 262 deletions
diff --git a/ext/fileinfo/libmagic/funcs.c b/ext/fileinfo/libmagic/funcs.c index 70b0a2f45c..db906d6018 100644 --- a/ext/fileinfo/libmagic/funcs.c +++ b/ext/fileinfo/libmagic/funcs.c @@ -471,7 +471,7 @@ file_replace(struct magic_set *ms, const char *pat, const char *rep) int opts = 0; pcre_cache_entry *pce; zend_string *res; - zval repl; + zend_string *repl; int rep_cnt = 0; (void)setlocale(LC_CTYPE, "C"); @@ -485,10 +485,10 @@ file_replace(struct magic_set *ms, const char *pat, const char *rep) } zval_ptr_dtor(&patt); - ZVAL_STRING(&repl, rep); - res = php_pcre_replace_impl(pce, NULL, ms->o.buf, strlen(ms->o.buf), &repl, 0, -1, &rep_cnt); + repl = zend_string_init(rep, strlen(rep), 0); + res = php_pcre_replace_impl(pce, NULL, ms->o.buf, strlen(ms->o.buf), repl, -1, &rep_cnt); - zval_ptr_dtor(&repl); + zend_string_release(repl); if (NULL == res) { rep_cnt = -1; goto out; diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index d21dd52380..615c83ae65 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -1032,8 +1032,19 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec if (mark) { add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark); } + break; } } + + /* Advance to the next piece. */ + start_offset = offsets[1]; + + /* If we have matched an empty string, mimic what Perl's /g options does. + This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try + the match again at the same point. If this fails (picked up above) we + advance to the next character. */ + g_notempty = (start_offset == offsets[0]) ? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0; + } else if (count == PCRE_ERROR_NOMATCH) { /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match, this is not necessarily the end. We need to advance @@ -1042,23 +1053,14 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec if (g_notempty != 0 && start_offset < subject_len) { int unit_len = calculate_unit_length(pce, subject + start_offset); - offsets[0] = (int)start_offset; - offsets[1] = (int)(start_offset + unit_len); + start_offset += unit_len; + g_notempty = 0; } else break; } else { pcre_handle_exec_error(count); break; } - - /* If we have matched an empty string, mimic what Perl's /g options does. - This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try - the match again at the same point. If this fails (picked up above) we - advance to the next character. */ - g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0; - - /* Advance to the position right after the last full match */ - start_offset = offsets[1]; } while (global); /* Add the match sets to the output array and clean up */ @@ -1159,31 +1161,36 @@ static int preg_get_backref(char **str, int *backref) /* {{{ preg_do_repl_func */ -static zend_string *preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark) +static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark) { zend_string *result_str; zval retval; /* Function return value */ - zval args[1]; /* Argument to pass to function */ + zval arg; /* Argument to pass to function */ int i; - array_init_size(&args[0], count + (mark ? 1 : 0)); + array_init_size(&arg, count + (mark ? 1 : 0)); if (subpat_names) { for (i = 0; i < count; i++) { if (subpat_names[i]) { - add_assoc_stringl(&args[0], subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]); + add_assoc_stringl(&arg, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]); } - add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]); + add_next_index_stringl(&arg, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]); } } else { for (i = 0; i < count; i++) { - add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]); + add_next_index_stringl(&arg, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]); } } if (mark) { - add_assoc_string(&args[0], "MARK", (char *) mark); + add_assoc_string(&arg, "MARK", (char *) mark); } - if (call_user_function_ex(EG(function_table), NULL, function, &retval, 1, args, 0, NULL) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) { + fci->retval = &retval; + fci->param_count = 1; + fci->params = &arg; + fci->no_separation = 0; + + if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) { result_str = zval_get_string(&retval); zval_ptr_dtor(&retval); } else { @@ -1194,7 +1201,7 @@ static zend_string *preg_do_repl_func(zval *function, char *subject, int *offset result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0); } - zval_ptr_dtor(&args[0]); + zval_ptr_dtor(&arg); return result_str; } @@ -1205,7 +1212,7 @@ static zend_string *preg_do_repl_func(zval *function, char *subject, int *offset PHPAPI zend_string *php_pcre_replace(zend_string *regex, zend_string *subject_str, char *subject, int subject_len, - zval *replace_val, int is_callable_replace, + zend_string *replace_str, int limit, int *replace_count) { pcre_cache_entry *pce; /* Compiled regular expression */ @@ -1216,8 +1223,8 @@ PHPAPI zend_string *php_pcre_replace(zend_string *regex, return NULL; } pce->refcount++; - result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_val, - is_callable_replace, limit, replace_count); + result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str, + limit, replace_count); pce->refcount--; return result; @@ -1225,7 +1232,7 @@ PHPAPI zend_string *php_pcre_replace(zend_string *regex, /* }}} */ /* {{{ php_pcre_replace_impl() */ -PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zval *replace_val, int is_callable_replace, int limit, int *replace_count) +PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zend_string *replace_str, int limit, int *replace_count) { pcre_extra *extra = pce->extra;/* Holds results of studying */ pcre_extra extra_data; /* Used locally for exec options */ @@ -1241,17 +1248,14 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su int backref; /* Backreference number */ int start_offset; /* Where the new search starts */ int g_notempty=0; /* If the match should not be empty */ - char *replace=NULL, /* Replacement string */ - *walkbuf, /* Location of current replacement in the result */ + char *walkbuf, /* Location of current replacement in the result */ *walk, /* Used to walk the replacement string */ *match, /* The current match */ *piece, /* The current piece of subject */ - *replace_end=NULL, /* End of replacement string */ + *replace_end, /* End of replacement string */ walk_last; /* Last walked character */ size_t result_len; /* Length of result */ - unsigned char *mark = NULL; /* Target for MARK name */ zend_string *result; /* Result of replacement */ - zend_string *eval_result=NULL; /* Result of custom function */ ALLOCA_FLAG(use_heap); @@ -1268,11 +1272,6 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su return NULL; } - if (!is_callable_replace) { - replace = Z_STRVAL_P(replace_val); - replace_end = replace + Z_STRLEN_P(replace_val); - } - /* Calculate the size of the offsets array, and allocate memory for it. */ num_subpats = pce->capture_count + 1; size_offsets = num_subpats * 3; @@ -1315,12 +1314,7 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su #endif #ifdef PCRE_EXTRA_MARK - if (EXPECTED(replace)) { - extra->flags &= ~PCRE_EXTRA_MARK; - } else { - extra->mark = &mark; - extra->flags |= PCRE_EXTRA_MARK; - } + extra->flags &= ~PCRE_EXTRA_MARK; #endif while (1) { @@ -1348,6 +1342,8 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su /* if (EXPECTED(count > 0 && (limit == -1 || limit > 0))) */ if (count > 0 && (offsets[1] - offsets[0] >= 0) && limit) { + zend_bool simple_string = 1; + if (replace_count) { ++*replace_count; } @@ -1357,47 +1353,53 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su new_len = result_len + offsets[0] - start_offset; /* part before the match */ - /* if (!is_callable_replace) */ - if (EXPECTED(replace)) { - /* do regular substitution */ - walk = replace; - walk_last = 0; - - while (walk < replace_end) { - if ('\\' == *walk || '$' == *walk) { - if (walk_last == '\\') { - walk++; - walk_last = 0; - continue; - } - if (preg_get_backref(&walk, &backref)) { - if (backref < count) - new_len += offsets[(backref<<1)+1] - offsets[backref<<1]; - continue; - } + walk = ZSTR_VAL(replace_str); + replace_end = walk + ZSTR_LEN(replace_str); + walk_last = 0; + + while (walk < replace_end) { + if ('\\' == *walk || '$' == *walk) { + simple_string = 0; + if (walk_last == '\\') { + walk++; + walk_last = 0; + continue; + } + if (preg_get_backref(&walk, &backref)) { + if (backref < count) + new_len += offsets[(backref<<1)+1] - offsets[backref<<1]; + continue; } - new_len++; - walk++; - walk_last = walk[-1]; } + new_len++; + walk++; + walk_last = walk[-1]; + } - if (new_len >= alloc_len) { - alloc_len = zend_safe_address_guarded(2, new_len, alloc_len); - if (result == NULL) { - result = zend_string_alloc(alloc_len, 0); - } else { - result = zend_string_extend(result, alloc_len, 0); - } + if (new_len >= alloc_len) { + alloc_len = zend_safe_address_guarded(2, new_len, alloc_len); + if (result == NULL) { + result = zend_string_alloc(alloc_len, 0); + } else { + result = zend_string_extend(result, alloc_len, 0); } + } + if (match-piece > 0) { /* copy the part of the string before the match */ memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece); result_len += (match-piece); + } + if (simple_string) { + /* copy replacement */ + memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1); + result_len += ZSTR_LEN(replace_str); + } else { /* copy replacement and backrefs */ walkbuf = ZSTR_VAL(result) + result_len; - walk = replace; + walk = ZSTR_VAL(replace_str); walk_last = 0; while (walk < replace_end) { if ('\\' == *walk || '$' == *walk) { @@ -1421,35 +1423,231 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su *walkbuf = '\0'; /* increment the result length by how much we've added to the string */ result_len += (walkbuf - (ZSTR_VAL(result) + result_len)); + } + + if (limit) { + limit--; + } + + /* Advance to the next piece. */ + start_offset = offsets[1]; + + /* If we have matched an empty string, mimic what Perl's /g options does. + This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try + the match again at the same point. If this fails (picked up above) we + advance to the next character. */ + g_notempty = (start_offset == offsets[0]) ? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0; + + } else if (count == PCRE_ERROR_NOMATCH || limit == 0) { + /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match, + this is not necessarily the end. We need to advance + the start offset, and continue. Fudge the offset values + to achieve this, unless we're already at the end of the string. */ + if (g_notempty != 0 && start_offset < subject_len) { + int unit_len = calculate_unit_length(pce, piece); + + start_offset += unit_len; + memcpy(ZSTR_VAL(result) + result_len, piece, unit_len); + result_len += unit_len; + g_notempty = 0; } else { - /* Use custom function to get replacement string and its length. */ - eval_result = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark); - ZEND_ASSERT(eval_result); - new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result), new_len); + if (!result && subject_str) { + result = zend_string_copy(subject_str); + break; + } + new_len = result_len + subject_len - start_offset; if (new_len >= alloc_len) { - alloc_len = zend_safe_address_guarded(2, new_len, alloc_len); - if (result == NULL) { - result = zend_string_alloc(alloc_len, 0); + alloc_len = new_len; /* now we know exactly how long it is */ + if (NULL != result) { + result = zend_string_realloc(result, alloc_len, 0); } else { - result = zend_string_extend(result, alloc_len, 0); + result = zend_string_alloc(alloc_len, 0); } } + /* stick that last bit of string on our output */ + memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - start_offset); + result_len += subject_len - start_offset; + ZSTR_VAL(result)[result_len] = '\0'; + ZSTR_LEN(result) = result_len; + break; + } + } else { + pcre_handle_exec_error(count); + if (result) { + zend_string_release(result); + result = NULL; + } + break; + } + } + + if (size_offsets <= 32) { + free_alloca(offsets, use_heap); + } else { + efree(offsets); + } + if (UNEXPECTED(subpat_names)) { + efree(subpat_names); + } + + return result; +} +/* }}} */ + +/* {{{ php_pcre_replace_func_impl() */ +static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, int limit, int *replace_count) +{ + pcre_extra *extra = pce->extra;/* Holds results of studying */ + pcre_extra extra_data; /* Used locally for exec options */ + int no_utf_check = 0; /* Execution options */ + int count = 0; /* Count of matched subpatterns */ + int *offsets; /* Array of subpattern offsets */ + char **subpat_names; /* Array for named subpatterns */ + int num_subpats; /* Number of captured subpatterns */ + int size_offsets; /* Size of the offsets array */ + size_t new_len; /* Length of needed storage */ + size_t alloc_len; /* Actual allocated length */ + int start_offset; /* Where the new search starts */ + int g_notempty=0; /* If the match should not be empty */ + char *match, /* The current match */ + *piece; /* The current piece of subject */ + size_t result_len; /* Length of result */ + unsigned char *mark = NULL; /* Target for MARK name */ + zend_string *result; /* Result of replacement */ + zend_string *eval_result=NULL; /* Result of custom function */ + + ALLOCA_FLAG(use_heap); + + if (extra == NULL) { + extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; + extra = &extra_data; + } + + extra->match_limit = (unsigned long)PCRE_G(backtrack_limit); + extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit); + + if (UNEXPECTED(pce->preg_options & PREG_REPLACE_EVAL)) { + php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead"); + return NULL; + } + + /* Calculate the size of the offsets array, and allocate memory for it. */ + num_subpats = pce->capture_count + 1; + size_offsets = num_subpats * 3; + if (size_offsets <= 32) { + offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap); + } else { + offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); + } + + /* + * Build a mapping from subpattern numbers to their names. We will + * allocate the table only if there are any named subpatterns. + */ + subpat_names = NULL; + if (UNEXPECTED(pce->name_count > 0)) { + subpat_names = make_subpats_table(num_subpats, pce); + if (!subpat_names) { + if (size_offsets <= 32) { + free_alloca(offsets, use_heap); + } else { + efree(offsets); + } + return NULL; + } + } + + alloc_len = 0; + result = NULL; + + /* Initialize */ + match = NULL; + start_offset = 0; + result_len = 0; + PCRE_G(error_code) = PHP_PCRE_NO_ERROR; + +#ifdef HAVE_PCRE_JIT_SUPPORT + if (!(pce->compile_options & PCRE_UTF8)) { + no_utf_check = PCRE_NO_UTF8_CHECK; + } +#endif + +#ifdef PCRE_EXTRA_MARK + extra->mark = &mark; + extra->flags |= PCRE_EXTRA_MARK; +#endif + + while (1) { + /* Execute the regular expression. */ +#ifdef HAVE_PCRE_JIT_SUPPORT + if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) + && no_utf_check && !g_notempty) { + count = pcre_jit_exec(pce->re, extra, subject, subject_len, start_offset, + no_utf_check|g_notempty, offsets, size_offsets, jit_stack); + } else +#endif + count = pcre_exec(pce->re, extra, subject, subject_len, start_offset, + no_utf_check|g_notempty, offsets, size_offsets); + + /* the string was already proved to be valid UTF-8 */ + no_utf_check = PCRE_NO_UTF8_CHECK; + + /* Check for too many substrings condition. */ + if (UNEXPECTED(count == 0)) { + php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings"); + count = size_offsets / 3; + } + + piece = subject + start_offset; + + /* if (EXPECTED(count > 0 && (limit == -1 || limit > 0))) */ + if (count > 0 && (offsets[1] - offsets[0] >= 0) && limit) { + if (replace_count) { + ++*replace_count; + } + + /* Set the match location in subject */ + match = subject + offsets[0]; + + new_len = result_len + offsets[0] - start_offset; /* part before the match */ + + /* Use custom function to get replacement string and its length. */ + eval_result = preg_do_repl_func(fci, fcc, subject, offsets, subpat_names, count, mark); + ZEND_ASSERT(eval_result); + new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result), new_len); + if (new_len >= alloc_len) { + alloc_len = zend_safe_address_guarded(2, new_len, alloc_len); + if (result == NULL) { + result = zend_string_alloc(alloc_len, 0); + } else { + result = zend_string_extend(result, alloc_len, 0); + } + } + + if (match-piece > 0) { /* copy the part of the string before the match */ memcpy(ZSTR_VAL(result) + result_len, piece, match-piece); result_len += (int)(match-piece); - - /* copy replacement and backrefs */ - walkbuf = ZSTR_VAL(result) + result_len; - - /* If using custom function, copy result to the buffer and clean up. */ - memcpy(walkbuf, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result)); - result_len += (int)ZSTR_LEN(eval_result); - zend_string_release(eval_result); } + /* If using custom function, copy result to the buffer and clean up. */ + memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result)); + result_len += (int)ZSTR_LEN(eval_result); + zend_string_release(eval_result); + if (limit) { limit--; } + + /* Advance to the next piece. */ + start_offset = offsets[1]; + + /* If we have matched an empty string, mimic what Perl's /g options does. + This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try + the match again at the same point. If this fails (picked up above) we + advance to the next character. */ + g_notempty = (start_offset == offsets[0]) ? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0; + } else if (count == PCRE_ERROR_NOMATCH || limit == 0) { /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match, this is not necessarily the end. We need to advance @@ -1458,10 +1656,10 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su if (g_notempty != 0 && start_offset < subject_len) { int unit_len = calculate_unit_length(pce, piece); - offsets[0] = start_offset; - offsets[1] = start_offset + unit_len; + start_offset += unit_len; memcpy(ZSTR_VAL(result) + result_len, piece, unit_len); result_len += unit_len; + g_notempty = 0; } else { if (!result && subject_str) { result = zend_string_copy(subject_str); @@ -1486,20 +1684,11 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su } else { pcre_handle_exec_error(count); if (result) { - zend_string_free(result); + zend_string_release(result); result = NULL; } break; } - - /* If we have matched an empty string, mimic what Perl's /g options does. - This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try - the match again at the same point. If this fails (picked up above) we - advance to the next character. */ - g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0; - - /* Advance to the next piece. */ - start_offset = offsets[1]; } if (size_offsets <= 32) { @@ -1515,166 +1704,245 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su } /* }}} */ -/* {{{ php_replace_in_subject +/* {{{ php_pcre_replace_func */ -static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int is_callable_replace, int *replace_count) +static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex, + zend_string *subject_str, + zend_fcall_info *fci, zend_fcall_info_cache *fcc, + int limit, int *replace_count) { - zval *regex_entry, - *replace_value, - empty_replace; - zend_string *result; - uint32_t replace_idx; - zend_string *subject_str = zval_get_string(subject); - - /* FIXME: This might need to be changed to ZSTR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */ - ZVAL_EMPTY_STRING(&empty_replace); + pcre_cache_entry *pce; /* Compiled regular expression */ + zend_string *result; /* Function result */ - if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject_str))) { - php_error_docref(NULL, E_WARNING, "Subject is too long"); - return NULL; + /* Compile regex or get it from cache. */ + if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) { + return NULL; } + pce->refcount++; + result = php_pcre_replace_func_impl(pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc, + limit, replace_count); + pce->refcount--; + + return result; +} +/* }}} */ + +/* {{{ php_pcre_replace_array + */ +static zend_string *php_pcre_replace_array(HashTable *regex, zval *replace, zend_string *subject_str, int limit, int *replace_count) +{ + zval *regex_entry; + zend_string *result; + zend_string *replace_str; - /* If regex is an array */ - if (Z_TYPE_P(regex) == IS_ARRAY) { - replace_value = replace; - replace_idx = 0; + if (Z_TYPE_P(replace) == IS_ARRAY) { + uint32_t replace_idx = 0; + HashTable *replace_ht = Z_ARRVAL_P(replace); /* For each entry in the regex array, get the entry */ - ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) { - zval replace_str; + ZEND_HASH_FOREACH_VAL(regex, regex_entry) { /* Make sure we're dealing with strings. */ zend_string *regex_str = zval_get_string(regex_entry); + zval *zv; - ZVAL_UNDEF(&replace_str); - /* If replace is an array and not a callable construct */ - if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) { - /* Get current entry */ - while (replace_idx < Z_ARRVAL_P(replace)->nNumUsed) { - if (Z_TYPE(Z_ARRVAL_P(replace)->arData[replace_idx].val) != IS_UNDEF) { - ZVAL_COPY(&replace_str, &Z_ARRVAL_P(replace)->arData[replace_idx].val); - break; - } - replace_idx++; + /* Get current entry */ + while (1) { + if (replace_idx == replace_ht->nNumUsed) { + replace_str = ZSTR_EMPTY_ALLOC(); + break; } - if (!Z_ISUNDEF(replace_str)) { - if (!is_callable_replace) { - convert_to_string(&replace_str); - } - replace_value = &replace_str; - replace_idx++; - } else { - /* We've run out of replacement strings, so use an empty one */ - replace_value = &empty_replace; + zv = &replace_ht->arData[replace_idx].val; + replace_idx++; + if (Z_TYPE_P(zv) != IS_UNDEF) { + replace_str = zval_get_string(zv); + break; } } /* Do the actual replacement and put the result back into subject_str for further replacements. */ - if ((result = php_pcre_replace(regex_str, - subject_str, - ZSTR_VAL(subject_str), - (int)ZSTR_LEN(subject_str), - replace_value, - is_callable_replace, - limit, - replace_count)) != NULL) { - zend_string_release(subject_str); - subject_str = result; - } else { - zend_string_release(subject_str); - zend_string_release(regex_str); - zval_dtor(&replace_str); - return NULL; + result = php_pcre_replace(regex_str, + subject_str, + ZSTR_VAL(subject_str), + (int)ZSTR_LEN(subject_str), + replace_str, + limit, + replace_count); + zend_string_release(replace_str); + zend_string_release(regex_str); + zend_string_release(subject_str); + subject_str = result; + if (UNEXPECTED(result == NULL)) { + break; } + } ZEND_HASH_FOREACH_END(); + + } else { + replace_str = Z_STR_P(replace); + /* For each entry in the regex array, get the entry */ + ZEND_HASH_FOREACH_VAL(regex, regex_entry) { + /* Make sure we're dealing with strings. */ + zend_string *regex_str = zval_get_string(regex_entry); + + /* Do the actual replacement and put the result back into subject_str + for further replacements. */ + result = php_pcre_replace(regex_str, + subject_str, + ZSTR_VAL(subject_str), + (int)ZSTR_LEN(subject_str), + replace_str, + limit, + replace_count); zend_string_release(regex_str); - zval_dtor(&replace_str); + zend_string_release(subject_str); + subject_str = result; + + if (UNEXPECTED(result == NULL)) { + break; + } } ZEND_HASH_FOREACH_END(); + } - return subject_str; - } else { + return subject_str; +} +/* }}} */ + +/* {{{ php_replace_in_subject + */ +static zend_always_inline zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int *replace_count) +{ + zend_string *result; + zend_string *subject_str = zval_get_string(subject); + + if (UNEXPECTED(ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject_str)))) { + zend_string_release(subject_str); + php_error_docref(NULL, E_WARNING, "Subject is too long"); + result = NULL; + } else if (Z_TYPE_P(regex) != IS_ARRAY) { result = php_pcre_replace(Z_STR_P(regex), subject_str, ZSTR_VAL(subject_str), (int)ZSTR_LEN(subject_str), - replace, - is_callable_replace, + Z_STR_P(replace), limit, replace_count); zend_string_release(subject_str); - return result; + } else { + result = php_pcre_replace_array(Z_ARRVAL_P(regex), + replace, + subject_str, + limit, + replace_count); } + return result; } /* }}} */ -/* {{{ preg_replace_impl +/* {{{ php_replace_in_subject_func */ -static int preg_replace_impl(zval *return_value, zval *regex, zval *replace, zval *subject, zend_long limit_val, int is_callable_replace, int is_filter) +static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, int limit, int *replace_count) { - zval *subject_entry; - zend_string *result; - zend_string *string_key; - zend_ulong num_key; - int replace_count = 0, old_replace_count; + zval *regex_entry; + zend_string *result; + zend_string *subject_str = zval_get_string(subject); - if (Z_TYPE_P(replace) != IS_ARRAY && (Z_TYPE_P(replace) != IS_OBJECT || !is_callable_replace)) { - convert_to_string_ex(replace); + if (UNEXPECTED(ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject_str)))) { + php_error_docref(NULL, E_WARNING, "Subject is too long"); + return NULL; + } + + if (Z_TYPE_P(regex) != IS_ARRAY) { + result = php_pcre_replace_func(Z_STR_P(regex), + subject_str, + fci, fcc, + limit, + replace_count); + zend_string_release(subject_str); + return result; + } else { + /* If regex is an array */ + + /* For each entry in the regex array, get the entry */ + ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) { + /* Make sure we're dealing with strings. */ + zend_string *regex_str = zval_get_string(regex_entry); + + /* Do the actual replacement and put the result back into subject_str + for further replacements. */ + result = php_pcre_replace_func(regex_str, + subject_str, + fci, fcc, + limit, + replace_count); + zend_string_release(regex_str); + zend_string_release(subject_str); + subject_str = result; + if (UNEXPECTED(result == NULL)) { + break; + } + } ZEND_HASH_FOREACH_END(); + + return subject_str; } +} +/* }}} */ + +/* {{{ preg_replace_func_impl + */ +static int preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, zend_long limit_val) +{ + zend_string *result; + int replace_count = 0; if (Z_TYPE_P(regex) != IS_ARRAY) { convert_to_string_ex(regex); } - /* if subject is an array */ - if (Z_TYPE_P(subject) == IS_ARRAY) { + if (Z_TYPE_P(subject) != IS_ARRAY) { + result = php_replace_in_subject_func(regex, fci, fcc, subject, limit_val, &replace_count); + if (result != NULL) { + RETVAL_STR(result); + } else { + RETVAL_NULL(); + } + } else { + /* if subject is an array */ + zval *subject_entry, zv; + zend_string *string_key; + zend_ulong num_key; + array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject))); /* For each subject entry, convert it to string, then perform replacement and add the result to the return_value array. */ ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) { - old_replace_count = replace_count; - if ((result = php_replace_in_subject(regex, replace, subject_entry, limit_val, is_callable_replace, &replace_count)) != NULL) { - if (!is_filter || replace_count > old_replace_count) { - /* Add to return array */ - zval zv; - - ZVAL_STR(&zv, result); - if (string_key) { - zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv); - } else { - zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv); - } + result = php_replace_in_subject_func(regex, fci, fcc, subject_entry, limit_val, &replace_count); + if (result != NULL) { + /* Add to return array */ + ZVAL_STR(&zv, result); + if (string_key) { + zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv); } else { - zend_string_release(result); + zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv); } } } ZEND_HASH_FOREACH_END(); - } else { - /* if subject is not an array */ - old_replace_count = replace_count; - if ((result = php_replace_in_subject(regex, replace, subject, limit_val, is_callable_replace, &replace_count)) != NULL) { - if (!is_filter || replace_count > old_replace_count) { - RETVAL_STR(result); - } else { - zend_string_release(result); - RETVAL_NULL(); - } - } else { - RETVAL_NULL(); - } } return replace_count; } /* }}} */ -/* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]]) - Perform Perl-style regular expression replacement. */ -static PHP_FUNCTION(preg_replace) +/* {{{ preg_replace_common + */ +static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, int is_filter) { zval *regex, *replace, *subject, *zcount = NULL; zend_long limit = -1; - int replace_count; + int replace_count = 0; + zend_string *result; + int old_replace_count; /* Get function parameters and do error-checking. */ ZEND_PARSE_PARAMETERS_START(3, 5) @@ -1686,12 +1954,68 @@ static PHP_FUNCTION(preg_replace) Z_PARAM_ZVAL_DEREF(zcount) ZEND_PARSE_PARAMETERS_END(); - if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) { - php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array"); - RETURN_FALSE; + if (Z_TYPE_P(replace) != IS_ARRAY) { + convert_to_string_ex(replace); + if (Z_TYPE_P(regex) != IS_ARRAY) { + convert_to_string_ex(regex); + } + } else { + if (Z_TYPE_P(regex) != IS_ARRAY) { + php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array"); + RETURN_FALSE; + } + } + + if (Z_TYPE_P(subject) != IS_ARRAY) { + old_replace_count = replace_count; + result = php_replace_in_subject(regex, + replace, + subject, + limit, + &replace_count); + if (result != NULL) { + if (!is_filter || replace_count > old_replace_count) { + RETVAL_STR(result); + } else { + zend_string_release(result); + RETVAL_NULL(); + } + } else { + RETVAL_NULL(); + } + } else { + /* if subject is an array */ + zval *subject_entry, zv; + zend_string *string_key; + zend_ulong num_key; + + array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject))); + + /* For each subject entry, convert it to string, then perform replacement + and add the result to the return_value array. */ + ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) { + old_replace_count = replace_count; + result = php_replace_in_subject(regex, + replace, + subject_entry, + limit, + &replace_count); + if (result != NULL) { + if (!is_filter || replace_count > old_replace_count) { + /* Add to return array */ + ZVAL_STR(&zv, result); + if (string_key) { + zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv); + } else { + zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv); + } + } else { + zend_string_release(result); + } + } + } ZEND_HASH_FOREACH_END(); } - replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 0); if (zcount) { zval_ptr_dtor(zcount); ZVAL_LONG(zcount, replace_count); @@ -1699,6 +2023,14 @@ static PHP_FUNCTION(preg_replace) } /* }}} */ +/* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]]) + Perform Perl-style regular expression replacement. */ +static PHP_FUNCTION(preg_replace) +{ + preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); +} +/* }}} */ + /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]]) Perform Perl-style regular expression replacement using replacement callback. */ static PHP_FUNCTION(preg_replace_callback) @@ -1707,6 +2039,8 @@ static PHP_FUNCTION(preg_replace_callback) zend_long limit = -1; zend_string *callback_name; int replace_count; + zend_fcall_info fci; + zend_fcall_info_cache fcc; /* Get function parameters and do error-checking. */ ZEND_PARSE_PARAMETERS_START(3, 5) @@ -1718,7 +2052,7 @@ static PHP_FUNCTION(preg_replace_callback) Z_PARAM_ZVAL_DEREF(zcount) ZEND_PARSE_PARAMETERS_END(); - if (!zend_is_callable(replace, 0, &callback_name)) { + if (!zend_is_callable_ex(replace, NULL, 0, &callback_name, &fcc, NULL)) { php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", ZSTR_VAL(callback_name)); zend_string_release(callback_name); ZVAL_STR(return_value, zval_get_string(subject)); @@ -1726,7 +2060,11 @@ static PHP_FUNCTION(preg_replace_callback) } zend_string_release(callback_name); - replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 1, 0); + fci.size = sizeof(fci); + fci.object = NULL; + ZVAL_COPY_VALUE(&fci.function_name, replace); + + replace_count = preg_replace_func_impl(return_value, regex, &fci, &fcc, subject, limit); if (zcount) { zval_ptr_dtor(zcount); ZVAL_LONG(zcount, replace_count); @@ -1743,6 +2081,8 @@ static PHP_FUNCTION(preg_replace_callback_array) zend_string *str_idx; zend_string *callback_name; int replace_count = 0; + zend_fcall_info fci; + zend_fcall_info_cache fcc; /* Get function parameters and do error-checking. */ ZEND_PARSE_PARAMETERS_START(2, 4) @@ -1753,6 +2093,9 @@ static PHP_FUNCTION(preg_replace_callback_array) Z_PARAM_ZVAL_DEREF(zcount) ZEND_PARSE_PARAMETERS_END(); + fci.size = sizeof(fci); + fci.object = NULL; + ZEND_HASH_FOREACH_STR_KEY_VAL(Z_ARRVAL_P(pattern), str_idx, replace) { if (str_idx) { ZVAL_STR_COPY(®ex, str_idx); @@ -1762,7 +2105,7 @@ static PHP_FUNCTION(preg_replace_callback_array) RETURN_NULL(); } - if (!zend_is_callable(replace, 0, &callback_name)) { + if (!zend_is_callable_ex(replace, NULL, 0, &callback_name, &fcc, NULL)) { php_error_docref(NULL, E_WARNING, "'%s' is not a valid callback", ZSTR_VAL(callback_name)); zend_string_release(callback_name); zval_ptr_dtor(®ex); @@ -1772,10 +2115,12 @@ static PHP_FUNCTION(preg_replace_callback_array) } zend_string_release(callback_name); - if (Z_ISNULL_P(return_value)) { - replace_count += preg_replace_impl(&zv, ®ex, replace, subject, limit, 1, 0); + ZVAL_COPY_VALUE(&fci.function_name, replace); + + replace_count += preg_replace_func_impl(&zv, ®ex, &fci, &fcc, subject, limit); + if (subject != return_value) { + subject = return_value; } else { - replace_count += preg_replace_impl(&zv, ®ex, replace, return_value, limit, 1, 0); zval_ptr_dtor(return_value); } @@ -1800,30 +2145,7 @@ static PHP_FUNCTION(preg_replace_callback_array) Perform Perl-style regular expression replacement and only return matches. */ static PHP_FUNCTION(preg_filter) { - zval *regex, *replace, *subject, *zcount = NULL; - zend_long limit = -1; - int replace_count; - - /* Get function parameters and do error-checking. */ - ZEND_PARSE_PARAMETERS_START(3, 5) - Z_PARAM_ZVAL(regex) - Z_PARAM_ZVAL(replace) - Z_PARAM_ZVAL(subject) - Z_PARAM_OPTIONAL - Z_PARAM_LONG(limit) - Z_PARAM_ZVAL_DEREF(zcount) - ZEND_PARSE_PARAMETERS_END(); - - if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) { - php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array"); - RETURN_FALSE; - } - - replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 1); - if (zcount) { - zval_ptr_dtor(zcount); - ZVAL_LONG(zcount, replace_count); - } + preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); } /* }}} */ @@ -1983,14 +2305,24 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec } } } + + /* Advance to the position right after the last full match */ + start_offset = offsets[1]; + + /* If we have matched an empty string, mimic what Perl's /g options does. + This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try + the match again at the same point. If this fails (picked up above) we + advance to the next character. */ + g_notempty = (start_offset == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0; + } else if (count == PCRE_ERROR_NOMATCH) { /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match, this is not necessarily the end. We need to advance the start offset, and continue. Fudge the offset values to achieve this, unless we're already at the end of the string. */ if (g_notempty != 0 && start_offset < subject_len) { - offsets[0] = start_offset; - offsets[1] = start_offset + calculate_unit_length(pce, subject + start_offset); + start_offset += calculate_unit_length(pce, subject + start_offset); + g_notempty = 0; } else { break; } @@ -1998,15 +2330,6 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec pcre_handle_exec_error(count); break; } - - /* If we have matched an empty string, mimic what Perl's /g options does. - This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try - the match again at the same point. If this fails (picked up above) we - advance to the next character. */ - g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0; - - /* Advance to the position right after the last full match */ - start_offset = offsets[1]; } @@ -2113,7 +2436,6 @@ static PHP_FUNCTION(preg_quote) RETURN_STR_COPY(str); } -do_quote: /* Allocate enough memory so that even if each character is quoted, we won't run out of room */ out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0); diff --git a/ext/pcre/php_pcre.h b/ext/pcre/php_pcre.h index fed47c91a4..9265831d0e 100644 --- a/ext/pcre/php_pcre.h +++ b/ext/pcre/php_pcre.h @@ -33,7 +33,7 @@ #include <locale.h> #endif -PHPAPI zend_string *php_pcre_replace(zend_string *regex, zend_string *subject_str, char *subject, int subject_len, zval *replace_val, int is_callable_replace, int limit, int *replace_count); +PHPAPI zend_string *php_pcre_replace(zend_string *regex, zend_string *subject_str, char *subject, int subject_len, zend_string *replace_str, int limit, int *replace_count); PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *options); PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *coptions); @@ -61,8 +61,8 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex); PHPAPI void php_pcre_match_impl( pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value, zval *subpats, int global, int use_flags, zend_long flags, zend_long start_offset); -PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zval *return_value, - int is_callable_replace, int limit, int *replace_count); +PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zend_string *replace_str, + int limit, int *replace_count); PHPAPI void php_pcre_split_impl( pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value, zend_long limit_val, zend_long flags); diff --git a/ext/spl/spl_iterators.c b/ext/spl/spl_iterators.c index 0048f2107d..4af3d12c22 100644 --- a/ext/spl/spl_iterators.c +++ b/ext/spl/spl_iterators.c @@ -2091,7 +2091,7 @@ SPL_METHOD(RegexIterator, accept) convert_to_string(&tmp_replacement); replacement = &tmp_replacement; } - result = php_pcre_replace_impl(intern->u.regex.pce, subject, ZSTR_VAL(subject), ZSTR_LEN(subject), replacement, 0, -1, &count); + result = php_pcre_replace_impl(intern->u.regex.pce, subject, ZSTR_VAL(subject), ZSTR_LEN(subject), Z_STR_P(replacement), -1, &count); if (intern->u.regex.flags & REGIT_USE_KEY) { zval_ptr_dtor(&intern->current.key); diff --git a/win32/sendmail.c b/win32/sendmail.c index fb1a8f2299..c71ab2201f 100644 --- a/win32/sendmail.c +++ b/win32/sendmail.c @@ -134,47 +134,45 @@ static zend_string *php_win32_mail_trim_header(char *header) #if HAVE_PCRE || HAVE_BUNDLED_PCRE zend_string *result, *result2; - zval replace; + zend_string *replace; zend_string *regex; if (!header) { return NULL; } - ZVAL_STRINGL(&replace, PHP_WIN32_MAIL_UNIFY_REPLACE, strlen(PHP_WIN32_MAIL_UNIFY_REPLACE)); + replace = zend_string_init(PHP_WIN32_MAIL_UNIFY_REPLACE, strlen(PHP_WIN32_MAIL_UNIFY_REPLACE), 0); regex = zend_string_init(PHP_WIN32_MAIL_UNIFY_PATTERN, sizeof(PHP_WIN32_MAIL_UNIFY_PATTERN)-1, 0); result = php_pcre_replace(regex, NULL, header, (int)strlen(header), - &replace, - 0, + replace, -1, NULL); - zval_ptr_dtor(&replace); + zend_string_release(replace); zend_string_release(regex); if (NULL == result) { return NULL; } - ZVAL_STRING(&replace, PHP_WIN32_MAIL_RMVDBL_PATTERN); + replace = zend_string_init(PHP_WIN32_MAIL_RMVDBL_PATTERN, strlen(PHP_WIN32_MAIL_RMVDBL_PATTERN), 0); regex = zend_string_init(PHP_WIN32_MAIL_RMVDBL_PATTERN, sizeof(PHP_WIN32_MAIL_RMVDBL_PATTERN)-1, 0); result2 = php_pcre_replace(regex, result, ZSTR_VAL(result), (int)ZSTR_LEN(result), - &replace, - 0, + replace, -1, NULL); - zval_ptr_dtor(&replace); + zend_string_release(replace); zend_string_release(regex); zend_string_release(result); return result2; #else /* In case we don't have PCRE support (for whatever reason...) simply do nothing and return the unmodified header */ - return estrdup(header); + return zend_string_init(header, strlen(header), 0); #endif } |