diff options
author | Nuno Lopes <nlopess@php.net> | 2007-10-07 11:48:27 +0000 |
---|---|---|
committer | Nuno Lopes <nlopess@php.net> | 2007-10-07 11:48:27 +0000 |
commit | 424db6feb93a2834184bda8fcd7984e30ed5bbb6 (patch) | |
tree | 2c55b8b8c9aa8dd6e2f526483b449e5cf6a49468 /ext/pcre/php_pcre.c | |
parent | 41dcd7d0b5315d6192652812800989218ed3c119 (diff) | |
download | php-git-424db6feb93a2834184bda8fcd7984e30ed5bbb6.tar.gz |
fix bug #37911: preg_replace_callback ignores named groups
Diffstat (limited to 'ext/pcre/php_pcre.c')
-rw-r--r-- | ext/pcre/php_pcre.c | 122 |
1 files changed, 74 insertions, 48 deletions
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 28b6b21b15..deaca774eb 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -174,6 +174,49 @@ static int pcre_clean_cache(void *data, void *arg TSRMLS_DC) } /* }}} */ +/* {{{ static make_subpats_table */ +static char **make_subpats_table(int num_subpats, int rc, pcre_cache_entry *pce) +{ + pcre_extra *extra = pce->extra; + int name_cnt = 0, name_size, ni = 0; + char *name_table; + unsigned short name_idx; + char **subpat_names = (char **)ecalloc(num_subpats, sizeof(char *)); + + rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt); + if (rc < 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); + efree(subpat_names); + return NULL; + } + if (name_cnt > 0) { + int rc1, rc2; + + rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table); + rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size); + rc = rc2 ? rc2 : rc1; + if (rc < 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); + efree(subpat_names); + return NULL; + } + + while (ni++ < name_cnt) { + name_idx = 0xff * name_table[0] + name_table[1]; + subpat_names[name_idx] = name_table + 2; + if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed"); + efree(subpat_names); + return NULL; + } + name_table += name_size; + } + } + + return subpat_names; +} +/* }}} */ + /* {{{ pcre_get_compiled_regex_cache */ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC) @@ -484,7 +527,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec int g_notempty = 0; /* If the match should not be empty */ const char **stringlist; /* Holds list of subpatterns */ char *match; /* The current match */ - char **subpat_names = NULL;/* Array for named subpatterns */ + char **subpat_names; /* Array for named subpatterns */ int i, rc; int subpats_order; /* Order of subpattern matches */ int offset_capture; /* Capture match offsets: yes/no */ @@ -539,54 +582,19 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec } num_subpats++; size_offsets = num_subpats * 3; - offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); /* * Build a mapping from subpattern numbers to their names. We will always * allocate the table, even though there may be no named subpatterns. This * avoids somewhat more complicated logic in the inner loops. */ - subpat_names = (char **)safe_emalloc(num_subpats, sizeof(char *), 0); - memset(subpat_names, 0, sizeof(char *) * num_subpats); - { - int name_cnt = 0, name_size, ni = 0; - char *name_table; - unsigned short name_idx; - - rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt); - if (rc < 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); - efree(offsets); - efree(subpat_names); - RETURN_FALSE; - } - if (name_cnt > 0) { - int rc1, rc2; - - rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table); - rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size); - rc = rc2 ? rc2 : rc1; - if (rc < 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); - efree(offsets); - efree(subpat_names); - RETURN_FALSE; - } - - while (ni++ < name_cnt) { - name_idx = 0xff * name_table[0] + name_table[1]; - subpat_names[name_idx] = name_table + 2; - if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed"); - efree(offsets); - efree(subpat_names); - RETURN_FALSE; - } - name_table += name_size; - } - } + subpat_names = make_subpats_table(num_subpats, rc, pce); + if (!subpat_names) { + RETURN_FALSE; } + offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); + /* Allocate match sets array and initialize the values. */ if (global && subpats_order == PREG_PATTERN_ORDER) { match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0); @@ -794,7 +802,7 @@ static int preg_get_backref(char **str, int *backref) /* {{{ preg_do_repl_func */ -static int preg_do_repl_func(zval *function, char *subject, int *offsets, int count, char **result TSRMLS_DC) +static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, char **result TSRMLS_DC) { zval *retval_ptr; /* Function return value */ zval **args[1]; /* Argument to pass to function */ @@ -804,8 +812,12 @@ static int preg_do_repl_func(zval *function, char *subject, int *offsets, int co MAKE_STD_ZVAL(subpats); array_init(subpats); - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { + if (subpat_names[i]) { + add_assoc_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1], 1); + } add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1); + } args[0] = &subpats; if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) { @@ -944,6 +956,8 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub int exoptions = 0; /* Execution options */ int count = 0; /* Count of matched subpatterns */ int *offsets; /* Array of subpattern offsets */ + char **subpat_names; /* Array for named subpatterns */ + int num_subpats; /* Number of captured subpatterns */ int size_offsets; /* Size of the offsets array */ int new_len; /* Length of needed storage */ int alloc_len; /* Actual allocated length */ @@ -987,12 +1001,24 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub } /* Calculate the size of the offsets array, and allocate memory for it. */ - rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets); + rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats); if (rc < 0) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); return NULL; } - size_offsets = (size_offsets + 1) * 3; + num_subpats++; + size_offsets = num_subpats * 3; + + /* + * Build a mapping from subpattern numbers to their names. We will always + * allocate the table, even though there may be no named subpatterns. This + * avoids somewhat more complicated logic in the inner loops. + */ + subpat_names = make_subpats_table(num_subpats, rc, pce); + if (!subpat_names) { + return NULL; + } + offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); alloc_len = 2 * subject_len + 1; @@ -1033,8 +1059,7 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub new_len += eval_result_len; } else if (is_callable_replace) { /* Use custom function to get replacement string and its length. */ - eval_result_len = preg_do_repl_func(replace_val, subject, offsets, - count, &eval_result TSRMLS_CC); + eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, &eval_result TSRMLS_CC); new_len += eval_result_len; } else { /* do regular substitution */ walk = replace; @@ -1149,8 +1174,9 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub /* Advance to the next piece. */ start_offset = offsets[1]; } - + efree(offsets); + efree(subpat_names); return result; } |