diff options
author | Andrei Zmievski <andrei@php.net> | 2000-11-23 17:25:13 +0000 |
---|---|---|
committer | Andrei Zmievski <andrei@php.net> | 2000-11-23 17:25:13 +0000 |
commit | 0af5c36d9402be4fa2952f5e137abd68f26bc503 (patch) | |
tree | c8aae309774bd48b1fff78b1f1a986af812a9700 /ext/pcre/php_pcre.c | |
parent | f03b949134ee2179963fbf88ddca312134642b4f (diff) | |
download | php-git-0af5c36d9402be4fa2952f5e137abd68f26bc503.tar.gz |
This submission introduces two features.
The first one is support for Perl-style matching regexp delimiters, i.e.
using <[{( and )}]> to delimit the regular expressions.
The second one is a new 'F' modifier that allows you to specify a function name
in the replacement argument to preg_replace(). This function will be called
when the replacement needs to be made. It is passed an array of full matched
pattern and captured subpatterns and it is expected to return a string that
will be used for replacement. 'e' and 'F' modifiers cannot be used together.
@- Implemented support for Perl-style matching regexp delimiters in PCRE.
@ You can use <{[( and )]}> to delimit your expressions now. (Andrei)
@- Introduced new 'F' modifier in PCRE that lets you specify a function
@ name in the replacement argument to preg_replace() that will be called
@ at run-time to provide the replacement string. It is passed an array of
@ matched pattern and subpatterns. (Andrei)
Diffstat (limited to 'ext/pcre/php_pcre.c')
-rw-r--r-- | ext/pcre/php_pcre.c | 121 |
1 files changed, 101 insertions, 20 deletions
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index d4a022bbdc..53cf7da06d 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -41,6 +41,7 @@ #define PREG_SPLIT_NO_EMPTY (1<<0) #define PREG_REPLACE_EVAL (1<<0) +#define PREG_REPLACE_FUNC (1<<1) #ifdef ZTS int pcre_globals_id; @@ -147,6 +148,8 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o const char *error; int erroffset; char delimiter; + char start_delimiter; + char end_delimiter; char *p, *pp; char *pattern; int regex_len; @@ -192,20 +195,47 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o zend_error(E_WARNING, "Delimiter must not be alphanumeric or backslash"); return NULL; } - - /* We need to iterate through the pattern, searching for the ending delimiter, - but skipping the backslashed delimiters. If the ending delimiter is not - found, display a warning. */ - pp = p; - while (*pp != 0) { - if (*pp == '\\' && pp[1] != 0) pp++; - else if (*pp == delimiter) - break; - pp++; - } - if (*pp == 0) { - zend_error(E_WARNING, "No ending delimiter found"); - return NULL; + + start_delimiter = delimiter; + if ((pp = strchr("([{< )]}> )]}>", delimiter))) + delimiter = pp[5]; + end_delimiter = delimiter; + + if (start_delimiter == end_delimiter) { + /* We need to iterate through the pattern, searching for the ending delimiter, + but skipping the backslashed delimiters. If the ending delimiter is not + found, display a warning. */ + pp = p; + while (*pp != 0) { + if (*pp == '\\' && pp[1] != 0) pp++; + else if (*pp == delimiter) + break; + pp++; + } + if (*pp == 0) { + zend_error(E_WARNING, "No ending delimiter '%c' found", delimiter); + return NULL; + } + } else { + /* We iterate through the pattern, searching for the matching ending + * delimiter. For each matching starting delimiter, we increment nesting + * level, and decrement it for each matching ending delimiter. If we + * reach the end of the pattern without matching, display a warning. + */ + int brackets = 1; /* brackets nesting level */ + pp = p; + while (*pp != 0) { + if (*pp == '\\' && pp[1] != 0) pp++; + else if (*pp == end_delimiter && --brackets <= 0) + break; + else if (*pp == start_delimiter) + brackets++; + pp++; + } + if (*pp == 0) { + zend_error(E_WARNING, "No ending matching delimiter '%c' found", end_delimiter); + return NULL; + } } /* Make a copy of the actual pattern. */ @@ -235,7 +265,8 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o case 'X': coptions |= PCRE_EXTRA; break; /* Custom preg options */ - case 'e': poptions |= PREG_REPLACE_EVAL; break; + case 'e': poptions |= PREG_REPLACE_EVAL; break; + case 'F': poptions |= PREG_REPLACE_FUNC; break; case ' ': case '\n': @@ -247,6 +278,12 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o return NULL; } } + + if ((poptions & PREG_REPLACE_EVAL) && (poptions & PREG_REPLACE_FUNC)) { + zend_error(E_WARNING, "'e' and 'F' modifiers cannot be used together"); + efree(pattern); + return NULL; + } #if HAVE_SETLOCALE if (strcmp(locale, "C")) @@ -526,6 +563,40 @@ static inline int preg_get_backref(const char *walk, int *backref) return 1; } +static int preg_do_repl_func(char *function_name, char *subject, int *offsets, int count, char **result) +{ + zval *retval_ptr; /* Function return value */ + zval function; /* Function to call */ + zval *function_ptr = &function; /* Pointer to function to call */ + zval **args[0]; /* Argument to pass to function */ + zval *subpats; /* Captured subpatterns */ + int result_len; /* Return value length */ + int i; + CLS_FETCH(); + + ZVAL_STRING(function_ptr, function_name, 0); + + MAKE_STD_ZVAL(subpats); + array_init(subpats); + for (i = 0; i < count; i++) + add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1); + args[0] = &subpats; + + if (call_user_function_ex(CG(function_table), NULL, function_ptr, &retval_ptr, 1, args, 0, NULL) == SUCCESS && retval_ptr) { + convert_to_string_ex(&retval_ptr); + *result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr)); + result_len = Z_STRLEN_P(retval_ptr); + zval_ptr_dtor(&retval_ptr); + } else { + php_error(E_WARNING, "Unable to call custom replacement function %s()", function_name); + *result = empty_string; + result_len = 0; + } + zval_dtor(subpats); + FREE_ZVAL(subpats); + + return result_len; +} static int preg_do_eval(char *eval_str, int eval_str_len, char *subject, int *offsets, int count, char **result) @@ -630,10 +701,13 @@ char *php_pcre_replace(char *regex, int regex_len, int size_offsets; /* Size of the offsets array */ int new_len; /* Length of needed storage */ int alloc_len; /* Actual allocated length */ - int eval_result_len=0; /* Length of the eval'ed string */ + int eval_result_len=0; /* Length of the eval'ed or + function-returned string */ int match_len; /* Length of the current match */ int backref; /* Backreference number */ int eval; /* If the replacement string should be eval'ed */ + int use_func; /* If the matches should be run through + a function to get the replacement string */ int start_offset; /* Where the new search starts */ int g_notempty = 0; /* If the match should not be empty */ char *result, /* Result of replacement */ @@ -643,7 +717,7 @@ char *php_pcre_replace(char *regex, int regex_len, *match, /* The current match */ *piece, /* The current piece of subject */ *replace_end, /* End of replacement string */ - *eval_result, /* Result of eval */ + *eval_result, /* Result of eval or custom function */ walk_last; /* Last walked character */ /* Compile regex or get it from cache. */ @@ -670,6 +744,7 @@ char *php_pcre_replace(char *regex, int regex_len, start_offset = 0; replace_end = replace + replace_len; eval = preg_options & PREG_REPLACE_EVAL; + use_func = preg_options & PREG_REPLACE_FUNC; while (1) { /* Execute the regular expression. */ @@ -695,6 +770,11 @@ char *php_pcre_replace(char *regex, int regex_len, eval_result_len = preg_do_eval(replace, replace_len, subject, offsets, count, &eval_result); new_len += eval_result_len; + } else if (use_func) { + /* Use custom function to get replacement string and its length. */ + eval_result_len = preg_do_repl_func(replace, subject, offsets, + count, &eval_result); + new_len += eval_result_len; } else { /* do regular substitution */ walk = replace; walk_last = 0; @@ -726,11 +806,12 @@ char *php_pcre_replace(char *regex, int regex_len, /* copy replacement and backrefs */ walkbuf = result + *result_len; - /* If evaluating, copy result to the buffer and clean up */ - if (eval) { + /* If evaluating or using custom function, copy result to the buffer + * and clean up. */ + if (eval || use_func) { memcpy(walkbuf, eval_result, eval_result_len); *result_len += eval_result_len; - efree(eval_result); + STR_FREE(eval_result); } else { /* do regular backreference copying */ walk = replace; walk_last = 0; |