summaryrefslogtreecommitdiff
path: root/ext/pcre/php_pcre.c
diff options
context:
space:
mode:
authorAndrei Zmievski <andrei@php.net>2000-11-23 17:25:13 +0000
committerAndrei Zmievski <andrei@php.net>2000-11-23 17:25:13 +0000
commit0af5c36d9402be4fa2952f5e137abd68f26bc503 (patch)
treec8aae309774bd48b1fff78b1f1a986af812a9700 /ext/pcre/php_pcre.c
parentf03b949134ee2179963fbf88ddca312134642b4f (diff)
downloadphp-git-0af5c36d9402be4fa2952f5e137abd68f26bc503.tar.gz
This submission introduces two features.
The first one is support for Perl-style matching regexp delimiters, i.e. using <[{( and )}]> to delimit the regular expressions. The second one is a new 'F' modifier that allows you to specify a function name in the replacement argument to preg_replace(). This function will be called when the replacement needs to be made. It is passed an array of full matched pattern and captured subpatterns and it is expected to return a string that will be used for replacement. 'e' and 'F' modifiers cannot be used together. @- Implemented support for Perl-style matching regexp delimiters in PCRE. @ You can use <{[( and )]}> to delimit your expressions now. (Andrei) @- Introduced new 'F' modifier in PCRE that lets you specify a function @ name in the replacement argument to preg_replace() that will be called @ at run-time to provide the replacement string. It is passed an array of @ matched pattern and subpatterns. (Andrei)
Diffstat (limited to 'ext/pcre/php_pcre.c')
-rw-r--r--ext/pcre/php_pcre.c121
1 files changed, 101 insertions, 20 deletions
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c
index d4a022bbdc..53cf7da06d 100644
--- a/ext/pcre/php_pcre.c
+++ b/ext/pcre/php_pcre.c
@@ -41,6 +41,7 @@
#define PREG_SPLIT_NO_EMPTY (1<<0)
#define PREG_REPLACE_EVAL (1<<0)
+#define PREG_REPLACE_FUNC (1<<1)
#ifdef ZTS
int pcre_globals_id;
@@ -147,6 +148,8 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o
const char *error;
int erroffset;
char delimiter;
+ char start_delimiter;
+ char end_delimiter;
char *p, *pp;
char *pattern;
int regex_len;
@@ -192,20 +195,47 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o
zend_error(E_WARNING, "Delimiter must not be alphanumeric or backslash");
return NULL;
}
-
- /* We need to iterate through the pattern, searching for the ending delimiter,
- but skipping the backslashed delimiters. If the ending delimiter is not
- found, display a warning. */
- pp = p;
- while (*pp != 0) {
- if (*pp == '\\' && pp[1] != 0) pp++;
- else if (*pp == delimiter)
- break;
- pp++;
- }
- if (*pp == 0) {
- zend_error(E_WARNING, "No ending delimiter found");
- return NULL;
+
+ start_delimiter = delimiter;
+ if ((pp = strchr("([{< )]}> )]}>", delimiter)))
+ delimiter = pp[5];
+ end_delimiter = delimiter;
+
+ if (start_delimiter == end_delimiter) {
+ /* We need to iterate through the pattern, searching for the ending delimiter,
+ but skipping the backslashed delimiters. If the ending delimiter is not
+ found, display a warning. */
+ pp = p;
+ while (*pp != 0) {
+ if (*pp == '\\' && pp[1] != 0) pp++;
+ else if (*pp == delimiter)
+ break;
+ pp++;
+ }
+ if (*pp == 0) {
+ zend_error(E_WARNING, "No ending delimiter '%c' found", delimiter);
+ return NULL;
+ }
+ } else {
+ /* We iterate through the pattern, searching for the matching ending
+ * delimiter. For each matching starting delimiter, we increment nesting
+ * level, and decrement it for each matching ending delimiter. If we
+ * reach the end of the pattern without matching, display a warning.
+ */
+ int brackets = 1; /* brackets nesting level */
+ pp = p;
+ while (*pp != 0) {
+ if (*pp == '\\' && pp[1] != 0) pp++;
+ else if (*pp == end_delimiter && --brackets <= 0)
+ break;
+ else if (*pp == start_delimiter)
+ brackets++;
+ pp++;
+ }
+ if (*pp == 0) {
+ zend_error(E_WARNING, "No ending matching delimiter '%c' found", end_delimiter);
+ return NULL;
+ }
}
/* Make a copy of the actual pattern. */
@@ -235,7 +265,8 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o
case 'X': coptions |= PCRE_EXTRA; break;
/* Custom preg options */
- case 'e': poptions |= PREG_REPLACE_EVAL; break;
+ case 'e': poptions |= PREG_REPLACE_EVAL; break;
+ case 'F': poptions |= PREG_REPLACE_FUNC; break;
case ' ':
case '\n':
@@ -247,6 +278,12 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o
return NULL;
}
}
+
+ if ((poptions & PREG_REPLACE_EVAL) && (poptions & PREG_REPLACE_FUNC)) {
+ zend_error(E_WARNING, "'e' and 'F' modifiers cannot be used together");
+ efree(pattern);
+ return NULL;
+ }
#if HAVE_SETLOCALE
if (strcmp(locale, "C"))
@@ -526,6 +563,40 @@ static inline int preg_get_backref(const char *walk, int *backref)
return 1;
}
+static int preg_do_repl_func(char *function_name, char *subject, int *offsets, int count, char **result)
+{
+ zval *retval_ptr; /* Function return value */
+ zval function; /* Function to call */
+ zval *function_ptr = &function; /* Pointer to function to call */
+ zval **args[0]; /* Argument to pass to function */
+ zval *subpats; /* Captured subpatterns */
+ int result_len; /* Return value length */
+ int i;
+ CLS_FETCH();
+
+ ZVAL_STRING(function_ptr, function_name, 0);
+
+ MAKE_STD_ZVAL(subpats);
+ array_init(subpats);
+ for (i = 0; i < count; i++)
+ add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
+ args[0] = &subpats;
+
+ if (call_user_function_ex(CG(function_table), NULL, function_ptr, &retval_ptr, 1, args, 0, NULL) == SUCCESS && retval_ptr) {
+ convert_to_string_ex(&retval_ptr);
+ *result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
+ result_len = Z_STRLEN_P(retval_ptr);
+ zval_ptr_dtor(&retval_ptr);
+ } else {
+ php_error(E_WARNING, "Unable to call custom replacement function %s()", function_name);
+ *result = empty_string;
+ result_len = 0;
+ }
+ zval_dtor(subpats);
+ FREE_ZVAL(subpats);
+
+ return result_len;
+}
static int preg_do_eval(char *eval_str, int eval_str_len, char *subject,
int *offsets, int count, char **result)
@@ -630,10 +701,13 @@ char *php_pcre_replace(char *regex, int regex_len,
int size_offsets; /* Size of the offsets array */
int new_len; /* Length of needed storage */
int alloc_len; /* Actual allocated length */
- int eval_result_len=0; /* Length of the eval'ed string */
+ int eval_result_len=0; /* Length of the eval'ed or
+ function-returned string */
int match_len; /* Length of the current match */
int backref; /* Backreference number */
int eval; /* If the replacement string should be eval'ed */
+ int use_func; /* If the matches should be run through
+ a function to get the replacement string */
int start_offset; /* Where the new search starts */
int g_notempty = 0; /* If the match should not be empty */
char *result, /* Result of replacement */
@@ -643,7 +717,7 @@ char *php_pcre_replace(char *regex, int regex_len,
*match, /* The current match */
*piece, /* The current piece of subject */
*replace_end, /* End of replacement string */
- *eval_result, /* Result of eval */
+ *eval_result, /* Result of eval or custom function */
walk_last; /* Last walked character */
/* Compile regex or get it from cache. */
@@ -670,6 +744,7 @@ char *php_pcre_replace(char *regex, int regex_len,
start_offset = 0;
replace_end = replace + replace_len;
eval = preg_options & PREG_REPLACE_EVAL;
+ use_func = preg_options & PREG_REPLACE_FUNC;
while (1) {
/* Execute the regular expression. */
@@ -695,6 +770,11 @@ char *php_pcre_replace(char *regex, int regex_len,
eval_result_len = preg_do_eval(replace, replace_len, subject,
offsets, count, &eval_result);
new_len += eval_result_len;
+ } else if (use_func) {
+ /* Use custom function to get replacement string and its length. */
+ eval_result_len = preg_do_repl_func(replace, subject, offsets,
+ count, &eval_result);
+ new_len += eval_result_len;
} else { /* do regular substitution */
walk = replace;
walk_last = 0;
@@ -726,11 +806,12 @@ char *php_pcre_replace(char *regex, int regex_len,
/* copy replacement and backrefs */
walkbuf = result + *result_len;
- /* If evaluating, copy result to the buffer and clean up */
- if (eval) {
+ /* If evaluating or using custom function, copy result to the buffer
+ * and clean up. */
+ if (eval || use_func) {
memcpy(walkbuf, eval_result, eval_result_len);
*result_len += eval_result_len;
- efree(eval_result);
+ STR_FREE(eval_result);
} else { /* do regular backreference copying */
walk = replace;
walk_last = 0;