summaryrefslogtreecommitdiff
path: root/ext/pcre/php_pcre.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/pcre/php_pcre.c')
-rw-r--r--ext/pcre/php_pcre.c196
1 files changed, 128 insertions, 68 deletions
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c
index 5b136428d4..d80c174ae3 100644
--- a/ext/pcre/php_pcre.c
+++ b/ext/pcre/php_pcre.c
@@ -18,12 +18,18 @@
/* $Id$ */
+/*
+ TODO:
+ - Allow user to set PCRE_NOTEMPTY, PCRE_ANCHORED at execution time
+ - Have an option for preg_split() to not return empty strings
+*/
+
#include "php.h"
#include "php_globals.h"
+#include "php_pcre.h"
-#if HAVE_PCRE
+#if HAVE_PCRE || HAVE_BUNDLED_PCRE
-#include "php_pcre.h"
#include "ext/standard/php3_string.h"
#define PREG_PATTERN_ORDER 0
@@ -307,13 +313,13 @@ static void _pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
int *offsets; /* Array of subpattern offsets */
int num_subpats; /* Number of captured subpatterns */
int size_offsets; /* Size of the offsets array */
+ int start_offset; /* Where the new search starts */
int matched; /* Has anything matched */
int i;
int subpats_order_val = 0; /* Integer value of subpats_order */
+ int g_notempty = 0; /* If the match should not be empty */
const char **stringlist; /* Used to hold list of subpatterns */
- char *match, /* The current match */
- *piece, /* The current piece of subject */
- *subject_end; /* Points to the end of the subject */
+ char *match; /* The current match */
/* Get function parameters and do error-checking. */
@@ -389,17 +395,15 @@ static void _pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
}
/* Start from the beginning of the string */
- piece = subject->value.str.val;
- subject_end = piece + subject->value.str.len;
+ start_offset = 0;
match = NULL;
matched = 0;
do {
/* Execute the regular expression. */
- count = pcre_exec(re, extra, piece,
- subject_end-piece, subject->value.str.val,
- (piece==subject->value.str.val ? exoptions : exoptions|PCRE_NOTBOL),
- offsets, size_offsets, (piece == match));
+ count = pcre_exec(re, extra, subject->value.str.val,
+ subject->value.str.len, start_offset,
+ exoptions|g_notempty, offsets, size_offsets);
/* Check for too many substrings condition. */
if (count == 0) {
@@ -410,12 +414,13 @@ static void _pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
/* If something has matched */
if (count >= 0) {
matched++;
- match = piece + offsets[0];
+ match = subject->value.str.val + offsets[0];
/* If subpatters array has been passed, fill it in with values. */
if (subpats != NULL) {
/* Try to get the list of substrings and display a warning if failed. */
- if (pcre_get_substring_list(piece, offsets, count, &stringlist) < 0) {
+ if (pcre_get_substring_list(subject->value.str.val,
+ offsets, count, &stringlist) < 0) {
efree(offsets);
efree(re);
zend_error(E_WARNING, "Get subpatterns list failed");
@@ -452,12 +457,29 @@ static void _pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
}
php_pcre_free((void *) stringlist);
-
- /* Advance to the position right after the last full match */
- piece += offsets[1];
}
}
- } while (global && count >= 0);
+ else { /* Failed to match */
+ /* If we previously set PCRE_NOTEMPTY after a null match,
+ this is not necessarily the end. We need to advance
+ the start offset, and continue. Fudge the offset values
+ to achieve this, unless we're already at the end of the string. */
+ if (g_notempty != 0 && start_offset < subject->value.str.len) {
+ offsets[0] = start_offset;
+ offsets[1] = start_offset + 1;
+ } else
+ break;
+ }
+
+ /* If we have matched an empty string, mimic what Perl's /g options does.
+ This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
+ the match again at the same point. If this fails (picked up above) we
+ advance to the next character. */
+ g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY : 0;
+
+ /* Advance to the position right after the last full match */
+ start_offset = offsets[1];
+ } while (global);
/* Add the match sets to the output array and clean up */
if (global && subpats_order_val == PREG_PATTERN_ORDER) {
@@ -593,13 +615,14 @@ char *_php_pcre_replace(char *regex, char *subject, char *replace)
int match_len; /* Length of the current match */
int backref; /* Backreference number */
int eval; /* If the replacement string should be eval'ed */
+ int start_offset; /* Where the new search starts */
+ int g_notempty = 0; /* If the match should not be empty */
char *result, /* Result of replacement */
*new_buf, /* Temporary buffer for re-allocation */
*walkbuf, /* Location of current replacement in the result */
*walk, /* Used to walk the replacement string */
*match, /* The current match */
*piece, /* The current piece of subject */
- *subject_end, /* Points to the end of the subject */
*eval_result; /* Result of eval */
/* Compile regex or get it from cache. */
@@ -625,16 +648,13 @@ char *_php_pcre_replace(char *regex, char *subject, char *replace)
/* Initialize */
match = NULL;
result[0] = '\0';
- piece = subject;
- subject_end = subject + subject_len;
+ start_offset = 0;
eval = preg_options & PREG_REPLACE_EVAL;
- while (count >= 0) {
+ while (1) {
/* Execute the regular expression. */
- count = pcre_exec(re, extra, piece,
- subject_end-piece, subject,
- (piece==subject ? exoptions : exoptions|PCRE_NOTBOL),
- offsets, size_offsets, (piece == match));
+ count = pcre_exec(re, extra, subject, subject_len, start_offset,
+ exoptions|g_notempty, offsets, size_offsets);
/* Check for too many substrings condition. */
if (count == 0) {
@@ -642,11 +662,13 @@ char *_php_pcre_replace(char *regex, char *subject, char *replace)
count = size_offsets/3;
}
+ piece = &subject[start_offset];
+
if (count > 0) {
- /* Set the match location in piece */
- match = piece + offsets[0];
+ /* Set the match location in subject */
+ match = subject + offsets[0];
- new_len = strlen(result) + offsets[0]; /* part before the match */
+ new_len = strlen(result) + offsets[0] - start_offset; /* part before the match */
/* If evaluating, do it and add the return string's length */
if (eval) {
@@ -679,7 +701,7 @@ char *_php_pcre_replace(char *regex, char *subject, char *replace)
strncat(result, piece, match-piece);
/* copy replacement and backrefs */
- walkbuf = &result[result_len + offsets[0]];
+ walkbuf = &result[result_len + offsets[0] - start_offset];
/* If evaluating, copy result to the buffer and clean up */
if (eval) {
@@ -702,21 +724,39 @@ char *_php_pcre_replace(char *regex, char *subject, char *replace)
*walkbuf++ = *walk++;
}
*walkbuf = '\0';
-
- /* Advance to the next piece */
- piece += offsets[1];
- } else {
- new_len = strlen(result) + subject_end-piece;
- if (new_len + 1 > alloc_len) {
- alloc_len = new_len + 1; /* now we know exactly how long it is */
- new_buf = emalloc(alloc_len * sizeof(char));
- strcpy(new_buf, result);
- efree(result);
- result = new_buf;
+ } else { /* Failed to match */
+ /* If we previously set PCRE_NOTEMPTY after a null match,
+ this is not necessarily the end. We need to advance
+ the start offset, and continue. Fudge the offset values
+ to achieve this, unless we're already at the end of the string. */
+ if (g_notempty != 0 && start_offset < subject_len) {
+ offsets[0] = start_offset;
+ offsets[1] = start_offset + 1;
+ strncat(result, piece, 1);
+ }
+ else {
+ new_len = strlen(result) + subject_len - start_offset;
+ if (new_len + 1 > alloc_len) {
+ alloc_len = new_len + 1; /* now we know exactly how long it is */
+ new_buf = emalloc(alloc_len * sizeof(char));
+ strcpy(new_buf, result);
+ efree(result);
+ result = new_buf;
+ }
+ /* stick that last bit of string on our output */
+ strcat(result, piece);
+ break;
}
- /* stick that last bit of string on our output */
- strcat(result, piece);
}
+
+ /* If we have matched an empty string, mimic what Perl's /g options does.
+ This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
+ the match again at the same point. If this fails (picked up above) we
+ advance to the next character. */
+ g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY : 0;
+
+ /* Advance to the next piece */
+ start_offset = offsets[1];
}
efree(offsets);
@@ -862,9 +902,10 @@ PHP_FUNCTION(preg_split)
int argc; /* Argument count */
int limit_val; /* Integer value of limit */
int count = 0; /* Count of matched subpatterns */
+ int start_offset; /* Where the new search starts */
+ int g_notempty = 0; /* If the match should not be empty */
char *match, /* The current match */
- *piece, /* The current piece of subject */
- *subject_end; /* Points to the end of subject string */
+ *last_match; /* Location of last match */
/* Get function parameters and do error checking */
argc = ARG_COUNT(ht);
@@ -896,45 +937,60 @@ PHP_FUNCTION(preg_split)
offsets = (int *)emalloc(size_offsets * sizeof(int));
/* Start at the beginning of the string */
- piece = subject->value.str.val;
- subject_end = piece + subject->value.str.len;
+ start_offset = 0;
+ last_match = subject->value.str.val;
match = NULL;
/* Get next piece if no limit or limit not yet reached and something matched*/
- while ((limit_val == -1 || limit_val > 1) && count >= 0) {
- count = pcre_exec(re, extra, piece,
- subject_end-piece, subject->value.str.val,
- (piece==subject->value.str.val ? exoptions : exoptions|PCRE_NOTBOL),
- offsets, size_offsets, (piece==match));
+ while ((limit_val == -1 || limit_val > 1)) {
+ count = pcre_exec(re, extra, subject->value.str.val,
+ subject->value.str.len, start_offset,
+ exoptions|g_notempty, offsets, size_offsets);
/* Check for too many substrings condition. */
if (count == 0) {
zend_error(E_NOTICE, "Matched, but too many substrings\n");
count = size_offsets/3;
}
-
+
/* If something matched */
if (count > 0) {
- match = piece + offsets[0];
-
+ match = subject->value.str.val + offsets[0];
+
/* Add the piece to the return value */
- add_next_index_stringl(return_value,
- piece,
- offsets[0], 1);
+ add_next_index_stringl(return_value, last_match,
+ &subject->value.str.val[offsets[0]]-last_match, 1);
- /* Advance to next position */
- piece += offsets[1];
+ last_match = &subject->value.str.val[offsets[1]];
/* One less left to do */
if (limit_val != -1)
limit_val--;
+ } else { /* Failed to match */
+ /* If we previously set PCRE_NOTEMPTY after a null match,
+ this is not necessarily the end. We need to advance
+ the start offset, and continue. Fudge the offset values
+ to achieve this, unless we're already at the end of the string. */
+ if (g_notempty != 0 && start_offset < subject->value.str.len) {
+ offsets[0] = start_offset;
+ offsets[1] = start_offset + 1;
+ } else
+ break;
}
+
+ /* If we have matched an empty string, mimic what Perl's /g options does.
+ This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
+ the match again at the same point. If this fails (picked up above) we
+ advance to the next character. */
+ g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY : 0;
+
+ /* Advance to the position right after the last full match */
+ start_offset = offsets[1];
}
/* Add the last piece to the return value */
- add_next_index_stringl(return_value,
- piece,
- subject_end-piece, 1);
+ add_next_index_string(return_value,
+ &subject->value.str.val[start_offset], 1);
/* Clean up */
efree(offsets);
@@ -1064,8 +1120,8 @@ PHP_FUNCTION(preg_grep)
/* Perform the match */
count = pcre_exec(re, extra, entry->value.str.val,
- entry->value.str.len, entry->value.str.val,
- 0, offsets, size_offsets, 0);
+ entry->value.str.len, 0,
+ 0, offsets, size_offsets);
/* Check for too many substrings condition. */
if (count == 0) {
@@ -1119,15 +1175,19 @@ function_entry pcre_functions[] = {
};
zend_module_entry pcre_module_entry = {
- "PCRE", pcre_functions, PHP_MINIT(pcre), PHP_MSHUTDOWN(pcre),
- PHP_RINIT(pcre), NULL,
- PHP_MINFO(pcre), STANDARD_MODULE_PROPERTIES
+ "PCRE", pcre_functions,
+ PHP_MINIT(pcre),
+ PHP_MSHUTDOWN(pcre),
+ PHP_RINIT(pcre),
+ NULL,
+ PHP_MINFO(pcre),
+ STANDARD_MODULE_PROPERTIES
};
/* }}} */
-#endif /* HAVE_PCRE */
+#endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
/*
* Local variables: