summaryrefslogtreecommitdiff
path: root/ext/pcre/php_pcre.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/pcre/php_pcre.c')
-rw-r--r--ext/pcre/php_pcre.c202
1 files changed, 119 insertions, 83 deletions
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c
index 22e8a65a7e..19d2124ded 100644
--- a/ext/pcre/php_pcre.c
+++ b/ext/pcre/php_pcre.c
@@ -317,7 +317,7 @@ PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_
/* }}} */
/* {{{ add_offset_pair */
-static inline void add_offset_pair(zval *result, char *str, int len, int offset)
+static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
{
zval *match_pair;
@@ -329,6 +329,10 @@ static inline void add_offset_pair(zval *result, char *str, int len, int offset)
add_next_index_stringl(match_pair, str, len, 1);
add_next_index_long(match_pair, offset);
+ if (name) {
+ zval_add_ref(&match_pair);
+ zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL);
+ }
zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL);
}
/* }}} */
@@ -337,11 +341,15 @@ static inline void add_offset_pair(zval *result, char *str, int len, int offset)
*/
static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
{
- zval **regex, /* Regular expression */
- **subject, /* String to match against */
- **subpats = NULL, /* Array for subpatterns */
- **flags, /* Match control flags */
- *result_set, /* Holds a set of subpatterns after
+ /* parameters */
+ char *regex; /* Regular expression */
+ char *subject; /* String to match against */
+ int regex_len;
+ int subject_len;
+ zval *subpats = NULL; /* Array for subpatterns */
+ int flags; /* Match control flags */
+
+ zval *result_set, /* Holds a set of subpatterns after
a global match */
**match_sets = NULL; /* An array of sets of matches for each
subpattern after a global match */
@@ -353,63 +361,56 @@ static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
int *offsets; /* Array of subpattern offsets */
int num_subpats; /* Number of captured subpatterns */
int size_offsets; /* Size of the offsets array */
- int start_offset; /* Where the new search starts */
+ int start_offset = 0; /* Where the new search starts */
int matched; /* Has anything matched */
- int i;
- int subpats_order = 0; /* Order of subpattern matches */
+ int subpats_order = 0; /* Order of subpattern matches */
int offset_capture = 0;/* Capture match offsets: yes/no */
int g_notempty = 0; /* If the match should not be empty */
- const char **stringlist; /* Used to hold list of subpatterns */
+ const char **stringlist; /* Holds list of subpatterns */
char *match; /* The current match */
+ char **subpat_names = NULL;/* Array for named subpatterns */
+ int i;
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|zll", &regex, &regex_len,
+ &subject, &subject_len, &subpats, &flags, &start_offset) == FAILURE) {
+ RETURN_FALSE;
+ }
- /* Get function parameters and do error-checking. */
- switch(ZEND_NUM_ARGS()) {
- case 2:
- if (global || zend_get_parameters_ex(2, &regex, &subject) == FAILURE) {
- WRONG_PARAM_COUNT;
- }
- break;
-
- case 3:
- if (zend_get_parameters_ex(3, &regex, &subject, &subpats) == FAILURE) {
- WRONG_PARAM_COUNT;
- }
- if (global)
- subpats_order = PREG_PATTERN_ORDER;
- break;
+ if (global)
+ subpats_order = PREG_PATTERN_ORDER;
- case 4:
- if (zend_get_parameters_ex(4, &regex, &subject, &subpats, &flags) == FAILURE) {
- WRONG_PARAM_COUNT;
- }
-
- convert_to_long_ex(flags);
- offset_capture = Z_LVAL_PP(flags) & PREG_OFFSET_CAPTURE;
- subpats_order = Z_LVAL_PP(flags) & 0xff;
- if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
- (!global && subpats_order != 0)) {
- php_error_docref(NULL TSRMLS_CC,E_WARNING, "Wrong value for parameter 4");
- return;
- }
- break;
-
- default:
- WRONG_PARAM_COUNT;
+ if (ZEND_NUM_ARGS() > 3) {
+ offset_capture = flags & PREG_OFFSET_CAPTURE;
+ /*
+ * subpats_order is pre-set to pattern mode so we change it only if
+ * necessary.
+ */
+ if (flags & 0xff) {
+ subpats_order = flags & 0xff;
+ }
+ if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
+ (!global && subpats_order != 0)) {
+ zend_error(E_WARNING, "Wrong value for parameter 4 in call to %s()", get_active_function_name(TSRMLS_C));
+ return;
+ }
}
- /* Make sure we're dealing with strings. */
- convert_to_string_ex(regex);
- convert_to_string_ex(subject);
-
- /* Make sure to clean up the passed array and initialize it. */
+ /* Overwrite the passed-in value for subpatterns with an empty array. */
if (subpats != NULL) {
- zval_dtor(*subpats);
- array_init(*subpats);
+ zval_dtor(subpats);
+ array_init(subpats);
+ }
+
+ /* Negative offset counts from the end of the string. */
+ if (start_offset < 0) {
+ start_offset = subject_len + start_offset;
+ if (start_offset < 0) {
+ start_offset = 0;
+ }
}
/* Compile regex or get it from cache. */
- if ((re = pcre_get_compiled_regex(Z_STRVAL_PP(regex), &extra, &preg_options TSRMLS_CC)) == NULL) {
+ if ((re = pcre_get_compiled_regex(regex, &extra, &preg_options TSRMLS_CC)) == NULL) {
RETURN_FALSE;
}
@@ -417,11 +418,36 @@ static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
num_subpats++;
size_offsets = num_subpats * 3;
- offsets = (int *)emalloc(size_offsets * sizeof(int));
+ offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
+
+ /*
+ * Build a mapping from subpattern numbers to their names. We will always
+ * allocate the table, even though they may be no named subpatterns. This
+ * avoids somewhat more complicated logic in the inner loops.
+ */
+ subpat_names = (char **)safe_emalloc(num_subpats, sizeof(char *), 0);
+ memset(subpat_names, 0, sizeof(char *) * num_subpats);
+ {
+ int name_cnt, name_size, ni = 0;
+ char *name_table;
+ unsigned short name_idx;
+
+ pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
+ if (name_cnt > 0) {
+ pcre_fullinfo(re, extra, PCRE_INFO_NAMETABLE, &name_table);
+ pcre_fullinfo(re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
+
+ while (ni++ < name_cnt) {
+ name_idx = 0xff * name_table[0] + name_table[1];
+ subpat_names[name_idx] = name_table + 2;
+ name_table += name_size;
+ }
+ }
+ }
- /* Allocate match sets array and initialize the values */
+ /* Allocate match sets array and initialize the values. */
if (global && subpats_order == PREG_PATTERN_ORDER) {
- match_sets = (zval **)emalloc(num_subpats * sizeof(zval *));
+ match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0);
for (i=0; i<num_subpats; i++) {
ALLOC_ZVAL(match_sets[i]);
array_init(match_sets[i]);
@@ -429,36 +455,33 @@ static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
}
}
- /* Start from the beginning of the string */
- start_offset = 0;
match = NULL;
matched = 0;
do {
/* Execute the regular expression. */
- count = pcre_exec(re, extra, Z_STRVAL_PP(subject),
- Z_STRLEN_PP(subject), start_offset,
+ count = pcre_exec(re, extra, subject, subject_len, start_offset,
exoptions|g_notempty, offsets, size_offsets);
/* Check for too many substrings condition. */
if (count == 0) {
- php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
+ zend_error(E_NOTICE, "Matched, but too many substrings");
count = size_offsets/3;
}
/* If something has matched */
if (count >= 0) {
matched++;
- match = Z_STRVAL_PP(subject) + offsets[0];
+ match = subject + offsets[0];
/* If subpatterns array has been passed, fill it in with values. */
if (subpats != NULL) {
/* Try to get the list of substrings and display a warning if failed. */
- if (pcre_get_substring_list(Z_STRVAL_PP(subject),
- offsets, count, &stringlist) < 0) {
+ if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
+ efree(subpat_names);
efree(offsets);
efree(re);
- php_error_docref(NULL TSRMLS_CC,E_WARNING, "Get subpatterns list failed");
+ zend_error(E_WARNING, "Get subpatterns list failed");
return;
}
@@ -468,7 +491,7 @@ static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
for (i = 0; i < count; i++) {
if (offset_capture) {
add_offset_pair(match_sets[i], (char *)stringlist[i],
- offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1]);
+ offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
} else {
add_next_index_stringl(match_sets[i], (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1], 1);
@@ -494,24 +517,32 @@ static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
for (i = 0; i < count; i++) {
if (offset_capture) {
add_offset_pair(result_set, (char *)stringlist[i],
- offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1]);
+ offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
} else {
+ if (subpat_names[i]) {
+ add_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i],
+ offsets[(i<<1)+1] - offsets[i<<1], 1);
+ }
add_next_index_stringl(result_set, (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1], 1);
}
}
/* And add it to the output array */
- zend_hash_next_index_insert(Z_ARRVAL_PP(subpats), &result_set,
- sizeof(zval *), NULL);
+ zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL);
}
} else { /* single pattern matching */
/* For each subpattern, insert it into the subpatterns array. */
for (i = 0; i < count; i++) {
if (offset_capture) {
- add_offset_pair(*subpats, (char *)stringlist[i],
- offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1]);
+ add_offset_pair(subpats, (char *)stringlist[i],
+ offsets[(i<<1)+1] - offsets[i<<1],
+ offsets[i<<1], subpat_names[i]);
} else {
- add_next_index_stringl(*subpats, (char *)stringlist[i],
+ if (subpat_names[i]) {
+ add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
+ offsets[(i<<1)+1] - offsets[i<<1], 1);
+ }
+ add_next_index_stringl(subpats, (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1], 1);
}
}
@@ -525,7 +556,7 @@ static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
this is not necessarily the end. We need to advance
the start offset, and continue. Fudge the offset values
to achieve this, unless we're already at the end of the string. */
- if (g_notempty != 0 && start_offset < Z_STRLEN_PP(subject)) {
+ if (g_notempty != 0 && start_offset < subject_len) {
offsets[0] = start_offset;
offsets[1] = start_offset + 1;
} else
@@ -544,19 +575,24 @@ static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
/* Add the match sets to the output array and clean up */
if (global && subpats_order == PREG_PATTERN_ORDER) {
- for (i=0; i<num_subpats; i++) {
- zend_hash_next_index_insert(Z_ARRVAL_PP(subpats), &match_sets[i], sizeof(zval *), NULL);
+ for (i = 0; i < num_subpats; i++) {
+ if (subpat_names[i]) {
+ zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i],
+ strlen(subpat_names[i])+1, &match_sets[i], sizeof(zval *), NULL);
+ }
+ zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL);
}
efree(match_sets);
}
efree(offsets);
+ efree(subpat_names);
RETVAL_LONG(matched);
}
/* }}} */
-/* {{{ proto int preg_match(string pattern, string subject [, array subpatterns [, int flags]])
+/* {{{ proto int preg_match(string pattern, string subject [, array subpatterns [, int flags [, int offset]]])
Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)
{
@@ -564,7 +600,7 @@ PHP_FUNCTION(preg_match)
}
/* }}} */
-/* {{{ proto int preg_match_all(string pattern, string subject, array subpatterns [, int flags])
+/* {{{ proto int preg_match_all(string pattern, string subject, array subpatterns [, int flags [, int offset]])
Perform a Perl-style global regular expression match */
PHP_FUNCTION(preg_match_all)
{
@@ -787,10 +823,10 @@ PHPAPI char *php_pcre_replace(char *regex, int regex_len,
/* Calculate the size of the offsets array, and allocate memory for it. */
size_offsets = (pcre_info(re, NULL, NULL) + 1) * 3;
- offsets = (int *)emalloc(size_offsets * sizeof(int));
+ offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
alloc_len = 2 * subject_len + 1;
- result = emalloc(alloc_len * sizeof(char));
+ result = safe_emalloc(alloc_len, sizeof(char), 0);
/* Initialize */
match = NULL;
@@ -912,7 +948,7 @@ PHPAPI char *php_pcre_replace(char *regex, int regex_len,
new_len = *result_len + subject_len - start_offset;
if (new_len + 1 > alloc_len) {
alloc_len = new_len + 1; /* now we know exactly how long it is */
- new_buf = emalloc(alloc_len * sizeof(char));
+ new_buf = safe_emalloc(alloc_len, sizeof(char), 0);
memcpy(new_buf, result, *result_len);
efree(result);
result = new_buf;
@@ -1184,7 +1220,7 @@ PHP_FUNCTION(preg_split)
/* Calculate the size of the offsets array, and allocate memory for it. */
size_offsets = (pcre_info(re, NULL, NULL) + 1) * 3;
- offsets = (int *)emalloc(size_offsets * sizeof(int));
+ offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
/* Start at the beginning of the string */
start_offset = 0;
@@ -1212,7 +1248,7 @@ PHP_FUNCTION(preg_split)
if (offset_capture) {
/* Add (match, offset) pair to the return value */
- add_offset_pair(return_value, last_match, &Z_STRVAL_PP(subject)[offsets[0]]-last_match, next_offset);
+ add_offset_pair(return_value, last_match, &Z_STRVAL_PP(subject)[offsets[0]]-last_match, next_offset, NULL);
} else {
/* Add the piece to the return value */
add_next_index_stringl(return_value, last_match,
@@ -1234,7 +1270,7 @@ PHP_FUNCTION(preg_split)
/* If we have matched a delimiter */
if (!no_empty || match_len > 0) {
if (offset_capture) {
- add_offset_pair(return_value, &Z_STRVAL_PP(subject)[offsets[i<<1]], match_len, offsets[i<<1]);
+ add_offset_pair(return_value, &Z_STRVAL_PP(subject)[offsets[i<<1]], match_len, offsets[i<<1], NULL);
} else {
add_next_index_stringl(return_value,
&Z_STRVAL_PP(subject)[offsets[i<<1]],
@@ -1270,7 +1306,7 @@ PHP_FUNCTION(preg_split)
{
if (offset_capture) {
/* Add the last (match, offset) pair to the return value */
- add_offset_pair(return_value, &Z_STRVAL_PP(subject)[start_offset], Z_STRLEN_PP(subject) - start_offset, start_offset);
+ add_offset_pair(return_value, &Z_STRVAL_PP(subject)[start_offset], Z_STRLEN_PP(subject) - start_offset, start_offset, NULL);
} else {
/* Add the last piece to the return value */
add_next_index_stringl(return_value, last_match, Z_STRVAL_PP(subject) + Z_STRLEN_PP(subject) - last_match, 1);
@@ -1324,7 +1360,7 @@ PHP_FUNCTION(preg_quote)
/* Allocate enough memory so that even if each character
is quoted, we won't run out of room */
- out_str = emalloc(2 * Z_STRLEN_PP(in_str_arg) + 1);
+ out_str = safe_emalloc(2, Z_STRLEN_PP(in_str_arg), 1);
/* Go through the string and quote necessary characters */
for(p = in_str, q = out_str; p != in_str_end; p++) {
@@ -1415,7 +1451,7 @@ PHP_FUNCTION(preg_grep)
/* Calculate the size of the offsets array, and allocate memory for it. */
size_offsets = (pcre_info(re, NULL, NULL) + 1) * 3;
- offsets = (int *)emalloc(size_offsets * sizeof(int));
+ offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
/* Initialize return array */
array_init(return_value);