summaryrefslogtreecommitdiff
path: root/ext/pcre/pcre.c
diff options
context:
space:
mode:
authorAndrey Hristov <andrey@php.net>1999-05-26 15:22:02 +0000
committerAndrey Hristov <andrey@php.net>1999-05-26 15:22:02 +0000
commit0062ae621834c38b7cb065e37dc8d8bbce8448f3 (patch)
tree6690e6424da5e4dde5ec79124324ab7e42f368e7 /ext/pcre/pcre.c
parent033858e13d77186b2a4ce1b1833955e43d9d4b74 (diff)
downloadphp-git-0062ae621834c38b7cb065e37dc8d8bbce8448f3.tar.gz
A few changes here.
First of all, as per extensive discussion on the list, the functions are now prefixed with "preg" instead of "pcre". Secondly, global matching is now possible using preg_match_all. Please, give suggestions on a better name if this one doesn't sit well with you. Possible names are preg_global_match and preg_gmatch. preg_match_all takes 4 arguments: a regex pattern, a subject string, the array for capturing subpatterns, and a parameter that tells how the results in the subpatterns array are arranged. Basically, preg_match_all will go through the subject string and try to capture all the matches that it finds, not just the first one like preg_match. 4th parameter can be PREG_PATTERN_ORDER (default) or PREG_SET_ORDER. Example: preg_match_all("|</?([^>]+)>|", "<div align=left>a test</div>", $out, PREG_PATTERN_ORDER); This returns results so that $out[0] is an array of full pattern matches, $out[1] is an array of first captured subpattern matches, and so on. $out[0] -> ("<div align=left>", "</div>") $out[1] -> ("div align=left", "div") Example: preg_match_all("|</?([^>]+)>|", "<div align=left>a test</div>", $out, PREG_SET_ORDER); This returns results so that $out[0] is an array of first full pattern match and subpatterns, $out[1] is an array of second full pattern match and subpatterns. $out[0] -> ("<div align=left>", "div align=left") $out[1] -> ("</div>", "div") If anyone has a better name for these PREG_ constants and also which one should be the default, I'd like to hear it.
Diffstat (limited to 'ext/pcre/pcre.c')
-rw-r--r--ext/pcre/pcre.c210
1 files changed, 158 insertions, 52 deletions
diff --git a/ext/pcre/pcre.c b/ext/pcre/pcre.c
index 1831f7bc16..461e5cd890 100644
--- a/ext/pcre/pcre.c
+++ b/ext/pcre/pcre.c
@@ -37,13 +37,17 @@
#include "php_pcre.h"
+#define PREG_PATTERN_ORDER 0
+#define PREG_SET_ORDER 1
+
/* {{{ module definition structures */
unsigned char third_arg_force_ref[] = { 3, BYREF_NONE, BYREF_NONE, BYREF_FORCE };
function_entry pcre_functions[] = {
- PHP_FE(pcre_match, third_arg_force_ref)
- PHP_FE(pcre_replace, NULL)
+ PHP_FE(preg_match, third_arg_force_ref)
+ PHP_FE(preg_match_all, third_arg_force_ref)
+ PHP_FE(preg_replace, NULL)
{NULL, NULL, NULL}
};
@@ -55,6 +59,7 @@ php3_module_entry pcre_module_entry = {
/* }}} */
+
#ifdef ZTS
int pcre_globals_id;
#else
@@ -118,6 +123,9 @@ int php_minit_pcre(INIT_FUNC_ARGS)
#else
zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, _php_free_pcre_cache, 1);
#endif
+
+ REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
return SUCCESS;
}
/* }}} */
@@ -268,23 +276,31 @@ static pcre* _pcre_get_compiled_regex(char *regex, pcre_extra *extra) {
/* }}} */
-/* {{{ proto pcre_match(string pattern, string subject [, array subpatterns ])
- Perform a Perl-style regular expression match */
-PHP_FUNCTION(pcre_match)
+/* {{{ void _pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) */
+void _pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
{
zval *regex, /* Regular expression */
*subject, /* String to match against */
- *subpats = NULL; /* Array for subpatterns */
+ *subpats = NULL, /* Array for subpatterns */
+ *subpats_order, /* Order of the results in the subpatterns
+ array for global match */
+ *result_set, /* Holds a set of subpatterns after
+ a global match */
+ **match_sets; /* An array of sets of matches for each
+ subpattern after a global match */
pcre *re = NULL; /* Compiled regular expression */
pcre_extra *extra = NULL; /* Holds results of studying */
int exoptions = 0; /* Execution options */
- int count; /* Count of matched subpatterns */
+ int count = 0; /* Count of matched subpatterns */
int *offsets; /* Array of subpattern offsets */
+ int num_subpats; /* Number of captured subpatterns */
int size_offsets; /* Size of the offsets array */
int matched; /* Has anything matched */
int i;
+ int subpats_order_val; /* Integer value of subpats_order */
const char **stringlist; /* Used to hold list of subpatterns */
-
+ int subject_offset; /* Current position in the subject string */
+
/* Get function parameters and do error-checking. */
switch(ARG_COUNT(ht)) {
case 2:
@@ -297,10 +313,30 @@ PHP_FUNCTION(pcre_match)
if (getParameters(ht, 3, &regex, &subject, &subpats) == FAILURE) {
WRONG_PARAM_COUNT;
}
+ if (global)
+ subpats_order_val = PREG_PATTERN_ORDER;
+ if (!ParameterPassedByReference(ht, 3)) {
+ zend_error(E_WARNING, "Array to be filled with matches must be passed by reference.");
+ RETURN_FALSE;
+ }
+ break;
+
+ case 4:
+ if (getParameters(ht, 4, &regex, &subject, &subpats, &subpats_order) == FAILURE) {
+ WRONG_PARAM_COUNT;
+ }
if (!ParameterPassedByReference(ht, 3)) {
zend_error(E_WARNING, "Array to be filled with matches must be passed by reference.");
RETURN_FALSE;
}
+
+ /* Make sure subpats_order is a number */
+ convert_to_long(subpats_order);
+ subpats_order_val = subpats_order->value.lval;
+ if (subpats_order_val < PREG_PATTERN_ORDER ||
+ subpats_order_val > PREG_SET_ORDER) {
+ zend_error(E_WARNING, "Wrong value for parameter 4 in call to preg_match_all()");
+ }
break;
default:
@@ -311,68 +347,138 @@ PHP_FUNCTION(pcre_match)
convert_to_string(regex);
convert_to_string(subject);
+ /* Make sure to clean up the passed array and initialize it. */
+ if (subpats != NULL) {
+ zval_dtor(subpats);
+ array_init(subpats);
+ }
+
/* Compile regex or get it from cache. */
if ((re = _pcre_get_compiled_regex(regex->value.str.val, extra)) == NULL)
return;
-
+
/* Calculate the size of the offsets array, and allocate memory for it. */
- size_offsets = (pcre_info(re, NULL, NULL) + 1) * 3;
+ num_subpats = pcre_info(re, NULL, NULL) + 1;
+ size_offsets = num_subpats * 3;
offsets = (int *)emalloc(size_offsets * sizeof(int));
-
- /* Execute the regular expression. */
- count = pcre_exec(re, extra, subject->value.str.val, subject->value.str.len,
- exoptions, offsets, size_offsets);
-
- /* Check for too many substrings condition. */
- if (count == 0) {
- zend_error(E_NOTICE, "Matched, but too many substrings\n");
- count = size_offsets/3;
+
+ /* Allocate match sets array and initialize the values */
+ if (global && subpats_order_val == PREG_PATTERN_ORDER) {
+ match_sets = (zval **)emalloc(num_subpats * sizeof(zval *));
+ for (i=0; i<num_subpats; i++) {
+ match_sets[i] = (zval *)emalloc(sizeof(zval));
+ array_init(match_sets[i]);
+ match_sets[i]->is_ref = 0;
+ match_sets[i]->refcount = 1;
+ }
}
+
+ /* Start from the beginning of the string */
+ subject_offset = 0;
- /* If something has matched */
- if (count >= 0) {
- matched = 1;
-
- /* If subpatters array has been passed, fill it in with values. */
- if (subpats != NULL) {
- /* Try to get the list of substrings and display a warning if failed. */
- if (pcre_get_substring_list(subject->value.str.val, offsets, count, &stringlist) < 0) {
- efree(offsets);
- efree(re);
- zend_error(E_WARNING, "Get subpatterns list failed");
- return;
- }
+ do {
+ /* Execute the regular expression. */
+ count = pcre_exec(re, extra, &subject->value.str.val[subject_offset],
+ subject->value.str.len-subject_offset,
+ (subject_offset ? exoptions|PCRE_NOTBOL : exoptions),
+ offsets, size_offsets);
- /* Make sure to clean up the passed array and initialize it. */
- zval_dtor(subpats);
- array_init(subpats);
+ /* Check for too many substrings condition. */
+ if (count == 0) {
+ zend_error(E_NOTICE, "Matched, but too many substrings\n");
+ count = size_offsets/3;
+ }
+
+ /* If something has matched */
+ if (count >= 0) {
+ matched = 1;
+
+ /* If subpatters array has been passed, fill it in with values. */
+ if (subpats != NULL) {
+ /* Try to get the list of substrings and display a warning if failed. */
+ if (pcre_get_substring_list(&subject->value.str.val[subject_offset],
+ offsets, count, &stringlist) < 0) {
+ efree(offsets);
+ efree(re);
+ zend_error(E_WARNING, "Get subpatterns list failed");
+ return;
+ }
- /* For each subpattern, insert it into the subpatterns array. */
- for (i=0; i<count; i++) {
- add_next_index_string(subpats, (char *)stringlist[i], 1);
+ if (global) { /* global pattern matching */
+ if (subpats_order_val == PREG_PATTERN_ORDER) {
+ /* For each subpattern, insert it into the appropriate array */
+ for (i=0; i<count; i++) {
+ add_next_index_string(match_sets[i], (char *)stringlist[i], 1);
+ }
+ }
+ else {
+ /* Allocate the result set array */
+ result_set = emalloc(sizeof(zval));
+ array_init(result_set);
+ result_set->is_ref = 0;
+ result_set->refcount = 1;
+
+ /* Add all the subpatterns to it */
+ for (i=0; i<count; i++) {
+ add_next_index_string(result_set, (char *)stringlist[i], 1);
+ }
+ /* And add it to the output array */
+ zend_hash_next_index_insert(subpats->value.ht, &result_set,
+ sizeof(zval *), NULL);
+ }
+ }
+ else { /* single pattern matching */
+ /* For each subpattern, insert it into the subpatterns array. */
+ for (i=0; i<count; i++) {
+ add_next_index_string(subpats, (char *)stringlist[i], 1);
+ }
+ }
+
+ php_pcre_free(stringlist);
+
+ /* Advance to the position right after the last full match */
+ subject_offset += offsets[1];
}
-
- php_pcre_free(stringlist);
}
- }
- /* If nothing matched */
- else {
- matched = 0;
+ /* If nothing matched */
+ else {
+ matched = 0;
+ }
+ } while (global && count >= 0);
- /* Make sure to clean up the passed array and initialize it
- to empty since we don't want to leave previous values in it. */
- if (subpats != NULL) {
- zval_dtor(subpats);
- array_init(subpats);
+ /* Add the match sets to the output array and clean up */
+ if (global && subpats_order_val == PREG_PATTERN_ORDER) {
+ for (i=0; i<num_subpats; i++) {
+ zend_hash_next_index_insert(subpats->value.ht, &match_sets[i], sizeof(zval *), NULL);
}
+ efree(match_sets);
}
-
+
efree(offsets);
RETVAL_LONG(matched);
}
/* }}} */
+
+/* {{{ proto preg_match(string pattern, string subject [, array subpatterns ])
+ Perform a Perl-style regular expression match */
+PHP_FUNCTION(preg_match)
+{
+ _pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
+}
+/* }}} */
+
+
+/* {{{ proto preg_match_all(string pattern, string subject, array subpatterns, integer order)
+ Perform a Perl-style global regular expression match */
+PHP_FUNCTION(preg_match_all)
+{
+ _pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
+}
+/* }}} */
+
+
/* {{{ int _pcre_get_backref(const char *walk, int *backref) */
static int _pcre_get_backref(const char *walk, int *backref)
{
@@ -602,9 +708,9 @@ static char *_php_replace_in_subject(zval *regex, zval *replace, zval *subject)
}
-/* {{{ proto pcre_replace(string|array regex, string|array replace, string|array subject)
+/* {{{ proto preg_replace(string|array regex, string|array replace, string|array subject)
Perform Perl-style regular expression replacement */
-PHP_FUNCTION(pcre_replace)
+PHP_FUNCTION(preg_replace)
{
zval *regex,
*replace,