summaryrefslogtreecommitdiff
path: root/ext/pcre/php_pcre.c
diff options
context:
space:
mode:
authorZeev Suraski <zeev@php.net>1999-07-06 19:24:13 +0000
committerZeev Suraski <zeev@php.net>1999-07-06 19:24:13 +0000
commit8ee9fd57689cd3cd28c57260777403735b12e771 (patch)
tree4d768e8cb3bc932877427834495a7c2ab09c9d16 /ext/pcre/php_pcre.c
parent52308cd368bb26146b2e534c68ece91df5656ea2 (diff)
downloadphp-git-8ee9fd57689cd3cd28c57260777403735b12e771.tar.gz
avoid two copies of pcre.c in the repository
Diffstat (limited to 'ext/pcre/php_pcre.c')
-rw-r--r--ext/pcre/php_pcre.c1111
1 files changed, 1111 insertions, 0 deletions
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c
new file mode 100644
index 0000000000..6a9ca26b90
--- /dev/null
+++ b/ext/pcre/php_pcre.c
@@ -0,0 +1,1111 @@
+/*
+ +----------------------------------------------------------------------+
+ | PHP HTML Embedded Scripting Language Version 3.0 |
+ +----------------------------------------------------------------------+
+ | Copyright (c) 1997-1999 PHP Development Team (See Credits file) |
+ +----------------------------------------------------------------------+
+ | This program is free software; you can redistribute it and/or modify |
+ | it under the terms of one of the following licenses: |
+ | |
+ | A) the GNU General Public License as published by the Free Software |
+ | Foundation; either version 2 of the License, or (at your option) |
+ | any later version. |
+ | |
+ | B) the PHP License as published by the PHP Development Team and |
+ | included in the distribution in the file: LICENSE |
+ | |
+ | This program is distributed in the hope that it will be useful, |
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of |
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
+ | GNU General Public License for more details. |
+ | |
+ | You should have received a copy of both licenses referred to here. |
+ | If you did not, or have any questions about PHP licensing, please |
+ | contact core@php.net. |
+ +----------------------------------------------------------------------+
+ | Authors: Andrey Zmievski <andrey@ispi.net> |
+ +----------------------------------------------------------------------+
+ */
+
+/* $Id$ */
+
+#include "php.h"
+
+#if HAVE_PCRE
+
+#include "php_pcre.h"
+#include "ext/standard/php3_string.h"
+
+#define PREG_PATTERN_ORDER 0
+#define PREG_SET_ORDER 1
+
+#define PREG_REPLACE_EVAL (1<<0)
+
+/* {{{ module definition structures */
+
+unsigned char third_arg_force_ref[] = { 3, BYREF_NONE, BYREF_NONE, BYREF_FORCE };
+
+function_entry pcre_functions[] = {
+ PHP_FE(preg_match, third_arg_force_ref)
+ PHP_FE(preg_match_all, third_arg_force_ref)
+ PHP_FE(preg_replace, NULL)
+ PHP_FE(preg_split, NULL)
+ PHP_FE(preg_quote, NULL)
+ PHP_FE(preg_grep, NULL)
+ {NULL, NULL, NULL}
+};
+
+zend_module_entry pcre_module_entry = {
+ "PCRE", pcre_functions, php_minit_pcre, php_mshutdown_pcre,
+ php_rinit_pcre, NULL,
+ php_info_pcre, STANDARD_MODULE_PROPERTIES
+};
+
+/* }}} */
+
+
+#ifdef ZTS
+int pcre_globals_id;
+#else
+php_pcre_globals pcre_globals;
+#endif
+
+
+static void *php_pcre_malloc(size_t size)
+{
+ return pemalloc(size, 1);
+}
+
+
+static void php_pcre_free(void *ptr)
+{
+ pefree(ptr, 1);
+}
+
+
+static void _php_free_pcre_cache(void *data)
+{
+ pcre_cache_entry *pce = (pcre_cache_entry *) data;
+ pefree(pce->re, 1);
+}
+
+
+#ifdef ZTS
+static void _php_pcre_init_globals(php_pcre_globals *pcre_globals)
+{
+ zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, _php_free_pcre_cache, 1);
+}
+
+
+static void _php_pcre_shutdown_globals(php_pcre_globals *pcre_globals)
+{
+ zend_hash_destroy(&PCRE_G(pcre_cache));
+}
+#endif
+
+
+/* {{{ void php_info_pcre(ZEND_MODULE_INFO_FUNC_ARGS) */
+void php_info_pcre(ZEND_MODULE_INFO_FUNC_ARGS)
+{
+ php3_printf("Perl Compatible Regular Expressions");
+ php3_printf("<table cellpadding=5>"
+ "<tr><td>PCRE library version:</td>"
+ "<td>%s</td></tr>"
+ "</table>", pcre_version());
+}
+/* }}} */
+
+
+/* {{{ int php_minit_pcre(INIT_FUNC_ARGS) */
+int php_minit_pcre(INIT_FUNC_ARGS)
+{
+#ifdef ZTS
+ pcre_globals_id = tsrm_allocate_id(
+ sizeof(php_pcre_globals),
+ _php_pcre_init_globals,
+ _php_pcre_shutdown_globals);
+#else
+ zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, _php_free_pcre_cache, 1);
+#endif
+
+ REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
+ return SUCCESS;
+}
+/* }}} */
+
+
+/* {{{ int php_mshutdown_pcre(void) */
+int php_mshutdown_pcre(SHUTDOWN_FUNC_ARGS)
+{
+#ifndef ZTS
+ zend_hash_destroy(&PCRE_G(pcre_cache));
+#endif
+ return SUCCESS;
+}
+/* }}} */
+
+
+/* {{{ int php_rinit_pcre(INIT_FUNC_ARGS) */
+int php_rinit_pcre(INIT_FUNC_ARGS)
+{
+ pcre_malloc = php_pcre_malloc;
+ pcre_free = php_pcre_free;
+
+ return SUCCESS;
+}
+/* }}} */
+
+
+/* {{{ static pcre* _pcre_get_compiled_regex(char *regex, pcre_extra *extra) */
+static pcre* _pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_options) {
+ pcre *re = NULL;
+ int coptions = 0;
+ int soptions = 0;
+ const char *error;
+ int erroffset;
+ char delimiter;
+ char *p, *pp;
+ char *pattern;
+ int regex_len;
+ int do_study = 0;
+ int poptions = 0;
+ pcre_cache_entry *pce;
+ pcre_cache_entry new_entry;
+ PCRE_LS_FETCH();
+
+ /* Try to lookup the cached regex entry, and if successful, just pass
+ back the compiled pattern, otherwise go on and compile it. */
+ regex_len = strlen(regex);
+ if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) {
+ extra = pce->extra;
+ *preg_options = pce->preg_options;
+ return pce->re;
+ }
+
+ p = regex;
+
+ /* Parse through the leading whitespace, and display a warning if we
+ get to the end without encountering a delimiter. */
+ while (isspace((int)*p)) p++;
+ if (*p == 0) {
+ zend_error(E_WARNING, "Empty regular expression");
+ return NULL;
+ }
+
+ /* Get the delimiter and display a warning if it is alphanumeric
+ or a backslash. */
+ delimiter = *p++;
+ if (isalnum((int)delimiter) || delimiter == '\\') {
+ zend_error(E_WARNING, "Delimiter must not be alphanumeric or backslash");
+ return NULL;
+ }
+
+ /* We need to iterate through the pattern, searching for the ending delimiter,
+ but skipping the backslashed delimiters. If the ending delimiter is not
+ found, display a warning. */
+ pp = p;
+ while (*pp != 0) {
+ if (*pp == delimiter && pp[-1] != '\\')
+ break;
+ pp++;
+ }
+ if (*pp == 0) {
+ zend_error(E_WARNING, "No ending delimiter found");
+ return NULL;
+ }
+
+ /* Make a copy of the actual pattern. */
+ pattern = estrndup(p, pp-p);
+
+ /* Move on to the options */
+ pp++;
+
+ /* Clear out preg options */
+ *preg_options = 0;
+
+ /* Parse through the options, setting appropriate flags. Display
+ a warning if we encounter an unknown option. */
+ while (*pp != 0) {
+ switch (*pp++) {
+ /* Perl compatible options */
+ case 'i': coptions |= PCRE_CASELESS; break;
+ case 'm': coptions |= PCRE_MULTILINE; break;
+ case 's': coptions |= PCRE_DOTALL; break;
+ case 'x': coptions |= PCRE_EXTENDED; break;
+
+ /* PCRE specific options */
+ case 'A': coptions |= PCRE_ANCHORED; break;
+ case 'D': coptions |= PCRE_DOLLAR_ENDONLY;break;
+ case 'S': do_study = 1; break;
+ case 'U': coptions |= PCRE_UNGREEDY; break;
+ case 'X': coptions |= PCRE_EXTRA; break;
+
+ /* Custom preg options */
+ case 'e': poptions |= PREG_REPLACE_EVAL; break;
+
+ case ' ':
+ case '\n':
+ break;
+
+ default:
+ zend_error(E_WARNING, "Unknown option '%c'", pp[-1]);
+ efree(pattern);
+ return NULL;
+ }
+ }
+
+ /* Compile pattern and display a warning if compilation failed. */
+ re = pcre_compile(pattern,
+ coptions,
+ &error,
+ &erroffset,
+ NULL);
+
+ if (re == NULL) {
+ zend_error(E_WARNING, "Compilation failed: %s at offset %d\n", error, erroffset);
+ efree(pattern);
+ return NULL;
+ }
+
+ /* If study option was specified, study the pattern and
+ store the result in extra for passing to pcre_exec. */
+ if (do_study) {
+ extra = pcre_study(re, soptions, &error);
+ if (error != NULL) {
+ zend_error(E_WARNING, "Error while studying pattern");
+ }
+ }
+
+ *preg_options = poptions;
+
+ efree(pattern);
+
+ /* Store the compiled pattern and extra info in the cache. */
+ new_entry.re = re;
+ new_entry.extra = extra;
+ new_entry.preg_options = poptions;
+ zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry,
+ sizeof(pcre_cache_entry), NULL);
+
+ return re;
+}
+/* }}} */
+
+
+/* {{{ void _pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) */
+static void _pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
+{
+ zval *regex, /* Regular expression */
+ *subject, /* String to match against */
+ *subpats = NULL, /* Array for subpatterns */
+ *subpats_order, /* Order of the results in the subpatterns
+ array for global match */
+ *result_set, /* Holds a set of subpatterns after
+ a global match */
+ **match_sets = NULL; /* An array of sets of matches for each
+ subpattern after a global match */
+ pcre *re = NULL; /* Compiled regular expression */
+ pcre_extra *extra = NULL; /* Holds results of studying */
+ int exoptions = 0; /* Execution options */
+ int preg_options = 0; /* Custom preg options */
+ int count = 0; /* Count of matched subpatterns */
+ int *offsets; /* Array of subpattern offsets */
+ int num_subpats; /* Number of captured subpatterns */
+ int size_offsets; /* Size of the offsets array */
+ int matched; /* Has anything matched */
+ int i;
+ int subpats_order_val = 0; /* Integer value of subpats_order */
+ const char **stringlist; /* Used to hold list of subpatterns */
+ char *match, /* The current match */
+ *piece, /* The current piece of subject */
+ *subject_end; /* Points to the end of the subject */
+
+
+ /* Get function parameters and do error-checking. */
+ switch(ARG_COUNT(ht)) {
+ case 2:
+ if (getParameters(ht, 2, &regex, &subject) == FAILURE) {
+ WRONG_PARAM_COUNT;
+ }
+ break;
+
+ case 3:
+ if (getParameters(ht, 3, &regex, &subject, &subpats) == FAILURE) {
+ WRONG_PARAM_COUNT;
+ }
+ if (global)
+ subpats_order_val = PREG_PATTERN_ORDER;
+ if (!ParameterPassedByReference(ht, 3)) {
+ zend_error(E_WARNING, "Array to be filled with matches must be passed by reference.");
+ RETURN_FALSE;
+ }
+ break;
+
+ case 4:
+ if (getParameters(ht, 4, &regex, &subject, &subpats, &subpats_order) == FAILURE) {
+ WRONG_PARAM_COUNT;
+ }
+ if (!ParameterPassedByReference(ht, 3)) {
+ zend_error(E_WARNING, "Array to be filled with matches must be passed by reference.");
+ RETURN_FALSE;
+ }
+
+ /* Make sure subpats_order is a number */
+ convert_to_long(subpats_order);
+ subpats_order_val = subpats_order->value.lval;
+ if (subpats_order_val < PREG_PATTERN_ORDER ||
+ subpats_order_val > PREG_SET_ORDER) {
+ zend_error(E_WARNING, "Wrong value for parameter 4 in call to preg_match_all()");
+ }
+ break;
+
+ default:
+ WRONG_PARAM_COUNT;
+ }
+
+ /* Make sure we're dealing with strings. */
+ convert_to_string(regex);
+ convert_to_string(subject);
+
+ /* Make sure to clean up the passed array and initialize it. */
+ if (subpats != NULL) {
+ zval_dtor(subpats);
+ array_init(subpats);
+ }
+
+ /* Compile regex or get it from cache. */
+ if ((re = _pcre_get_compiled_regex(regex->value.str.val, extra, &preg_options)) == NULL) {
+ RETURN_FALSE;
+ }
+
+ /* Calculate the size of the offsets array, and allocate memory for it. */
+ num_subpats = pcre_info(re, NULL, NULL) + 1;
+ size_offsets = num_subpats * 3;
+ offsets = (int *)emalloc(size_offsets * sizeof(int));
+
+ /* Allocate match sets array and initialize the values */
+ if (global && subpats_order_val == PREG_PATTERN_ORDER) {
+ match_sets = (zval **)emalloc(num_subpats * sizeof(zval *));
+ for (i=0; i<num_subpats; i++) {
+ match_sets[i] = (zval *)emalloc(sizeof(zval));
+ array_init(match_sets[i]);
+ match_sets[i]->is_ref = 0;
+ match_sets[i]->refcount = 1;
+ }
+ }
+
+ /* Start from the beginning of the string */
+ piece = subject->value.str.val;
+ subject_end = piece + subject->value.str.len;
+ match = NULL;
+ matched = 0;
+
+ do {
+ /* Execute the regular expression. */
+ count = pcre_exec(re, extra, piece,
+ subject_end-piece, subject->value.str.val,
+ (piece==subject->value.str.val ? exoptions : exoptions|PCRE_NOTBOL),
+ offsets, size_offsets, (piece == match));
+
+ /* Check for too many substrings condition. */
+ if (count == 0) {
+ zend_error(E_NOTICE, "Matched, but too many substrings\n");
+ count = size_offsets/3;
+ }
+
+ /* If something has matched */
+ if (count >= 0) {
+ matched++;
+ match = piece + offsets[0];
+
+ /* If subpatters array has been passed, fill it in with values. */
+ if (subpats != NULL) {
+ /* Try to get the list of substrings and display a warning if failed. */
+ if (pcre_get_substring_list(piece, offsets, count, &stringlist) < 0) {
+ efree(offsets);
+ efree(re);
+ zend_error(E_WARNING, "Get subpatterns list failed");
+ return;
+ }
+
+ if (global) { /* global pattern matching */
+ if (subpats_order_val == PREG_PATTERN_ORDER) {
+ /* For each subpattern, insert it into the appropriate array */
+ for (i=0; i<count; i++) {
+ add_next_index_string(match_sets[i], (char *)stringlist[i], 1);
+ }
+ }
+ else {
+ /* Allocate the result set array */
+ result_set = emalloc(sizeof(zval));
+ array_init(result_set);
+ result_set->is_ref = 0;
+ result_set->refcount = 1;
+
+ /* Add all the subpatterns to it */
+ for (i=0; i<count; i++) {
+ add_next_index_string(result_set, (char *)stringlist[i], 1);
+ }
+ /* And add it to the output array */
+ zend_hash_next_index_insert(subpats->value.ht, &result_set,
+ sizeof(zval *), NULL);
+ }
+ }
+ else { /* single pattern matching */
+ /* For each subpattern, insert it into the subpatterns array. */
+ for (i=0; i<count; i++) {
+ add_next_index_string(subpats, (char *)stringlist[i], 1);
+ }
+ }
+
+ php_pcre_free(stringlist);
+
+ /* Advance to the position right after the last full match */
+ piece += offsets[1];
+ }
+ }
+ } while (global && count >= 0);
+
+ /* Add the match sets to the output array and clean up */
+ if (global && subpats_order_val == PREG_PATTERN_ORDER) {
+ for (i=0; i<num_subpats; i++) {
+ zend_hash_next_index_insert(subpats->value.ht, &match_sets[i], sizeof(zval *), NULL);
+ }
+ efree(match_sets);
+ }
+
+ efree(offsets);
+
+ RETVAL_LONG(matched);
+}
+/* }}} */
+
+
+/* {{{ proto preg_match(string pattern, string subject [, array subpatterns ])
+ Perform a Perl-style regular expression match */
+PHP_FUNCTION(preg_match)
+{
+ _pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
+}
+/* }}} */
+
+
+/* {{{ proto preg_match_all(string pattern, string subject, array subpatterns [, int order ])
+ Perform a Perl-style global regular expression match */
+PHP_FUNCTION(preg_match_all)
+{
+ _pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
+}
+/* }}} */
+
+
+/* {{{ int _preg_get_backref(const char *walk, int *backref) */
+static int _preg_get_backref(const char *walk, int *backref)
+{
+ if (*walk && *walk >= '0' && *walk <= '9')
+ *backref = *walk - '0';
+ else
+ return 0;
+
+ if (walk[1] && walk[1] >= '0' && walk[1] <= '9')
+ *backref = *backref * 10 + walk[1] - '0';
+
+ return 1;
+}
+/* }}} */
+
+
+/* {{{ int _preg_do_eval(char *eval_str, char *subject, int *offsets,
+ int count, char **result) */
+static int _preg_do_eval(char *eval_str, char *subject, int *offsets,
+ int count, char **result)
+{
+ zval retval; /* Return value from evaluation */
+ char backref_buf[4], /* Buffer for string version of backref */
+ *code, /* PHP code string */
+ *new_code, /* Code as result of substitution */
+ *match, /* Current match for a backref */
+ *walk; /* Used to walk the code string */
+ int code_len; /* Length of the code string */
+ int new_code_len; /* Length of the substituted code string */
+ int match_len; /* Length of the match */
+ int result_len; /* Length of the result of the evaluation */
+ int backref; /* Current backref */
+ CLS_FETCH();
+ ELS_FETCH();
+
+ /* Save string to be evaluated, since we will be modifying it */
+ code = estrdup(eval_str);
+ walk = code;
+ new_code_len = code_len = strlen(code);
+
+ while (*walk) {
+ /* If found a backreference.. */
+ if ('\\' == *walk &&
+ _preg_get_backref(walk+1, &backref) &&
+ backref < count) {
+ /* Find the corresponding string match and substitute it
+ in instead of the backref */
+ match = subject + offsets[backref<<1];
+ match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
+ sprintf(backref_buf, "\\%d", backref);
+ new_code = _php3_str_to_str(code, code_len,
+ backref_buf, (backref > 9) ? 3 : 2,
+ match, match_len, &new_code_len);
+
+ /* Adjust the walk pointer */
+ walk = new_code + (walk - code) + match_len;
+
+ /* Clean up and reassign */
+ efree(code);
+ code = new_code;
+ code_len = new_code_len;
+ } else {
+ walk++;
+ }
+ }
+
+ /* Run the code */
+ zend_eval_string(code, &retval CLS_CC ELS_CC);
+ convert_to_string(&retval);
+
+ /* Save the return value and its length */
+ *result = estrdup(retval.value.str.val);
+ result_len = retval.value.str.len;
+
+ /* Clean up */
+ zval_dtor(&retval);
+ efree(code);
+
+ return result_len;
+}
+/* }}} */
+
+
+/* {{{ char *_php_pcre_replace(char *regex, char *subject, char *replace) */
+char *_php_pcre_replace(char *regex, char *subject, char *replace)
+{
+ pcre *re = NULL; /* Compiled regular expression */
+ pcre_extra *extra = NULL; /* Holds results of studying */
+ int exoptions = 0; /* Execution options */
+ int preg_options = 0; /* Custom preg options */
+ int count = 0; /* Count of matched subpatterns */
+ int *offsets; /* Array of subpattern offsets */
+ int size_offsets; /* Size of the offsets array */
+ int new_len; /* Length of needed storage */
+ int alloc_len; /* Actual allocated length */
+ int subject_len; /* Length of the subject string */
+ int eval_result_len=0; /* Length of the eval'ed string */
+ int result_len; /* Current length of the result */
+ int match_len; /* Length of the current match */
+ int backref; /* Backreference number */
+ int eval; /* If the replacement string should be eval'ed */
+ char *result, /* Result of replacement */
+ *new_buf, /* Temporary buffer for re-allocation */
+ *walkbuf, /* Location of current replacement in the result */
+ *walk, /* Used to walk the replacement string */
+ *match, /* The current match */
+ *piece, /* The current piece of subject */
+ *subject_end, /* Points to the end of the subject */
+ *eval_result; /* Result of eval */
+
+ /* Compile regex or get it from cache. */
+ if ((re = _pcre_get_compiled_regex(regex, extra, &preg_options)) == NULL) {
+ return NULL;
+ }
+
+ /* Calculate the size of the offsets array, and allocate memory for it. */
+ size_offsets = (pcre_info(re, NULL, NULL) + 1) * 3;
+ offsets = (int *)emalloc(size_offsets * sizeof(int));
+
+ subject_len = strlen(subject);
+
+ alloc_len = 2 * subject_len + 1;
+ result = emalloc(alloc_len * sizeof(char));
+ if (!result) {
+ zend_error(E_WARNING, "Unable to allocate memory in pcre_replace");
+ efree(re);
+ efree(offsets);
+ return NULL;
+ }
+
+ /* Initialize */
+ match = NULL;
+ result[0] = '\0';
+ piece = subject;
+ subject_end = subject + subject_len;
+ eval = preg_options & PREG_REPLACE_EVAL;
+
+ while (count >= 0) {
+ /* Execute the regular expression. */
+ count = pcre_exec(re, extra, piece,
+ subject_end-piece, subject,
+ (piece==subject ? exoptions : exoptions|PCRE_NOTBOL),
+ offsets, size_offsets, (piece == match));
+
+ /* Check for too many substrings condition. */
+ if (count == 0) {
+ zend_error(E_NOTICE, "Matched, but too many substrings\n");
+ count = size_offsets/3;
+ }
+
+ if (count > 0) {
+ /* Set the match location in piece */
+ match = piece + offsets[0];
+
+ new_len = strlen(result) + offsets[0]; /* part before the match */
+
+ /* If evaluating, do it and add the return string's length */
+ if (eval) {
+ eval_result_len = _preg_do_eval(replace, piece, offsets,
+ count, &eval_result);
+ new_len += eval_result_len;
+ } else { /* do regular substitution */
+ walk = replace;
+ while (*walk)
+ if ('\\' == *walk &&
+ _preg_get_backref(walk+1, &backref) &&
+ backref < count) {
+ new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
+ walk += (backref > 9) ? 3 : 2;
+ } else {
+ new_len++;
+ walk++;
+ }
+ }
+
+ if (new_len + 1 > alloc_len) {
+ alloc_len = 1 + alloc_len + 2 * new_len;
+ new_buf = emalloc(alloc_len);
+ strcpy(new_buf, result);
+ efree(result);
+ result = new_buf;
+ }
+ result_len = strlen(result);
+ /* copy the part of the string before the match */
+ strncat(result, piece, match-piece);
+
+ /* copy replacement and backrefs */
+ walkbuf = &result[result_len + offsets[0]];
+
+ /* If evaluating, copy result to the buffer and clean up */
+ if (eval) {
+ memcpy(walkbuf, eval_result, eval_result_len);
+ walkbuf += eval_result_len;
+ efree(eval_result);
+ } else { /* do regular backreference copying */
+ walk = replace;
+ while (*walk)
+ if ('\\' == *walk &&
+ _preg_get_backref(walk+1, &backref) &&
+ backref < count) {
+ match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
+ memcpy (walkbuf,
+ piece + offsets[backref<<1],
+ match_len);
+ walkbuf += match_len;
+ walk += (backref > 9) ? 3 : 2;
+ } else
+ *walkbuf++ = *walk++;
+ }
+ *walkbuf = '\0';
+
+ /* Advance to the next piece */
+ piece += offsets[1];
+ } else {
+ new_len = strlen(result) + subject_end-piece;
+ if (new_len + 1 > alloc_len) {
+ alloc_len = new_len + 1; /* now we know exactly how long it is */
+ new_buf = emalloc(alloc_len * sizeof(char));
+ strcpy(new_buf, result);
+ efree(result);
+ result = new_buf;
+ }
+ /* stick that last bit of string on our output */
+ strcat(result, piece);
+ }
+ }
+
+ efree(offsets);
+
+ return result;
+}
+/* }}} */
+
+
+static char *_php_replace_in_subject(zval *regex, zval *replace, zval *subject)
+{
+ zval **regex_entry_ptr,
+ *regex_entry,
+ **replace_entry_ptr,
+ *replace_entry;
+ char *replace_value = NULL,
+ *subject_value,
+ *result;
+
+ /* Make sure we're dealing with strings. */
+ convert_to_string(subject);
+
+ /* If regex is an array */
+ if (regex->type == IS_ARRAY) {
+ /* Duplicating subject string for repeated replacement */
+ subject_value = estrdup(subject->value.str.val);
+
+ zend_hash_internal_pointer_reset(regex->value.ht);
+
+ if (replace->type == IS_ARRAY)
+ zend_hash_internal_pointer_reset(replace->value.ht);
+ else
+ /* Set replacement value to the passed one */
+ replace_value = replace->value.str.val;
+
+ /* For each entry in the regex array, get the entry */
+ while (zend_hash_get_current_data(regex->value.ht, (void **)&regex_entry_ptr) == SUCCESS) {
+ regex_entry = *regex_entry_ptr;
+
+ /* Make sure we're dealing with strings. */
+ convert_to_string(regex_entry);
+
+ /* If replace is an array */
+ if (replace->type == IS_ARRAY) {
+ /* Get current entry */
+ if (zend_hash_get_current_data(replace->value.ht, (void **)&replace_entry_ptr) == SUCCESS) {
+ replace_entry = *replace_entry_ptr;
+
+ /* Make sure we're dealing with strings. */
+ convert_to_string(replace_entry);
+
+ /* Set replacement value to the one we got from array */
+ replace_value = replace_entry->value.str.val;
+
+ zend_hash_move_forward(replace->value.ht);
+ }
+ else
+ /* We've run out of replacement strings, so use an empty one */
+ replace_value = empty_string;
+ }
+
+ /* Do the actual replacement and put the result back into subject_value
+ for further replacements. */
+ if ((result = _php_pcre_replace(regex_entry->value.str.val,
+ subject_value,
+ replace_value)) != NULL) {
+ efree(subject_value);
+ subject_value = result;
+ }
+
+ zend_hash_move_forward(regex->value.ht);
+ }
+
+ return subject_value;
+ }
+ else {
+ /* Make sure we're dealing with strings and do the replacement */
+ convert_to_string(regex);
+ convert_to_string(replace);
+ result = _php_pcre_replace(regex->value.str.val,
+ subject->value.str.val,
+ replace->value.str.val);
+ return result;
+ }
+}
+
+
+/* {{{ proto preg_replace(string|array regex, string|array replace, string|array subject)
+ Perform Perl-style regular expression replacement */
+PHP_FUNCTION(preg_replace)
+{
+ zval *regex,
+ *replace,
+ *subject,
+ **subject_entry_ptr,
+ *subject_entry;
+ char *result;
+
+ /* Get function parameters and do error-checking. */
+ if (ARG_COUNT(ht) != 3 || getParameters(ht, 3, &regex, &replace, &subject) == FAILURE) {
+ WRONG_PARAM_COUNT;
+ }
+
+ /* if subject is an array */
+ if (subject->type == IS_ARRAY) {
+ array_init(return_value);
+ zend_hash_internal_pointer_reset(subject->value.ht);
+
+ /* For each subject entry, convert it to string, then perform replacement
+ and add the result to the return_value array. */
+ while (zend_hash_get_current_data(subject->value.ht, (void **)&subject_entry_ptr) == SUCCESS) {
+ subject_entry = *subject_entry_ptr;
+
+ if ((result = _php_replace_in_subject(regex, replace, subject_entry)) != NULL)
+ add_next_index_string(return_value, result, 0);
+
+ zend_hash_move_forward(subject->value.ht);
+ }
+ }
+ else { /* if subject is not an array */
+ if ((result = _php_replace_in_subject(regex, replace, subject)) != NULL) {
+ RETVAL_STRING(result, 1);
+ efree(result);
+ }
+ }
+}
+/* }}} */
+
+
+/* {{{ proto preg_split(string pattern, string subject [, int limit ]) */
+PHP_FUNCTION(preg_split)
+{
+ zval *regex, /* Regular expression to split by */
+ *subject, /* Subject string to split */
+ *limit; /* Number of pieces to return */
+ pcre *re = NULL; /* Compiled regular expression */
+ pcre_extra *extra = NULL; /* Holds results of studying */
+ int *offsets; /* Array of subpattern offsets */
+ int size_offsets; /* Size of the offsets array */
+ int exoptions = 0; /* Execution options */
+ int preg_options = 0; /* Custom preg options */
+ int argc; /* Argument count */
+ int limit_val; /* Integer value of limit */
+ int count = 0; /* Count of matched subpatterns */
+ char *match, /* The current match */
+ *piece, /* The current piece of subject */
+ *subject_end; /* Points to the end of subject string */
+
+ /* Get function parameters and do error checking */
+ argc = ARG_COUNT(ht);
+ if (argc < 1 || argc > 3 || getParameters(ht, argc, &regex, &subject, &limit) == FAILURE) {
+ WRONG_PARAM_COUNT;
+ }
+
+ if (argc == 3) {
+ convert_to_long(limit);
+ limit_val = limit->value.lval;
+ }
+ else
+ limit_val = -1;
+
+ /* Make sure we're dealing with strings */
+ convert_to_string(regex);
+ convert_to_string(subject);
+
+ /* Compile regex or get it from cache. */
+ if ((re = _pcre_get_compiled_regex(regex->value.str.val, extra, &preg_options)) == NULL) {
+ RETURN_FALSE;
+ }
+
+ /* Initialize return value */
+ array_init(return_value);
+
+ /* Calculate the size of the offsets array, and allocate memory for it. */
+ size_offsets = (pcre_info(re, NULL, NULL) + 1) * 3;
+ offsets = (int *)emalloc(size_offsets * sizeof(int));
+
+ /* Start at the beginning of the string */
+ piece = subject->value.str.val;
+ subject_end = piece + subject->value.str.len;
+ match = NULL;
+
+ /* Get next piece if no limit or limit not yet reached and something matched*/
+ while ((limit_val == -1 || limit_val > 1) && count >= 0) {
+ count = pcre_exec(re, extra, piece,
+ subject_end-piece, subject->value.str.val,
+ (piece==subject->value.str.val ? exoptions : exoptions|PCRE_NOTBOL),
+ offsets, size_offsets, (piece==match));
+
+ /* Check for too many substrings condition. */
+ if (count == 0) {
+ zend_error(E_NOTICE, "Matched, but too many substrings\n");
+ count = size_offsets/3;
+ }
+
+ /* If something matched */
+ if (count > 0) {
+ match = piece + offsets[0];
+
+ /* Add the piece to the return value */
+ add_next_index_stringl(return_value,
+ piece,
+ offsets[0], 1);
+
+ /* Advance to next position */
+ piece += offsets[1];
+
+ /* One less left to do */
+ if (limit_val != -1)
+ limit_val--;
+ }
+ }
+
+ /* Add the last piece to the return value */
+ add_next_index_stringl(return_value,
+ piece,
+ subject_end-piece, 1);
+
+ /* Clean up */
+ efree(offsets);
+}
+/* }}} */
+
+
+/* {{{ proto string preg_quote(string str) */
+PHP_FUNCTION(preg_quote)
+{
+ zval *in_str_arg; /* Input string argument */
+ char *in_str, /* Input string */
+ *out_str, /* Output string with quoted characters */
+ *p, /* Iterator for input string */
+ *q, /* Iterator for output string */
+ c; /* Current character */
+
+ /* Get the arguments and check for errors */
+ if (ARG_COUNT(ht) != 1 || getParameters(ht, 1, &in_str_arg) == FAILURE) {
+ WRONG_PARAM_COUNT;
+ }
+
+ /* Make sure we're working with strings */
+ convert_to_string(in_str_arg);
+ in_str = in_str_arg->value.str.val;
+
+ /* Nothing to do if we got an empty string */
+ if (!*in_str) {
+ RETVAL_STRING(empty_string, 0);
+ }
+
+ /* Allocate enough memory so that even if each character
+ is quoted, we won't run out of room */
+ out_str = emalloc(2 * in_str_arg->value.str.len + 1);
+
+ /* Go through the string and quote necessary characters */
+ for(p = in_str, q = out_str; (c = *p); p++) {
+ switch(c) {
+ case '.':
+ case '\\':
+ case '+':
+ case '*':
+ case '?':
+ case '[':
+ case '^':
+ case ']':
+ case '$':
+ case '(':
+ case ')':
+ case '{':
+ case '}':
+ case '=':
+ case '!':
+ case '>':
+ case '<':
+ case '|':
+ case ':':
+ *q++ = '\\';
+ /* break is missing _intentionally_ */
+ default:
+ *q++ = c;
+ }
+ }
+ *q = '\0';
+
+ /* Reallocate string and return it */
+ RETVAL_STRING(erealloc(out_str, q - out_str + 1), 0);
+}
+/* }}} */
+
+
+/* {{{ proto array preg_grep(string regex, array input)
+ Searches array and returns entries which match regex */
+PHP_FUNCTION(preg_grep)
+{
+ zval *regex, /* Regular expression */
+ *input, /* Input array */
+ **entry; /* An entry in the input array */
+ pcre *re = NULL; /* Compiled regular expression */
+ pcre_extra *extra = NULL; /* Holds results of studying */
+ int preg_options = 0; /* Custom preg options */
+ int *offsets; /* Array of subpattern offsets */
+ int size_offsets; /* Size of the offsets array */
+ int count = 0; /* Count of matched subpatterns */
+ char *string_key;
+ ulong num_key;
+
+ /* Get arguments and do error checking */
+
+ if (ARG_COUNT(ht) != 2 || getParameters(ht, ARG_COUNT(ht), &regex, &input) == FAILURE) {
+ WRONG_PARAM_COUNT;
+ }
+
+ if (input->type != IS_ARRAY) {
+ zend_error(E_WARNING, "Secong argument to preg_grep() should be an array");
+ return;
+ }
+
+ /* Make sure regex is a string */
+ convert_to_string(regex);
+
+ /* Compile regex or get it from cache. */
+ if ((re = _pcre_get_compiled_regex(regex->value.str.val, extra, &preg_options)) == NULL) {
+ RETURN_FALSE;
+ }
+
+ /* Calculate the size of the offsets array, and allocate memory for it. */
+ size_offsets = (pcre_info(re, NULL, NULL) + 1) * 3;
+ offsets = (int *)emalloc(size_offsets * sizeof(int));
+
+ /* Initialize return array */
+ array_init(return_value);
+
+ /* Go through the input array */
+ zend_hash_internal_pointer_reset(input->value.ht);
+ while(zend_hash_get_current_data(input->value.ht, (void **)&entry) == SUCCESS) {
+
+ /* Only match against strings */
+ if ((*entry)->type == IS_STRING) {
+ /* Perform the match */
+ count = pcre_exec(re, extra, (*entry)->value.str.val,
+ (*entry)->value.str.len, (*entry)->value.str.val,
+ 0, offsets, size_offsets, 0);
+
+ /* Check for too many substrings condition. */
+ if (count == 0) {
+ zend_error(E_NOTICE, "Matched, but too many substrings\n");
+ count = size_offsets/3;
+ }
+
+ /* If something matched */
+ if (count > 0) {
+ (*entry)->refcount++;
+
+ /* Add to return array */
+ switch(zend_hash_get_current_key(input->value.ht, &string_key, &num_key))
+ {
+ case HASH_KEY_IS_STRING:
+ zend_hash_update(return_value->value.ht, string_key,
+ strlen(string_key)+1, entry, sizeof(zval *), NULL);
+ efree(string_key);
+ break;
+
+ case HASH_KEY_IS_LONG:
+ zend_hash_next_index_insert(return_value->value.ht, entry,
+ sizeof(zval *), NULL);
+ break;
+ }
+ }
+ }
+
+ zend_hash_move_forward(input->value.ht);
+ }
+
+ efree(offsets);
+}
+/* }}} */
+
+
+#endif /* HAVE_PCRE */
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * End:
+ */