diff options
author | Zeev Suraski <zeev@php.net> | 1999-07-06 19:24:13 +0000 |
---|---|---|
committer | Zeev Suraski <zeev@php.net> | 1999-07-06 19:24:13 +0000 |
commit | 8ee9fd57689cd3cd28c57260777403735b12e771 (patch) | |
tree | 4d768e8cb3bc932877427834495a7c2ab09c9d16 /ext/pcre/php_pcre.c | |
parent | 52308cd368bb26146b2e534c68ece91df5656ea2 (diff) | |
download | php-git-8ee9fd57689cd3cd28c57260777403735b12e771.tar.gz |
avoid two copies of pcre.c in the repository
Diffstat (limited to 'ext/pcre/php_pcre.c')
-rw-r--r-- | ext/pcre/php_pcre.c | 1111 |
1 files changed, 1111 insertions, 0 deletions
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c new file mode 100644 index 0000000000..6a9ca26b90 --- /dev/null +++ b/ext/pcre/php_pcre.c @@ -0,0 +1,1111 @@ +/* + +----------------------------------------------------------------------+ + | PHP HTML Embedded Scripting Language Version 3.0 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-1999 PHP Development Team (See Credits file) | + +----------------------------------------------------------------------+ + | This program is free software; you can redistribute it and/or modify | + | it under the terms of one of the following licenses: | + | | + | A) the GNU General Public License as published by the Free Software | + | Foundation; either version 2 of the License, or (at your option) | + | any later version. | + | | + | B) the PHP License as published by the PHP Development Team and | + | included in the distribution in the file: LICENSE | + | | + | This program is distributed in the hope that it will be useful, | + | but WITHOUT ANY WARRANTY; without even the implied warranty of | + | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | + | GNU General Public License for more details. | + | | + | You should have received a copy of both licenses referred to here. | + | If you did not, or have any questions about PHP licensing, please | + | contact core@php.net. | + +----------------------------------------------------------------------+ + | Authors: Andrey Zmievski <andrey@ispi.net> | + +----------------------------------------------------------------------+ + */ + +/* $Id$ */ + +#include "php.h" + +#if HAVE_PCRE + +#include "php_pcre.h" +#include "ext/standard/php3_string.h" + +#define PREG_PATTERN_ORDER 0 +#define PREG_SET_ORDER 1 + +#define PREG_REPLACE_EVAL (1<<0) + +/* {{{ module definition structures */ + +unsigned char third_arg_force_ref[] = { 3, BYREF_NONE, BYREF_NONE, BYREF_FORCE }; + +function_entry pcre_functions[] = { + PHP_FE(preg_match, third_arg_force_ref) + PHP_FE(preg_match_all, third_arg_force_ref) + PHP_FE(preg_replace, NULL) + PHP_FE(preg_split, NULL) + PHP_FE(preg_quote, NULL) + PHP_FE(preg_grep, NULL) + {NULL, NULL, NULL} +}; + +zend_module_entry pcre_module_entry = { + "PCRE", pcre_functions, php_minit_pcre, php_mshutdown_pcre, + php_rinit_pcre, NULL, + php_info_pcre, STANDARD_MODULE_PROPERTIES +}; + +/* }}} */ + + +#ifdef ZTS +int pcre_globals_id; +#else +php_pcre_globals pcre_globals; +#endif + + +static void *php_pcre_malloc(size_t size) +{ + return pemalloc(size, 1); +} + + +static void php_pcre_free(void *ptr) +{ + pefree(ptr, 1); +} + + +static void _php_free_pcre_cache(void *data) +{ + pcre_cache_entry *pce = (pcre_cache_entry *) data; + pefree(pce->re, 1); +} + + +#ifdef ZTS +static void _php_pcre_init_globals(php_pcre_globals *pcre_globals) +{ + zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, _php_free_pcre_cache, 1); +} + + +static void _php_pcre_shutdown_globals(php_pcre_globals *pcre_globals) +{ + zend_hash_destroy(&PCRE_G(pcre_cache)); +} +#endif + + +/* {{{ void php_info_pcre(ZEND_MODULE_INFO_FUNC_ARGS) */ +void php_info_pcre(ZEND_MODULE_INFO_FUNC_ARGS) +{ + php3_printf("Perl Compatible Regular Expressions"); + php3_printf("<table cellpadding=5>" + "<tr><td>PCRE library version:</td>" + "<td>%s</td></tr>" + "</table>", pcre_version()); +} +/* }}} */ + + +/* {{{ int php_minit_pcre(INIT_FUNC_ARGS) */ +int php_minit_pcre(INIT_FUNC_ARGS) +{ +#ifdef ZTS + pcre_globals_id = tsrm_allocate_id( + sizeof(php_pcre_globals), + _php_pcre_init_globals, + _php_pcre_shutdown_globals); +#else + zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, _php_free_pcre_cache, 1); +#endif + + REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT); + return SUCCESS; +} +/* }}} */ + + +/* {{{ int php_mshutdown_pcre(void) */ +int php_mshutdown_pcre(SHUTDOWN_FUNC_ARGS) +{ +#ifndef ZTS + zend_hash_destroy(&PCRE_G(pcre_cache)); +#endif + return SUCCESS; +} +/* }}} */ + + +/* {{{ int php_rinit_pcre(INIT_FUNC_ARGS) */ +int php_rinit_pcre(INIT_FUNC_ARGS) +{ + pcre_malloc = php_pcre_malloc; + pcre_free = php_pcre_free; + + return SUCCESS; +} +/* }}} */ + + +/* {{{ static pcre* _pcre_get_compiled_regex(char *regex, pcre_extra *extra) */ +static pcre* _pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_options) { + pcre *re = NULL; + int coptions = 0; + int soptions = 0; + const char *error; + int erroffset; + char delimiter; + char *p, *pp; + char *pattern; + int regex_len; + int do_study = 0; + int poptions = 0; + pcre_cache_entry *pce; + pcre_cache_entry new_entry; + PCRE_LS_FETCH(); + + /* Try to lookup the cached regex entry, and if successful, just pass + back the compiled pattern, otherwise go on and compile it. */ + regex_len = strlen(regex); + if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) { + extra = pce->extra; + *preg_options = pce->preg_options; + return pce->re; + } + + p = regex; + + /* Parse through the leading whitespace, and display a warning if we + get to the end without encountering a delimiter. */ + while (isspace((int)*p)) p++; + if (*p == 0) { + zend_error(E_WARNING, "Empty regular expression"); + return NULL; + } + + /* Get the delimiter and display a warning if it is alphanumeric + or a backslash. */ + delimiter = *p++; + if (isalnum((int)delimiter) || delimiter == '\\') { + zend_error(E_WARNING, "Delimiter must not be alphanumeric or backslash"); + return NULL; + } + + /* We need to iterate through the pattern, searching for the ending delimiter, + but skipping the backslashed delimiters. If the ending delimiter is not + found, display a warning. */ + pp = p; + while (*pp != 0) { + if (*pp == delimiter && pp[-1] != '\\') + break; + pp++; + } + if (*pp == 0) { + zend_error(E_WARNING, "No ending delimiter found"); + return NULL; + } + + /* Make a copy of the actual pattern. */ + pattern = estrndup(p, pp-p); + + /* Move on to the options */ + pp++; + + /* Clear out preg options */ + *preg_options = 0; + + /* Parse through the options, setting appropriate flags. Display + a warning if we encounter an unknown option. */ + while (*pp != 0) { + switch (*pp++) { + /* Perl compatible options */ + case 'i': coptions |= PCRE_CASELESS; break; + case 'm': coptions |= PCRE_MULTILINE; break; + case 's': coptions |= PCRE_DOTALL; break; + case 'x': coptions |= PCRE_EXTENDED; break; + + /* PCRE specific options */ + case 'A': coptions |= PCRE_ANCHORED; break; + case 'D': coptions |= PCRE_DOLLAR_ENDONLY;break; + case 'S': do_study = 1; break; + case 'U': coptions |= PCRE_UNGREEDY; break; + case 'X': coptions |= PCRE_EXTRA; break; + + /* Custom preg options */ + case 'e': poptions |= PREG_REPLACE_EVAL; break; + + case ' ': + case '\n': + break; + + default: + zend_error(E_WARNING, "Unknown option '%c'", pp[-1]); + efree(pattern); + return NULL; + } + } + + /* Compile pattern and display a warning if compilation failed. */ + re = pcre_compile(pattern, + coptions, + &error, + &erroffset, + NULL); + + if (re == NULL) { + zend_error(E_WARNING, "Compilation failed: %s at offset %d\n", error, erroffset); + efree(pattern); + return NULL; + } + + /* If study option was specified, study the pattern and + store the result in extra for passing to pcre_exec. */ + if (do_study) { + extra = pcre_study(re, soptions, &error); + if (error != NULL) { + zend_error(E_WARNING, "Error while studying pattern"); + } + } + + *preg_options = poptions; + + efree(pattern); + + /* Store the compiled pattern and extra info in the cache. */ + new_entry.re = re; + new_entry.extra = extra; + new_entry.preg_options = poptions; + zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry, + sizeof(pcre_cache_entry), NULL); + + return re; +} +/* }}} */ + + +/* {{{ void _pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) */ +static void _pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) +{ + zval *regex, /* Regular expression */ + *subject, /* String to match against */ + *subpats = NULL, /* Array for subpatterns */ + *subpats_order, /* Order of the results in the subpatterns + array for global match */ + *result_set, /* Holds a set of subpatterns after + a global match */ + **match_sets = NULL; /* An array of sets of matches for each + subpattern after a global match */ + pcre *re = NULL; /* Compiled regular expression */ + pcre_extra *extra = NULL; /* Holds results of studying */ + int exoptions = 0; /* Execution options */ + int preg_options = 0; /* Custom preg options */ + int count = 0; /* Count of matched subpatterns */ + int *offsets; /* Array of subpattern offsets */ + int num_subpats; /* Number of captured subpatterns */ + int size_offsets; /* Size of the offsets array */ + int matched; /* Has anything matched */ + int i; + int subpats_order_val = 0; /* Integer value of subpats_order */ + const char **stringlist; /* Used to hold list of subpatterns */ + char *match, /* The current match */ + *piece, /* The current piece of subject */ + *subject_end; /* Points to the end of the subject */ + + + /* Get function parameters and do error-checking. */ + switch(ARG_COUNT(ht)) { + case 2: + if (getParameters(ht, 2, ®ex, &subject) == FAILURE) { + WRONG_PARAM_COUNT; + } + break; + + case 3: + if (getParameters(ht, 3, ®ex, &subject, &subpats) == FAILURE) { + WRONG_PARAM_COUNT; + } + if (global) + subpats_order_val = PREG_PATTERN_ORDER; + if (!ParameterPassedByReference(ht, 3)) { + zend_error(E_WARNING, "Array to be filled with matches must be passed by reference."); + RETURN_FALSE; + } + break; + + case 4: + if (getParameters(ht, 4, ®ex, &subject, &subpats, &subpats_order) == FAILURE) { + WRONG_PARAM_COUNT; + } + if (!ParameterPassedByReference(ht, 3)) { + zend_error(E_WARNING, "Array to be filled with matches must be passed by reference."); + RETURN_FALSE; + } + + /* Make sure subpats_order is a number */ + convert_to_long(subpats_order); + subpats_order_val = subpats_order->value.lval; + if (subpats_order_val < PREG_PATTERN_ORDER || + subpats_order_val > PREG_SET_ORDER) { + zend_error(E_WARNING, "Wrong value for parameter 4 in call to preg_match_all()"); + } + break; + + default: + WRONG_PARAM_COUNT; + } + + /* Make sure we're dealing with strings. */ + convert_to_string(regex); + convert_to_string(subject); + + /* Make sure to clean up the passed array and initialize it. */ + if (subpats != NULL) { + zval_dtor(subpats); + array_init(subpats); + } + + /* Compile regex or get it from cache. */ + if ((re = _pcre_get_compiled_regex(regex->value.str.val, extra, &preg_options)) == NULL) { + RETURN_FALSE; + } + + /* Calculate the size of the offsets array, and allocate memory for it. */ + num_subpats = pcre_info(re, NULL, NULL) + 1; + size_offsets = num_subpats * 3; + offsets = (int *)emalloc(size_offsets * sizeof(int)); + + /* Allocate match sets array and initialize the values */ + if (global && subpats_order_val == PREG_PATTERN_ORDER) { + match_sets = (zval **)emalloc(num_subpats * sizeof(zval *)); + for (i=0; i<num_subpats; i++) { + match_sets[i] = (zval *)emalloc(sizeof(zval)); + array_init(match_sets[i]); + match_sets[i]->is_ref = 0; + match_sets[i]->refcount = 1; + } + } + + /* Start from the beginning of the string */ + piece = subject->value.str.val; + subject_end = piece + subject->value.str.len; + match = NULL; + matched = 0; + + do { + /* Execute the regular expression. */ + count = pcre_exec(re, extra, piece, + subject_end-piece, subject->value.str.val, + (piece==subject->value.str.val ? exoptions : exoptions|PCRE_NOTBOL), + offsets, size_offsets, (piece == match)); + + /* Check for too many substrings condition. */ + if (count == 0) { + zend_error(E_NOTICE, "Matched, but too many substrings\n"); + count = size_offsets/3; + } + + /* If something has matched */ + if (count >= 0) { + matched++; + match = piece + offsets[0]; + + /* If subpatters array has been passed, fill it in with values. */ + if (subpats != NULL) { + /* Try to get the list of substrings and display a warning if failed. */ + if (pcre_get_substring_list(piece, offsets, count, &stringlist) < 0) { + efree(offsets); + efree(re); + zend_error(E_WARNING, "Get subpatterns list failed"); + return; + } + + if (global) { /* global pattern matching */ + if (subpats_order_val == PREG_PATTERN_ORDER) { + /* For each subpattern, insert it into the appropriate array */ + for (i=0; i<count; i++) { + add_next_index_string(match_sets[i], (char *)stringlist[i], 1); + } + } + else { + /* Allocate the result set array */ + result_set = emalloc(sizeof(zval)); + array_init(result_set); + result_set->is_ref = 0; + result_set->refcount = 1; + + /* Add all the subpatterns to it */ + for (i=0; i<count; i++) { + add_next_index_string(result_set, (char *)stringlist[i], 1); + } + /* And add it to the output array */ + zend_hash_next_index_insert(subpats->value.ht, &result_set, + sizeof(zval *), NULL); + } + } + else { /* single pattern matching */ + /* For each subpattern, insert it into the subpatterns array. */ + for (i=0; i<count; i++) { + add_next_index_string(subpats, (char *)stringlist[i], 1); + } + } + + php_pcre_free(stringlist); + + /* Advance to the position right after the last full match */ + piece += offsets[1]; + } + } + } while (global && count >= 0); + + /* Add the match sets to the output array and clean up */ + if (global && subpats_order_val == PREG_PATTERN_ORDER) { + for (i=0; i<num_subpats; i++) { + zend_hash_next_index_insert(subpats->value.ht, &match_sets[i], sizeof(zval *), NULL); + } + efree(match_sets); + } + + efree(offsets); + + RETVAL_LONG(matched); +} +/* }}} */ + + +/* {{{ proto preg_match(string pattern, string subject [, array subpatterns ]) + Perform a Perl-style regular expression match */ +PHP_FUNCTION(preg_match) +{ + _pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); +} +/* }}} */ + + +/* {{{ proto preg_match_all(string pattern, string subject, array subpatterns [, int order ]) + Perform a Perl-style global regular expression match */ +PHP_FUNCTION(preg_match_all) +{ + _pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); +} +/* }}} */ + + +/* {{{ int _preg_get_backref(const char *walk, int *backref) */ +static int _preg_get_backref(const char *walk, int *backref) +{ + if (*walk && *walk >= '0' && *walk <= '9') + *backref = *walk - '0'; + else + return 0; + + if (walk[1] && walk[1] >= '0' && walk[1] <= '9') + *backref = *backref * 10 + walk[1] - '0'; + + return 1; +} +/* }}} */ + + +/* {{{ int _preg_do_eval(char *eval_str, char *subject, int *offsets, + int count, char **result) */ +static int _preg_do_eval(char *eval_str, char *subject, int *offsets, + int count, char **result) +{ + zval retval; /* Return value from evaluation */ + char backref_buf[4], /* Buffer for string version of backref */ + *code, /* PHP code string */ + *new_code, /* Code as result of substitution */ + *match, /* Current match for a backref */ + *walk; /* Used to walk the code string */ + int code_len; /* Length of the code string */ + int new_code_len; /* Length of the substituted code string */ + int match_len; /* Length of the match */ + int result_len; /* Length of the result of the evaluation */ + int backref; /* Current backref */ + CLS_FETCH(); + ELS_FETCH(); + + /* Save string to be evaluated, since we will be modifying it */ + code = estrdup(eval_str); + walk = code; + new_code_len = code_len = strlen(code); + + while (*walk) { + /* If found a backreference.. */ + if ('\\' == *walk && + _preg_get_backref(walk+1, &backref) && + backref < count) { + /* Find the corresponding string match and substitute it + in instead of the backref */ + match = subject + offsets[backref<<1]; + match_len = offsets[(backref<<1)+1] - offsets[backref<<1]; + sprintf(backref_buf, "\\%d", backref); + new_code = _php3_str_to_str(code, code_len, + backref_buf, (backref > 9) ? 3 : 2, + match, match_len, &new_code_len); + + /* Adjust the walk pointer */ + walk = new_code + (walk - code) + match_len; + + /* Clean up and reassign */ + efree(code); + code = new_code; + code_len = new_code_len; + } else { + walk++; + } + } + + /* Run the code */ + zend_eval_string(code, &retval CLS_CC ELS_CC); + convert_to_string(&retval); + + /* Save the return value and its length */ + *result = estrdup(retval.value.str.val); + result_len = retval.value.str.len; + + /* Clean up */ + zval_dtor(&retval); + efree(code); + + return result_len; +} +/* }}} */ + + +/* {{{ char *_php_pcre_replace(char *regex, char *subject, char *replace) */ +char *_php_pcre_replace(char *regex, char *subject, char *replace) +{ + pcre *re = NULL; /* Compiled regular expression */ + pcre_extra *extra = NULL; /* Holds results of studying */ + int exoptions = 0; /* Execution options */ + int preg_options = 0; /* Custom preg options */ + int count = 0; /* Count of matched subpatterns */ + int *offsets; /* Array of subpattern offsets */ + int size_offsets; /* Size of the offsets array */ + int new_len; /* Length of needed storage */ + int alloc_len; /* Actual allocated length */ + int subject_len; /* Length of the subject string */ + int eval_result_len=0; /* Length of the eval'ed string */ + int result_len; /* Current length of the result */ + int match_len; /* Length of the current match */ + int backref; /* Backreference number */ + int eval; /* If the replacement string should be eval'ed */ + char *result, /* Result of replacement */ + *new_buf, /* Temporary buffer for re-allocation */ + *walkbuf, /* Location of current replacement in the result */ + *walk, /* Used to walk the replacement string */ + *match, /* The current match */ + *piece, /* The current piece of subject */ + *subject_end, /* Points to the end of the subject */ + *eval_result; /* Result of eval */ + + /* Compile regex or get it from cache. */ + if ((re = _pcre_get_compiled_regex(regex, extra, &preg_options)) == NULL) { + return NULL; + } + + /* Calculate the size of the offsets array, and allocate memory for it. */ + size_offsets = (pcre_info(re, NULL, NULL) + 1) * 3; + offsets = (int *)emalloc(size_offsets * sizeof(int)); + + subject_len = strlen(subject); + + alloc_len = 2 * subject_len + 1; + result = emalloc(alloc_len * sizeof(char)); + if (!result) { + zend_error(E_WARNING, "Unable to allocate memory in pcre_replace"); + efree(re); + efree(offsets); + return NULL; + } + + /* Initialize */ + match = NULL; + result[0] = '\0'; + piece = subject; + subject_end = subject + subject_len; + eval = preg_options & PREG_REPLACE_EVAL; + + while (count >= 0) { + /* Execute the regular expression. */ + count = pcre_exec(re, extra, piece, + subject_end-piece, subject, + (piece==subject ? exoptions : exoptions|PCRE_NOTBOL), + offsets, size_offsets, (piece == match)); + + /* Check for too many substrings condition. */ + if (count == 0) { + zend_error(E_NOTICE, "Matched, but too many substrings\n"); + count = size_offsets/3; + } + + if (count > 0) { + /* Set the match location in piece */ + match = piece + offsets[0]; + + new_len = strlen(result) + offsets[0]; /* part before the match */ + + /* If evaluating, do it and add the return string's length */ + if (eval) { + eval_result_len = _preg_do_eval(replace, piece, offsets, + count, &eval_result); + new_len += eval_result_len; + } else { /* do regular substitution */ + walk = replace; + while (*walk) + if ('\\' == *walk && + _preg_get_backref(walk+1, &backref) && + backref < count) { + new_len += offsets[(backref<<1)+1] - offsets[backref<<1]; + walk += (backref > 9) ? 3 : 2; + } else { + new_len++; + walk++; + } + } + + if (new_len + 1 > alloc_len) { + alloc_len = 1 + alloc_len + 2 * new_len; + new_buf = emalloc(alloc_len); + strcpy(new_buf, result); + efree(result); + result = new_buf; + } + result_len = strlen(result); + /* copy the part of the string before the match */ + strncat(result, piece, match-piece); + + /* copy replacement and backrefs */ + walkbuf = &result[result_len + offsets[0]]; + + /* If evaluating, copy result to the buffer and clean up */ + if (eval) { + memcpy(walkbuf, eval_result, eval_result_len); + walkbuf += eval_result_len; + efree(eval_result); + } else { /* do regular backreference copying */ + walk = replace; + while (*walk) + if ('\\' == *walk && + _preg_get_backref(walk+1, &backref) && + backref < count) { + match_len = offsets[(backref<<1)+1] - offsets[backref<<1]; + memcpy (walkbuf, + piece + offsets[backref<<1], + match_len); + walkbuf += match_len; + walk += (backref > 9) ? 3 : 2; + } else + *walkbuf++ = *walk++; + } + *walkbuf = '\0'; + + /* Advance to the next piece */ + piece += offsets[1]; + } else { + new_len = strlen(result) + subject_end-piece; + if (new_len + 1 > alloc_len) { + alloc_len = new_len + 1; /* now we know exactly how long it is */ + new_buf = emalloc(alloc_len * sizeof(char)); + strcpy(new_buf, result); + efree(result); + result = new_buf; + } + /* stick that last bit of string on our output */ + strcat(result, piece); + } + } + + efree(offsets); + + return result; +} +/* }}} */ + + +static char *_php_replace_in_subject(zval *regex, zval *replace, zval *subject) +{ + zval **regex_entry_ptr, + *regex_entry, + **replace_entry_ptr, + *replace_entry; + char *replace_value = NULL, + *subject_value, + *result; + + /* Make sure we're dealing with strings. */ + convert_to_string(subject); + + /* If regex is an array */ + if (regex->type == IS_ARRAY) { + /* Duplicating subject string for repeated replacement */ + subject_value = estrdup(subject->value.str.val); + + zend_hash_internal_pointer_reset(regex->value.ht); + + if (replace->type == IS_ARRAY) + zend_hash_internal_pointer_reset(replace->value.ht); + else + /* Set replacement value to the passed one */ + replace_value = replace->value.str.val; + + /* For each entry in the regex array, get the entry */ + while (zend_hash_get_current_data(regex->value.ht, (void **)®ex_entry_ptr) == SUCCESS) { + regex_entry = *regex_entry_ptr; + + /* Make sure we're dealing with strings. */ + convert_to_string(regex_entry); + + /* If replace is an array */ + if (replace->type == IS_ARRAY) { + /* Get current entry */ + if (zend_hash_get_current_data(replace->value.ht, (void **)&replace_entry_ptr) == SUCCESS) { + replace_entry = *replace_entry_ptr; + + /* Make sure we're dealing with strings. */ + convert_to_string(replace_entry); + + /* Set replacement value to the one we got from array */ + replace_value = replace_entry->value.str.val; + + zend_hash_move_forward(replace->value.ht); + } + else + /* We've run out of replacement strings, so use an empty one */ + replace_value = empty_string; + } + + /* Do the actual replacement and put the result back into subject_value + for further replacements. */ + if ((result = _php_pcre_replace(regex_entry->value.str.val, + subject_value, + replace_value)) != NULL) { + efree(subject_value); + subject_value = result; + } + + zend_hash_move_forward(regex->value.ht); + } + + return subject_value; + } + else { + /* Make sure we're dealing with strings and do the replacement */ + convert_to_string(regex); + convert_to_string(replace); + result = _php_pcre_replace(regex->value.str.val, + subject->value.str.val, + replace->value.str.val); + return result; + } +} + + +/* {{{ proto preg_replace(string|array regex, string|array replace, string|array subject) + Perform Perl-style regular expression replacement */ +PHP_FUNCTION(preg_replace) +{ + zval *regex, + *replace, + *subject, + **subject_entry_ptr, + *subject_entry; + char *result; + + /* Get function parameters and do error-checking. */ + if (ARG_COUNT(ht) != 3 || getParameters(ht, 3, ®ex, &replace, &subject) == FAILURE) { + WRONG_PARAM_COUNT; + } + + /* if subject is an array */ + if (subject->type == IS_ARRAY) { + array_init(return_value); + zend_hash_internal_pointer_reset(subject->value.ht); + + /* For each subject entry, convert it to string, then perform replacement + and add the result to the return_value array. */ + while (zend_hash_get_current_data(subject->value.ht, (void **)&subject_entry_ptr) == SUCCESS) { + subject_entry = *subject_entry_ptr; + + if ((result = _php_replace_in_subject(regex, replace, subject_entry)) != NULL) + add_next_index_string(return_value, result, 0); + + zend_hash_move_forward(subject->value.ht); + } + } + else { /* if subject is not an array */ + if ((result = _php_replace_in_subject(regex, replace, subject)) != NULL) { + RETVAL_STRING(result, 1); + efree(result); + } + } +} +/* }}} */ + + +/* {{{ proto preg_split(string pattern, string subject [, int limit ]) */ +PHP_FUNCTION(preg_split) +{ + zval *regex, /* Regular expression to split by */ + *subject, /* Subject string to split */ + *limit; /* Number of pieces to return */ + pcre *re = NULL; /* Compiled regular expression */ + pcre_extra *extra = NULL; /* Holds results of studying */ + int *offsets; /* Array of subpattern offsets */ + int size_offsets; /* Size of the offsets array */ + int exoptions = 0; /* Execution options */ + int preg_options = 0; /* Custom preg options */ + int argc; /* Argument count */ + int limit_val; /* Integer value of limit */ + int count = 0; /* Count of matched subpatterns */ + char *match, /* The current match */ + *piece, /* The current piece of subject */ + *subject_end; /* Points to the end of subject string */ + + /* Get function parameters and do error checking */ + argc = ARG_COUNT(ht); + if (argc < 1 || argc > 3 || getParameters(ht, argc, ®ex, &subject, &limit) == FAILURE) { + WRONG_PARAM_COUNT; + } + + if (argc == 3) { + convert_to_long(limit); + limit_val = limit->value.lval; + } + else + limit_val = -1; + + /* Make sure we're dealing with strings */ + convert_to_string(regex); + convert_to_string(subject); + + /* Compile regex or get it from cache. */ + if ((re = _pcre_get_compiled_regex(regex->value.str.val, extra, &preg_options)) == NULL) { + RETURN_FALSE; + } + + /* Initialize return value */ + array_init(return_value); + + /* Calculate the size of the offsets array, and allocate memory for it. */ + size_offsets = (pcre_info(re, NULL, NULL) + 1) * 3; + offsets = (int *)emalloc(size_offsets * sizeof(int)); + + /* Start at the beginning of the string */ + piece = subject->value.str.val; + subject_end = piece + subject->value.str.len; + match = NULL; + + /* Get next piece if no limit or limit not yet reached and something matched*/ + while ((limit_val == -1 || limit_val > 1) && count >= 0) { + count = pcre_exec(re, extra, piece, + subject_end-piece, subject->value.str.val, + (piece==subject->value.str.val ? exoptions : exoptions|PCRE_NOTBOL), + offsets, size_offsets, (piece==match)); + + /* Check for too many substrings condition. */ + if (count == 0) { + zend_error(E_NOTICE, "Matched, but too many substrings\n"); + count = size_offsets/3; + } + + /* If something matched */ + if (count > 0) { + match = piece + offsets[0]; + + /* Add the piece to the return value */ + add_next_index_stringl(return_value, + piece, + offsets[0], 1); + + /* Advance to next position */ + piece += offsets[1]; + + /* One less left to do */ + if (limit_val != -1) + limit_val--; + } + } + + /* Add the last piece to the return value */ + add_next_index_stringl(return_value, + piece, + subject_end-piece, 1); + + /* Clean up */ + efree(offsets); +} +/* }}} */ + + +/* {{{ proto string preg_quote(string str) */ +PHP_FUNCTION(preg_quote) +{ + zval *in_str_arg; /* Input string argument */ + char *in_str, /* Input string */ + *out_str, /* Output string with quoted characters */ + *p, /* Iterator for input string */ + *q, /* Iterator for output string */ + c; /* Current character */ + + /* Get the arguments and check for errors */ + if (ARG_COUNT(ht) != 1 || getParameters(ht, 1, &in_str_arg) == FAILURE) { + WRONG_PARAM_COUNT; + } + + /* Make sure we're working with strings */ + convert_to_string(in_str_arg); + in_str = in_str_arg->value.str.val; + + /* Nothing to do if we got an empty string */ + if (!*in_str) { + RETVAL_STRING(empty_string, 0); + } + + /* Allocate enough memory so that even if each character + is quoted, we won't run out of room */ + out_str = emalloc(2 * in_str_arg->value.str.len + 1); + + /* Go through the string and quote necessary characters */ + for(p = in_str, q = out_str; (c = *p); p++) { + switch(c) { + case '.': + case '\\': + case '+': + case '*': + case '?': + case '[': + case '^': + case ']': + case '$': + case '(': + case ')': + case '{': + case '}': + case '=': + case '!': + case '>': + case '<': + case '|': + case ':': + *q++ = '\\'; + /* break is missing _intentionally_ */ + default: + *q++ = c; + } + } + *q = '\0'; + + /* Reallocate string and return it */ + RETVAL_STRING(erealloc(out_str, q - out_str + 1), 0); +} +/* }}} */ + + +/* {{{ proto array preg_grep(string regex, array input) + Searches array and returns entries which match regex */ +PHP_FUNCTION(preg_grep) +{ + zval *regex, /* Regular expression */ + *input, /* Input array */ + **entry; /* An entry in the input array */ + pcre *re = NULL; /* Compiled regular expression */ + pcre_extra *extra = NULL; /* Holds results of studying */ + int preg_options = 0; /* Custom preg options */ + int *offsets; /* Array of subpattern offsets */ + int size_offsets; /* Size of the offsets array */ + int count = 0; /* Count of matched subpatterns */ + char *string_key; + ulong num_key; + + /* Get arguments and do error checking */ + + if (ARG_COUNT(ht) != 2 || getParameters(ht, ARG_COUNT(ht), ®ex, &input) == FAILURE) { + WRONG_PARAM_COUNT; + } + + if (input->type != IS_ARRAY) { + zend_error(E_WARNING, "Secong argument to preg_grep() should be an array"); + return; + } + + /* Make sure regex is a string */ + convert_to_string(regex); + + /* Compile regex or get it from cache. */ + if ((re = _pcre_get_compiled_regex(regex->value.str.val, extra, &preg_options)) == NULL) { + RETURN_FALSE; + } + + /* Calculate the size of the offsets array, and allocate memory for it. */ + size_offsets = (pcre_info(re, NULL, NULL) + 1) * 3; + offsets = (int *)emalloc(size_offsets * sizeof(int)); + + /* Initialize return array */ + array_init(return_value); + + /* Go through the input array */ + zend_hash_internal_pointer_reset(input->value.ht); + while(zend_hash_get_current_data(input->value.ht, (void **)&entry) == SUCCESS) { + + /* Only match against strings */ + if ((*entry)->type == IS_STRING) { + /* Perform the match */ + count = pcre_exec(re, extra, (*entry)->value.str.val, + (*entry)->value.str.len, (*entry)->value.str.val, + 0, offsets, size_offsets, 0); + + /* Check for too many substrings condition. */ + if (count == 0) { + zend_error(E_NOTICE, "Matched, but too many substrings\n"); + count = size_offsets/3; + } + + /* If something matched */ + if (count > 0) { + (*entry)->refcount++; + + /* Add to return array */ + switch(zend_hash_get_current_key(input->value.ht, &string_key, &num_key)) + { + case HASH_KEY_IS_STRING: + zend_hash_update(return_value->value.ht, string_key, + strlen(string_key)+1, entry, sizeof(zval *), NULL); + efree(string_key); + break; + + case HASH_KEY_IS_LONG: + zend_hash_next_index_insert(return_value->value.ht, entry, + sizeof(zval *), NULL); + break; + } + } + } + + zend_hash_move_forward(input->value.ht); + } + + efree(offsets); +} +/* }}} */ + + +#endif /* HAVE_PCRE */ + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + */ |