diff options
author | Andrey Hristov <andrey@php.net> | 1999-05-21 13:17:23 +0000 |
---|---|---|
committer | Andrey Hristov <andrey@php.net> | 1999-05-21 13:17:23 +0000 |
commit | 17bbbf29636908d39c398206a3af6e262707e617 (patch) | |
tree | cc0d43c8b4422e63df5d5d64d165df084c888d7d /ext/pcre | |
parent | b57dc275950b228f2399990471c4f22b7d154c6c (diff) | |
download | php-git-17bbbf29636908d39c398206a3af6e262707e617.tar.gz |
Initial check-in of PCRE (Perl Compatible Regular Expressions) extension.
PCRE library can be found at ftp://ftp.cus.cam.ac.uk/pub/software/programs/pcre/
config.m4 will be updated to be more robust later on.
perl_match() takes a regular expression, the source string, and the array
for subpattern matches.
perl_replace() takes a regular expression, the search string, and the replacement
string.
Regular expression is specified using delimiters and options. Example:
perl_match("/<[a-z]*>/i", $text, $tags);
More stuff is coming soon.
Diffstat (limited to 'ext/pcre')
-rw-r--r-- | ext/pcre/Makefile.am | 6 | ||||
-rw-r--r-- | ext/pcre/config.h.stub | 2 | ||||
-rw-r--r-- | ext/pcre/config.m4 | 18 | ||||
-rw-r--r-- | ext/pcre/config0.m4 | 18 | ||||
-rw-r--r-- | ext/pcre/pcre.c | 495 | ||||
-rw-r--r-- | ext/pcre/php_pcre.h | 56 | ||||
-rw-r--r-- | ext/pcre/setup.stub | 6 |
7 files changed, 601 insertions, 0 deletions
diff --git a/ext/pcre/Makefile.am b/ext/pcre/Makefile.am new file mode 100644 index 0000000000..8a7bc21258 --- /dev/null +++ b/ext/pcre/Makefile.am @@ -0,0 +1,6 @@ +# $Id$ + +INCLUDES=@INCLUDES@ -I@top_srcdir@ -I@top_srcdir@/libzend +noinst_LIBRARIES=libphpext_pcre.a +libphpext_pcre_a_SOURCES=pcre.c + diff --git a/ext/pcre/config.h.stub b/ext/pcre/config.h.stub new file mode 100644 index 0000000000..334cff6697 --- /dev/null +++ b/ext/pcre/config.h.stub @@ -0,0 +1,2 @@ +/* define if you want to use the pcre extension */ +#define HAVE_LIBPCRE 0 diff --git a/ext/pcre/config.m4 b/ext/pcre/config.m4 new file mode 100644 index 0000000000..f564884b9c --- /dev/null +++ b/ext/pcre/config.m4 @@ -0,0 +1,18 @@ +dnl $Id$ +dnl config.m4 for extension pcre + +AC_MSG_CHECKING(for PCRE support) +AC_ARG_WITH(pcre, +[ --with-pcre Include Perl Compatible Regular Expressions support],[ + if test "$withval" = "yes"; then + PCRE_LIBS="-lpcre" + AC_DEFINE(HAVE_LIBPCRE, 1) + AC_MSG_RESULT(yes) + PHP_EXTENSION(pcre) + EXTRA_LIBS="$EXTRA_LIBS $PCRE_LIBS" + else + AC_MSG_RESULT(no) + fi +],[ + AC_MSG_RESULT(no) +]) diff --git a/ext/pcre/config0.m4 b/ext/pcre/config0.m4 new file mode 100644 index 0000000000..f564884b9c --- /dev/null +++ b/ext/pcre/config0.m4 @@ -0,0 +1,18 @@ +dnl $Id$ +dnl config.m4 for extension pcre + +AC_MSG_CHECKING(for PCRE support) +AC_ARG_WITH(pcre, +[ --with-pcre Include Perl Compatible Regular Expressions support],[ + if test "$withval" = "yes"; then + PCRE_LIBS="-lpcre" + AC_DEFINE(HAVE_LIBPCRE, 1) + AC_MSG_RESULT(yes) + PHP_EXTENSION(pcre) + EXTRA_LIBS="$EXTRA_LIBS $PCRE_LIBS" + else + AC_MSG_RESULT(no) + fi +],[ + AC_MSG_RESULT(no) +]) diff --git a/ext/pcre/pcre.c b/ext/pcre/pcre.c new file mode 100644 index 0000000000..ce9253765e --- /dev/null +++ b/ext/pcre/pcre.c @@ -0,0 +1,495 @@ +/* + +----------------------------------------------------------------------+ + | PHP HTML Embedded Scripting Language Version 3.0 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-1999 PHP Development Team (See Credits file) | + +----------------------------------------------------------------------+ + | This program is free software; you can redistribute it and/or modify | + | it under the terms of one of the following licenses: | + | | + | A) the GNU General Public License as published by the Free Software | + | Foundation; either version 2 of the License, or (at your option) | + | any later version. | + | | + | B) the PHP License as published by the PHP Development Team and | + | included in the distribution in the file: LICENSE | + | | + | This program is distributed in the hope that it will be useful, | + | but WITHOUT ANY WARRANTY; without even the implied warranty of | + | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | + | GNU General Public License for more details. | + | | + | You should have received a copy of both licenses referred to here. | + | If you did not, or have any questions about PHP licensing, please | + | contact core@php.net. | + +----------------------------------------------------------------------+ + | Authors: Andrey Zmievski <andrey@ispi.net> | + +----------------------------------------------------------------------+ + */ + +/* $Id$ */ + +/* Get PCRE library from ftp://ftp.cus.cam.ac.uk/pub/software/programs/pcre/ */ + +#include "php.h" + +#if HAVE_LIBPCRE + +#include "php_pcre.h" + +/* {{{ module definition structures */ + +unsigned char third_arg_force_ref[] = { 3, BYREF_NONE, BYREF_NONE, BYREF_FORCE }; + +function_entry pcre_functions[] = { + PHP_FE(pcre_match, third_arg_force_ref) + PHP_FE(pcre_replace, NULL) + {NULL, NULL, NULL} +}; + +php3_module_entry pcre_module_entry = { + "PCRE", pcre_functions, NULL, NULL, php_rinit_pcre, NULL, + php_info_pcre, STANDARD_MODULE_PROPERTIES +}; + +/* }}} */ + + +static void *php_pcre_malloc(size_t size) +{ + return emalloc(size); +} + +static void php_pcre_free(void *ptr) +{ + efree(ptr); +} + + +/* {{{ void php_info_pcre(ZEND_MODULE_INFO_FUNC_ARGS) */ +void php_info_pcre(ZEND_MODULE_INFO_FUNC_ARGS) +{ + php3_printf("Perl Compatible Regular Expressions"); + php3_printf("<table cellpadding=5>" + "<tr><td>PCRE library version:</td>" + "<td>%s</td></tr>" + "</table>", pcre_version()); +} +/* }}} */ + + +/* {{{ int php_rinit_pcre(INIT_FUNC_ARGS) */ +int php_rinit_pcre(INIT_FUNC_ARGS) +{ + pcre_malloc = php_pcre_malloc; + pcre_free = php_pcre_free; + + return SUCCESS; +} +/* }}} */ + + +/* {{{ static pcre* _pcre_get_compiled_regex(char *regex, pcre_extra *extra) */ +static pcre* _pcre_get_compiled_regex(char *regex, pcre_extra *extra) { + pcre *re = NULL; + int coptions = 0; + int soptions = 0; + const char *error; + int erroffset; + char delimiter; + unsigned char *p, *pp; + char *pattern; + int do_study = 0; + + p = regex; + + /* Parse through the leading whitespace, and display a warning if we + get to the end without encountering a delimiter. */ + while (isspace(*p)) p++; + if (*p == 0) { + zend_error(E_WARNING, "Empty regular expression"); + return NULL; + } + + /* Get the delimiter and display a warning if it is alphanumeric + or a backslash. */ + delimiter = *p++; + if (isalnum(delimiter) || delimiter == '\\') { + zend_error(E_WARNING, "Delimiter must not be alphanumeric or backslash"); + return NULL; + } + + /* We need to iterate through the pattern, searching for the ending delimiter, + but skipping the backslashed delimiters. If the ending delimiter is not + found, display a warning. */ + pp = p; + while (*pp != 0) { + if (*pp == delimiter && pp[-1] != '\\') + break; + pp++; + } + if (*pp == 0) { + zend_error(E_WARNING, "No ending delimiter found"); + return NULL; + } + + /* Make a copy of the actual pattern. */ + pattern = estrndup(p, pp-p); + + /* Move on to the options */ + pp++; + + /* Parse through the options, setting appropriate flags. Display + a warning if we encounter an unknown option. */ + while (*pp != 0) { + switch (*pp++) { + case 'i': coptions |= PCRE_CASELESS; break; + case 'm': coptions |= PCRE_MULTILINE; break; + case 's': coptions |= PCRE_DOTALL; break; + case 'x': coptions |= PCRE_EXTENDED; break; + + case 'A': coptions |= PCRE_ANCHORED; break; + case 'D': coptions |= PCRE_DOLLAR_ENDONLY;break; + case 'S': do_study = 1; break; + case 'U': coptions |= PCRE_UNGREEDY; break; + case 'X': coptions |= PCRE_EXTRA; break; + + case ' ': + case '\n': + break; + + default: + zend_error(E_WARNING, "Unknown option '%c'", pp[-1]); + efree(pattern); + return NULL; + } + } + + /* Compile pattern and display a warning if compilation failed. */ + re = pcre_compile(pattern, + coptions, + &error, + &erroffset, + NULL); + + if (re == NULL) { + zend_error(E_WARNING, "Compilation failed: %s at offset %d\n", error, erroffset); + efree(pattern); + return NULL; + } + + /* If study option was specified, study the pattern and + store the result in extra for passing to pcre_exec. */ + if (do_study) { + extra = pcre_study(re, soptions, &error); + if (error != NULL) { + zend_error(E_WARNING, "Error while studying pattern"); + } + } + + efree(pattern); + + return re; +} +/* }}} */ + + +/* {{{ proto pcre_match(string pattern, string subject [, array subpatterns ]) + Perform a Perl-style regular expression match */ +PHP_FUNCTION(pcre_match) +{ + zval *regex, + *subject, + *subpats = NULL; + pcre *re = NULL; + pcre_extra *extra = NULL; + int exoptions = 0; + const char *error; + int count; + int *offsets; + int size_offsets; + int matched; + int i; + zval *entry; + const char **stringlist; + + /* Get function parameters and do error-checking. */ + switch(ARG_COUNT(ht)) { + case 2: + if (getParameters(ht, 2, ®ex, &subject) == FAILURE) { + WRONG_PARAM_COUNT; + } + break; + + case 3: + if (getParameters(ht, 3, ®ex, &subject, &subpats) == FAILURE) { + WRONG_PARAM_COUNT; + } + if (!ParameterPassedByReference(ht, 3)) { + zend_error(E_WARNING, "Array to be filled with matches must be passed by reference."); + RETURN_FALSE; + } + break; + + default: + WRONG_PARAM_COUNT; + } + + /* Make sure we're dealing with strings. */ + convert_to_string(regex); + convert_to_string(subject); + + /* Compile regex or get it from cache. */ + if ((re = _pcre_get_compiled_regex(regex->value.str.val, extra)) == NULL) + return; + + /* Calculate the size of the offsets array, and allocate memory for it. */ + size_offsets = (pcre_info(re, NULL, NULL) + 1) * 3; + offsets = (int *)emalloc(size_offsets * sizeof(int)); + + /* Execute the regular expression. */ + count = pcre_exec(re, extra, subject->value.str.val, subject->value.str.len, + exoptions, offsets, size_offsets); + + /* Check for too many substrings condition. */ + if (count == 0) { + zend_error(E_NOTICE, "Matched, but too many substrings\n"); + count = size_offsets/3; + } + + /* If something has matched */ + if (count >= 0) { + matched = 1; + + for(i=0; i<count; i++) + { + php3_printf("so = %d, eo = %d\n", offsets[i<<1], offsets[(i<<1)+1]); + } + /* If subpatters array has been passed, fill it in with values. */ + if (subpats != NULL) { + /* Try to get the list of substrings and display a warning if failed. */ + if (pcre_get_substring_list(subject->value.str.val, offsets, count, &stringlist) < 0) { + efree(offsets); + efree(re); + zend_error(E_WARNING, "Get subpatterns list failed"); + return; + } + + /* Make sure to clean up the passed array and initialize it. */ + zval_dtor(subpats); + array_init(subpats); + + /* For each subpattern, allocate an array entry, initialize it, + and fill in the matched pattern. Then insert it into the + subpatterns array. */ + for (i=0; i<count; i++) { + entry = (zval *)emalloc(sizeof(zval)); + entry->type = IS_STRING; + entry->value.str.val = estrdup(stringlist[i]); + entry->value.str.len = offsets[(i<<1)+1] - offsets[i<<1]; + entry->is_ref = 0; + entry->refcount = 1; + + zend_hash_index_update(subpats->value.ht, i, &entry, sizeof(zval *), NULL); + } + + efree(stringlist); + } + } + /* If nothing matched */ + else { + matched = 0; + + /* Make sure to clean up the passed array and initialize it + to empty since we don't want to leave previous values in it. */ + if (subpats != NULL) { + zval_dtor(subpats); + array_init(subpats); + } + } + + efree(offsets); + efree(re); + + RETVAL_LONG(matched); +} +/* }}} */ + +/* {{{ int _pcre_get_backref(const char *walk, int *backref) */ +static int _pcre_get_backref(const char *walk, int *backref) +{ + if (*walk < '0' && *walk > '9') + return 0; + else + *backref = *walk - '0'; + + if (walk[1] >= '0' && walk[1] <= '9') + *backref = *backref * 10 + walk[1] - '0'; + + php3_printf("backref = %d\n", *backref); + return 1; +} +/* }}} */ + + +/* {{{ proto pcre_replace() + */ +PHP_FUNCTION(pcre_replace) +{ + zval *regex, + *replace, + *subject; + pcre *re = NULL; + pcre_extra *extra = NULL; + int exoptions = 0; + const char *error; + int count = 0; + int *offsets; + int size_offsets; + int matched; + const char ***stringlist; + int new_len; + int alloc_len; + int subject_len; + int subject_offset; + int result_len; + int backref; + char *result, + *new_buf, + *walkbuf, + *walk; + + if (ARG_COUNT(ht) != 3 || getParameters(ht, 3, ®ex, &replace, &subject) == FAILURE) { + WRONG_PARAM_COUNT; + } + + /* Make sure we're dealing with strings. */ + convert_to_string(regex); + convert_to_string(replace); + convert_to_string(subject); + + /* Compile regex or get it from cache. */ + if ((re = _pcre_get_compiled_regex(regex->value.str.val, extra)) == NULL) + return; + + /* Calculate the size of the offsets array, and allocate memory for it. */ + size_offsets = (pcre_info(re, NULL, NULL) + 1) * 3; + offsets = (int *)emalloc(size_offsets * sizeof(int)); + + subject_len = subject->value.str.len; + + alloc_len = 2 * subject_len + 1; + result = emalloc(alloc_len * sizeof(char)); + if (!result) { + zend_error(E_WARNING, "Unable to allocate memory in pcre_replace"); + efree(re); + efree(offsets); + return; + } + + subject_offset = 0; + result[0] = '\0'; + + while (count >= 0) { + /* Execute the regular expression. */ + count = pcre_exec(re, extra, &subject->value.str.val[subject_offset], + subject->value.str.len-subject_offset, + (subject_offset ? PCRE_NOTBOL : 0), offsets, size_offsets); + + /* Check for too many substrings condition. */ + if (count == 0) { + zend_error(E_NOTICE, "Matched, but too many substrings\n"); + count = size_offsets/3; + } + + if (count > 0) { + new_len = strlen(result) + offsets[0]; /* part before the match */ + walk = replace->value.str.val; + while (*walk) + if ('\\' == *walk && + _pcre_get_backref(walk+1, &backref) && + backref < count) { + new_len += offsets[2*backref+1] - offsets[2*backref]; + walk += (backref > 9) ? 3 : 2; + } else { + new_len++; + walk++; + } + + if (new_len + 1 > alloc_len) { + alloc_len = 1 + alloc_len + 2 * new_len; + new_buf = emalloc(alloc_len); + strcpy(new_buf, result); + efree(result); + result = new_buf; + } + result_len = strlen(result); + /* copy the part of the string before the match */ + strncat(result, &subject->value.str.val[subject_offset], offsets[0]); + + /* copy replacement and backrefs */ + walkbuf = &result[result_len + offsets[0]]; + walk = replace->value.str.val; + while (*walk) + if ('\\' == *walk && + _pcre_get_backref(walk+1, &backref) && + backref < count) { + result_len = offsets[2*backref+1] - offsets[2*backref]; + memcpy (walkbuf, + &subject->value.str.val[subject_offset + offsets[2*backref]], + result_len); + walkbuf += result_len; + walk += (backref > 9) ? 3 : 2; + } else + *walkbuf++ = *walk++; + *walkbuf = '\0'; + + /* and get ready to keep looking for replacements */ + if (offsets[0] == offsets[1]) { + if (offsets[0] + subject_offset >= subject_len) + break; + new_len = strlen (result) + 1; + if (new_len + 1 > alloc_len) { + alloc_len = 1 + alloc_len + 2 * new_len; + new_buf = emalloc(alloc_len * sizeof(char)); + strcpy(new_buf, result); + efree(result); + result = new_buf; + } + subject_offset += offsets[1] + 1; + result [new_len-1] = subject->value.str.val [subject_offset-1]; + result [new_len] = '\0'; + } else { + subject_offset += offsets[1]; + } + } else { /* REG_NOMATCH */ + new_len = strlen(result) + strlen(&subject->value.str.val[subject_offset]); + if (new_len + 1 > alloc_len) { + alloc_len = new_len + 1; /* now we know exactly how long it is */ + new_buf = emalloc(alloc_len * sizeof(char)); + strcpy(new_buf, result); + efree(result); + result = new_buf; + } + /* stick that last bit of string on our output */ + strcat(result, &subject->value.str.val[subject_offset]); + } + } + + efree(offsets); + efree(re); + + RETVAL_STRING(result, 1); + efree(result); +} +/* }}} */ + + +#endif /* HAVE_PCRE */ + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + */ diff --git a/ext/pcre/php_pcre.h b/ext/pcre/php_pcre.h new file mode 100644 index 0000000000..a5b5671111 --- /dev/null +++ b/ext/pcre/php_pcre.h @@ -0,0 +1,56 @@ +/* + +----------------------------------------------------------------------+ + | PHP HTML Embedded Scripting Language Version 3.0 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-1999 PHP Development Team (See Credits file) | + +----------------------------------------------------------------------+ + | This program is free software; you can redistribute it and/or modify | + | it under the terms of one of the following licenses: | + | | + | A) the GNU General Public License as published by the Free Software | + | Foundation; either version 2 of the License, or (at your option) | + | any later version. | + | | + | B) the PHP License as published by the PHP Development Team and | + | included in the distribution in the file: LICENSE | + | | + | This program is distributed in the hope that it will be useful, | + | but WITHOUT ANY WARRANTY; without even the implied warranty of | + | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | + | GNU General Public License for more details. | + | | + | You should have received a copy of both licenses referred to here. | + | If you did not, or have any questions about PHP licensing, please | + | contact core@php.net. | + +----------------------------------------------------------------------+ + | Authors: Andrey Zmievski <andrey@ispi.net> | + +----------------------------------------------------------------------+ + */ + +/* $Id$ */ + +#ifndef _PHP_PCRE_H +#define _PHP_PCRE_H + +#if HAVE_LIBPCRE + +#include "pcre.h" + +extern void php_info_pcre(ZEND_MODULE_INFO_FUNC_ARGS); +extern int php_rinit_pcre(INIT_FUNC_ARGS); + +PHP_FUNCTION(pcre_match); +PHP_FUNCTION(pcre_replace); + +extern zend_module_entry pcre_module_entry; +#define pcre_module_ptr &pcre_module_entry + +#else + +#define pcre_module_ptr NULL + +#endif /* HAVE_LIBPCRE */ + +#define phpext_pcre_ptr pcre_module_ptr + +#endif /* _PHP_PCRE_H */ diff --git a/ext/pcre/setup.stub b/ext/pcre/setup.stub new file mode 100644 index 0000000000..74a845d0cb --- /dev/null +++ b/ext/pcre/setup.stub @@ -0,0 +1,6 @@ +# $Source$ +# $Id$ + +define_option with-pcre 'pcre support?' yesnodir no \ +' Whether to build the pcre extension.' + |