summaryrefslogtreecommitdiff
path: root/ext/standard/reg.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/standard/reg.c')
-rw-r--r--ext/standard/reg.c668
1 files changed, 0 insertions, 668 deletions
diff --git a/ext/standard/reg.c b/ext/standard/reg.c
deleted file mode 100644
index 0b87b18416..0000000000
--- a/ext/standard/reg.c
+++ /dev/null
@@ -1,668 +0,0 @@
-/*
- +----------------------------------------------------------------------+
- | PHP Version 5 |
- +----------------------------------------------------------------------+
- | Copyright (c) 1997-2005 The PHP Group |
- +----------------------------------------------------------------------+
- | This source file is subject to version 3.0 of the PHP license, |
- | that is bundled with this package in the file LICENSE, and is |
- | available through the world-wide-web at the following url: |
- | http://www.php.net/license/3_0.txt. |
- | If you did not receive a copy of the PHP license and are unable to |
- | obtain it through the world-wide-web, please send a note to |
- | license@php.net so we can mail you a copy immediately. |
- +----------------------------------------------------------------------+
- | Authors: Rasmus Lerdorf <rasmus@php.net> |
- | Jim Winstead <jimw@php.net> |
- | Jaakko Hyvätti <jaakko@hyvatti.iki.fi> |
- +----------------------------------------------------------------------+
- */
-/* $Id$ */
-
-#include <stdio.h>
-#include <ctype.h>
-#include "php.h"
-#include "php_string.h"
-#include "reg.h"
-#include "ext/standard/info.h"
-
-ZEND_DECLARE_MODULE_GLOBALS(reg)
-
-typedef struct {
- regex_t preg;
- int cflags;
-} reg_cache;
-
-static int reg_magic = 0;
-
-/* {{{ _php_regcomp
- */
-static int _php_regcomp(regex_t *preg, const char *pattern, int cflags)
-{
- int r = 0;
- int patlen = strlen(pattern);
- reg_cache *rc = NULL;
- TSRMLS_FETCH();
-
- if(zend_hash_find(&REG(ht_rc), (char *) pattern, patlen+1, (void **) &rc) == SUCCESS
- && rc->cflags == cflags) {
-#ifdef HAVE_REGEX_T_RE_MAGIC
- /*
- * We use a saved magic number to see whether cache is corrupted, and if it
- * is, we flush it and compile the pattern from scratch.
- */
- if (rc->preg.re_magic != reg_magic) {
- zend_hash_clean(&REG(ht_rc));
- } else {
- memcpy(preg, &rc->preg, sizeof(*preg));
- return r;
- }
- }
-
- r = regcomp(preg, pattern, cflags);
- if(!r) {
- reg_cache rcp;
-
- rcp.cflags = cflags;
- memcpy(&rcp.preg, preg, sizeof(*preg));
- /*
- * Since we don't have access to the actual MAGIC1 definition in the private
- * header file, we save the magic value immediately after compilation. Hopefully,
- * it's good.
- */
- if (!reg_magic) reg_magic = preg->re_magic;
- zend_hash_update(&REG(ht_rc), (char *) pattern, patlen+1,
- (void *) &rcp, sizeof(rcp), NULL);
- }
-#else
- memcpy(preg, &rc->preg, sizeof(*preg));
- } else {
- r = regcomp(preg, pattern, cflags);
- if(!r) {
- reg_cache rcp;
-
- rcp.cflags = cflags;
- memcpy(&rcp.preg, preg, sizeof(*preg));
- zend_hash_update(&REG(ht_rc), (char *) pattern, patlen+1,
- (void *) &rcp, sizeof(rcp), NULL);
- }
- }
-#endif
- return r;
-}
-/* }}} */
-
-static void _free_reg_cache(reg_cache *rc)
-{
- regfree(&rc->preg);
-}
-
-#undef regfree
-#define regfree(a);
-#undef regcomp
-#define regcomp(a, b, c) _php_regcomp(a, b, c)
-
-static void php_reg_init_globals(zend_reg_globals *reg_globals TSRMLS_DC)
-{
- zend_hash_init(&reg_globals->ht_rc, 0, NULL, (void (*)(void *)) _free_reg_cache, 1);
-}
-
-static void php_reg_destroy_globals(zend_reg_globals *reg_globals TSRMLS_DC)
-{
- zend_hash_destroy(&reg_globals->ht_rc);
-}
-
-PHP_MINIT_FUNCTION(regex)
-{
- ZEND_INIT_MODULE_GLOBALS(reg, php_reg_init_globals, php_reg_destroy_globals);
- return SUCCESS;
-}
-
-PHP_MSHUTDOWN_FUNCTION(regex)
-{
-#ifndef ZTS
- php_reg_destroy_globals(&reg_globals TSRMLS_CC);
-#endif
-
- return SUCCESS;
-}
-
-PHP_MINFO_FUNCTION(regex)
-{
-#if HSREGEX
- php_info_print_table_row(2, "Regex Library", "Bundled library enabled");
-#else
- php_info_print_table_row(2, "Regex Library", "System library enabled");
-#endif
-}
-
-
-/* {{{ php_reg_eprint
- * php_reg_eprint - convert error number to name
- */
-static void php_reg_eprint(int err, regex_t *re) {
- char *buf = NULL, *message = NULL;
- size_t len;
- size_t buf_len;
-
-#ifdef REG_ITOA
- /* get the length of the message */
- buf_len = regerror(REG_ITOA | err, re, NULL, 0);
- if (buf_len) {
- buf = (char *)safe_emalloc(buf_len, sizeof(char), 0);
- if (!buf) return; /* fail silently */
- /* finally, get the error message */
- regerror(REG_ITOA | err, re, buf, buf_len);
- }
-#else
- buf_len = 0;
-#endif
- len = regerror(err, re, NULL, 0);
- if (len) {
- TSRMLS_FETCH();
-
- message = (char *)safe_emalloc((buf_len + len + 2), sizeof(char), 0);
- if (!message) {
- return; /* fail silently */
- }
- if (buf_len) {
- snprintf(message, buf_len, "%s: ", buf);
- buf_len += 1; /* so pointer math below works */
- }
- /* drop the message into place */
- regerror(err, re, message + buf_len, len);
-
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", message);
- }
-
- STR_FREE(buf);
- STR_FREE(message);
-}
-/* }}} */
-
-/* {{{ php_ereg
- */
-static void php_ereg(INTERNAL_FUNCTION_PARAMETERS, int icase)
-{
- pval **regex, /* Regular expression */
- **findin, /* String to apply expression to */
- **array = NULL; /* Optional register array */
- regex_t re;
- regmatch_t *subs;
- int err, match_len, string_len;
- uint i;
- int copts = 0;
- off_t start, end;
- char *buf = NULL;
- char *string = NULL;
- int argc = ZEND_NUM_ARGS();
-
- if (argc < 2 || argc > 3 ||
- zend_get_parameters_ex(argc, &regex, &findin, &array) == FAILURE) {
- WRONG_PARAM_COUNT;
- }
-
- if (icase)
- copts |= REG_ICASE;
-
- if (argc == 2)
- copts |= REG_NOSUB;
-
- /* compile the regular expression from the supplied regex */
- if (Z_TYPE_PP(regex) == IS_STRING) {
- err = regcomp(&re, Z_STRVAL_PP(regex), REG_EXTENDED | copts);
- } else {
- /* we convert numbers to integers and treat them as a string */
- if (Z_TYPE_PP(regex) == IS_DOUBLE)
- convert_to_long_ex(regex); /* get rid of decimal places */
- convert_to_string_ex(regex);
- /* don't bother doing an extended regex with just a number */
- err = regcomp(&re, Z_STRVAL_PP(regex), copts);
- }
-
- if (err) {
- php_reg_eprint(err, &re);
- RETURN_FALSE;
- }
-
- /* make a copy of the string we're looking in */
- convert_to_string_ex(findin);
- string = estrndup(Z_STRVAL_PP(findin), Z_STRLEN_PP(findin));
-
- /* allocate storage for (sub-)expression-matches */
- subs = (regmatch_t *)ecalloc(sizeof(regmatch_t),re.re_nsub+1);
-
- /* actually execute the regular expression */
- err = regexec(&re, string, re.re_nsub+1, subs, 0);
- if (err && err != REG_NOMATCH) {
- php_reg_eprint(err, &re);
- regfree(&re);
- efree(subs);
- RETURN_FALSE;
- }
- match_len = 1;
-
- if (array && err != REG_NOMATCH) {
- match_len = (int) (subs[0].rm_eo - subs[0].rm_so);
- string_len = Z_STRLEN_PP(findin) + 1;
-
- buf = emalloc(string_len);
-
- zval_dtor(*array); /* start with clean array */
- array_init(*array);
-
- for (i = 0; i <= re.re_nsub; i++) {
- start = subs[i].rm_so;
- end = subs[i].rm_eo;
- if (start != -1 && end > 0 && start < string_len && end < string_len && start < end) {
- add_index_stringl(*array, i, string+start, end-start, 1);
- } else {
- add_index_bool(*array, i, 0);
- }
- }
- efree(buf);
- }
-
- efree(subs);
- efree(string);
- if (err == REG_NOMATCH) {
- RETVAL_FALSE;
- } else {
- if (match_len == 0)
- match_len = 1;
- RETVAL_LONG(match_len);
- }
- regfree(&re);
-}
-/* }}} */
-
-/* {{{ proto int ereg(string pattern, string string [, array registers])
- Regular expression match */
-PHP_FUNCTION(ereg)
-{
- php_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
-}
-/* }}} */
-
-/* {{{ proto int eregi(string pattern, string string [, array registers])
- Case-insensitive regular expression match */
-PHP_FUNCTION(eregi)
-{
- php_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
-}
-/* }}} */
-
-/* {{{ php_reg_replace
- * this is the meat and potatoes of regex replacement! */
-PHPAPI char *php_reg_replace(const char *pattern, const char *replace, const char *string, int icase, int extended)
-{
- regex_t re;
- regmatch_t *subs;
-
- char *buf, /* buf is where we build the replaced string */
- *nbuf, /* nbuf is used when we grow the buffer */
- *walkbuf; /* used to walk buf when replacing backrefs */
- const char *walk; /* used to walk replacement string for backrefs */
- int buf_len;
- int pos, tmp, string_len, new_l;
- int err, copts = 0;
-
- string_len = strlen(string);
-
- if (icase) {
- copts = REG_ICASE;
- }
- if (extended) {
- copts |= REG_EXTENDED;
- }
-
- err = regcomp(&re, pattern, copts);
- if (err) {
- php_reg_eprint(err, &re);
- return ((char *) -1);
- }
-
-
- /* allocate storage for (sub-)expression-matches */
- subs = (regmatch_t *)ecalloc(sizeof(regmatch_t),re.re_nsub+1);
-
- /* start with a buffer that is twice the size of the stringo
- we're doing replacements in */
- buf_len = 2 * string_len + 1;
- buf = safe_emalloc(buf_len, sizeof(char), 0);
-
- err = pos = 0;
- buf[0] = '\0';
- while (!err) {
- err = regexec(&re, &string[pos], re.re_nsub+1, subs, (pos ? REG_NOTBOL : 0));
-
- if (err && err != REG_NOMATCH) {
- php_reg_eprint(err, &re);
- efree(subs);
- efree(buf);
- regfree(&re);
- return ((char *) -1);
- }
-
- if (!err) {
- /* backref replacement is done in two passes:
- 1) find out how long the string will be, and allocate buf
- 2) copy the part before match, replacement and backrefs to buf
-
- Jaakko Hyvätti <Jaakko.Hyvatti@iki.fi>
- */
-
- new_l = strlen(buf) + subs[0].rm_so; /* part before the match */
- walk = replace;
- while (*walk) {
- if ('\\' == *walk && isdigit((unsigned char)walk[1]) && ((unsigned char)walk[1]) - '0' <= re.re_nsub) {
- if (subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1) {
- new_l += subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so;
- }
- walk += 2;
- } else {
- new_l++;
- walk++;
- }
- }
- if (new_l + 1 > buf_len) {
- buf_len = 1 + buf_len + 2 * new_l;
- nbuf = emalloc(buf_len);
- strcpy(nbuf, buf);
- efree(buf);
- buf = nbuf;
- }
- tmp = strlen(buf);
- /* copy the part of the string before the match */
- strncat(buf, &string[pos], subs[0].rm_so);
-
- /* copy replacement and backrefs */
- walkbuf = &buf[tmp + subs[0].rm_so];
- walk = replace;
- while (*walk) {
- if ('\\' == *walk && isdigit(walk[1]) && walk[1] - '0' <= (int)re.re_nsub) {
- if (subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1
- /* this next case shouldn't happen. it does. */
- && subs[walk[1] - '0'].rm_so <= subs[walk[1] - '0'].rm_eo) {
-
- tmp = subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so;
- memcpy (walkbuf, &string[pos + subs[walk[1] - '0'].rm_so], tmp);
- walkbuf += tmp;
- }
- walk += 2;
- } else {
- *walkbuf++ = *walk++;
- }
- }
- *walkbuf = '\0';
-
- /* and get ready to keep looking for replacements */
- if (subs[0].rm_so == subs[0].rm_eo) {
- if (subs[0].rm_so + pos >= string_len) {
- break;
- }
- new_l = strlen (buf) + 1;
- if (new_l + 1 > buf_len) {
- buf_len = 1 + buf_len + 2 * new_l;
- nbuf = safe_emalloc(buf_len, sizeof(char), 0);
- strcpy(nbuf, buf);
- efree(buf);
- buf = nbuf;
- }
- pos += subs[0].rm_eo + 1;
- buf [new_l-1] = string [pos-1];
- buf [new_l] = '\0';
- } else {
- pos += subs[0].rm_eo;
- }
- } else { /* REG_NOMATCH */
- new_l = strlen(buf) + strlen(&string[pos]);
- if (new_l + 1 > buf_len) {
- buf_len = new_l + 1; /* now we know exactly how long it is */
- nbuf = safe_emalloc(buf_len, sizeof(char), 0);
- strcpy(nbuf, buf);
- efree(buf);
- buf = nbuf;
- }
- /* stick that last bit of string on our output */
- strcat(buf, &string[pos]);
- }
- }
-
- /* don't want to leak memory .. */
- efree(subs);
- regfree(&re);
-
- /* whew. */
- return (buf);
-}
-/* }}} */
-
-/* {{{ php_ereg_replace
- */
-static void php_ereg_replace(INTERNAL_FUNCTION_PARAMETERS, int icase)
-{
- pval **arg_pattern,
- **arg_replace,
- **arg_string;
- char *pattern;
- char *string;
- char *replace;
- char *ret;
-
- if (ZEND_NUM_ARGS() != 3 ||
- zend_get_parameters_ex(3, &arg_pattern, &arg_replace, &arg_string) == FAILURE) {
- WRONG_PARAM_COUNT;
- }
-
- if (Z_TYPE_PP(arg_pattern) == IS_STRING) {
- if (Z_STRVAL_PP(arg_pattern) && Z_STRLEN_PP(arg_pattern))
- pattern = estrndup(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern));
- else
- pattern = STR_EMPTY_ALLOC();
- } else {
- convert_to_long_ex(arg_pattern);
- pattern = emalloc(2);
- pattern[0] = (char) Z_LVAL_PP(arg_pattern);
- pattern[1] = '\0';
- }
-
- if (Z_TYPE_PP(arg_replace) == IS_STRING) {
- if (Z_STRVAL_PP(arg_replace) && Z_STRLEN_PP(arg_replace))
- replace = estrndup(Z_STRVAL_PP(arg_replace), Z_STRLEN_PP(arg_replace));
- else
- replace = STR_EMPTY_ALLOC();
- } else {
- convert_to_long_ex(arg_replace);
- replace = emalloc(2);
- replace[0] = (char) Z_LVAL_PP(arg_replace);
- replace[1] = '\0';
- }
-
- convert_to_string_ex(arg_string);
- if (Z_STRVAL_PP(arg_string) && Z_STRLEN_PP(arg_string))
- string = estrndup(Z_STRVAL_PP(arg_string), Z_STRLEN_PP(arg_string));
- else
- string = STR_EMPTY_ALLOC();
-
- /* do the actual work */
- ret = php_reg_replace(pattern, replace, string, icase, 1);
- if (ret == (char *) -1) {
- RETVAL_FALSE;
- } else {
- RETVAL_STRING(ret, 1);
- STR_FREE(ret);
- }
-
- STR_FREE(string);
- STR_FREE(replace);
- STR_FREE(pattern);
-}
-/* }}} */
-
-/* {{{ proto string ereg_replace(string pattern, string replacement, string string)
- Replace regular expression */
-PHP_FUNCTION(ereg_replace)
-{
- php_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
-}
-/* }}} */
-
-/* {{{ proto string eregi_replace(string pattern, string replacement, string string)
- Case insensitive replace regular expression */
-PHP_FUNCTION(eregi_replace)
-{
- php_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
-}
-/* }}} */
-
-/* {{{ php_split
- */
-static void php_split(INTERNAL_FUNCTION_PARAMETERS, int icase)
-{
- zval **spliton, **str, **arg_count = NULL;
- regex_t re;
- regmatch_t subs[1];
- char *strp, *endp;
- int err, size, count = -1, copts = 0;
- int argc = ZEND_NUM_ARGS();
-
- if (argc < 2 || argc > 3 ||
- zend_get_parameters_ex(argc, &spliton, &str, &arg_count) == FAILURE) {
- WRONG_PARAM_COUNT;
- }
-
- if (argc > 2) {
- convert_to_long_ex(arg_count);
- count = Z_LVAL_PP(arg_count);
- }
-
- if (icase)
- copts = REG_ICASE;
-
- convert_to_string_ex(spliton);
- convert_to_string_ex(str);
-
- strp = Z_STRVAL_PP(str);
- endp = strp + Z_STRLEN_PP(str);
-
- err = regcomp(&re, Z_STRVAL_PP(spliton), REG_EXTENDED | copts);
- if (err) {
- php_reg_eprint(err, &re);
- RETURN_FALSE;
- }
-
- array_init(return_value);
-
- /* churn through str, generating array entries as we go */
- while ((count == -1 || count > 1) && !(err = regexec(&re, strp, 1, subs, 0))) {
- if (subs[0].rm_so == 0 && subs[0].rm_eo) {
- /* match is at start of string, return empty string */
- add_next_index_stringl(return_value, "", 0, 1);
- /* skip ahead the length of the regex match */
- strp += subs[0].rm_eo;
- } else if (subs[0].rm_so == 0 && subs[0].rm_eo == 0) {
- /* No more matches */
- regfree(&re);
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid Regular Expression to split()");
- zend_hash_destroy(Z_ARRVAL_P(return_value));
- efree(Z_ARRVAL_P(return_value));
- RETURN_FALSE;
- } else {
- /* On a real match */
-
- /* make a copy of the substring */
- size = subs[0].rm_so;
-
- /* add it to the array */
- add_next_index_stringl(return_value, strp, size, 1);
-
- /* point at our new starting point */
- strp = strp + subs[0].rm_eo;
- }
-
- /* if we're only looking for a certain number of points,
- stop looking once we hit it */
- if (count != -1) {
- count--;
- }
- }
-
- /* see if we encountered an error */
- if (err && err != REG_NOMATCH) {
- php_reg_eprint(err, &re);
- regfree(&re);
- zend_hash_destroy(Z_ARRVAL_P(return_value));
- efree(Z_ARRVAL_P(return_value));
- RETURN_FALSE;
- }
-
- /* otherwise we just have one last element to add to the array */
- size = endp - strp;
-
- add_next_index_stringl(return_value, strp, size, 1);
-
- regfree(&re);
-}
-/* }}} */
-
-/* {{{ proto array split(string pattern, string string [, int limit])
- Split string into array by regular expression */
-PHP_FUNCTION(split)
-{
- php_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
-}
-/* }}} */
-
-/* {{{ proto array spliti(string pattern, string string [, int limit])
- Split string into array by regular expression case-insensitive */
-
-PHP_FUNCTION(spliti)
-{
- php_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
-}
-
-/* }}} */
-
-/* {{{ proto string sql_regcase(string string)
- Make regular expression for case insensitive match */
-PHPAPI PHP_FUNCTION(sql_regcase)
-{
- zval **string;
- char *tmp;
- unsigned char c;
- register int i, j;
-
- if (ZEND_NUM_ARGS()!=1 || zend_get_parameters_ex(1, &string)==FAILURE) {
- WRONG_PARAM_COUNT;
- }
- convert_to_string_ex(string);
-
- tmp = safe_emalloc(Z_STRLEN_PP(string), 4, 1);
-
- for (i = j = 0; i < Z_STRLEN_PP(string); i++) {
- c = (unsigned char) Z_STRVAL_PP(string)[i];
- if(isalpha(c)) {
- tmp[j++] = '[';
- tmp[j++] = toupper(c);
- tmp[j++] = tolower(c);
- tmp[j++] = ']';
- } else {
- tmp[j++] = c;
- }
- }
- tmp[j] = 0;
-
- RETVAL_STRINGL(tmp, j, 1);
- efree(tmp);
-}
-/* }}} */
-
-/*
- * Local variables:
- * tab-width: 4
- * c-basic-offset: 4
- * End:
- * vim600: noet sw=4 ts=4 fdm=marker
- * vim<600: noet sw=4 ts=4
- */