diff options
Diffstat (limited to 'ext/standard/reg.c')
-rw-r--r-- | ext/standard/reg.c | 569 |
1 files changed, 569 insertions, 0 deletions
diff --git a/ext/standard/reg.c b/ext/standard/reg.c new file mode 100644 index 0000000000..42d36493d6 --- /dev/null +++ b/ext/standard/reg.c @@ -0,0 +1,569 @@ +/* + +----------------------------------------------------------------------+ + | PHP HTML Embedded Scripting Language Version 3.0 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997,1998 PHP Development Team (See Credits file) | + +----------------------------------------------------------------------+ + | This program is free software; you can redistribute it and/or modify | + | it under the terms of one of the following licenses: | + | | + | A) the GNU General Public License as published by the Free Software | + | Foundation; either version 2 of the License, or (at your option) | + | any later version. | + | | + | B) the PHP License as published by the PHP Development Team and | + | included in the distribution in the file: LICENSE | + | | + | This program is distributed in the hope that it will be useful, | + | but WITHOUT ANY WARRANTY; without even the implied warranty of | + | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | + | GNU General Public License for more details. | + | | + | You should have received a copy of both licenses referred to here. | + | If you did not, or have any questions about PHP licensing, please | + | contact core@php.net. | + +----------------------------------------------------------------------+ + | Authors: Rasmus Lerdorf <rasmus@lerdorf.on.ca> | + | Jim Winstead <jimw@php.net> | + | Jaakko Hyvätti <jaakko@hyvatti.iki.fi> | + +----------------------------------------------------------------------+ + */ +/* $Id$ */ +#ifdef THREAD_SAFE +#include "tls.h" +#endif +#include <stdio.h> +#include "php.h" +#include "php3_string.h" +#include "reg.h" + +unsigned char third_argument_force_ref[] = { 3, BYREF_NONE, BYREF_NONE, BYREF_FORCE }; + +function_entry reg_functions[] = { + {"ereg", php3_ereg, third_argument_force_ref }, + {"ereg_replace", php3_eregreplace, NULL }, + {"eregi", php3_eregi, third_argument_force_ref }, + {"eregi_replace", php3_eregireplace, NULL }, + {"split", php3_split, NULL}, + {"join", php3_implode, NULL}, + {"sql_regcase", php3_sql_regcase, NULL}, + {NULL, NULL, NULL} +}; + +php3_module_entry regexp_module_entry = { + "Regular Expressions", reg_functions, NULL, NULL, NULL, NULL, NULL, STANDARD_MODULE_PROPERTIES +}; + +/* This is the maximum number of (..) constructs we'll generate from a + call to ereg() or eregi() with the optional third argument. */ +#define NS 10 + +/* + * _php3_reg_eprint - convert error number to name + */ +static void _php3_reg_eprint(int err, regex_t *re) { + char *buf = NULL, *message = NULL; + size_t len; + size_t buf_len; + +#ifdef REG_ITOA + /* get the length of the message */ + buf_len = regerror(REG_ITOA | err, re, NULL, 0); + if (buf_len) { + buf = (char *)emalloc(buf_len * sizeof(char)); + if (!buf) return; /* fail silently */ + /* finally, get the error message */ + regerror(REG_ITOA | err, re, buf, buf_len); + } +#else + buf_len = 0; +#endif + len = regerror(err, re, NULL, 0); + if (len) { + message = (char *)emalloc((buf_len + len + 2) * sizeof(char)); + if (!message) { + return; /* fail silently */ + } + if (buf_len) { + snprintf(message, buf_len, "%s: ", buf); + buf_len += 1; /* so pointer math below works */ + } + /* drop the message into place */ + regerror(err, re, message + buf_len, len); + + php3_error(E_WARNING, "%s", message); + } + + STR_FREE(buf); + STR_FREE(message); +} + +static void _php3_ereg(INTERNAL_FUNCTION_PARAMETERS, int icase) +{ + pval *regex, /* Regular expression */ + *findin, /* String to apply expression to */ + *array = NULL; /* Optional register array */ + regex_t re; + regmatch_t subs[NS]; + int err, i, match_len, string_len; + int copts = 0; + off_t start, end; + char *buf = NULL; + char *string = NULL; + TLS_VARS; + + if (icase) + copts |= REG_ICASE; + + switch(ARG_COUNT(ht)) { + case 2: + if (getParameters(ht, 2, ®ex, &findin) == FAILURE) { + WRONG_PARAM_COUNT; + } + /* don't bother doing substring matching if we're not going + to make use of the information */ + copts |= REG_NOSUB; + break; + case 3: + if (getParameters(ht, 3, ®ex, &findin, &array) == FAILURE) { + WRONG_PARAM_COUNT; + } + if (!ParameterPassedByReference(ht, 3)) { + php3_error(E_WARNING, "Array to be filled with values must be passed by reference."); + RETURN_FALSE; + } + break; + default: + WRONG_PARAM_COUNT; + } + + + /* compile the regular expression from the supplied regex */ + if (regex->type == IS_STRING) { + err = regcomp(&re, regex->value.str.val, REG_EXTENDED | copts); + } else { + /* we convert numbers to integers and treat them as a string */ + if (regex->type == IS_DOUBLE) + convert_to_long(regex); /* get rid of decimal places */ + convert_to_string(regex); + /* don't bother doing an extended regex with just a number */ + err = regcomp(&re, regex->value.str.val, copts); + } + + if (err) { + _php3_reg_eprint(err, &re); + RETURN_FALSE; + } + + /* make a copy of the string we're looking in */ + convert_to_string(findin); + string = estrndup(findin->value.str.val, findin->value.str.len); + + /* actually execute the regular expression */ + err = regexec(&re, string, (size_t) NS, subs, 0); + if (err && err != REG_NOMATCH) { + _php3_reg_eprint(err, &re); + regfree(&re); + RETURN_FALSE; + } + match_len = 1; + + if (array && err != REG_NOMATCH) { + match_len = (int) (subs[0].rm_eo - subs[0].rm_so); + string_len = strlen(string) + 1; + + buf = emalloc(string_len); + if (!buf) { + php3_error(E_WARNING, "Unable to allocate memory in _php3_ereg"); + RETURN_FALSE; + } + + pval_destructor(array _INLINE_TLS); /* start with clean array */ + array_init(array); + + for (i = 0; i < NS; i++) { + start = subs[i].rm_so; + end = subs[i].rm_eo; + if (start != -1 && end > 0 && start < string_len && end < string_len && start < end) { + add_index_stringl(array, i, string+start, end-start, 1); + } + } + efree(buf); + } + + efree(string); + if (err == REG_NOMATCH) { + RETVAL_FALSE; + } else { + if (match_len == 0) + match_len = 1; + RETVAL_LONG(match_len); + } + regfree(&re); +} + +/* {{{ proto int ereg(string pattern, string string [, array registers]) + Regular expression match */ +void php3_ereg(INTERNAL_FUNCTION_PARAMETERS) +{ + _php3_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); +} +/* }}} */ + +/* {{{ proto int eregi(string pattern, string string [, array registers]) + Case-insensitive regular expression match */ +void php3_eregi(INTERNAL_FUNCTION_PARAMETERS) +{ + _php3_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); +} +/* }}} */ + +/* this is the meat and potatoes of regex replacement! */ +char *_php3_regreplace(const char *pattern, const char *replace, const char *string, int icase, int extended) +{ + regex_t re; + regmatch_t subs[NS]; + + char *buf, /* buf is where we build the replaced string */ + *nbuf, /* nbuf is used when we grow the buffer */ + *walkbuf; /* used to walk buf when replacing backrefs */ + const char *walk; /* used to walk replacement string for backrefs */ + int buf_len; + int pos, tmp, string_len, new_l; + int err, copts = 0; + + string_len = strlen(string); + if (!string_len) + return (char *)string; + + if (icase) + copts = REG_ICASE; + if (extended) + copts |= REG_EXTENDED; + err = regcomp(&re, pattern, copts); + if (err) { + _php3_reg_eprint(err, &re); + return ((char *) -1); + } + + /* start with a buffer that is twice the size of the stringo + we're doing replacements in */ + buf_len = 2 * string_len + 1; + buf = emalloc(buf_len * sizeof(char)); + if (!buf) { + php3_error(E_WARNING, "Unable to allocate memory in _php3_regreplace"); + regfree(&re); + return ((char *) -1); + } + + err = pos = 0; + buf[0] = '\0'; + + while (!err) { + err = regexec(&re, &string[pos], (size_t) NS, subs, (pos ? REG_NOTBOL : 0)); + + if (err && err != REG_NOMATCH) { + _php3_reg_eprint(err, &re); + regfree(&re); + return ((char *) -1); + } + if (!err) { + /* backref replacement is done in two passes: + 1) find out how long the string will be, and allocate buf + 2) copy the part before match, replacement and backrefs to buf + + Jaakko Hyvätti <Jaakko.Hyvatti@iki.fi> + */ + + new_l = strlen(buf) + subs[0].rm_so; /* part before the match */ + walk = replace; + while (*walk) + if ('\\' == *walk + && '0' <= walk[1] && '9' >= walk[1] + && subs[walk[1] - '0'].rm_so > -1 + && subs[walk[1] - '0'].rm_eo > -1) { + new_l += subs[walk[1] - '0'].rm_eo + - subs[walk[1] - '0'].rm_so; + walk += 2; + } else { + new_l++; + walk++; + } + + if (new_l + 1 > buf_len) { + buf_len = 1 + buf_len + 2 * new_l; + nbuf = emalloc(buf_len); + strcpy(nbuf, buf); + efree(buf); + buf = nbuf; + } + tmp = strlen(buf); + /* copy the part of the string before the match */ + strncat(buf, &string[pos], subs[0].rm_so); + + /* copy replacement and backrefs */ + walkbuf = &buf[tmp + subs[0].rm_so]; + walk = replace; + while (*walk) + if ('\\' == *walk + && '0' <= walk[1] && '9' >= walk[1] + && subs[walk[1] - '0'].rm_so > -1 + && subs[walk[1] - '0'].rm_eo > -1) { + tmp = subs[walk[1] - '0'].rm_eo + - subs[walk[1] - '0'].rm_so; + memcpy (walkbuf, + &string[pos + subs[walk[1] - '0'].rm_so], + tmp); + walkbuf += tmp; + walk += 2; + } else + *walkbuf++ = *walk++; + *walkbuf = '\0'; + + /* and get ready to keep looking for replacements */ + if (subs[0].rm_so == subs[0].rm_eo) { + if (subs[0].rm_so + pos >= string_len) + break; + new_l = strlen (buf) + 1; + if (new_l + 1 > buf_len) { + buf_len = 1 + buf_len + 2 * new_l; + nbuf = emalloc(buf_len * sizeof(char)); + strcpy(nbuf, buf); + efree(buf); + buf = nbuf; + } + pos += subs[0].rm_eo + 1; + buf [new_l-1] = string [pos-1]; + buf [new_l] = '\0'; + } else { + pos += subs[0].rm_eo; + } + } else { /* REG_NOMATCH */ + new_l = strlen(buf) + strlen(&string[pos]); + if (new_l + 1 > buf_len) { + buf_len = new_l + 1; /* now we know exactly how long it is */ + nbuf = emalloc(buf_len * sizeof(char)); + strcpy(nbuf, buf); + efree(buf); + buf = nbuf; + } + /* stick that last bit of string on our output */ + strcat(buf, &string[pos]); + } + } + + /* don't want to leak memory .. */ + regfree(&re); + + /* whew. */ + return (buf); +} + +static void _php3_eregreplace(INTERNAL_FUNCTION_PARAMETERS, int icase) +{ + pval *arg_pattern, + *arg_replace, + *arg_string; + char *pattern; + char *string; + char *replace; + char *ret; + TLS_VARS; + + if (ARG_COUNT(ht) != 3 || getParameters(ht, 3, &arg_pattern, &arg_replace, &arg_string) == FAILURE) { + WRONG_PARAM_COUNT; + } + + if (arg_pattern->type == IS_STRING) { + if (arg_pattern->value.str.val && arg_pattern->value.str.len) + pattern = estrndup(arg_pattern->value.str.val,arg_pattern->value.str.len); + else + pattern = empty_string; + } else { + convert_to_long(arg_pattern); + pattern = emalloc(2); + pattern[0] = (char) arg_pattern->value.lval; + pattern[1] = '\0'; + } + + if (arg_replace->type == IS_STRING) { + if (arg_replace->value.str.val && arg_replace->value.str.len) + replace = estrndup(arg_replace->value.str.val, arg_replace->value.str.len); + else + replace = empty_string; + } else { + convert_to_long(arg_replace); + replace = emalloc(2); + replace[0] = (char) arg_replace->value.lval; + replace[1] = '\0'; + } + + convert_to_string(arg_string); + if (arg_string->value.str.val && arg_string->value.str.len) + string = estrndup(arg_string->value.str.val, arg_string->value.str.len); + else + string = empty_string; + + /* do the actual work */ + ret = _php3_regreplace(pattern, replace, string, icase, 1); + if (ret == (char *) -1) { + RETVAL_FALSE; + } else { + RETVAL_STRING(ret,1); + STR_FREE(ret); + } + STR_FREE(string); + STR_FREE(replace); + STR_FREE(pattern); +} + +/* {{{ proto string ereg_replace(string pattern, string string [, array registers]) + Replace regular expression */ +void php3_eregreplace(INTERNAL_FUNCTION_PARAMETERS) +{ + _php3_eregreplace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); +} +/* }}} */ + +/* {{{ proto string eregi_replace(string pattern, string string [, array registers]) + Case insensitive replace regular expression */ +void php3_eregireplace(INTERNAL_FUNCTION_PARAMETERS) +{ + _php3_eregreplace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); +} +/* }}} */ + +/* ("root", "passwd", "uid", "gid", "other:stuff:like:/bin/sh") + = split(":", $passwd_file, 5); */ +/* {{{ proto array split(string pattern, string string [, int limit]) + split string into array by regular expression */ +void php3_split(INTERNAL_FUNCTION_PARAMETERS) +{ + pval *spliton, *str, *arg_count = NULL; + regex_t re; + regmatch_t subs[1]; + char *strp, *endp; + int err, size, count; + TLS_VARS; + + switch (ARG_COUNT(ht)) { + case 2: + if (getParameters(ht, 2, &spliton, &str) == FAILURE) + WRONG_PARAM_COUNT; + count = -1; + break; + case 3: + if (getParameters(ht, 3, &spliton, &str, &arg_count) == FAILURE) + WRONG_PARAM_COUNT; + convert_to_long(arg_count); + count = arg_count->value.lval; + break; + default: + WRONG_PARAM_COUNT; + } + + convert_to_string(spliton); + convert_to_string(str); + + strp = str->value.str.val; + endp = str->value.str.val + strlen(str->value.str.val); + + err = regcomp(&re, spliton->value.str.val, REG_EXTENDED); + if (err) { + php3_error(E_WARNING, "unexpected regex error (%d)", err); + RETURN_FALSE; + } + + if (array_init(return_value) == FAILURE) { + regfree(&re); + RETURN_FALSE; + } + + /* churn through str, generating array entries as we go */ + while ((count == -1 || count > 1) && !(err = regexec(&re, strp, 1, subs, 0))) { + if (subs[0].rm_so == 0 && subs[0].rm_eo) { + /* match is at start of string, return empty string */ + add_next_index_stringl(return_value, empty_string, 0, 1); + /* skip ahead the length of the regex match */ + strp+=subs[0].rm_eo; + } else if (subs[0].rm_so==0 && subs[0].rm_eo==0) { + /* No more matches */ + regfree(&re); + php3_error(E_WARNING, "bad regular expression for split()"); + _php3_hash_destroy(return_value->value.ht); + efree(return_value->value.ht); + RETURN_FALSE; + } else { + /* On a real match */ + + /* make a copy of the substring */ + size = subs[0].rm_so; + + /* add it to the array */ + add_next_index_stringl(return_value, strp, size, 1); + + /* point at our new starting point */ + strp = strp + subs[0].rm_eo; + } + + /* if we're only looking for a certain number of points, + stop looking once we hit it */ + if (count != -1) count--; + } + + /* see if we encountered an error */ + if (err && err != REG_NOMATCH) { + php3_error(E_WARNING, "unexpected regex error (%d)", err); + regfree(&re); + _php3_hash_destroy(return_value->value.ht); + efree(return_value->value.ht); + RETURN_FALSE; + } + + /* otherwise we just have one last element to add to the array */ + size = endp - strp; + + add_next_index_stringl(return_value, strp, size, 1); + + regfree(&re); + + return; +} +/* }}} */ + +/* {{{ proto string sql_regcase(string string) + Make regular expression for case insensitive match */ +PHPAPI void php3_sql_regcase(INTERNAL_FUNCTION_PARAMETERS) +{ + pval *string; + char *tmp; + register int i; + + if (ARG_COUNT(ht)!=1 || getParameters(ht, 1, &string)==FAILURE) { + WRONG_PARAM_COUNT; + } + + convert_to_string(string); + + tmp = (char *) emalloc(string->value.str.len*4+1); + + for (i=0; i<string->value.str.len; i++) { + tmp[i*4] = '['; + tmp[i*4+1]=toupper((unsigned char)string->value.str.val[i]); + tmp[i*4+2]=tolower((unsigned char)string->value.str.val[i]); + tmp[i*4+3]=']'; + } + tmp[string->value.str.len*4]=0; + + return_value->value.str.val = tmp; + return_value->value.str.len = string->value.str.len*4; + return_value->type = IS_STRING; +} +/* }}} */ + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + */ |