diff options
author | Dmitry Stogov <dmitry@zend.com> | 2014-09-11 11:06:57 +0400 |
---|---|---|
committer | Dmitry Stogov <dmitry@zend.com> | 2014-09-11 11:06:57 +0400 |
commit | 1981dd9c73ada7ea893b4636b84211f68838ba23 (patch) | |
tree | 10685f35448e5afd4b970ccebf5fa9ce01953e48 | |
parent | 98b8050433d25cc9c173360e01272836acd7307f (diff) | |
download | php-git-1981dd9c73ada7ea893b4636b84211f68838ba23.tar.gz |
Revert "Removing ext/ereg and dependencies"
This reverts commit 86de7963fe69638431c0aa8f413d25e01bf99d68.
122 files changed, 15068 insertions, 40 deletions
diff --git a/ext/ereg/CREDITS b/ext/ereg/CREDITS new file mode 100644 index 0000000000..9cd0ac2d1b --- /dev/null +++ b/ext/ereg/CREDITS @@ -0,0 +1,2 @@ +ereg +Rasmus Lerdorf, Jim Winstead, Jaakko Hyvätti diff --git a/ext/ereg/config.w32 b/ext/ereg/config.w32 new file mode 100644 index 0000000000..18a002b44d --- /dev/null +++ b/ext/ereg/config.w32 @@ -0,0 +1,12 @@ +// $Id$ +// vim:ft=javascript + +ARG_WITH("ereg", "POSIX extended regular expressions", "yes"); +if (PHP_EREG != "no") { + + EXTENSION("ereg", "ereg.c", PHP_EREG_SHARED, "-Dregexec=php_regexec -Dregerror=php_regerror -Dregfree=php_regfree -Dregcomp=php_regcomp -Iext/ereg/regex"); + ADD_SOURCES("ext/ereg/regex", "regcomp.c regexec.c regerror.c regfree.c", "ereg"); + AC_DEFINE('REGEX', 1, 'Bundled regex'); + AC_DEFINE('HSREGEX', 1, 'Bundled regex'); + PHP_INSTALL_HEADERS("ext/ereg", "php_ereg.h php_regex.h regex/"); +} diff --git a/ext/ereg/config0.m4 b/ext/ereg/config0.m4 new file mode 100644 index 0000000000..caec39d285 --- /dev/null +++ b/ext/ereg/config0.m4 @@ -0,0 +1,56 @@ +dnl $Id$ +dnl config.m4 for extension ereg + +dnl +dnl Check for regex library type +dnl +PHP_ARG_WITH(regex,, +[ --with-regex=TYPE Regex library type: system, php. [TYPE=php] + WARNING: Do NOT use unless you know what you are doing!], php, no) + +case $PHP_REGEX in + system) + if test "$PHP_SAPI" = "apache" || test "$PHP_SAPI" = "apache2filter" || test "$PHP_SAPI" = "apache2handler"; then + REGEX_TYPE=php + else + REGEX_TYPE=system + fi + ;; + yes | php) + REGEX_TYPE=php + ;; + *) + REGEX_TYPE=php + AC_MSG_WARN([Invalid regex library type selected. Using default value: php]) + ;; +esac + +AC_MSG_CHECKING([which regex library to use]) +AC_MSG_RESULT([$REGEX_TYPE]) + +if test "$REGEX_TYPE" = "php"; then + ereg_regex_sources="regex/regcomp.c regex/regexec.c regex/regerror.c regex/regfree.c" + ereg_regex_headers="regex/" + PHP_EREG_CFLAGS="-Dregexec=php_regexec -Dregerror=php_regerror -Dregfree=php_regfree -Dregcomp=php_regcomp" +fi + +PHP_NEW_EXTENSION(ereg, ereg.c $ereg_regex_sources, no,,$PHP_EREG_CFLAGS) +PHP_INSTALL_HEADERS([ext/ereg], [php_ereg.h php_regex.h $ereg_regex_headers]) + +if test "$REGEX_TYPE" = "php"; then + AC_DEFINE(HAVE_REGEX_T_RE_MAGIC, 1, [ ]) + AC_DEFINE(HSREGEX,1,[ ]) + AC_DEFINE(REGEX,1,[ ]) + PHP_ADD_BUILD_DIR([$ext_builddir/regex], 1) + PHP_ADD_INCLUDE([$ext_srcdir/regex]) +elif test "$REGEX_TYPE" = "system"; then + AC_DEFINE(REGEX,0,[ ]) + dnl Check if field re_magic exists in struct regex_t + AC_CACHE_CHECK([whether field re_magic exists in struct regex_t], ac_cv_regex_t_re_magic, [ + AC_TRY_COMPILE([#include <sys/types.h> +#include <regex.h>], [regex_t rt; rt.re_magic;], + [ac_cv_regex_t_re_magic=yes], [ac_cv_regex_t_re_magic=no])]) + if test "$ac_cv_regex_t_re_magic" = "yes"; then + AC_DEFINE([HAVE_REGEX_T_RE_MAGIC], [ ], 1) + fi +fi diff --git a/ext/ereg/ereg.c b/ext/ereg/ereg.c new file mode 100644 index 0000000000..c1d92ba4e7 --- /dev/null +++ b/ext/ereg/ereg.c @@ -0,0 +1,771 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-2014 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Rasmus Lerdorf <rasmus@php.net> | + | Jim Winstead <jimw@php.net> | + | Jaakko Hyvätti <jaakko@hyvatti.iki.fi> | + +----------------------------------------------------------------------+ + */ +/* $Id$ */ + +#include <stdio.h> +#include <ctype.h> +#include "php.h" +#include "ext/standard/php_string.h" +#include "php_ereg.h" +#include "ext/standard/info.h" + +/* {{{ arginfo */ +ZEND_BEGIN_ARG_INFO_EX(arginfo_ereg, 0, 0, 2) + ZEND_ARG_INFO(0, pattern) + ZEND_ARG_INFO(0, string) + ZEND_ARG_INFO(1, registers) /* ARRAY_INFO(1, registers, 1) */ +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO(arginfo_ereg_replace, 0) + ZEND_ARG_INFO(0, pattern) + ZEND_ARG_INFO(0, replacement) + ZEND_ARG_INFO(0, string) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(arginfo_split, 0, 0, 2) + ZEND_ARG_INFO(0, pattern) + ZEND_ARG_INFO(0, string) + ZEND_ARG_INFO(0, limit) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO(arginfo_sql_regcase, 0) + ZEND_ARG_INFO(0, string) +ZEND_END_ARG_INFO() +/* }}} */ + +/* {{{ Function table */ +const zend_function_entry ereg_functions[] = { + PHP_DEP_FE(ereg, arginfo_ereg) + PHP_DEP_FE(ereg_replace, arginfo_ereg_replace) + PHP_DEP_FE(eregi, arginfo_ereg) + PHP_DEP_FE(eregi_replace, arginfo_ereg_replace) + PHP_DEP_FE(split, arginfo_split) + PHP_DEP_FE(spliti, arginfo_split) + PHP_DEP_FE(sql_regcase, arginfo_sql_regcase) + PHP_FE_END +}; +/* }}} */ + +/* {{{ reg_cache */ +typedef struct { + regex_t preg; + int cflags; + unsigned long lastuse; +} reg_cache; +static int reg_magic = 0; +#define EREG_CACHE_SIZE 4096 +/* }}} */ + +ZEND_DECLARE_MODULE_GLOBALS(ereg) +static PHP_GINIT_FUNCTION(ereg); +static PHP_GSHUTDOWN_FUNCTION(ereg); + +/* {{{ Module entry */ +zend_module_entry ereg_module_entry = { + STANDARD_MODULE_HEADER, + "ereg", + ereg_functions, + NULL, + NULL, + NULL, + NULL, + PHP_MINFO(ereg), + NO_VERSION_YET, + PHP_MODULE_GLOBALS(ereg), + PHP_GINIT(ereg), + PHP_GSHUTDOWN(ereg), + NULL, + STANDARD_MODULE_PROPERTIES_EX +}; +/* }}} */ + +/* {{{ COMPILE_DL_EREG */ +#ifdef COMPILE_DL_EREG +ZEND_GET_MODULE(ereg) +#endif +/* }}} */ + +/* {{{ ereg_lru_cmp */ +static int ereg_lru_cmp(const void *a, const void *b TSRMLS_DC) +{ + Bucket *f = (Bucket *) a; + Bucket *s = (Bucket *) b; + + if (((reg_cache *)Z_PTR(f->val))->lastuse < + ((reg_cache *)Z_PTR(s->val))->lastuse) { + return -1; + } else if (((reg_cache *)Z_PTR(f->val))->lastuse == + ((reg_cache *)Z_PTR(s->val))->lastuse) { + return 0; + } else { + return 1; + } +} +/* }}} */ + +/* {{{ static ereg_clean_cache */ +static int ereg_clean_cache(zval *data, void *arg TSRMLS_DC) +{ + int *num_clean = (int *)arg; + + if (*num_clean > 0) { + (*num_clean)--; + return ZEND_HASH_APPLY_REMOVE; + } else { + return ZEND_HASH_APPLY_STOP; + } +} +/* }}} */ + +/* {{{ _php_regcomp + */ +static int _php_regcomp(regex_t *preg, const char *pattern, int cflags TSRMLS_DC) +{ + int r = 0; + int patlen = strlen(pattern); + reg_cache *rc = NULL; + + if (zend_hash_num_elements(&EREG(ht_rc)) >= EREG_CACHE_SIZE) { + /* easier than dealing with overflow as it happens */ + if (EREG(lru_counter) >= (1 << 31) || zend_hash_sort(&EREG(ht_rc), zend_qsort, ereg_lru_cmp, 0 TSRMLS_CC) == FAILURE) { + zend_hash_clean(&EREG(ht_rc)); + EREG(lru_counter) = 0; + } else { + int num_clean = EREG_CACHE_SIZE / 4; + zend_hash_apply_with_argument(&EREG(ht_rc), ereg_clean_cache, &num_clean TSRMLS_CC); + } + } + + rc = zend_hash_str_find_ptr(&EREG(ht_rc), pattern, patlen); + if (rc + && rc->cflags == cflags) { +#ifdef HAVE_REGEX_T_RE_MAGIC + /* + * We use a saved magic number to see whether cache is corrupted, and if it + * is, we flush it and compile the pattern from scratch. + */ + if (rc->preg.re_magic != reg_magic) { + zend_hash_clean(&EREG(ht_rc)); + EREG(lru_counter) = 0; + } else { + memcpy(preg, &rc->preg, sizeof(*preg)); + return r; + } + } + + r = regcomp(preg, pattern, cflags); + if(!r) { + reg_cache rcp; + + rcp.cflags = cflags; + rcp.lastuse = ++(EREG(lru_counter)); + memcpy(&rcp.preg, preg, sizeof(*preg)); + /* + * Since we don't have access to the actual MAGIC1 definition in the private + * header file, we save the magic value immediately after compilation. Hopefully, + * it's good. + */ + if (!reg_magic) reg_magic = preg->re_magic; + zend_hash_str_update_mem(&EREG(ht_rc), pattern, patlen, + &rcp, sizeof(rcp)); + } +#else + memcpy(preg, &rc->preg, sizeof(*preg)); + } else { + r = regcomp(preg, pattern, cflags); + if(!r) { + reg_cache rcp; + + rcp.cflags = cflags; + rcp.lastuse = ++(EREG(lru_counter)); + memcpy(&rcp.preg, preg, sizeof(*preg)); + zend_hash_str_update_mem(&EREG(ht_rc), pattern, patlen, + &rcp, sizeof(rcp)); + } + } +#endif + return r; +} +/* }}} */ + +static void _free_ereg_cache(zval *zv) +{ + reg_cache *rc = Z_PTR_P(zv); + regfree(&rc->preg); + free(rc); +} + +#undef regfree +#define regfree(a); +#undef regcomp +#define regcomp(a, b, c) _php_regcomp(a, b, c TSRMLS_CC) + +/* {{{ PHP_GINIT_FUNCTION + */ +static PHP_GINIT_FUNCTION(ereg) +{ + zend_hash_init(&ereg_globals->ht_rc, 0, NULL, _free_ereg_cache, 1); + ereg_globals->lru_counter = 0; +} +/* }}} */ + +/* {{{ PHP_GSHUTDOWN_FUNCTION + */ +static PHP_GSHUTDOWN_FUNCTION(ereg) +{ + zend_hash_destroy(&ereg_globals->ht_rc); +} +/* }}} */ + +PHP_MINFO_FUNCTION(ereg) +{ + php_info_print_table_start(); +#if HSREGEX + php_info_print_table_row(2, "Regex Library", "Bundled library enabled"); +#else + php_info_print_table_row(2, "Regex Library", "System library enabled"); +#endif + php_info_print_table_end(); +} + + +/* {{{ php_ereg_eprint + * php_ereg_eprint - convert error number to name + */ +static void php_ereg_eprint(int err, regex_t *re TSRMLS_DC) { + char *buf = NULL, *message = NULL; + size_t len; + size_t buf_len; + +#ifdef REG_ITOA + /* get the length of the message */ + buf_len = regerror(REG_ITOA | err, re, NULL, 0); + if (buf_len) { + buf = (char *)safe_emalloc(buf_len, sizeof(char), 0); + if (!buf) return; /* fail silently */ + /* finally, get the error message */ + regerror(REG_ITOA | err, re, buf, buf_len); + } +#else + buf_len = 0; +#endif + len = regerror(err, re, NULL, 0); + if (len) { + message = (char *)safe_emalloc((buf_len + len + 2), sizeof(char), 0); + if (!message) { + return; /* fail silently */ + } + if (buf_len) { + snprintf(message, buf_len, "%s: ", buf); + buf_len += 1; /* so pointer math below works */ + } + /* drop the message into place */ + regerror(err, re, message + buf_len, len); + + php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", message); + } + + if (buf) efree(buf); + if (message) efree(message); +} +/* }}} */ + +/* {{{ php_ereg + */ +static void php_ereg(INTERNAL_FUNCTION_PARAMETERS, int icase) +{ + zval *regex, /* Regular expression */ + *array = NULL; /* Optional register array */ + char *findin; /* String to apply expression to */ + size_t findin_len; + regex_t re; + regmatch_t *subs; + int err, match_len, string_len; + uint i; + int copts = 0; + off_t start, end; + char *buf = NULL; + char *string = NULL; + int argc = ZEND_NUM_ARGS(); + + if (zend_parse_parameters(argc TSRMLS_CC, "zs|z/", ®ex, &findin, &findin_len, &array) == FAILURE) { + return; + } + + if (icase) { + copts |= REG_ICASE; + } + + if (argc == 2) { + copts |= REG_NOSUB; + } + + /* compile the regular expression from the supplied regex */ + if (Z_TYPE_P(regex) == IS_STRING) { + err = regcomp(&re, Z_STRVAL_P(regex), REG_EXTENDED | copts); + } else { + /* we convert numbers to integers and treat them as a string */ + if (Z_TYPE_P(regex) == IS_DOUBLE) { + convert_to_long_ex(regex); /* get rid of decimal places */ + } + convert_to_string_ex(regex); + /* don't bother doing an extended regex with just a number */ + err = regcomp(&re, Z_STRVAL_P(regex), copts); + } + + if (err) { + php_ereg_eprint(err, &re TSRMLS_CC); + RETURN_FALSE; + } + + /* make a copy of the string we're looking in */ + string = estrndup(findin, findin_len); + + /* allocate storage for (sub-)expression-matches */ + subs = (regmatch_t *)ecalloc(sizeof(regmatch_t),re.re_nsub+1); + + /* actually execute the regular expression */ + err = regexec(&re, string, re.re_nsub+1, subs, 0); + if (err && err != REG_NOMATCH) { + php_ereg_eprint(err, &re TSRMLS_CC); + regfree(&re); + efree(subs); + RETURN_FALSE; + } + match_len = 1; + + if (array && err != REG_NOMATCH) { + match_len = (int) (subs[0].rm_eo - subs[0].rm_so); + string_len = findin_len + 1; + + buf = emalloc(string_len); + + zval_dtor(array); /* start with clean array */ + array_init(array); + + for (i = 0; i <= re.re_nsub; i++) { + start = subs[i].rm_so; + end = subs[i].rm_eo; + if (start != -1 && end > 0 && start < string_len && end < string_len && start < end) { + add_index_stringl(array, i, string+start, end-start); + } else { + add_index_bool(array, i, 0); + } + } + efree(buf); + } + + efree(subs); + efree(string); + if (err == REG_NOMATCH) { + RETVAL_FALSE; + } else { + if (match_len == 0) + match_len = 1; + RETVAL_LONG(match_len); + } + regfree(&re); +} +/* }}} */ + +/* {{{ proto int ereg(string pattern, string string [, array registers]) + Regular expression match */ +PHP_FUNCTION(ereg) +{ + php_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); +} +/* }}} */ + +/* {{{ proto int eregi(string pattern, string string [, array registers]) + Case-insensitive regular expression match */ +PHP_FUNCTION(eregi) +{ + php_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); +} +/* }}} */ + +/* {{{ php_ereg_replace + * this is the meat and potatoes of regex replacement! */ +PHP_EREG_API char *php_ereg_replace(const char *pattern, const char *replace, const char *string, int icase, int extended TSRMLS_DC) +{ + regex_t re; + regmatch_t *subs; + + char *buf, /* buf is where we build the replaced string */ + *nbuf, /* nbuf is used when we grow the buffer */ + *walkbuf; /* used to walk buf when replacing backrefs */ + const char *walk; /* used to walk replacement string for backrefs */ + int buf_len; + int pos, tmp, string_len, new_l; + int err, copts = 0; + + string_len = strlen(string); + + if (icase) { + copts = REG_ICASE; + } + if (extended) { + copts |= REG_EXTENDED; + } + + err = regcomp(&re, pattern, copts); + if (err) { + php_ereg_eprint(err, &re TSRMLS_CC); + return ((char *) -1); + } + + + /* allocate storage for (sub-)expression-matches */ + subs = (regmatch_t *)ecalloc(sizeof(regmatch_t),re.re_nsub+1); + + /* start with a buffer that is twice the size of the stringo + we're doing replacements in */ + buf_len = 2 * string_len + 1; + buf = safe_emalloc(buf_len, sizeof(char), 0); + + err = pos = 0; + buf[0] = '\0'; + while (!err) { + err = regexec(&re, &string[pos], re.re_nsub+1, subs, (pos ? REG_NOTBOL : 0)); + + if (err && err != REG_NOMATCH) { + php_ereg_eprint(err, &re TSRMLS_CC); + efree(subs); + efree(buf); + regfree(&re); + return ((char *) -1); + } + + if (!err) { + /* backref replacement is done in two passes: + 1) find out how long the string will be, and allocate buf + 2) copy the part before match, replacement and backrefs to buf + + Jaakko Hyvätti <Jaakko.Hyvatti@iki.fi> + */ + + new_l = strlen(buf) + subs[0].rm_so; /* part before the match */ + walk = replace; + while (*walk) { + if ('\\' == *walk && isdigit((unsigned char)walk[1]) && ((unsigned char)walk[1]) - '0' <= (int)re.re_nsub) { + if (subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1) { + new_l += subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so; + } + walk += 2; + } else { + new_l++; + walk++; + } + } + if (new_l + 1 > buf_len) { + buf_len = 1 + buf_len + 2 * new_l; + nbuf = emalloc(buf_len); + strncpy(nbuf, buf, buf_len - 1); + nbuf[buf_len - 1] = '\0'; + efree(buf); + buf = nbuf; + } + tmp = strlen(buf); + /* copy the part of the string before the match */ + strncat(buf, &string[pos], subs[0].rm_so); + + /* copy replacement and backrefs */ + walkbuf = &buf[tmp + subs[0].rm_so]; + walk = replace; + while (*walk) { + if ('\\' == *walk && isdigit((unsigned char)walk[1]) && (unsigned char)walk[1] - '0' <= (int)re.re_nsub) { + if (subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1 + /* this next case shouldn't happen. it does. */ + && subs[walk[1] - '0'].rm_so <= subs[walk[1] - '0'].rm_eo) { + + tmp = subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so; + memcpy (walkbuf, &string[pos + subs[walk[1] - '0'].rm_so], tmp); + walkbuf += tmp; + } + walk += 2; + } else { + *walkbuf++ = *walk++; + } + } + *walkbuf = '\0'; + + /* and get ready to keep looking for replacements */ + if (subs[0].rm_so == subs[0].rm_eo) { + if (subs[0].rm_so + pos >= string_len) { + break; + } + new_l = strlen (buf) + 1; + if (new_l + 1 > buf_len) { + buf_len = 1 + buf_len + 2 * new_l; + nbuf = safe_emalloc(buf_len, sizeof(char), 0); + strncpy(nbuf, buf, buf_len-1); + efree(buf); + buf = nbuf; + } + pos += subs[0].rm_eo + 1; + buf [new_l-1] = string [pos-1]; + buf [new_l] = '\0'; + } else { + pos += subs[0].rm_eo; + } + } else { /* REG_NOMATCH */ + new_l = strlen(buf) + strlen(&string[pos]); + if (new_l + 1 > buf_len) { + buf_len = new_l + 1; /* now we know exactly how long it is */ + nbuf = safe_emalloc(buf_len, sizeof(char), 0); + strncpy(nbuf, buf, buf_len-1); + efree(buf); + buf = nbuf; + } + /* stick that last bit of string on our output */ + strlcat(buf, &string[pos], buf_len); + } + } + + /* don't want to leak memory .. */ + efree(subs); + regfree(&re); + + /* whew. */ + return (buf); +} +/* }}} */ + +/* {{{ php_do_ereg_replace + */ +static void php_do_ereg_replace(INTERNAL_FUNCTION_PARAMETERS, int icase) +{ + zval *arg_pattern, + *arg_replace; + zend_string *pattern, *arg_string; + zend_string *string; + zend_string *replace; + char *ret; + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zzS", &arg_pattern, &arg_replace, &arg_string) == FAILURE) { + return; + } + + if (Z_TYPE_P(arg_pattern) == IS_STRING) { + if (Z_STRVAL_P(arg_pattern) && Z_STRLEN_P(arg_pattern)) { + pattern = zend_string_copy(Z_STR_P(arg_pattern)); + } else { + pattern = STR_EMPTY_ALLOC(); + } + } else { + convert_to_long_ex(arg_pattern); + pattern = zend_string_alloc(1, 0); + pattern->val[0] = (char) Z_LVAL_P(arg_pattern); + pattern->val[1] = '\0'; + } + + if (Z_TYPE_P(arg_replace) == IS_STRING) { + if (Z_STRVAL_P(arg_replace) && Z_STRLEN_P(arg_replace)) { + replace = zend_string_copy(Z_STR_P(arg_replace)); + } else { + replace = STR_EMPTY_ALLOC(); + } + } else { + convert_to_long_ex(arg_replace); + replace = zend_string_alloc(1, 0); + replace->val[0] = (char) Z_LVAL_P(arg_replace); + replace->val[1] = '\0'; + } + + if (arg_string) { + string = zend_string_copy(arg_string); + } else { + string = STR_EMPTY_ALLOC(); + } + + /* do the actual work */ + ret = php_ereg_replace(pattern->val, replace->val, string->val, icase, 1 TSRMLS_CC); + if (ret == (char *) -1) { + RETVAL_FALSE; + } else { + RETVAL_STRING(ret); + efree(ret); + } + + zend_string_release(string); + zend_string_release(replace); + zend_string_release(pattern); +} +/* }}} */ + +/* {{{ proto string ereg_replace(string pattern, string replacement, string string) + Replace regular expression */ +PHP_FUNCTION(ereg_replace) +{ + php_do_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); +} +/* }}} */ + +/* {{{ proto string eregi_replace(string pattern, string replacement, string string) + Case insensitive replace regular expression */ +PHP_FUNCTION(eregi_replace) +{ + php_do_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); +} +/* }}} */ + +/* {{{ php_split + */ +static void php_split(INTERNAL_FUNCTION_PARAMETERS, int icase) +{ + zend_long count = -1; + regex_t re; + regmatch_t subs[1]; + char *spliton, *str, *strp, *endp; + size_t spliton_len, str_len; + int err, size, copts = 0; + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &spliton, &spliton_len, &str, &str_len, &count) == FAILURE) { + return; + } + + if (icase) { + copts = REG_ICASE; + } + + strp = str; + endp = strp + str_len; + + err = regcomp(&re, spliton, REG_EXTENDED | copts); + if (err) { + php_ereg_eprint(err, &re TSRMLS_CC); + RETURN_FALSE; + } + + array_init(return_value); + + /* churn through str, generating array entries as we go */ + while ((count == -1 || count > 1) && !(err = regexec(&re, strp, 1, subs, 0))) { + if (subs[0].rm_so == 0 && subs[0].rm_eo) { + /* match is at start of string, return empty string */ + add_next_index_stringl(return_value, "", 0); + /* skip ahead the length of the regex match */ + strp += subs[0].rm_eo; + } else if (subs[0].rm_so == 0 && subs[0].rm_eo == 0) { + /* No more matches */ + regfree(&re); + + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid Regular Expression"); + + zend_hash_destroy(Z_ARRVAL_P(return_value)); + efree(Z_ARR_P(return_value)); + RETURN_FALSE; + } else { + /* On a real match */ + + /* make a copy of the substring */ + size = subs[0].rm_so; + + /* add it to the array */ + add_next_index_stringl(return_value, strp, size); + + /* point at our new starting point */ + strp = strp + subs[0].rm_eo; + } + + /* if we're only looking for a certain number of points, + stop looking once we hit it */ + if (count != -1) { + count--; + } + } + + /* see if we encountered an error */ + if (err && err != REG_NOMATCH) { + php_ereg_eprint(err, &re TSRMLS_CC); + regfree(&re); + zend_hash_destroy(Z_ARRVAL_P(return_value)); + efree(Z_ARR_P(return_value)); + RETURN_FALSE; + } + + /* otherwise we just have one last element to add to the array */ + size = endp - strp; + + add_next_index_stringl(return_value, strp, size); + + regfree(&re); +} +/* }}} */ + +/* {{{ proto array split(string pattern, string string [, int limit]) + Split string into array by regular expression */ +PHP_FUNCTION(split) +{ + php_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); +} +/* }}} */ + +/* {{{ proto array spliti(string pattern, string string [, int limit]) + Split string into array by regular expression case-insensitive */ + +PHP_FUNCTION(spliti) +{ + php_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); +} + +/* }}} */ + +/* {{{ proto string sql_regcase(string string) + Make regular expression for case insensitive match */ +PHP_EREG_API PHP_FUNCTION(sql_regcase) +{ + char *string, *tmp; + size_t string_len; + unsigned char c; + register int i, j; + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &string, &string_len) == FAILURE) { + return; + } + + tmp = safe_emalloc(string_len, 4, 1); + + for (i = j = 0; i < string_len; i++) { + c = (unsigned char) string[i]; + if (isalpha(c)) { + tmp[j++] = '['; + tmp[j++] = toupper(c); + tmp[j++] = tolower(c); + tmp[j++] = ']'; + } else { + tmp[j++] = c; + } + } + tmp[j] = 0; + + RETVAL_STRINGL(tmp, j); + efree(tmp); +} +/* }}} */ + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: noet sw=4 ts=4 fdm=marker + * vim<600: noet sw=4 ts=4 + */ diff --git a/ext/ereg/php_ereg.h b/ext/ereg/php_ereg.h new file mode 100644 index 0000000000..d34d19855f --- /dev/null +++ b/ext/ereg/php_ereg.h @@ -0,0 +1,64 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-2014 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Author: Rasmus Lerdorf <rasmus@lerdorf.on.ca> | + +----------------------------------------------------------------------+ +*/ + + +/* $Id$ */ + +#ifndef EREG_H +#define EREG_H + +#include "php_regex.h" + +extern zend_module_entry ereg_module_entry; +#define phpext_ereg_ptr &ereg_module_entry + +#ifdef PHP_WIN32 +# define PHP_EREG_API __declspec(dllexport) +#elif defined(__GNUC__) && __GNUC__ >= 4 +# define PHP_EREG_API __attribute__ ((visibility("default"))) +#else +# define PHP_EREG_API +#endif + +PHP_EREG_API char *php_ereg_replace(const char *pattern, const char *replace, const char *string, int icase, int extended TSRMLS_DC); + +PHP_FUNCTION(ereg); +PHP_FUNCTION(eregi); +PHP_FUNCTION(eregi_replace); +PHP_FUNCTION(ereg_replace); +PHP_FUNCTION(split); +PHP_FUNCTION(spliti); +PHP_EREG_API PHP_FUNCTION(sql_regcase); + +ZEND_BEGIN_MODULE_GLOBALS(ereg) + HashTable ht_rc; + unsigned int lru_counter; +ZEND_END_MODULE_GLOBALS(ereg) + +/* Module functions */ +PHP_MINFO_FUNCTION(ereg); + +#ifdef ZTS +#define EREG(v) TSRMG(ereg_globals_id, zend_ereg_globals *, v) +#else +#define EREG(v) (ereg_globals.v) +#endif + +ZEND_EXTERN_MODULE_GLOBALS(ereg) + +#endif /* REG_H */ diff --git a/ext/ereg/php_regex.h b/ext/ereg/php_regex.h new file mode 100644 index 0000000000..bb83b89fc9 --- /dev/null +++ b/ext/ereg/php_regex.h @@ -0,0 +1,65 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-2014 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Author: | + +----------------------------------------------------------------------+ +*/ + +/* $Id$ */ + +#ifndef PHP_REGEX_H +#define PHP_REGEX_H + +/* + * REGEX means: + * 0.. system regex + * 1.. bundled regex + */ + +#if (REGEX == 1) +/* Define aliases */ +#define regexec php_regexec +#define regerror php_regerror +#define regfree php_regfree +#define regcomp php_regcomp + +#include "ext/ereg/regex/regex.h" + +#undef _PCREPOSIX_H +#define _PCREPOSIX_H 1 + +#ifndef _REGEX_H +#define _REGEX_H 1 /* this should stop Apache from loading the system version of regex.h */ +#endif +#ifndef _REGEX_H_ +#define _REGEX_H_ 1 +#endif +#ifndef _RX_H +#define _RX_H 1 /* Try defining these for Linux to */ +#endif +#ifndef __REGEXP_LIBRARY_H__ +#define __REGEXP_LIBRARY_H__ 1 /* avoid Apache including regex.h */ +#endif +#ifndef _H_REGEX +#define _H_REGEX 1 /* This one is for AIX */ +#endif + +#elif REGEX == 0 +#include <regex.h> +#ifndef _REGEX_H_ +#define _REGEX_H_ 1 +#endif +#endif + +#endif /* PHP_REGEX_H */ diff --git a/ext/ereg/regex.patch b/ext/ereg/regex.patch new file mode 100644 index 0000000000..864e6bb6d8 --- /dev/null +++ b/ext/ereg/regex.patch @@ -0,0 +1,72 @@ +diff -u regex.orig/regerror.c regex/regerror.c +--- regex.orig/regerror.c 2011-08-09 19:49:30.000000000 +0800 ++++ regex/regerror.c 2011-08-12 10:45:57.000000000 +0800 +@@ -8,6 +8,7 @@ + #include "regex.h" + #include "utils.h" + #include "regerror.ih" ++#include "php.h" + + /* + = #define REG_OKAY 0 +@@ -74,17 +75,19 @@ + char convbuf[50]; + + if (errcode == REG_ATOI) +- s = regatoi(preg, convbuf); ++ s = regatoi(preg, convbuf, sizeof(convbuf)); + else { + for (r = rerrs; r->code >= 0; r++) + if (r->code == target) + break; + + if (errcode®_ITOA) { +- if (r->code >= 0) +- (void) strcpy(convbuf, r->name); +- else +- sprintf(convbuf, "REG_0x%x", target); ++ if (r->code >= 0) { ++ (void) strncpy(convbuf, r->name, sizeof(convbuf) - 1); ++ convbuf[sizeof(convbuf) - 1] = '\0'; ++ } else { ++ snprintf(convbuf, sizeof(convbuf), "REG_0x%x", target); ++ } + assert(strlen(convbuf) < sizeof(convbuf)); + s = convbuf; + } else +@@ -106,12 +109,13 @@ + + /* + - regatoi - internal routine to implement REG_ATOI +- == static char *regatoi(const regex_t *preg, char *localbuf); ++ == static char *regatoi(const regex_t *preg, char *localbuf, int bufsize); + */ + static char * +-regatoi(preg, localbuf) ++regatoi(preg, localbuf, bufsize) + const regex_t *preg; + char *localbuf; ++int bufsize; + { + register const struct rerr *r; + +@@ -121,6 +125,6 @@ + if (r->code < 0) + return("0"); + +- sprintf(localbuf, "%d", r->code); ++ snprintf(localbuf, bufsize, "%d", r->code); + return(localbuf); + } +diff -u regex.orig/regerror.ih regex/regerror.ih +--- regex.orig/regerror.ih 2011-08-09 19:49:00.000000000 +0800 ++++ regex/regerror.ih 2011-08-09 19:41:07.000000000 +0800 +@@ -4,7 +4,7 @@ + #endif + + /* === regerror.c === */ +-static char *regatoi(const regex_t *preg, char *localbuf); ++static char *regatoi(const regex_t *preg, char *localbuf, int bufsize); + + #ifdef __cplusplus + } diff --git a/ext/ereg/regex/COPYRIGHT b/ext/ereg/regex/COPYRIGHT new file mode 100644 index 0000000000..d43362fbfc --- /dev/null +++ b/ext/ereg/regex/COPYRIGHT @@ -0,0 +1,20 @@ +Copyright 1992, 1993, 1994 Henry Spencer. All rights reserved. +This software is not subject to any license of the American Telephone +and Telegraph Company or of the Regents of the University of California. + +Permission is granted to anyone to use this software for any purpose on +any computer system, and to alter it and redistribute it, subject +to the following restrictions: + +1. The author is not responsible for the consequences of use of this + software, no matter how awful, even if they arise from flaws in it. + +2. The origin of this software must not be misrepresented, either by + explicit claim or by omission. Since few users ever read sources, + credits must appear in the documentation. + +3. Altered versions must be plainly marked as such, and must not be + misrepresented as being the original software. Since few users + ever read sources, credits must appear in the documentation. + +4. This notice may not be removed or altered. diff --git a/ext/ereg/regex/README b/ext/ereg/regex/README new file mode 100644 index 0000000000..cea9b67b66 --- /dev/null +++ b/ext/ereg/regex/README @@ -0,0 +1,32 @@ +alpha3.4 release. +Thu Mar 17 23:17:18 EST 1994 +henry@zoo.toronto.edu + +See WHATSNEW for change listing. + +installation notes: +-------- +Read the comments at the beginning of Makefile before running. + +Utils.h contains some things that just might have to be modified on +some systems, as well as a nested include (ugh) of <assert.h>. + +The "fake" directory contains quick-and-dirty fakes for some header +files and routines that old systems may not have. Note also that +-DUSEBCOPY will make utils.h substitute bcopy() for memmove(). + +After that, "make r" will build regcomp.o, regexec.o, regfree.o, +and regerror.o (the actual routines), bundle them together into a test +program, and run regression tests on them. No output is good output. + +"make lib" builds just the .o files for the actual routines (when +you're happy with testing and have adjusted CFLAGS for production), +and puts them together into libregex.a. You can pick up either the +library or *.o ("make lib" makes sure there are no other .o files left +around to confuse things). + +Main.c, debug.c, split.c are used for regression testing but are not part +of the RE routines themselves. + +Regex.h goes in /usr/include. All other .h files are internal only. +-------- diff --git a/ext/ereg/regex/WHATSNEW b/ext/ereg/regex/WHATSNEW new file mode 100644 index 0000000000..6e82e1dae0 --- /dev/null +++ b/ext/ereg/regex/WHATSNEW @@ -0,0 +1,92 @@ +New in alpha3.4: The complex bug alluded to below has been fixed (in a +slightly kludgey temporary way that may hurt efficiency a bit; this is +another "get it out the door for 4.4" release). The tests at the end of +the tests file have accordingly been uncommented. The primary sign of +the bug was that something like a?b matching ab matched b rather than ab. +(The bug was essentially specific to this exact situation, else it would +have shown up earlier.) + +New in alpha3.3: The definition of word boundaries has been altered +slightly, to more closely match the usual programming notion that "_" +is an alphabetic. Stuff used for pre-ANSI systems is now in a subdir, +and the makefile no longer alludes to it in mysterious ways. The +makefile has generally been cleaned up some. Fixes have been made +(again!) so that the regression test will run without -DREDEBUG, at +the cost of weaker checking. A workaround for a bug in some folks' +<assert.h> has been added. And some more things have been added to +tests, including a couple right at the end which are commented out +because the code currently flunks them (complex bug; fix coming). +Plus the usual minor cleanup. + +New in alpha3.2: Assorted bits of cleanup and portability improvement +(the development base is now a BSDI system using GCC instead of an ancient +Sun system, and the newer compiler exposed some glitches). Fix for a +serious bug that affected REs using many [] (including REG_ICASE REs +because of the way they are implemented), *sometimes*, depending on +memory-allocation patterns. The header-file prototypes no longer name +the parameters, avoiding possible name conflicts. The possibility that +some clot has defined CHAR_MIN as (say) `-128' instead of `(-128)' is +now handled gracefully. "uchar" is no longer used as an internal type +name (too many people have the same idea). Still the same old lousy +performance, alas. + +New in alpha3.1: Basically nothing, this release is just a bookkeeping +convenience. Stay tuned. + +New in alpha3.0: Performance is no better, alas, but some fixes have been +made and some functionality has been added. (This is basically the "get +it out the door in time for 4.4" release.) One bug fix: regfree() didn't +free the main internal structure (how embarrassing). It is now possible +to put NULs in either the RE or the target string, using (resp.) a new +REG_PEND flag and the old REG_STARTEND flag. The REG_NOSPEC flag to +regcomp() makes all characters ordinary, so you can match a literal +string easily (this will become more useful when performance improves!). +There are now primitives to match beginnings and ends of words, although +the syntax is disgusting and so is the implementation. The REG_ATOI +debugging interface has changed a bit. And there has been considerable +internal cleanup of various kinds. + +New in alpha2.3: Split change list out of README, and moved flags notes +into Makefile. Macro-ized the name of regex(7) in regex(3), since it has +to change for 4.4BSD. Cleanup work in engine.c, and some new regression +tests to catch tricky cases thereof. + +New in alpha2.2: Out-of-date manpages updated. Regerror() acquires two +small extensions -- REG_ITOA and REG_ATOI -- which avoid debugging kludges +in my own test program and might be useful to others for similar purposes. +The regression test will now compile (and run) without REDEBUG. The +BRE \$ bug is fixed. Most uses of "uchar" are gone; it's all chars now. +Char/uchar parameters are now written int/unsigned, to avoid possible +portability problems with unpromoted parameters. Some unsigned casts have +been introduced to minimize portability problems with shifting into sign +bits. + +New in alpha2.1: Lots of little stuff, cleanup and fixes. The one big +thing is that regex.h is now generated, using mkh, rather than being +supplied in the distribution; due to circularities in dependencies, +you have to build regex.h explicitly by "make h". The two known bugs +have been fixed (and the regression test now checks for them), as has a +problem with assertions not being suppressed in the absence of REDEBUG. +No performance work yet. + +New in alpha2: Backslash-anything is an ordinary character, not an +error (except, of course, for the handful of backslashed metacharacters +in BREs), which should reduce script breakage. The regression test +checks *where* null strings are supposed to match, and has generally +been tightened up somewhat. Small bug fixes in parameter passing (not +harmful, but technically errors) and some other areas. Debugging +invoked by defining REDEBUG rather than not defining NDEBUG. + +New in alpha+3: full prototyping for internal routines, using a little +helper program, mkh, which extracts prototypes given in stylized comments. +More minor cleanup. Buglet fix: it's CHAR_BIT, not CHAR_BITS. Simple +pre-screening of input when a literal string is known to be part of the +RE; this does wonders for performance. + +New in alpha+2: minor bits of cleanup. Notably, the number "32" for the +word width isn't hardwired into regexec.c any more, the public header +file prototypes the functions if __STDC__ is defined, and some small typos +in the manpages have been fixed. + +New in alpha+1: improvements to the manual pages, and an important +extension, the REG_STARTEND option to regexec(). diff --git a/ext/ereg/regex/cclass.h b/ext/ereg/regex/cclass.h new file mode 100644 index 0000000000..d6b5760eef --- /dev/null +++ b/ext/ereg/regex/cclass.h @@ -0,0 +1,30 @@ +/* character-class table */ +static const struct cclass { + const unsigned char *name; + const unsigned char *chars; + const unsigned char *multis; +} cclasses[] = { + {"alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", ""}, + {"alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", + ""}, + {"blank", " \t", ""}, + {"cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ +\25\26\27\30\31\32\33\34\35\36\37\177", ""}, + {"digit", "0123456789", ""}, + {"graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ +0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + ""}, + {"lower", "abcdefghijklmnopqrstuvwxyz", + ""}, + {"print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ +0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", + ""}, + {"punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + ""}, + {"space", "\t\n\v\f\r ", ""}, + {"upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", + ""}, + {"xdigit", "0123456789ABCDEFabcdef", + ""}, + {NULL, 0, ""} +}; diff --git a/ext/ereg/regex/cname.h b/ext/ereg/regex/cname.h new file mode 100644 index 0000000000..c2855dad5d --- /dev/null +++ b/ext/ereg/regex/cname.h @@ -0,0 +1,102 @@ +/* character-name table */ +static const struct cname { + const char *name; + char code; +} cnames[] = { + {"NUL", '\0'}, + {"SOH", '\001'}, + {"STX", '\002'}, + {"ETX", '\003'}, + {"EOT", '\004'}, + {"ENQ", '\005'}, + {"ACK", '\006'}, + {"BEL", '\007'}, + {"alert", '\007'}, + {"BS", '\010'}, + {"backspace", '\b'}, + {"HT", '\011'}, + {"tab", '\t'}, + {"LF", '\012'}, + {"newline", '\n'}, + {"VT", '\013'}, + {"vertical-tab", '\v'}, + {"FF", '\014'}, + {"form-feed", '\f'}, + {"CR", '\015'}, + {"carriage-return", '\r'}, + {"SO", '\016'}, + {"SI", '\017'}, + {"DLE", '\020'}, + {"DC1", '\021'}, + {"DC2", '\022'}, + {"DC3", '\023'}, + {"DC4", '\024'}, + {"NAK", '\025'}, + {"SYN", '\026'}, + {"ETB", '\027'}, + {"CAN", '\030'}, + {"EM", '\031'}, + {"SUB", '\032'}, + {"ESC", '\033'}, + {"IS4", '\034'}, + {"FS", '\034'}, + {"IS3", '\035'}, + {"GS", '\035'}, + {"IS2", '\036'}, + {"RS", '\036'}, + {"IS1", '\037'}, + {"US", '\037'}, + {"space", ' '}, + {"exclamation-mark", '!'}, + {"quotation-mark", '"'}, + {"number-sign", '#'}, + {"dollar-sign", '$'}, + {"percent-sign", '%'}, + {"ampersand", '&'}, + {"apostrophe", '\''}, + {"left-parenthesis", '('}, + {"right-parenthesis", ')'}, + {"asterisk", '*'}, + {"plus-sign", '+'}, + {"comma", ','}, + {"hyphen", '-'}, + {"hyphen-minus", '-'}, + {"period", '.'}, + {"full-stop", '.'}, + {"slash", '/'}, + {"solidus", '/'}, + {"zero", '0'}, + {"one", '1'}, + {"two", '2'}, + {"three", '3'}, + {"four", '4'}, + {"five", '5'}, + {"six", '6'}, + {"seven", '7'}, + {"eight", '8'}, + {"nine", '9'}, + {"colon", ':'}, + {"semicolon", ';'}, + {"less-than-sign", '<'}, + {"equals-sign", '='}, + {"greater-than-sign", '>'}, + {"question-mark", '?'}, + {"commercial-at", '@'}, + {"left-square-bracket", '['}, + {"backslash", '\\'}, + {"reverse-solidus", '\\'}, + {"right-square-bracket", ']'}, + {"circumflex", '^'}, + {"circumflex-accent", '^'}, + {"underscore", '_'}, + {"low-line", '_'}, + {"grave-accent", '`'}, + {"left-brace", '{'}, + {"left-curly-bracket", '{'}, + {"vertical-line", '|'}, + {"right-brace", '}'}, + {"right-curly-bracket", '}'}, + {"tilde", '~'}, + {"DEL", '\177'}, + {NULL, 0}, +}; diff --git a/ext/ereg/regex/debug.c b/ext/ereg/regex/debug.c new file mode 100644 index 0000000000..3db93ef293 --- /dev/null +++ b/ext/ereg/regex/debug.c @@ -0,0 +1,242 @@ +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <limits.h> +#include <stdlib.h> +#include <sys/types.h> +#include <regex.h> + +#include "utils.h" +#include "regex2.h" +#include "debug.ih" + +/* + - regprint - print a regexp for debugging + == void regprint(regex_t *r, FILE *d); + */ +void +regprint(r, d) +regex_t *r; +FILE *d; +{ + register struct re_guts *g = r->re_g; + register int i; + register int c; + register int last; + int nincat[NC]; + + fprintf(d, "%ld states, %d categories", (long)g->nstates, + g->ncategories); + fprintf(d, ", first %ld last %ld", (long)g->firststate, + (long)g->laststate); + if (g->iflags&USEBOL) + fprintf(d, ", USEBOL"); + if (g->iflags&USEEOL) + fprintf(d, ", USEEOL"); + if (g->iflags&BAD) + fprintf(d, ", BAD"); + if (g->nsub > 0) + fprintf(d, ", nsub=%ld", (long)g->nsub); + if (g->must != NULL) + fprintf(d, ", must(%ld) `%*s'", (long)g->mlen, (int)g->mlen, + g->must); + if (g->backrefs) + fprintf(d, ", backrefs"); + if (g->nplus > 0) + fprintf(d, ", nplus %ld", (long)g->nplus); + fprintf(d, "\n"); + s_print(g, d); + for (i = 0; i < g->ncategories; i++) { + nincat[i] = 0; + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (g->categories[c] == i) + nincat[i]++; + } + fprintf(d, "cc0#%d", nincat[0]); + for (i = 1; i < g->ncategories; i++) + if (nincat[i] == 1) { + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (g->categories[c] == i) + break; + fprintf(d, ", %d=%s", i, regchar(c)); + } + fprintf(d, "\n"); + for (i = 1; i < g->ncategories; i++) + if (nincat[i] != 1) { + fprintf(d, "cc%d\t", i); + last = -1; + for (c = CHAR_MIN; c <= CHAR_MAX+1; c++) /* +1 does flush */ + if (c <= CHAR_MAX && g->categories[c] == i) { + if (last < 0) { + fprintf(d, "%s", regchar(c)); + last = c; + } + } else { + if (last >= 0) { + if (last != c-1) + fprintf(d, "-%s", + regchar(c-1)); + last = -1; + } + } + fprintf(d, "\n"); + } +} + +/* + - s_print - print the strip for debugging + == static void s_print(register struct re_guts *g, FILE *d); + */ +static void +s_print(g, d) +register struct re_guts *g; +FILE *d; +{ + register sop *s; + register cset *cs; + register int i; + register int done = 0; + register sop opnd; + register int col = 0; + register int last; + register sopno offset = 2; +# define GAP() { if (offset % 5 == 0) { \ + if (col > 40) { \ + fprintf(d, "\n\t"); \ + col = 0; \ + } else { \ + fprintf(d, " "); \ + col++; \ + } \ + } else \ + col++; \ + offset++; \ + } + + if (OP(g->strip[0]) != OEND) + fprintf(d, "missing initial OEND!\n"); + for (s = &g->strip[1]; !done; s++) { + opnd = OPND(*s); + switch (OP(*s)) { + case OEND: + fprintf(d, "\n"); + done = 1; + break; + case OCHAR: + if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL) + fprintf(d, "\\%c", (unsigned char)opnd); + else + fprintf(d, "%s", regchar((unsigned char)opnd)); + break; + case OBOL: + fprintf(d, "^"); + break; + case OEOL: + fprintf(d, "$"); + break; + case OBOW: + fprintf(d, "\\{"); + break; + case OEOW: + fprintf(d, "\\}"); + break; + case OANY: + fprintf(d, "."); + break; + case OANYOF: + fprintf(d, "[(%ld)", (long)opnd); + cs = &g->sets[opnd]; + last = -1; + for (i = 0; i < g->csetsize+1; i++) /* +1 flushes */ + if (CHIN(cs, i) && i < g->csetsize) { + if (last < 0) { + fprintf(d, "%s", regchar(i)); + last = i; + } + } else { + if (last >= 0) { + if (last != i-1) + fprintf(d, "-%s", + regchar(i-1)); + last = -1; + } + } + fprintf(d, "]"); + break; + case OBACK_: + fprintf(d, "(\\<%ld>", (long)opnd); + break; + case O_BACK: + fprintf(d, "<%ld>\\)", (long)opnd); + break; + case OPLUS_: + fprintf(d, "(+"); + if (OP(*(s+opnd)) != O_PLUS) + fprintf(d, "<%ld>", (long)opnd); + break; + case O_PLUS: + if (OP(*(s-opnd)) != OPLUS_) + fprintf(d, "<%ld>", (long)opnd); + fprintf(d, "+)"); + break; + case OQUEST_: + fprintf(d, "(?"); + if (OP(*(s+opnd)) != O_QUEST) + fprintf(d, "<%ld>", (long)opnd); + break; + case O_QUEST: + if (OP(*(s-opnd)) != OQUEST_) + fprintf(d, "<%ld>", (long)opnd); + fprintf(d, "?)"); + break; + case OLPAREN: + fprintf(d, "((<%ld>", (long)opnd); + break; + case ORPAREN: + fprintf(d, "<%ld>))", (long)opnd); + break; + case OCH_: + fprintf(d, "<"); + if (OP(*(s+opnd)) != OOR2) + fprintf(d, "<%ld>", (long)opnd); + break; + case OOR1: + if (OP(*(s-opnd)) != OOR1 && OP(*(s-opnd)) != OCH_) + fprintf(d, "<%ld>", (long)opnd); + fprintf(d, "|"); + break; + case OOR2: + fprintf(d, "|"); + if (OP(*(s+opnd)) != OOR2 && OP(*(s+opnd)) != O_CH) + fprintf(d, "<%ld>", (long)opnd); + break; + case O_CH: + if (OP(*(s-opnd)) != OOR1) + fprintf(d, "<%ld>", (long)opnd); + fprintf(d, ">"); + break; + default: + fprintf(d, "!%ld(%ld)!", OP(*s), opnd); + break; + } + if (!done) + GAP(); + } +} + +/* + - regchar - make a character printable + == static char *regchar(int ch); + */ +static unsigned char * /* -> representation */ +regchar(ch) +int ch; +{ + static unsigned char buf[10]; + + if (isprint(ch) || ch == ' ') + sprintf(buf, "%c", ch); + else + sprintf(buf, "\\%o", ch); + return(buf); +} diff --git a/ext/ereg/regex/debug.ih b/ext/ereg/regex/debug.ih new file mode 100644 index 0000000000..5f40ff7917 --- /dev/null +++ b/ext/ereg/regex/debug.ih @@ -0,0 +1,14 @@ +/* ========= begin header generated by ./mkh ========= */ +#ifdef __cplusplus +extern "C" { +#endif + +/* === debug.c === */ +void regprint(regex_t *r, FILE *d); +static void s_print(register struct re_guts *g, FILE *d); +static char *regchar(int ch); + +#ifdef __cplusplus +} +#endif +/* ========= end header generated by ./mkh ========= */ diff --git a/ext/ereg/regex/engine.c b/ext/ereg/regex/engine.c new file mode 100644 index 0000000000..0682267f61 --- /dev/null +++ b/ext/ereg/regex/engine.c @@ -0,0 +1,1019 @@ +/* + * The matching engine and friends. This file is #included by regexec.c + * after suitable #defines of a variety of macros used herein, so that + * different state representations can be used without duplicating masses + * of code. + */ + +#ifdef SNAMES +#define matcher smatcher +#define fast sfast +#define slow sslow +#define dissect sdissect +#define backref sbackref +#define step sstep +#define print sprint +#define at sat +#define match smat +#endif +#ifdef LNAMES +#define matcher lmatcher +#define fast lfast +#define slow lslow +#define dissect ldissect +#define backref lbackref +#define step lstep +#define print lprint +#define at lat +#define match lmat +#endif + +/* another structure passed up and down to avoid zillions of parameters */ +struct match { + struct re_guts *g; + int eflags; + regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ + unsigned char *offp; /* offsets work from here */ + unsigned char *beginp; /* start of string -- virtual NUL precedes */ + unsigned char *endp; /* end of string -- virtual NUL here */ + unsigned char *coldp; /* can be no match starting before here */ + unsigned char **lastpos; /* [nplus+1] */ + STATEVARS; + states st; /* current states */ + states fresh; /* states for a fresh start */ + states tmp; /* temporary */ + states empty; /* empty set of states */ +}; + +#include "engine.ih" + +#ifdef REDEBUG +#define SP(t, s, c) print(m, t, s, c, stdout) +#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) +#define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); } +#else +#define SP(t, s, c) /* nothing */ +#define AT(t, p1, p2, s1, s2) /* nothing */ +#define NOTE(s) /* nothing */ +#endif + +/* + - matcher - the actual matching engine + == static int matcher(register struct re_guts *g, char *string, \ + == size_t nmatch, regmatch_t pmatch[], int eflags); + */ +static int /* 0 success, REG_NOMATCH failure */ +matcher(g, string, nmatch, pmatch, eflags) +register struct re_guts *g; +unsigned char *string; +size_t nmatch; +regmatch_t pmatch[]; +int eflags; +{ + register unsigned char *endp; + register size_t i; + struct match mv; + register struct match *m = &mv; + register unsigned char *dp; + const register sopno gf = g->firststate+1; /* +1 for OEND */ + const register sopno gl = g->laststate; + unsigned char *start; + unsigned char *stop; + + /* simplify the situation where possible */ + if (g->cflags®_NOSUB) + nmatch = 0; + if (eflags®_STARTEND) { + start = string + pmatch[0].rm_so; + stop = string + pmatch[0].rm_eo; + } else { + start = string; + stop = start + strlen(start); + } + if (stop < start) + return(REG_INVARG); + + /* prescreening; this does wonders for this rather slow code */ + if (g->must != NULL) { + for (dp = start; dp < stop; dp++) + if (*dp == g->must[0] && stop - dp >= g->mlen && + memcmp(dp, g->must, (size_t)g->mlen) == 0) + break; + if (dp == stop) /* we didn't find g->must */ + return(REG_NOMATCH); + } + + /* match struct setup */ + m->g = g; + m->eflags = eflags; + m->pmatch = NULL; + m->lastpos = NULL; + m->offp = string; + m->beginp = start; + m->endp = stop; + STATESETUP(m, 4); + SETUP(m->st); + SETUP(m->fresh); + SETUP(m->tmp); + SETUP(m->empty); + CLEAR(m->empty); + + /* this loop does only one repetition except for backrefs */ + for (;;) { + endp = fast(m, start, stop, gf, gl); + if (endp == NULL) { /* a miss */ + STATETEARDOWN(m); + return(REG_NOMATCH); + } + if (nmatch == 0 && !g->backrefs) + break; /* no further info needed */ + + /* where? */ + assert(m->coldp != NULL); + for (;;) { + NOTE("finding start"); + endp = slow(m, m->coldp, stop, gf, gl); + if (endp != NULL) + break; + assert(m->coldp < m->endp); + m->coldp++; + } + if (nmatch == 1 && !g->backrefs) + break; /* no further info needed */ + + /* oh my, he wants the subexpressions... */ + if (m->pmatch == NULL) + m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * + sizeof(regmatch_t)); + if (m->pmatch == NULL) { + STATETEARDOWN(m); + return(REG_ESPACE); + } + for (i = 1; i <= m->g->nsub; i++) + m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; + if (!g->backrefs && !(m->eflags®_BACKR)) { + NOTE("dissecting"); + dp = dissect(m, m->coldp, endp, gf, gl); + } else { + if (g->nplus > 0 && m->lastpos == NULL) + m->lastpos = (unsigned char **)malloc((g->nplus+1) * + sizeof(unsigned char *)); + if (g->nplus > 0 && m->lastpos == NULL) { + free((char *)m->pmatch); + STATETEARDOWN(m); + return(REG_ESPACE); + } + NOTE("backref dissect"); + dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); + } + if (dp != NULL) + break; + + /* uh-oh... we couldn't find a subexpression-level match */ + assert(g->backrefs); /* must be back references doing it */ + assert(g->nplus == 0 || m->lastpos != NULL); + for (;;) { + if (dp != NULL || endp <= m->coldp) + break; /* defeat */ + NOTE("backoff"); + endp = slow(m, m->coldp, endp-1, gf, gl); + if (endp == NULL) + break; /* defeat */ + /* try it on a shorter possibility */ +#ifndef NDEBUG + for (i = 1; i <= m->g->nsub; i++) { + assert(m->pmatch[i].rm_so == -1); + assert(m->pmatch[i].rm_eo == -1); + } +#endif + NOTE("backoff dissect"); + dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); + } + assert(dp == NULL || dp == endp); + if (dp != NULL) /* found a shorter one */ + break; + + /* despite initial appearances, there is no match here */ + NOTE("false alarm"); + start = m->coldp + 1; /* recycle starting later */ + assert(start <= stop); + } + + /* fill in the details if requested */ + if (nmatch > 0) { + pmatch[0].rm_so = m->coldp - m->offp; + pmatch[0].rm_eo = endp - m->offp; + } + if (nmatch > 1) { + assert(m->pmatch != NULL); + for (i = 1; i < nmatch; i++) + if (i <= m->g->nsub) + pmatch[i] = m->pmatch[i]; + else { + pmatch[i].rm_so = -1; + pmatch[i].rm_eo = -1; + } + } + + if (m->pmatch != NULL) + free((char *)m->pmatch); + if (m->lastpos != NULL) + free((char *)m->lastpos); + STATETEARDOWN(m); + return(0); +} + +/* + - dissect - figure out what matched what, no back references + == static unsigned char *dissect(register struct match *m, unsigned char *start, \ + == unsigned char *stop, sopno startst, sopno stopst); + */ +static unsigned char * /* == stop (success) always */ +dissect(m, start, stop, startst, stopst) +register struct match *m; +unsigned char *start; +unsigned char *stop; +sopno startst; +sopno stopst; +{ + register int i; + register sopno ss; /* start sop of current subRE */ + register sopno es; /* end sop of current subRE */ + register unsigned char *sp; /* start of string matched by it */ + register unsigned char *stp; /* string matched by it cannot pass here */ + register unsigned char *rest; /* start of rest of string */ + register unsigned char *tail; /* string unmatched by rest of RE */ + register sopno ssub; /* start sop of subsubRE */ + register sopno esub; /* end sop of subsubRE */ + register unsigned char *ssp; /* start of string matched by subsubRE */ + register unsigned char *sep; /* end of string matched by subsubRE */ + register unsigned char *oldssp; /* previous ssp */ + register unsigned char *dp; + + AT("diss", start, stop, startst, stopst); + sp = start; + for (ss = startst; ss < stopst; ss = es) { + /* identify end of subRE */ + es = ss; + switch (OP(m->g->strip[es])) { + case OPLUS_: + case OQUEST_: + es += OPND(m->g->strip[es]); + break; + case OCH_: + while (OP(m->g->strip[es]) != O_CH) + es += OPND(m->g->strip[es]); + break; + } + es++; + + /* figure out what it matched */ + switch (OP(m->g->strip[ss])) { + case OEND: + assert(PHP_REGEX_NOPE); + break; + case OCHAR: + sp++; + break; + case OBOL: + case OEOL: + case OBOW: + case OEOW: + break; + case OANY: + case OANYOF: + sp++; + break; + case OBACK_: + case O_BACK: + assert(PHP_REGEX_NOPE); + break; + /* cases where length of match is hard to find */ + case OQUEST_: + stp = stop; + for (;;) { + /* how long could this one be? */ + rest = slow(m, sp, stp, ss, es); + assert(rest != NULL); /* it did match */ + /* could the rest match the rest? */ + tail = slow(m, rest, stop, es, stopst); + if (tail == stop) + break; /* yes! */ + /* no -- try a shorter match for this one */ + stp = rest - 1; + assert(stp >= sp); /* it did work */ + } + ssub = ss + 1; + esub = es - 1; + /* did innards match? */ + if (slow(m, sp, rest, ssub, esub) != NULL) { + dp = dissect(m, sp, rest, ssub, esub); + assert(dp == rest); + } else /* no */ + assert(sp == rest); + sp = rest; + break; + case OPLUS_: + stp = stop; + for (;;) { + /* how long could this one be? */ + rest = slow(m, sp, stp, ss, es); + assert(rest != NULL); /* it did match */ + /* could the rest match the rest? */ + tail = slow(m, rest, stop, es, stopst); + if (tail == stop) + break; /* yes! */ + /* no -- try a shorter match for this one */ + stp = rest - 1; + assert(stp >= sp); /* it did work */ + } + ssub = ss + 1; + esub = es - 1; + ssp = sp; + oldssp = ssp; + for (;;) { /* find last match of innards */ + sep = slow(m, ssp, rest, ssub, esub); + if (sep == NULL || sep == ssp) + break; /* failed or matched null */ + oldssp = ssp; /* on to next try */ + ssp = sep; + } + if (sep == NULL) { + /* last successful match */ + sep = ssp; + ssp = oldssp; + } + assert(sep == rest); /* must exhaust substring */ + assert(slow(m, ssp, sep, ssub, esub) == rest); + dp = dissect(m, ssp, sep, ssub, esub); + assert(dp == sep); + sp = rest; + break; + case OCH_: + stp = stop; + for (;;) { + /* how long could this one be? */ + rest = slow(m, sp, stp, ss, es); + assert(rest != NULL); /* it did match */ + /* could the rest match the rest? */ + tail = slow(m, rest, stop, es, stopst); + if (tail == stop) + break; /* yes! */ + /* no -- try a shorter match for this one */ + stp = rest - 1; + assert(stp >= sp); /* it did work */ + } + ssub = ss + 1; + esub = ss + OPND(m->g->strip[ss]) - 1; + assert(OP(m->g->strip[esub]) == OOR1); + for (;;) { /* find first matching branch */ + if (slow(m, sp, rest, ssub, esub) == rest) + break; /* it matched all of it */ + /* that one missed, try next one */ + assert(OP(m->g->strip[esub]) == OOR1); + esub++; + assert(OP(m->g->strip[esub]) == OOR2); + ssub = esub + 1; + esub += OPND(m->g->strip[esub]); + if (OP(m->g->strip[esub]) == OOR2) + esub--; + else + assert(OP(m->g->strip[esub]) == O_CH); + } + dp = dissect(m, sp, rest, ssub, esub); + assert(dp == rest); + sp = rest; + break; + case O_PLUS: + case O_QUEST: + case OOR1: + case OOR2: + case O_CH: + assert(PHP_REGEX_NOPE); + break; + case OLPAREN: + i = OPND(m->g->strip[ss]); + assert(0 < i && i <= m->g->nsub); + m->pmatch[i].rm_so = sp - m->offp; + break; + case ORPAREN: + i = OPND(m->g->strip[ss]); + assert(0 < i && i <= m->g->nsub); + m->pmatch[i].rm_eo = sp - m->offp; + break; + default: /* uh oh */ + assert(PHP_REGEX_NOPE); + break; + } + } + + assert(sp == stop); + return(sp); +} + +/* + - backref - figure out what matched what, figuring in back references + == static unsigned char *backref(register struct match *m, unsigned char *start, \ + == unsigned char *stop, sopno startst, sopno stopst, sopno lev); + */ +static unsigned char * /* == stop (success) or NULL (failure) */ +backref(m, start, stop, startst, stopst, lev) +register struct match *m; +unsigned char *start; +unsigned char *stop; +sopno startst; +sopno stopst; +sopno lev; /* PLUS nesting level */ +{ + register int i; + register sopno ss; /* start sop of current subRE */ + register unsigned char *sp; /* start of string matched by it */ + register sopno ssub; /* start sop of subsubRE */ + register sopno esub; /* end sop of subsubRE */ + register unsigned char *ssp; /* start of string matched by subsubRE */ + register unsigned char *dp; + register size_t len; + register int hard; + register sop s; + register regoff_t offsave; + register cset *cs; + + AT("back", start, stop, startst, stopst); + sp = start; + + /* get as far as we can with easy stuff */ + hard = 0; + for (ss = startst; !hard && ss < stopst; ss++) + switch (OP(s = m->g->strip[ss])) { + case OCHAR: + if (sp == stop || *sp++ != (unsigned char)OPND(s)) + return(NULL); + break; + case OANY: + if (sp == stop) + return(NULL); + sp++; + break; + case OANYOF: + cs = &m->g->sets[OPND(s)]; + if (sp == stop || !CHIN(cs, *sp++)) + return(NULL); + break; + case OBOL: + if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || + (sp < m->endp && *(sp-1) == '\n' && + (m->g->cflags®_NEWLINE)) ) + { /* yes */ } + else + return(NULL); + break; + case OEOL: + if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || + (sp < m->endp && *sp == '\n' && + (m->g->cflags®_NEWLINE)) ) + { /* yes */ } + else + return(NULL); + break; + case OBOW: + if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || + (sp < m->endp && *(sp-1) == '\n' && + (m->g->cflags®_NEWLINE)) || + (sp > m->beginp && + !ISWORD(*(sp-1))) ) && + (sp < m->endp && ISWORD(*sp)) ) + { /* yes */ } + else + return(NULL); + break; + case OEOW: + if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || + (sp < m->endp && *sp == '\n' && + (m->g->cflags®_NEWLINE)) || + (sp < m->endp && !ISWORD(*sp)) ) && + (sp > m->beginp && ISWORD(*(sp-1))) ) + { /* yes */ } + else + return(NULL); + break; + case O_QUEST: + break; + case OOR1: /* matches null but needs to skip */ + ss++; + s = m->g->strip[ss]; + do { + assert(OP(s) == OOR2); + ss += OPND(s); + } while (OP(s = m->g->strip[ss]) != O_CH); + /* note that the ss++ gets us past the O_CH */ + break; + default: /* have to make a choice */ + hard = 1; + break; + } + if (!hard) { /* that was it! */ + if (sp != stop) + return(NULL); + return(sp); + } + ss--; /* adjust for the for's final increment */ + + /* the hard stuff */ + AT("hard", sp, stop, ss, stopst); + s = m->g->strip[ss]; + switch (OP(s)) { + case OBACK_: /* the vilest depths */ + i = OPND(s); + assert(0 < i && i <= m->g->nsub); + if (m->pmatch[i].rm_eo == -1) + return(NULL); + assert(m->pmatch[i].rm_so != -1); + len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; + assert(stop - m->beginp >= len); + if (sp > stop - len) + return(NULL); /* not enough left to match */ + ssp = m->offp + m->pmatch[i].rm_so; + if (memcmp(sp, ssp, len) != 0) + return(NULL); + while (m->g->strip[ss] != SOP(O_BACK, i)) + ss++; + return(backref(m, sp+len, stop, ss+1, stopst, lev)); + break; + case OQUEST_: /* to null or not */ + dp = backref(m, sp, stop, ss+1, stopst, lev); + if (dp != NULL) + return(dp); /* not */ + return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); + break; + case OPLUS_: + assert(m->lastpos != NULL); + assert(lev+1 <= m->g->nplus); + m->lastpos[lev+1] = sp; + return(backref(m, sp, stop, ss+1, stopst, lev+1)); + break; + case O_PLUS: + if (sp == m->lastpos[lev]) /* last pass matched null */ + return(backref(m, sp, stop, ss+1, stopst, lev-1)); + /* try another pass */ + m->lastpos[lev] = sp; + dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); + if (dp == NULL) + return(backref(m, sp, stop, ss+1, stopst, lev-1)); + else + return(dp); + break; + case OCH_: /* find the right one, if any */ + ssub = ss + 1; + esub = ss + OPND(s) - 1; + assert(OP(m->g->strip[esub]) == OOR1); + for (;;) { /* find first matching branch */ + dp = backref(m, sp, stop, ssub, esub, lev); + if (dp != NULL) + return(dp); + /* that one missed, try next one */ + if (OP(m->g->strip[esub]) == O_CH) + return(NULL); /* there is none */ + esub++; + assert(OP(m->g->strip[esub]) == OOR2); + ssub = esub + 1; + esub += OPND(m->g->strip[esub]); + if (OP(m->g->strip[esub]) == OOR2) + esub--; + else + assert(OP(m->g->strip[esub]) == O_CH); + } + break; + case OLPAREN: /* must undo assignment if rest fails */ + i = OPND(s); + assert(0 < i && i <= m->g->nsub); + offsave = m->pmatch[i].rm_so; + m->pmatch[i].rm_so = sp - m->offp; + dp = backref(m, sp, stop, ss+1, stopst, lev); + if (dp != NULL) + return(dp); + m->pmatch[i].rm_so = offsave; + return(NULL); + break; + case ORPAREN: /* must undo assignment if rest fails */ + i = OPND(s); + assert(0 < i && i <= m->g->nsub); + offsave = m->pmatch[i].rm_eo; + m->pmatch[i].rm_eo = sp - m->offp; + dp = backref(m, sp, stop, ss+1, stopst, lev); + if (dp != NULL) + return(dp); + m->pmatch[i].rm_eo = offsave; + return(NULL); + break; + default: /* uh oh */ + assert(PHP_REGEX_NOPE); + break; + } + + /* "can't happen" */ + assert(PHP_REGEX_NOPE); + /* NOTREACHED */ + return((unsigned char *)NULL); /* dummy */ +} + +/* + - fast - step through the string at top speed + == static unsigned char *fast(register struct match *m, unsigned char *start, \ + == unsigned char *stop, sopno startst, sopno stopst); + */ +static unsigned char * /* where tentative match ended, or NULL */ +fast(m, start, stop, startst, stopst) +register struct match *m; +unsigned char *start; +unsigned char *stop; +sopno startst; +sopno stopst; +{ + register states st = m->st; + register states fresh = m->fresh; + register states tmp = m->tmp; + register unsigned char *p = start; + register int c = (start == m->beginp) ? OUT : *(start-1); + register int lastc; /* previous c */ + register int flagch; + register int i; + register unsigned char *coldp; /* last p after which no match was underway */ + + CLEAR(st); + SET1(st, startst); + st = step(m->g, startst, stopst, st, NOTHING, st); + ASSIGN(fresh, st); + SP("start", st, *p); + coldp = NULL; + for (;;) { + /* next character */ + lastc = c; + c = (p == m->endp) ? OUT : *p; + if (EQ(st, fresh)) + coldp = p; + + /* is there an EOL and/or BOL between lastc and c? */ + flagch = '\0'; + i = 0; + if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || + (lastc == OUT && !(m->eflags®_NOTBOL)) ) { + flagch = BOL; + i = m->g->nbol; + } + if ( (c == '\n' && m->g->cflags®_NEWLINE) || + (c == OUT && !(m->eflags®_NOTEOL)) ) { + flagch = (flagch == BOL) ? BOLEOL : EOL; + i += m->g->neol; + } + if (i != 0) { + for (; i > 0; i--) + st = step(m->g, startst, stopst, st, flagch, st); + SP("boleol", st, c); + } + + /* how about a word boundary? */ + if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && + (c != OUT && ISWORD(c)) ) { + flagch = BOW; + } + if ( (lastc != OUT && ISWORD(lastc)) && + (flagch == EOL || (c != OUT && !ISWORD(c))) ) { + flagch = EOW; + } + if (flagch == BOW || flagch == EOW) { + st = step(m->g, startst, stopst, st, flagch, st); + SP("boweow", st, c); + } + + /* are we done? */ + if (ISSET(st, stopst) || p == stop) + break; /* NOTE BREAK OUT */ + + /* no, we must deal with this character */ + ASSIGN(tmp, st); + ASSIGN(st, fresh); + assert(c != OUT); + st = step(m->g, startst, stopst, tmp, c, st); + SP("aft", st, c); + assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); + p++; + } + + assert(coldp != NULL); + m->coldp = coldp; + if (ISSET(st, stopst)) + return(p+1); + else + return(NULL); +} + +/* + - slow - step through the string more deliberately + == static unsigned char *slow(register struct match *m, unsigned char *start, \ + == unsigned char *stop, sopno startst, sopno stopst); + */ +static unsigned char * /* where it ended */ +slow(m, start, stop, startst, stopst) +register struct match *m; +unsigned char *start; +unsigned char *stop; +sopno startst; +sopno stopst; +{ + register states st = m->st; + register states empty = m->empty; + register states tmp = m->tmp; + register unsigned char *p = start; + register int c = (start == m->beginp) ? OUT : *(start-1); + register int lastc; /* previous c */ + register int flagch; + register int i; + register unsigned char *matchp; /* last p at which a match ended */ + + AT("slow", start, stop, startst, stopst); + CLEAR(st); + SET1(st, startst); + SP("sstart", st, *p); + st = step(m->g, startst, stopst, st, NOTHING, st); + matchp = NULL; + for (;;) { + /* next character */ + lastc = c; + c = (p == m->endp) ? OUT : *p; + + /* is there an EOL and/or BOL between lastc and c? */ + flagch = '\0'; + i = 0; + if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || + (lastc == OUT && !(m->eflags®_NOTBOL)) ) { + flagch = BOL; + i = m->g->nbol; + } + if ( (c == '\n' && m->g->cflags®_NEWLINE) || + (c == OUT && !(m->eflags®_NOTEOL)) ) { + flagch = (flagch == BOL) ? BOLEOL : EOL; + i += m->g->neol; + } + if (i != 0) { + for (; i > 0; i--) + st = step(m->g, startst, stopst, st, flagch, st); + SP("sboleol", st, c); + } + + /* how about a word boundary? */ + if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && + (c != OUT && ISWORD(c)) ) { + flagch = BOW; + } + if ( (lastc != OUT && ISWORD(lastc)) && + (flagch == EOL || (c != OUT && !ISWORD(c))) ) { + flagch = EOW; + } + if (flagch == BOW || flagch == EOW) { + st = step(m->g, startst, stopst, st, flagch, st); + SP("sboweow", st, c); + } + + /* are we done? */ + if (ISSET(st, stopst)) + matchp = p; + if (EQ(st, empty) || p == stop) + break; /* NOTE BREAK OUT */ + + /* no, we must deal with this character */ + ASSIGN(tmp, st); + ASSIGN(st, empty); + assert(c != OUT); + st = step(m->g, startst, stopst, tmp, c, st); + SP("saft", st, c); + assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); + p++; + } + + return(matchp); +} + + +/* + - step - map set of states reachable before char to set reachable after + == static states step(register struct re_guts *g, sopno start, sopno stop, \ + == register states bef, int ch, register states aft); + == #define BOL (OUT+1) + == #define EOL (BOL+1) + == #define BOLEOL (BOL+2) + == #define NOTHING (BOL+3) + == #define BOW (BOL+4) + == #define EOW (BOL+5) + == #define CODEMAX (BOL+5) // highest code used + == #define NONCHAR(c) ((c) > UCHAR_MAX) + == #define NNONCHAR (CODEMAX-UCHAR_MAX) + */ +static states +step(g, start, stop, bef, ch, aft) +register struct re_guts *g; +sopno start; /* start state within strip */ +sopno stop; /* state after stop state within strip */ +register states bef; /* states reachable before */ +int ch; /* character or NONCHAR code */ +register states aft; /* states already known reachable after */ +{ + register cset *cs; + register sop s; + register sopno pc; + register onestate here; /* note, macros know this name */ + register sopno look; + register long i; + + for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { + s = g->strip[pc]; + switch (OP(s)) { + case OEND: + assert(pc == stop-1); + break; + case OCHAR: + /* only characters can match */ + assert(!NONCHAR(ch) || ch != (unsigned char)OPND(s)); + if (ch == (unsigned char)OPND(s)) + FWD(aft, bef, 1); + break; + case OBOL: + if (ch == BOL || ch == BOLEOL) + FWD(aft, bef, 1); + break; + case OEOL: + if (ch == EOL || ch == BOLEOL) + FWD(aft, bef, 1); + break; + case OBOW: + if (ch == BOW) + FWD(aft, bef, 1); + break; + case OEOW: + if (ch == EOW) + FWD(aft, bef, 1); + break; + case OANY: + if (!NONCHAR(ch)) + FWD(aft, bef, 1); + break; + case OANYOF: + cs = &g->sets[OPND(s)]; + if (!NONCHAR(ch) && CHIN(cs, ch)) + FWD(aft, bef, 1); + break; + case OBACK_: /* ignored here */ + case O_BACK: + FWD(aft, aft, 1); + break; + case OPLUS_: /* forward, this is just an empty */ + FWD(aft, aft, 1); + break; + case O_PLUS: /* both forward and back */ + FWD(aft, aft, 1); + i = ISSETBACK(aft, OPND(s)); + BACK(aft, aft, OPND(s)); + if (!i && ISSETBACK(aft, OPND(s))) { + /* oho, must reconsider loop body */ + pc -= OPND(s) + 1; + INIT(here, pc); + } + break; + case OQUEST_: /* two branches, both forward */ + FWD(aft, aft, 1); + FWD(aft, aft, OPND(s)); + break; + case O_QUEST: /* just an empty */ + FWD(aft, aft, 1); + break; + case OLPAREN: /* not significant here */ + case ORPAREN: + FWD(aft, aft, 1); + break; + case OCH_: /* mark the first two branches */ + FWD(aft, aft, 1); + assert(OP(g->strip[pc+OPND(s)]) == OOR2); + FWD(aft, aft, OPND(s)); + break; + case OOR1: /* done a branch, find the O_CH */ + if (ISSTATEIN(aft, here)) { + for (look = 1; + OP(s = g->strip[pc+look]) != O_CH; + look += OPND(s)) + assert(OP(s) == OOR2); + FWD(aft, aft, look); + } + break; + case OOR2: /* propagate OCH_'s marking */ + FWD(aft, aft, 1); + if (OP(g->strip[pc+OPND(s)]) != O_CH) { + assert(OP(g->strip[pc+OPND(s)]) == OOR2); + FWD(aft, aft, OPND(s)); + } + break; + case O_CH: /* just empty */ + FWD(aft, aft, 1); + break; + default: /* ooooops... */ + assert(PHP_REGEX_NOPE); + break; + } + } + + return(aft); +} + +#ifdef REDEBUG +/* + - print - print a set of states + == #ifdef REDEBUG + == static void print(struct match *m, unsigned char *caption, states st, \ + == int ch, FILE *d); + == #endif + */ +static void +print(m, caption, st, ch, d) +struct match *m; +unsigned char *caption; +states st; +int ch; +FILE *d; +{ + register struct re_guts *g = m->g; + register int i; + register int first = 1; + + if (!(m->eflags®_TRACE)) + return; + + fprintf(d, "%s", caption); + if (ch != '\0') + fprintf(d, " %s", pchar(ch)); + for (i = 0; i < g->nstates; i++) + if (ISSET(st, i)) { + fprintf(d, "%s%d", (first) ? "\t" : ", ", i); + first = 0; + } + fprintf(d, "\n"); +} + +/* + - at - print current situation + == #ifdef REDEBUG + == static void at(struct match *m, unsigned char *title, unsigned char *start, unsigned char *stop, \ + == sopno startst, sopno stopst); + == #endif + */ +static void +at(m, title, start, stop, startst, stopst) +struct match *m; +unsigned char *title; +unsigned char *start; +unsigned char *stop; +sopno startst; +sopno stopst; +{ + if (!(m->eflags®_TRACE)) + return; + + printf("%s %s-", title, pchar(*start)); + printf("%s ", pchar(*stop)); + printf("%ld-%ld\n", (long)startst, (long)stopst); +} + +#ifndef PCHARDONE +#define PCHARDONE /* never again */ +/* + - pchar - make a character printable + == #ifdef REDEBUG + == static unsigned char *pchar(int ch); + == #endif + * + * Is this identical to regchar() over in debug.c? Well, yes. But a + * duplicate here avoids having a debugging-capable regexec.o tied to + * a matching debug.o, and this is convenient. It all disappears in + * the non-debug compilation anyway, so it doesn't matter much. + */ +static unsigned char * /* -> representation */ +pchar(ch) +int ch; +{ + static unsigned char pbuf[10]; + + if (isprint(ch) || ch == ' ') + sprintf(pbuf, "%c", ch); + else + sprintf(pbuf, "\\%o", ch); + return(pbuf); +} +#endif +#endif + +#undef matcher +#undef fast +#undef slow +#undef dissect +#undef backref +#undef step +#undef print +#undef at +#undef match diff --git a/ext/ereg/regex/engine.ih b/ext/ereg/regex/engine.ih new file mode 100644 index 0000000000..9a301838bc --- /dev/null +++ b/ext/ereg/regex/engine.ih @@ -0,0 +1,35 @@ +/* ========= begin header generated by ./mkh ========= */ +#ifdef __cplusplus +extern "C" { +#endif + +/* === engine.c === */ +static int matcher(register struct re_guts *g, unsigned char *string, size_t nmatch, regmatch_t pmatch[], int eflags); +static unsigned char *dissect(register struct match *m, unsigned char *start, unsigned char *stop, sopno startst, sopno stopst); +static unsigned char *backref(register struct match *m, unsigned char *start, unsigned char *stop, sopno startst, sopno stopst, sopno lev); +static unsigned char *fast(register struct match *m, unsigned char *start, unsigned char *stop, sopno startst, sopno stopst); +static unsigned char *slow(register struct match *m, unsigned char *start, unsigned char *stop, sopno startst, sopno stopst); +static states step(register struct re_guts *g, sopno start, sopno stop, register states bef, int ch, register states aft); +#define BOL (OUT+1) +#define EOL (BOL+1) +#define BOLEOL (BOL+2) +#define NOTHING (BOL+3) +#define BOW (BOL+4) +#define EOW (BOL+5) +#define CODEMAX (BOL+5) /* highest code used */ +#define NONCHAR(c) ((c) > UCHAR_MAX) +#define NNONCHAR (CODEMAX-UCHAR_MAX) +#ifdef REDEBUG +static void print(struct match *m, unsigned char *caption, states st, int ch, FILE *d); +#endif +#ifdef REDEBUG +static void at(struct match *m, unsigned char *title, unsigned char *start, unsigned char *stop, sopno startst, sopno stopst); +#endif +#ifdef REDEBUG +static unsigned char *pchar(int ch); +#endif + +#ifdef __cplusplus +} +#endif +/* ========= end header generated by ./mkh ========= */ diff --git a/ext/ereg/regex/main.c b/ext/ereg/regex/main.c new file mode 100644 index 0000000000..657338a2c1 --- /dev/null +++ b/ext/ereg/regex/main.c @@ -0,0 +1,510 @@ +#include <stdio.h> +#include <string.h> +#include <sys/types.h> +#include <regex.h> +#include <assert.h> +#include <stdlib.h> + +#include "main.ih" + +char *progname; +int debug = 0; +int line = 0; +int status = 0; + +int copts = REG_EXTENDED; +int eopts = 0; +regoff_t startoff = 0; +regoff_t endoff = 0; + + +extern int split(); +extern void regprint(); + +/* + - main - do the simple case, hand off to regress() for regression + */ +int main(argc, argv) +int argc; +char *argv[]; +{ + regex_t re; +# define NS 10 + regmatch_t subs[NS]; + char erbuf[100]; + int err; + size_t len; + int c; + int errflg = 0; + register int i; + extern int optind; + extern char *optarg; + + progname = argv[0]; + + while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF) + switch (c) { + case 'c': /* compile options */ + copts = options('c', optarg); + break; + case 'e': /* execute options */ + eopts = options('e', optarg); + break; + case 'S': /* start offset */ + startoff = (regoff_t)atoi(optarg); + break; + case 'E': /* end offset */ + endoff = (regoff_t)atoi(optarg); + break; + case 'x': /* Debugging. */ + debug++; + break; + case '?': + default: + errflg++; + break; + } + if (errflg) { + fprintf(stderr, "usage: %s ", progname); + fprintf(stderr, "[-c copt][-C][-d] [re]\n"); + exit(2); + } + + if (optind >= argc) { + regress(stdin); + exit(status); + } + + err = regcomp(&re, argv[optind++], copts); + if (err) { + len = regerror(err, &re, erbuf, sizeof(erbuf)); + fprintf(stderr, "error %s, %d/%d `%s'\n", + eprint(err), len, sizeof(erbuf), erbuf); + exit(status); + } + regprint(&re, stdout); + + if (optind >= argc) { + regfree(&re); + exit(status); + } + + if (eopts®_STARTEND) { + subs[0].rm_so = startoff; + subs[0].rm_eo = strlen(argv[optind]) - endoff; + } + err = regexec(&re, argv[optind], (size_t)NS, subs, eopts); + if (err) { + len = regerror(err, &re, erbuf, sizeof(erbuf)); + fprintf(stderr, "error %s, %d/%d `%s'\n", + eprint(err), len, sizeof(erbuf), erbuf); + exit(status); + } + if (!(copts®_NOSUB)) { + len = (int)(subs[0].rm_eo - subs[0].rm_so); + if (subs[0].rm_so != -1) { + if (len != 0) + printf("match `%.*s'\n", (int)len, + argv[optind] + subs[0].rm_so); + else + printf("match `'@%.1s\n", + argv[optind] + subs[0].rm_so); + } + for (i = 1; i < NS; i++) + if (subs[i].rm_so != -1) + printf("(%d) `%.*s'\n", i, + (int)(subs[i].rm_eo - subs[i].rm_so), + argv[optind] + subs[i].rm_so); + } + exit(status); +} + +/* + - regress - main loop of regression test + == void regress(FILE *in); + */ +void +regress(in) +FILE *in; +{ + char inbuf[1000]; +# define MAXF 10 + char *f[MAXF]; + int nf; + int i; + char erbuf[100]; + size_t ne; + char *badpat = "invalid regular expression"; +# define SHORT 10 + char *bpname = "REG_BADPAT"; + regex_t re; + + while (fgets(inbuf, sizeof(inbuf), in) != NULL) { + line++; + if (inbuf[0] == '#' || inbuf[0] == '\n') + continue; /* NOTE CONTINUE */ + inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */ + if (debug) + fprintf(stdout, "%d:\n", line); + nf = split(inbuf, f, MAXF, "\t\t"); + if (nf < 3) { + fprintf(stderr, "bad input, line %d\n", line); + exit(1); + } + for (i = 0; i < nf; i++) + if (strcmp(f[i], "\"\"") == 0) + f[i] = ""; + if (nf <= 3) + f[3] = NULL; + if (nf <= 4) + f[4] = NULL; + try(f[0], f[1], f[2], f[3], f[4], options('c', f[1])); + if (opt('&', f[1])) /* try with either type of RE */ + try(f[0], f[1], f[2], f[3], f[4], + options('c', f[1]) &~ REG_EXTENDED); + } + + ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf)); + if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) { + fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n", + erbuf, badpat); + status = 1; + } + ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT); + if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' || + ne != strlen(badpat)+1) { + fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n", + erbuf, SHORT-1, badpat); + status = 1; + } + ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf)); + if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) { + fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n", + erbuf, bpname); + status = 1; + } + re.re_endp = bpname; + ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf)); + if (atoi(erbuf) != (int)REG_BADPAT) { + fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n", + erbuf, (long)REG_BADPAT); + status = 1; + } else if (ne != strlen(erbuf)+1) { + fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n", + erbuf, (long)REG_BADPAT); + status = 1; + } +} + +/* + - try - try it, and report on problems + == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts); + */ +void +try(f0, f1, f2, f3, f4, opts) +char *f0; +char *f1; +char *f2; +char *f3; +char *f4; +int opts; /* may not match f1 */ +{ + regex_t re; +# define NSUBS 10 + regmatch_t subs[NSUBS]; +# define NSHOULD 15 + char *should[NSHOULD]; + int nshould; + char erbuf[100]; + int err; + int len; + char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE"; + register int i; + char *grump; + char f0copy[1000]; + char f2copy[1000]; + + strcpy(f0copy, f0); + re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL; + fixstr(f0copy); + err = regcomp(&re, f0copy, opts); + if (err != 0 && (!opt('C', f1) || err != efind(f2))) { + /* unexpected error or wrong error */ + len = regerror(err, &re, erbuf, sizeof(erbuf)); + fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n", + line, type, eprint(err), len, + sizeof(erbuf), erbuf); + status = 1; + } else if (err == 0 && opt('C', f1)) { + /* unexpected success */ + fprintf(stderr, "%d: %s should have given REG_%s\n", + line, type, f2); + status = 1; + err = 1; /* so we won't try regexec */ + } + + if (err != 0) { + regfree(&re); + return; + } + + strcpy(f2copy, f2); + fixstr(f2copy); + + if (options('e', f1)®_STARTEND) { + if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL) + fprintf(stderr, "%d: bad STARTEND syntax\n", line); + subs[0].rm_so = strchr(f2, '(') - f2 + 1; + subs[0].rm_eo = strchr(f2, ')') - f2; + } + err = regexec(&re, f2copy, NSUBS, subs, options('e', f1)); + + if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) { + /* unexpected error or wrong error */ + len = regerror(err, &re, erbuf, sizeof(erbuf)); + fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n", + line, type, eprint(err), len, + sizeof(erbuf), erbuf); + status = 1; + } else if (err != 0) { + /* nothing more to check */ + } else if (f3 == NULL) { + /* unexpected success */ + fprintf(stderr, "%d: %s exec should have failed\n", + line, type); + status = 1; + err = 1; /* just on principle */ + } else if (opts®_NOSUB) { + /* nothing more to check */ + } else if ((grump = check(f2, subs[0], f3)) != NULL) { + fprintf(stderr, "%d: %s %s\n", line, type, grump); + status = 1; + err = 1; + } + + if (err != 0 || f4 == NULL) { + regfree(&re); + return; + } + + for (i = 1; i < NSHOULD; i++) + should[i] = NULL; + nshould = split(f4, should+1, NSHOULD-1, ","); + if (nshould == 0) { + nshould = 1; + should[1] = ""; + } + for (i = 1; i < NSUBS; i++) { + grump = check(f2, subs[i], should[i]); + if (grump != NULL) { + fprintf(stderr, "%d: %s $%d %s\n", line, + type, i, grump); + status = 1; + err = 1; + } + } + + regfree(&re); +} + +/* + - options - pick options out of a regression-test string + == int options(int type, char *s); + */ +int +options(type, s) +int type; /* 'c' compile, 'e' exec */ +char *s; +{ + register char *p; + register int o = (type == 'c') ? copts : eopts; + register char *legal = (type == 'c') ? "bisnmp" : "^$#tl"; + + for (p = s; *p != '\0'; p++) + if (strchr(legal, *p) != NULL) + switch (*p) { + case 'b': + o &= ~REG_EXTENDED; + break; + case 'i': + o |= REG_ICASE; + break; + case 's': + o |= REG_NOSUB; + break; + case 'n': + o |= REG_NEWLINE; + break; + case 'm': + o &= ~REG_EXTENDED; + o |= REG_NOSPEC; + break; + case 'p': + o |= REG_PEND; + break; + case '^': + o |= REG_NOTBOL; + break; + case '$': + o |= REG_NOTEOL; + break; + case '#': + o |= REG_STARTEND; + break; + case 't': /* trace */ + o |= REG_TRACE; + break; + case 'l': /* force long representation */ + o |= REG_LARGE; + break; + case 'r': /* force backref use */ + o |= REG_BACKR; + break; + } + return(o); +} + +/* + - opt - is a particular option in a regression string? + == int opt(int c, char *s); + */ +int /* predicate */ +opt(c, s) +int c; +char *s; +{ + return(strchr(s, c) != NULL); +} + +/* + - fixstr - transform magic characters in strings + == void fixstr(register char *p); + */ +void +fixstr(p) +register char *p; +{ + if (p == NULL) + return; + + for (; *p != '\0'; p++) + if (*p == 'N') + *p = '\n'; + else if (*p == 'T') + *p = '\t'; + else if (*p == 'S') + *p = ' '; + else if (*p == 'Z') + *p = '\0'; +} + +/* + - check - check a substring match + == char *check(char *str, regmatch_t sub, char *should); + */ +char * /* NULL or complaint */ +check(str, sub, should) +char *str; +regmatch_t sub; +char *should; +{ + register int len; + register int shlen; + register char *p; + static char grump[500]; + register char *at = NULL; + + if (should != NULL && strcmp(should, "-") == 0) + should = NULL; + if (should != NULL && should[0] == '@') { + at = should + 1; + should = ""; + } + + /* check rm_so and rm_eo for consistency */ + if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) || + (sub.rm_so != -1 && sub.rm_eo == -1) || + (sub.rm_so != -1 && sub.rm_so < 0) || + (sub.rm_eo != -1 && sub.rm_eo < 0) ) { + sprintf(grump, "start %ld end %ld", (long)sub.rm_so, + (long)sub.rm_eo); + return(grump); + } + + /* check for no match */ + if (sub.rm_so == -1 && should == NULL) + return(NULL); + if (sub.rm_so == -1) + return("did not match"); + + /* check for in range */ + if (sub.rm_eo > strlen(str)) { + sprintf(grump, "start %ld end %ld, past end of string", + (long)sub.rm_so, (long)sub.rm_eo); + return(grump); + } + + len = (int)(sub.rm_eo - sub.rm_so); + shlen = (int)strlen(should); + p = str + sub.rm_so; + + /* check for not supposed to match */ + if (should == NULL) { + sprintf(grump, "matched `%.*s'", len, p); + return(grump); + } + + /* check for wrong match */ + if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) { + sprintf(grump, "matched `%.*s' instead", len, p); + return(grump); + } + if (shlen > 0) + return(NULL); + + /* check null match in right place */ + if (at == NULL) + return(NULL); + shlen = strlen(at); + if (shlen == 0) + shlen = 1; /* force check for end-of-string */ + if (strncmp(p, at, shlen) != 0) { + sprintf(grump, "matched null at `%.20s'", p); + return(grump); + } + return(NULL); +} + +/* + - eprint - convert error number to name + == static char *eprint(int err); + */ +static char * +eprint(err) +int err; +{ + static char epbuf[100]; + size_t len; + + len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf)); + assert(len <= sizeof(epbuf)); + return(epbuf); +} + +/* + - efind - convert error name to number + == static int efind(char *name); + */ +static int +efind(name) +char *name; +{ + static char efbuf[100]; + regex_t re; + + sprintf(efbuf, "REG_%s", name); + assert(strlen(efbuf) < sizeof(efbuf)); + re.re_endp = efbuf; + (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf)); + return(atoi(efbuf)); +} diff --git a/ext/ereg/regex/main.ih b/ext/ereg/regex/main.ih new file mode 100644 index 0000000000..5a0118ac44 --- /dev/null +++ b/ext/ereg/regex/main.ih @@ -0,0 +1,19 @@ +/* ========= begin header generated by ./mkh ========= */ +#ifdef __cplusplus +extern "C" { +#endif + +/* === main.c === */ +void regress(FILE *in); +void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts); +int options(int type, char *s); +int opt(int c, char *s); +void fixstr(register char *p); +char *check(char *str, regmatch_t sub, char *should); +static char *eprint(int err); +static int efind(char *name); + +#ifdef __cplusplus +} +#endif +/* ========= end header generated by ./mkh ========= */ diff --git a/ext/ereg/regex/mkh b/ext/ereg/regex/mkh new file mode 100644 index 0000000000..252b246c7b --- /dev/null +++ b/ext/ereg/regex/mkh @@ -0,0 +1,76 @@ +#! /bin/sh +# mkh - pull headers out of C source +PATH=/bin:/usr/bin ; export PATH + +# egrep pattern to pick out marked lines +egrep='^ =([ ]|$)' + +# Sed program to process marked lines into lines for the header file. +# The markers have already been removed. Two things are done here: removal +# of backslashed newlines, and some fudging of comments. The first is done +# because -o needs to have prototypes on one line to strip them down. +# Getting comments into the output is tricky; we turn C++-style // comments +# into /* */ comments, after altering any existing */'s to avoid trouble. +peel=' /\\$/N + /\\\n[ ]*/s///g + /\/\//s;\*/;* /;g + /\/\//s;//\(.*\);/*\1 */;' + +for a +do + case "$a" in + -o) # old (pre-function-prototype) compiler + # add code to comment out argument lists + peel="$peel + "'/^\([^#\/][^\/]*[a-zA-Z0-9_)]\)(\(.*\))/s;;\1(/*\2*/);' + shift + ;; + -b) # funny Berkeley __P macro + peel="$peel + "'/^\([^#\/][^\/]*[a-zA-Z0-9_)]\)(\(.*\))/s;;\1 __P((\2));' + shift + ;; + -s) # compiler doesn't like `static foo();' + # add code to get rid of the `static' + peel="$peel + "'/^static[ ][^\/]*[a-zA-Z0-9_)](.*)/s;static.;;' + shift + ;; + -p) # private declarations + egrep='^ ==([ ]|$)' + shift + ;; + -i) # wrap in #ifndef, argument is name + ifndef="$2" + shift ; shift + ;; + *) break + ;; + esac +done + +if test " $ifndef" != " " +then + echo "#ifndef $ifndef" + echo "#define $ifndef /* never again */" +fi +echo "/* ========= begin header generated by $0 ========= */" +echo '#ifdef __cplusplus' +echo 'extern "C" {' +echo '#endif' +for f +do + echo + echo "/* === $f === */" + egrep "$egrep" $f | sed 's/^ ==*[ ]//;s/^ ==*$//' | sed "$peel" + echo +done +echo '#ifdef __cplusplus' +echo '}' +echo '#endif' +echo "/* ========= end header generated by $0 ========= */" +if test " $ifndef" != " " +then + echo "#endif" +fi +exit 0 diff --git a/ext/ereg/regex/regcomp.c b/ext/ereg/regex/regcomp.c new file mode 100644 index 0000000000..156eee9329 --- /dev/null +++ b/ext/ereg/regex/regcomp.c @@ -0,0 +1,1613 @@ +#include <sys/types.h> +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <limits.h> +#include <stdlib.h> + +#define POSIX_MISTAKE + +#include "utils.h" +#include "regex.h" +#include "regex2.h" + +#include "cclass.h" +#include "cname.h" + +/* + * parse structure, passed up and down to avoid global variables and + * other clumsinesses + */ +struct parse { + unsigned char *next; /* next character in RE */ + unsigned char *end; /* end of string (-> NUL normally) */ + int error; /* has an error been seen? */ + sop *strip; /* malloced strip */ + sopno ssize; /* malloced strip size (allocated) */ + sopno slen; /* malloced strip length (used) */ + int ncsalloc; /* number of csets allocated */ + struct re_guts *g; +# define NPAREN 10 /* we need to remember () 1-9 for back refs */ + sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ + sopno pend[NPAREN]; /* -> ) ([0] unused) */ +}; + +#include "regcomp.ih" + +static unsigned char nuls[10]; /* place to point scanner in event of error */ + +/* + * macros for use with parse structure + * BEWARE: these know that the parse structure is named `p' !!! + */ +#define PEEK() (*p->next) +#define PEEK2() (*(p->next+1)) +#define MORE() (p->next < p->end) +#define MORE2() (p->next+1 < p->end) +#define SEE(c) (MORE() && PEEK() == (c)) +#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) +#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) +#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) +#define NEXT() (p->next++) +#define NEXT2() (p->next += 2) +#define NEXTn(n) (p->next += (n)) +#define GETNEXT() (*p->next++) +#define SETERROR(e) seterr(p, (e)) +#define REQUIRE(co, e) (void) ((co) || SETERROR(e)) +#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) +#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) +#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) +#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) +#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) +#define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) +#define ASTERN(sop, pos) EMIT(sop, HERE()-pos) +#define HERE() (p->slen) +#define THERE() (p->slen - 1) +#define THERETHERE() (p->slen - 2) +#define DROP(n) (p->slen -= (n)) + +#ifndef NDEBUG +static int never = 0; /* for use in asserts; shuts lint up */ +#else +#define never 0 /* some <assert.h>s have bugs too */ +#endif + +/* + - regcomp - interface for parser and compilation + = API_EXPORT(int) regcomp(regex_t *, const char *, int); + = #define REG_BASIC 0000 + = #define REG_EXTENDED 0001 + = #define REG_ICASE 0002 + = #define REG_NOSUB 0004 + = #define REG_NEWLINE 0010 + = #define REG_NOSPEC 0020 + = #define REG_PEND 0040 + = #define REG_DUMP 0200 + */ +API_EXPORT(int) /* 0 success, otherwise REG_something */ +regcomp(preg, pattern, cflags) +regex_t *preg; +const char *pattern; +int cflags; +{ + struct parse pa; + register struct re_guts *g; + register struct parse *p = &pa; + register int i; + register size_t len; +#ifdef REDEBUG +# define GOODFLAGS(f) (f) +#else +# define GOODFLAGS(f) ((f)&~REG_DUMP) +#endif + + cflags = GOODFLAGS(cflags); + if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) + return(REG_INVARG); + + if (cflags®_PEND) { + if (preg->re_endp < pattern) + return(REG_INVARG); + len = preg->re_endp - pattern; + } else + len = strlen((char *)pattern); + + /* do the mallocs early so failure handling is easy */ + g = (struct re_guts *)malloc(sizeof(struct re_guts) + + (NC-1)*sizeof(cat_t)); + if (g == NULL) + return(REG_ESPACE); + p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ + p->strip = (sop *)malloc(p->ssize * sizeof(sop)); + p->slen = 0; + if (p->strip == NULL) { + free((char *)g); + return(REG_ESPACE); + } + + /* set things up */ + p->g = g; + p->next = (unsigned char *)pattern; /* convenience; we do not modify it */ + p->end = p->next + len; + p->error = 0; + p->ncsalloc = 0; + for (i = 0; i < NPAREN; i++) { + p->pbegin[i] = 0; + p->pend[i] = 0; + } + g->csetsize = NC; + g->sets = NULL; + g->setbits = NULL; + g->ncsets = 0; + g->cflags = cflags; + g->iflags = 0; + g->nbol = 0; + g->neol = 0; + g->must = NULL; + g->mlen = 0; + g->nsub = 0; + g->ncategories = 1; /* category 0 is "everything else" */ + g->categories = &g->catspace[0]; + (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); + g->backrefs = 0; + + /* do it */ + EMIT(OEND, 0); + g->firststate = THERE(); + if (cflags®_EXTENDED) + p_ere(p, OUT); + else if (cflags®_NOSPEC) + p_str(p); + else + p_bre(p, OUT, OUT); + EMIT(OEND, 0); + g->laststate = THERE(); + + /* tidy up loose ends and fill things in */ + categorize(p, g); + stripsnug(p, g); + findmust(p, g); + g->nplus = pluscount(p, g); + g->magic = MAGIC2; + preg->re_nsub = g->nsub; + preg->re_g = g; + preg->re_magic = MAGIC1; +#ifndef REDEBUG + /* not debugging, so can't rely on the assert() in regexec() */ + if (g->iflags&BAD) + SETERROR(REG_ASSERT); +#endif + + /* win or lose, we're done */ + if (p->error != 0) /* lose */ + regfree(preg); + return(p->error); +} + +/* + - p_ere - ERE parser top level, concatenation and alternation + == static void p_ere(register struct parse *p, int stop); + */ +static void +p_ere(p, stop) +register struct parse *p; +int stop; /* character this ERE should end at */ +{ + register unsigned char c; + register sopno prevback = 0; + register sopno prevfwd = 0; + register sopno conc; + register int first = 1; /* is this the first alternative? */ + + for (;;) { + /* do a bunch of concatenated expressions */ + conc = HERE(); + while (MORE() && (c = PEEK()) != '|' && c != stop) + p_ere_exp(p); + (void) REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ + + if (!EAT('|')) + break; /* NOTE BREAK OUT */ + + if (first) { + INSERT(OCH_, conc); /* offset is wrong */ + prevfwd = conc; + prevback = conc; + first = 0; + } + ASTERN(OOR1, prevback); + prevback = THERE(); + AHEAD(prevfwd); /* fix previous offset */ + prevfwd = HERE(); + EMIT(OOR2, 0); /* offset is very wrong */ + } + + if (!first) { /* tail-end fixups */ + AHEAD(prevfwd); + ASTERN(O_CH, prevback); + } + + assert(!MORE() || SEE(stop)); +} + +/* + - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op + == static void p_ere_exp(register struct parse *p); + */ +static void +p_ere_exp(p) +register struct parse *p; +{ + register unsigned char c; + register sopno pos; + register int count; + register int count2; + register sopno subno; + int wascaret = 0; + + assert(MORE()); /* caller should have ensured this */ + c = GETNEXT(); + + pos = HERE(); + switch (c) { + case '(': + REQUIRE(MORE(), REG_EPAREN); + p->g->nsub++; + subno = p->g->nsub; + if (subno < NPAREN) + p->pbegin[subno] = HERE(); + EMIT(OLPAREN, subno); + if (!SEE(')')) + p_ere(p, ')'); + if (subno < NPAREN) { + p->pend[subno] = HERE(); + assert(p->pend[subno] != 0); + } + EMIT(ORPAREN, subno); + MUSTEAT(')', REG_EPAREN); + break; +#ifndef POSIX_MISTAKE + case ')': /* happens only if no current unmatched ( */ + /* + * You may ask, why the ifndef? Because I didn't notice + * this until slightly too late for 1003.2, and none of the + * other 1003.2 regular-expression reviewers noticed it at + * all. So an unmatched ) is legal POSIX, at least until + * we can get it fixed. + */ + SETERROR(REG_EPAREN); + break; +#endif + case '^': + EMIT(OBOL, 0); + p->g->iflags |= USEBOL; + p->g->nbol++; + wascaret = 1; + break; + case '$': + EMIT(OEOL, 0); + p->g->iflags |= USEEOL; + p->g->neol++; + break; + case '|': + SETERROR(REG_EMPTY); + break; + case '*': + case '+': + case '?': + SETERROR(REG_BADRPT); + break; + case '.': + if (p->g->cflags®_NEWLINE) + nonnewline(p); + else + EMIT(OANY, 0); + break; + case '[': + p_bracket(p); + break; + case '\\': + REQUIRE(MORE(), REG_EESCAPE); + c = GETNEXT(); + ordinary(p, c); + break; + case '{': /* okay as ordinary except if digit follows */ + REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT); + /* FALLTHROUGH */ + default: + ordinary(p, c); + break; + } + + if (!MORE()) + return; + c = PEEK(); + /* we call { a repetition if followed by a digit */ + if (!( c == '*' || c == '+' || c == '?' || + (c == '{' && MORE2() && isdigit(PEEK2())) )) + return; /* no repetition, we're done */ + NEXT(); + + REQUIRE(!wascaret, REG_BADRPT); + switch (c) { + case '*': /* implemented as +? */ + /* this case does not require the (y|) trick, noKLUDGE */ + INSERT(OPLUS_, pos); + ASTERN(O_PLUS, pos); + INSERT(OQUEST_, pos); + ASTERN(O_QUEST, pos); + break; + case '+': + INSERT(OPLUS_, pos); + ASTERN(O_PLUS, pos); + break; + case '?': + /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ + INSERT(OCH_, pos); /* offset slightly wrong */ + ASTERN(OOR1, pos); /* this one's right */ + AHEAD(pos); /* fix the OCH_ */ + EMIT(OOR2, 0); /* offset very wrong... */ + AHEAD(THERE()); /* ...so fix it */ + ASTERN(O_CH, THERETHERE()); + break; + case '{': + count = p_count(p); + if (EAT(',')) { + if (isdigit(PEEK())) { + count2 = p_count(p); + REQUIRE(count <= count2, REG_BADBR); + } else /* single number with comma */ + count2 = INFINITY; + } else /* just a single number */ + count2 = count; + repeat(p, pos, count, count2); + if (!EAT('}')) { /* error heuristics */ + while (MORE() && PEEK() != '}') + NEXT(); + REQUIRE(MORE(), REG_EBRACE); + SETERROR(REG_BADBR); + } + break; + } + + if (!MORE()) + return; + c = PEEK(); + if (!( c == '*' || c == '+' || c == '?' || + (c == '{' && MORE2() && isdigit(PEEK2())) ) ) + return; + SETERROR(REG_BADRPT); +} + +/* + - p_str - string (no metacharacters) "parser" + == static void p_str(register struct parse *p); + */ +static void +p_str(p) +register struct parse *p; +{ + REQUIRE(MORE(), REG_EMPTY); + while (MORE()) + ordinary(p, GETNEXT()); +} + +/* + - p_bre - BRE parser top level, anchoring and concatenation + == static void p_bre(register struct parse *p, register int end1, \ + == register int end2); + * Giving end1 as OUT essentially eliminates the end1/end2 check. + * + * This implementation is a bit of a kludge, in that a trailing $ is first + * taken as an ordinary character and then revised to be an anchor. The + * only undesirable side effect is that '$' gets included as a character + * category in such cases. This is fairly harmless; not worth fixing. + * The amount of lookahead needed to avoid this kludge is excessive. + */ +static void +p_bre(p, end1, end2) +register struct parse *p; +register int end1; /* first terminating character */ +register int end2; /* second terminating character */ +{ + register sopno start = HERE(); + register int first = 1; /* first subexpression? */ + register int wasdollar = 0; + + if (EAT('^')) { + EMIT(OBOL, 0); + p->g->iflags |= USEBOL; + p->g->nbol++; + } + while (MORE() && !SEETWO(end1, end2)) { + wasdollar = p_simp_re(p, first); + first = 0; + } + if (wasdollar) { /* oops, that was a trailing anchor */ + DROP(1); + EMIT(OEOL, 0); + p->g->iflags |= USEEOL; + p->g->neol++; + } + + REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ +} + +/* + - p_simp_re - parse a simple RE, an atom possibly followed by a repetition + == static int p_simp_re(register struct parse *p, int starordinary); + */ +static int /* was the simple RE an unbackslashed $? */ +p_simp_re(p, starordinary) +register struct parse *p; +int starordinary; /* is a leading * an ordinary character? */ +{ + register int c; + register int count; + register int count2; + register sopno pos; + register int i; + register sopno subno; +# define BACKSL (1<<CHAR_BIT) + + pos = HERE(); /* repetion op, if any, covers from here */ + + assert(MORE()); /* caller should have ensured this */ + c = GETNEXT(); + if (c == '\\') { + REQUIRE(MORE(), REG_EESCAPE); + c = BACKSL | (unsigned char)GETNEXT(); + } + switch (c) { + case '.': + if (p->g->cflags®_NEWLINE) + nonnewline(p); + else + EMIT(OANY, 0); + break; + case '[': + p_bracket(p); + break; + case BACKSL|'{': + SETERROR(REG_BADRPT); + break; + case BACKSL|'(': + p->g->nsub++; + subno = p->g->nsub; + if (subno < NPAREN) + p->pbegin[subno] = HERE(); + EMIT(OLPAREN, subno); + /* the MORE here is an error heuristic */ + if (MORE() && !SEETWO('\\', ')')) + p_bre(p, '\\', ')'); + if (subno < NPAREN) { + p->pend[subno] = HERE(); + assert(p->pend[subno] != 0); + } + EMIT(ORPAREN, subno); + REQUIRE(EATTWO('\\', ')'), REG_EPAREN); + break; + case BACKSL|')': /* should not get here -- must be user */ + case BACKSL|'}': + SETERROR(REG_EPAREN); + break; + case BACKSL|'1': + case BACKSL|'2': + case BACKSL|'3': + case BACKSL|'4': + case BACKSL|'5': + case BACKSL|'6': + case BACKSL|'7': + case BACKSL|'8': + case BACKSL|'9': + i = (c&~BACKSL) - '0'; + assert(i < NPAREN); + if (p->pend[i] != 0) { + assert(i <= p->g->nsub); + EMIT(OBACK_, i); + assert(p->pbegin[i] != 0); + assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); + assert(OP(p->strip[p->pend[i]]) == ORPAREN); + (void) dupl(p, p->pbegin[i]+1, p->pend[i]); + EMIT(O_BACK, i); + } else + SETERROR(REG_ESUBREG); + p->g->backrefs = 1; + break; + case '*': + REQUIRE(starordinary, REG_BADRPT); + /* FALLTHROUGH */ + default: + ordinary(p, (unsigned char)c); /* takes off BACKSL, if any */ + break; + } + + if (EAT('*')) { /* implemented as +? */ + /* this case does not require the (y|) trick, noKLUDGE */ + INSERT(OPLUS_, pos); + ASTERN(O_PLUS, pos); + INSERT(OQUEST_, pos); + ASTERN(O_QUEST, pos); + } else if (EATTWO('\\', '{')) { + count = p_count(p); + if (EAT(',')) { + if (MORE() && isdigit(PEEK())) { + count2 = p_count(p); + REQUIRE(count <= count2, REG_BADBR); + } else /* single number with comma */ + count2 = INFINITY; + } else /* just a single number */ + count2 = count; + repeat(p, pos, count, count2); + if (!EATTWO('\\', '}')) { /* error heuristics */ + while (MORE() && !SEETWO('\\', '}')) + NEXT(); + REQUIRE(MORE(), REG_EBRACE); + SETERROR(REG_BADBR); + } + } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */ + return(1); + + return(0); +} + +/* + - p_count - parse a repetition count + == static int p_count(register struct parse *p); + */ +static int /* the value */ +p_count(p) +register struct parse *p; +{ + register int count = 0; + register int ndigits = 0; + + while (MORE() && isdigit(PEEK()) && count <= DUPMAX) { + count = count*10 + (GETNEXT() - '0'); + ndigits++; + } + + REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); + return(count); +} + +/* + - p_bracket - parse a bracketed character list + == static void p_bracket(register struct parse *p); + * + * Note a significant property of this code: if the allocset() did SETERROR, + * no set operations are done. + */ +static void +p_bracket(p) +register struct parse *p; +{ + register cset *cs = allocset(p); + register int invert = 0; + + /* Dept of Truly Sickening Special-Case Kludges */ + if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { + EMIT(OBOW, 0); + NEXTn(6); + return; + } + if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { + EMIT(OEOW, 0); + NEXTn(6); + return; + } + + if (EAT('^')) + invert++; /* make note to invert set at end */ + if (EAT(']')) + CHadd(cs, ']'); + else if (EAT('-')) + CHadd(cs, '-'); + while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) + p_b_term(p, cs); + if (EAT('-')) + CHadd(cs, '-'); + MUSTEAT(']', REG_EBRACK); + + if (p->error != 0) /* don't mess things up further */ + return; + + if (p->g->cflags®_ICASE) { + register int i; + register int ci; + + for (i = p->g->csetsize - 1; i >= 0; i--) + if (CHIN(cs, i) && isalpha(i)) { + ci = othercase(i); + if (ci != i) + CHadd(cs, ci); + } + if (cs->multis != NULL) + mccase(p, cs); + } + if (invert) { + register int i; + + for (i = p->g->csetsize - 1; i >= 0; i--) + if (CHIN(cs, i)) + CHsub(cs, i); + else + CHadd(cs, i); + if (p->g->cflags®_NEWLINE) + CHsub(cs, '\n'); + if (cs->multis != NULL) + mcinvert(p, cs); + } + + assert(cs->multis == NULL); /* xxx */ + + if (nch(p, cs) == 1) { /* optimize singleton sets */ + ordinary(p, firstch(p, cs)); + freeset(p, cs); + } else + EMIT(OANYOF, freezeset(p, cs)); +} + +/* + - p_b_term - parse one term of a bracketed character list + == static void p_b_term(register struct parse *p, register cset *cs); + */ +static void +p_b_term(p, cs) +register struct parse *p; +register cset *cs; +{ + register unsigned char c; + register unsigned char start, finish; + register int i; + + /* classify what we've got */ + switch ((MORE()) ? PEEK() : '\0') { + case '[': + c = (MORE2()) ? PEEK2() : '\0'; + break; + case '-': + SETERROR(REG_ERANGE); + return; /* NOTE RETURN */ + break; + default: + c = '\0'; + break; + } + + switch (c) { + case ':': /* character class */ + NEXT2(); + REQUIRE(MORE(), REG_EBRACK); + c = PEEK(); + REQUIRE(c != '-' && c != ']', REG_ECTYPE); + p_b_cclass(p, cs); + REQUIRE(MORE(), REG_EBRACK); + REQUIRE(EATTWO(':', ']'), REG_ECTYPE); + break; + case '=': /* equivalence class */ + NEXT2(); + REQUIRE(MORE(), REG_EBRACK); + c = PEEK(); + REQUIRE(c != '-' && c != ']', REG_ECOLLATE); + p_b_eclass(p, cs); + REQUIRE(MORE(), REG_EBRACK); + REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); + break; + default: /* symbol, ordinary character, or range */ +/* xxx revision needed for multichar stuff */ + start = p_b_symbol(p); + if (SEE('-') && MORE2() && PEEK2() != ']') { + /* range */ + NEXT(); + if (EAT('-')) + finish = '-'; + else + finish = p_b_symbol(p); + } else + finish = start; +/* xxx what about signed chars here... */ + REQUIRE(start <= finish, REG_ERANGE); + for (i = start; i <= finish; i++) + CHadd(cs, i); + break; + } +} + +/* + - p_b_cclass - parse a character-class name and deal with it + == static void p_b_cclass(register struct parse *p, register cset *cs); + */ +static void +p_b_cclass(p, cs) +register struct parse *p; +register cset *cs; +{ + register unsigned char *sp = p->next; + register const struct cclass *cp; + register size_t len; + register const unsigned char *u; + register unsigned char c; + + while (MORE() && isalpha(PEEK())) + NEXT(); + len = p->next - sp; + for (cp = cclasses; cp->name != NULL; cp++) + if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') + break; + if (cp->name == NULL) { + /* oops, didn't find it */ + SETERROR(REG_ECTYPE); + return; + } + + u = cp->chars; + while ((c = *u++) != '\0') + CHadd(cs, c); + for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) + MCadd(p, cs, u); +} + +/* + - p_b_eclass - parse an equivalence-class name and deal with it + == static void p_b_eclass(register struct parse *p, register cset *cs); + * + * This implementation is incomplete. xxx + */ +static void +p_b_eclass(p, cs) +register struct parse *p; +register cset *cs; +{ + register unsigned char c; + + c = p_b_coll_elem(p, '='); + CHadd(cs, c); +} + +/* + - p_b_symbol - parse a character or [..]ed multicharacter collating symbol + == static char p_b_symbol(register struct parse *p); + */ +static unsigned char /* value of symbol */ +p_b_symbol(p) +register struct parse *p; +{ + register unsigned char value; + + REQUIRE(MORE(), REG_EBRACK); + if (!EATTWO('[', '.')) + return(GETNEXT()); + + /* collating symbol */ + value = p_b_coll_elem(p, '.'); + REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); + return(value); +} + +/* + - p_b_coll_elem - parse a collating-element name and look it up + == static char p_b_coll_elem(register struct parse *p, int endc); + */ +static unsigned char /* value of collating element */ +p_b_coll_elem(p, endc) +register struct parse *p; +int endc; /* name ended by endc,']' */ +{ + register unsigned char *sp = p->next; + register const struct cname *cp; + register int len; + + while (MORE() && !SEETWO(endc, ']')) + NEXT(); + if (!MORE()) { + SETERROR(REG_EBRACK); + return(0); + } + len = p->next - sp; + for (cp = cnames; cp->name != NULL; cp++) + if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') + return(cp->code); /* known name */ + if (len == 1) + return(*sp); /* single character */ + SETERROR(REG_ECOLLATE); /* neither */ + return(0); +} + +/* + - othercase - return the case counterpart of an alphabetic + == static char othercase(int ch); + */ +static unsigned char /* if no counterpart, return ch */ +othercase(ch) +int ch; +{ + assert(isalpha(ch)); + if (isupper(ch)) + return(tolower(ch)); + else if (islower(ch)) + return(toupper(ch)); + else /* peculiar, but could happen */ + return(ch); +} + +/* + - bothcases - emit a dualcase version of a two-case character + == static void bothcases(register struct parse *p, int ch); + * + * Boy, is this implementation ever a kludge... + */ +static void +bothcases(p, ch) +register struct parse *p; +int ch; +{ + register unsigned char *oldnext = p->next; + register unsigned char *oldend = p->end; + unsigned char bracket[3]; + + assert(othercase(ch) != ch); /* p_bracket() would recurse */ + p->next = bracket; + p->end = bracket+2; + bracket[0] = ch; + bracket[1] = ']'; + bracket[2] = '\0'; + p_bracket(p); + assert(p->next == bracket+2); + p->next = oldnext; + p->end = oldend; +} + +/* + - ordinary - emit an ordinary character + == static void ordinary(register struct parse *p, register int ch); + */ +static void +ordinary(p, ch) +register struct parse *p; +register int ch; +{ + register cat_t *cap = p->g->categories; + + if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch) + bothcases(p, ch); + else { + EMIT(OCHAR, (unsigned char)ch); + if (cap[ch] == 0) + cap[ch] = p->g->ncategories++; + } +} + +/* + - nonnewline - emit REG_NEWLINE version of OANY + == static void nonnewline(register struct parse *p); + * + * Boy, is this implementation ever a kludge... + */ +static void +nonnewline(p) +register struct parse *p; +{ + register unsigned char *oldnext = p->next; + register unsigned char *oldend = p->end; + unsigned char bracket[4]; + + p->next = bracket; + p->end = bracket+3; + bracket[0] = '^'; + bracket[1] = '\n'; + bracket[2] = ']'; + bracket[3] = '\0'; + p_bracket(p); + assert(p->next == bracket+3); + p->next = oldnext; + p->end = oldend; +} + +/* + - repeat - generate code for a bounded repetition, recursively if needed + == static void repeat(register struct parse *p, sopno start, int from, int to); + */ +static void +repeat(p, start, from, to) +register struct parse *p; +sopno start; /* operand from here to end of strip */ +int from; /* repeated from this number */ +int to; /* to this number of times (maybe INFINITY) */ +{ + register sopno finish = HERE(); +# define N 2 +# define INF 3 +# define REP(f, t) ((f)*8 + (t)) +# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) + register sopno copy; + + if (p->error != 0) /* head off possible runaway recursion */ + return; + + assert(from <= to); + + switch (REP(MAP(from), MAP(to))) { + case REP(0, 0): /* must be user doing this */ + DROP(finish-start); /* drop the operand */ + break; + case REP(0, 1): /* as x{1,1}? */ + case REP(0, N): /* as x{1,n}? */ + case REP(0, INF): /* as x{1,}? */ + /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ + INSERT(OCH_, start); /* offset is wrong... */ + repeat(p, start+1, 1, to); + ASTERN(OOR1, start); + AHEAD(start); /* ... fix it */ + EMIT(OOR2, 0); + AHEAD(THERE()); + ASTERN(O_CH, THERETHERE()); + break; + case REP(1, 1): /* trivial case */ + /* done */ + break; + case REP(1, N): /* as x?x{1,n-1} */ + /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ + INSERT(OCH_, start); + ASTERN(OOR1, start); + AHEAD(start); + EMIT(OOR2, 0); /* offset very wrong... */ + AHEAD(THERE()); /* ...so fix it */ + ASTERN(O_CH, THERETHERE()); + copy = dupl(p, start+1, finish+1); + assert(copy == finish+4); + repeat(p, copy, 1, to-1); + break; + case REP(1, INF): /* as x+ */ + INSERT(OPLUS_, start); + ASTERN(O_PLUS, start); + break; + case REP(N, N): /* as xx{m-1,n-1} */ + copy = dupl(p, start, finish); + repeat(p, copy, from-1, to-1); + break; + case REP(N, INF): /* as xx{n-1,INF} */ + copy = dupl(p, start, finish); + repeat(p, copy, from-1, to); + break; + default: /* "can't happen" */ + SETERROR(REG_ASSERT); /* just in case */ + break; + } +} + +/* + - seterr - set an error condition + == static int seterr(register struct parse *p, int e); + */ +static int /* useless but makes type checking happy */ +seterr(p, e) +register struct parse *p; +int e; +{ + if (p->error == 0) /* keep earliest error condition */ + p->error = e; + p->next = nuls; /* try to bring things to a halt */ + p->end = nuls; + return(0); /* make the return value well-defined */ +} + +/* + - allocset - allocate a set of characters for [] + == static cset *allocset(register struct parse *p); + */ +static cset * +allocset(p) +register struct parse *p; +{ + register int no = p->g->ncsets++; + register size_t nc; + register size_t nbytes; + register cset *cs; + register size_t css = (size_t)p->g->csetsize; + register int i; + + if (no >= p->ncsalloc) { /* need another column of space */ + p->ncsalloc += CHAR_BIT; + nc = p->ncsalloc; + assert(nc % CHAR_BIT == 0); + nbytes = nc / CHAR_BIT * css; + if (p->g->sets == NULL) + p->g->sets = (cset *)malloc(nc * sizeof(cset)); + else + p->g->sets = (cset *)realloc((unsigned char *)p->g->sets, + nc * sizeof(cset)); + if (p->g->setbits == NULL) + p->g->setbits = (uch *)malloc(nbytes); + else { + p->g->setbits = (uch *)realloc((unsigned char *)p->g->setbits, + nbytes); + /* xxx this isn't right if setbits is now NULL */ + for (i = 0; i < no; i++) + p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); + } + if (p->g->sets != NULL && p->g->setbits != NULL) + (void) memset((unsigned char *)p->g->setbits + (nbytes - css), + 0, css); + else { + no = 0; + SETERROR(REG_ESPACE); + /* caller's responsibility not to do set ops */ + } + } + + assert(p->g->sets != NULL); /* xxx */ + cs = &p->g->sets[no]; + cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); + cs->mask = 1 << ((no) % CHAR_BIT); + cs->hash = 0; + cs->smultis = 0; + cs->multis = NULL; + + return(cs); +} + +/* + - freeset - free a now-unused set + == static void freeset(register struct parse *p, register cset *cs); + */ +static void +freeset(p, cs) +register struct parse *p; +register cset *cs; +{ + register size_t i; + register cset *top = &p->g->sets[p->g->ncsets]; + register size_t css = (size_t)p->g->csetsize; + + for (i = 0; i < css; i++) + CHsub(cs, i); + if (cs == top-1) /* recover only the easy case */ + p->g->ncsets--; +} + +/* + - freezeset - final processing on a set of characters + == static int freezeset(register struct parse *p, register cset *cs); + * + * The main task here is merging identical sets. This is usually a waste + * of time (although the hash code minimizes the overhead), but can win + * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash + * is done using addition rather than xor -- all ASCII [aA] sets xor to + * the same value! + */ +static int /* set number */ +freezeset(p, cs) +register struct parse *p; +register cset *cs; +{ + register uch h = cs->hash; + register size_t i; + register cset *top = &p->g->sets[p->g->ncsets]; + register cset *cs2; + register size_t css = (size_t)p->g->csetsize; + + /* look for an earlier one which is the same */ + for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) + if (cs2->hash == h && cs2 != cs) { + /* maybe */ + for (i = 0; i < css; i++) + if (!!CHIN(cs2, i) != !!CHIN(cs, i)) + break; /* no */ + if (i == css) + break; /* yes */ + } + + if (cs2 < top) { /* found one */ + freeset(p, cs); + cs = cs2; + } + + return((int)(cs - p->g->sets)); +} + +/* + - firstch - return first character in a set (which must have at least one) + == static int firstch(register struct parse *p, register cset *cs); + */ +static int /* character; there is no "none" value */ +firstch(p, cs) +register struct parse *p; +register cset *cs; +{ + register size_t i; + register size_t css = (size_t)p->g->csetsize; + + for (i = 0; i < css; i++) + if (CHIN(cs, i)) + return((unsigned char)i); + assert(never); + return(0); /* arbitrary */ +} + +/* + - nch - number of characters in a set + == static int nch(register struct parse *p, register cset *cs); + */ +static int +nch(p, cs) +register struct parse *p; +register cset *cs; +{ + register size_t i; + register size_t css = (size_t)p->g->csetsize; + register int n = 0; + + for (i = 0; i < css; i++) + if (CHIN(cs, i)) + n++; + return(n); +} + +/* + - mcadd - add a collating element to a cset + == static void mcadd(register struct parse *p, register cset *cs, \ + == register char *cp); + */ +static void +mcadd(p, cs, cp) +register struct parse *p; +register cset *cs; +register const unsigned char *cp; +{ + register size_t oldend = cs->smultis; + + cs->smultis += strlen(cp) + 1; + if (cs->multis == NULL) + cs->multis = malloc(cs->smultis); + else + cs->multis = realloc(cs->multis, cs->smultis); + if (cs->multis == NULL) { + SETERROR(REG_ESPACE); + return; + } + + (void) strcpy(cs->multis + oldend - 1, cp); + cs->multis[cs->smultis - 1] = '\0'; +} + +#if 0 +/* + - mcsub - subtract a collating element from a cset + == static void mcsub(register cset *cs, register unsigned char *cp); + */ +static void +mcsub(cs, cp) +register unsigned cset *cs; +register unsigned char *cp; +{ + register unsigned char *fp = mcfind(cs, cp); + register size_t len = strlen(fp); + + assert(fp != NULL); + (void) memmove(fp, fp + len + 1, + cs->smultis - (fp + len + 1 - cs->multis)); + cs->smultis -= len; + + if (cs->smultis == 0) { + free(cs->multis); + cs->multis = NULL; + return; + } + + cs->multis = realloc(cs->multis, cs->smultis); + assert(cs->multis != NULL); +} + +/* + - mcin - is a collating element in a cset? + == static int mcin(register cset *cs, register unsigned char *cp); + */ +static int +mcin(cs, cp) +register cset *cs; +register unsigned char *cp; +{ + return(mcfind(cs, cp) != NULL); +} + + +/* + - mcfind - find a collating element in a cset + == static unsigned char *mcfind(register cset *cs, register unsigned char *cp); + */ +static unsigned char * +mcfind(cs, cp) +register cset *cs; +register unsigned char *cp; +{ + register unsigned char *p; + + if (cs->multis == NULL) + return(NULL); + for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) + if (strcmp(cp, p) == 0) + return(p); + return(NULL); +} +#endif + +/* + - mcinvert - invert the list of collating elements in a cset + == static void mcinvert(register struct parse *p, register cset *cs); + * + * This would have to know the set of possibilities. Implementation + * is deferred. + */ +static void +mcinvert(p, cs) +register struct parse *p; +register cset *cs; +{ + assert(cs->multis == NULL); /* xxx */ +} + +/* + - mccase - add case counterparts of the list of collating elements in a cset + == static void mccase(register struct parse *p, register cset *cs); + * + * This would have to know the set of possibilities. Implementation + * is deferred. + */ +static void +mccase(p, cs) +register struct parse *p; +register cset *cs; +{ + assert(cs->multis == NULL); /* xxx */ +} + +/* + - isinsets - is this character in any sets? + == static int isinsets(register struct re_guts *g, int c); + */ +static int /* predicate */ +isinsets(g, c) +register struct re_guts *g; +int c; +{ + register uch *col; + register int i; + register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; + register unsigned uc = (unsigned char)c; + + for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) + if (col[uc] != 0) + return(1); + return(0); +} + +/* + - samesets - are these two characters in exactly the same sets? + == static int samesets(register struct re_guts *g, int c1, int c2); + */ +static int /* predicate */ +samesets(g, c1, c2) +register struct re_guts *g; +int c1; +int c2; +{ + register uch *col; + register int i; + register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; + register unsigned uc1 = (unsigned char)c1; + register unsigned uc2 = (unsigned char)c2; + + for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) + if (col[uc1] != col[uc2]) + return(0); + return(1); +} + +/* + - categorize - sort out character categories + == static void categorize(struct parse *p, register struct re_guts *g); + */ +static void +categorize(p, g) +struct parse *p; +register struct re_guts *g; +{ + register cat_t *cats = g->categories; + register int c; + register int c2; + register cat_t cat; + + /* avoid making error situations worse */ + if (p->error != 0) + return; + + for (c = 0; c <= UCHAR_MAX; c++) + if (cats[c] == 0 && isinsets(g, c)) { + cat = g->ncategories++; + cats[c] = cat; + for (c2 = c+1; c2 <= UCHAR_MAX; c2++) + if (cats[c2] == 0 && samesets(g, c, c2)) + cats[c2] = cat; + } +} + +/* + - dupl - emit a duplicate of a bunch of sops + == static sopno dupl(register struct parse *p, sopno start, sopno finish); + */ +static sopno /* start of duplicate */ +dupl(p, start, finish) +register struct parse *p; +sopno start; /* from here */ +sopno finish; /* to this less one */ +{ + register sopno ret = HERE(); + register sopno len = finish - start; + + assert(finish >= start); + if (len == 0) + return(ret); + enlarge(p, p->ssize + len); /* this many unexpected additions */ + assert(p->ssize >= p->slen + len); + (void) memcpy((char *)(p->strip + p->slen), + (char *)(p->strip + start), (size_t)len*sizeof(sop)); + p->slen += len; + return(ret); +} + +/* + - doemit - emit a strip operator + == static void doemit(register struct parse *p, sop op, size_t opnd); + * + * It might seem better to implement this as a macro with a function as + * hard-case backup, but it's just too big and messy unless there are + * some changes to the data structures. Maybe later. + */ +static void +doemit(p, op, opnd) +register struct parse *p; +sop op; +size_t opnd; +{ + /* avoid making error situations worse */ + if (p->error != 0) + return; + + /* deal with oversize operands ("can't happen", more or less) */ + assert(opnd < 1<<OPSHIFT); + + /* deal with undersized strip */ + if (p->slen >= p->ssize) + enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ + assert(p->slen < p->ssize); + + /* finally, it's all reduced to the easy case */ + p->strip[p->slen++] = SOP(op, opnd); +} + +/* + - doinsert - insert a sop into the strip + == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); + */ +static void +doinsert(p, op, opnd, pos) +register struct parse *p; +sop op; +size_t opnd; +sopno pos; +{ + register sopno sn; + register sop s; + register int i; + + /* avoid making error situations worse */ + if (p->error != 0) + return; + + sn = HERE(); + EMIT(op, opnd); /* do checks, ensure space */ + assert(HERE() == sn+1); + s = p->strip[sn]; + + /* adjust paren pointers */ + assert(pos > 0); + for (i = 1; i < NPAREN; i++) { + if (p->pbegin[i] >= pos) { + p->pbegin[i]++; + } + if (p->pend[i] >= pos) { + p->pend[i]++; + } + } + + memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], + (HERE()-pos-1)*sizeof(sop)); + p->strip[pos] = s; +} + +/* + - dofwd - complete a forward reference + == static void dofwd(register struct parse *p, sopno pos, sop value); + */ +static void +dofwd(p, pos, value) +register struct parse *p; +register sopno pos; +sop value; +{ + /* avoid making error situations worse */ + if (p->error != 0) + return; + + assert(value < 1<<OPSHIFT); + p->strip[pos] = OP(p->strip[pos]) | value; +} + +/* + - enlarge - enlarge the strip + == static void enlarge(register struct parse *p, sopno size); + */ +static void +enlarge(p, size) +register struct parse *p; +register sopno size; +{ + register sop *sp; + + if (p->ssize >= size) + return; + + sp = (sop *)realloc(p->strip, size*sizeof(sop)); + if (sp == NULL) { + SETERROR(REG_ESPACE); + return; + } + p->strip = sp; + p->ssize = size; +} + +/* + - stripsnug - compact the strip + == static void stripsnug(register struct parse *p, register struct re_guts *g); + */ +static void +stripsnug(p, g) +register struct parse *p; +register struct re_guts *g; +{ + g->nstates = p->slen; + g->strip = (sop *)realloc((unsigned char *)p->strip, p->slen * sizeof(sop)); + if (g->strip == NULL) { + SETERROR(REG_ESPACE); + g->strip = p->strip; + } +} + +/* + - findmust - fill in must and mlen with longest mandatory literal string + == static void findmust(register struct parse *p, register struct re_guts *g); + * + * This algorithm could do fancy things like analyzing the operands of | + * for common subsequences. Someday. This code is simple and finds most + * of the interesting cases. + * + * Note that must and mlen got initialized during setup. + */ +static void +findmust(p, g) +struct parse *p; +register struct re_guts *g; +{ + register sop *scan; + sop *start = NULL; + register sop *newstart = NULL; + register sopno newlen; + register sop s; + register unsigned char *cp; + register sopno i; + + /* avoid making error situations worse */ + if (p->error != 0) + return; + + /* find the longest OCHAR sequence in strip */ + newlen = 0; + scan = g->strip + 1; + do { + s = *scan++; + switch (OP(s)) { + case OCHAR: /* sequence member */ + if (newlen == 0) /* new sequence */ + newstart = scan - 1; + newlen++; + break; + case OPLUS_: /* things that don't break one */ + case OLPAREN: + case ORPAREN: + break; + case OQUEST_: /* things that must be skipped */ + case OCH_: + scan--; + do { + scan += OPND(s); + s = *scan; + /* assert() interferes w debug printouts */ + if (OP(s) != O_QUEST && OP(s) != O_CH && + OP(s) != OOR2) { + g->iflags |= BAD; + return; + } + } while (OP(s) != O_QUEST && OP(s) != O_CH); + /* fallthrough */ + default: /* things that break a sequence */ + if (newlen > g->mlen) { /* ends one */ + start = newstart; + g->mlen = newlen; + } + newlen = 0; + break; + } + } while (OP(s) != OEND); + + if (g->mlen == 0) /* there isn't one */ + return; + + if (!start) { + g->mlen = 0; + return; + } + + /* turn it into a character string */ + g->must = malloc((size_t)g->mlen + 1); + if (g->must == NULL) { /* argh; just forget it */ + g->mlen = 0; + return; + } + cp = g->must; + scan = start; + for (i = g->mlen; i > 0; i--) { + while (OP(s = *scan++) != OCHAR) + continue; + assert(cp < g->must + g->mlen); + *cp++ = (unsigned char)OPND(s); + } + assert(cp == g->must + g->mlen); + *cp++ = '\0'; /* just on general principles */ +} + +/* + - pluscount - count + nesting + == static sopno pluscount(register struct parse *p, register struct re_guts *g); + */ +static sopno /* nesting depth */ +pluscount(p, g) +struct parse *p; +register struct re_guts *g; +{ + register sop *scan; + register sop s; + register sopno plusnest = 0; + register sopno maxnest = 0; + + if (p->error != 0) + return(0); /* there may not be an OEND */ + + scan = g->strip + 1; + do { + s = *scan++; + switch (OP(s)) { + case OPLUS_: + plusnest++; + break; + case O_PLUS: + if (plusnest > maxnest) + maxnest = plusnest; + plusnest--; + break; + } + } while (OP(s) != OEND); + if (plusnest != 0) + g->iflags |= BAD; + return(maxnest); +} diff --git a/ext/ereg/regex/regcomp.ih b/ext/ereg/regex/regcomp.ih new file mode 100644 index 0000000000..606948131b --- /dev/null +++ b/ext/ereg/regex/regcomp.ih @@ -0,0 +1,53 @@ +/* ========= begin header generated by ./mkh ========= */ +#ifdef __cplusplus +extern "C" { +#endif + +/* === regcomp.c === */ +static void p_ere(register struct parse *p, int stop); +static void p_ere_exp(register struct parse *p); +static void p_str(register struct parse *p); +static void p_bre(register struct parse *p, register int end1, register int end2); +static int p_simp_re(register struct parse *p, int starordinary); +static int p_count(register struct parse *p); +static void p_bracket(register struct parse *p); +static void p_b_term(register struct parse *p, register cset *cs); +static void p_b_cclass(register struct parse *p, register cset *cs); +static void p_b_eclass(register struct parse *p, register cset *cs); +static unsigned char p_b_symbol(register struct parse *p); +static unsigned char p_b_coll_elem(register struct parse *p, int endc); +static unsigned char othercase(int ch); +static void bothcases(register struct parse *p, int ch); +static void ordinary(register struct parse *p, register int ch); +static void nonnewline(register struct parse *p); +static void repeat(register struct parse *p, sopno start, int from, int to); +static int seterr(register struct parse *p, int e); +static cset *allocset(register struct parse *p); +static void freeset(register struct parse *p, register cset *cs); +static int freezeset(register struct parse *p, register cset *cs); +static int firstch(register struct parse *p, register cset *cs); +static int nch(register struct parse *p, register cset *cs); +static void mcadd(register struct parse *p, register cset *cs, register const unsigned char *cp); +#if 0 +static void mcsub(register cset *cs, register unsigned char *cp); +static int mcin(register cset *cs, register unsigned char *cp); +static unsigned char *mcfind(register cset *cs, register unsigned char *cp); +#endif +static void mcinvert(register struct parse *p, register cset *cs); +static void mccase(register struct parse *p, register cset *cs); +static int isinsets(register struct re_guts *g, int c); +static int samesets(register struct re_guts *g, int c1, int c2); +static void categorize(struct parse *p, register struct re_guts *g); +static sopno dupl(register struct parse *p, sopno start, sopno finish); +static void doemit(register struct parse *p, sop op, size_t opnd); +static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); +static void dofwd(register struct parse *p, sopno pos, sop value); +static void enlarge(register struct parse *p, sopno size); +static void stripsnug(register struct parse *p, register struct re_guts *g); +static void findmust(register struct parse *p, register struct re_guts *g); +static sopno pluscount(register struct parse *p, register struct re_guts *g); + +#ifdef __cplusplus +} +#endif +/* ========= end header generated by ./mkh ========= */ diff --git a/ext/ereg/regex/regerror.c b/ext/ereg/regex/regerror.c new file mode 100644 index 0000000000..7bf741967d --- /dev/null +++ b/ext/ereg/regex/regerror.c @@ -0,0 +1,130 @@ +#include <sys/types.h> +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <limits.h> +#include <stdlib.h> + +#include "regex.h" +#include "utils.h" +#include "regerror.ih" +#include "php.h" + +/* + = #define REG_OKAY 0 + = #define REG_NOMATCH 1 + = #define REG_BADPAT 2 + = #define REG_ECOLLATE 3 + = #define REG_ECTYPE 4 + = #define REG_EESCAPE 5 + = #define REG_ESUBREG 6 + = #define REG_EBRACK 7 + = #define REG_EPAREN 8 + = #define REG_EBRACE 9 + = #define REG_BADBR 10 + = #define REG_ERANGE 11 + = #define REG_ESPACE 12 + = #define REG_BADRPT 13 + = #define REG_EMPTY 14 + = #define REG_ASSERT 15 + = #define REG_INVARG 16 + = #define REG_ATOI 255 // convert name to number (!) + = #define REG_ITOA 0400 // convert number to name (!) + */ +static const struct rerr { + int code; + const char *name; + const char *explain; +} rerrs[] = { + {REG_OKAY, "REG_OKAY", "no errors detected"}, + {REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match"}, + {REG_BADPAT, "REG_BADPAT", "invalid regular expression"}, + {REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element"}, + {REG_ECTYPE, "REG_ECTYPE", "invalid character class"}, + {REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)"}, + {REG_ESUBREG, "REG_ESUBREG", "invalid backreference number"}, + {REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced"}, + {REG_EPAREN, "REG_EPAREN", "parentheses not balanced"}, + {REG_EBRACE, "REG_EBRACE", "braces not balanced"}, + {REG_BADBR, "REG_BADBR", "invalid repetition count(s)"}, + {REG_ERANGE, "REG_ERANGE", "invalid character range"}, + {REG_ESPACE, "REG_ESPACE", "out of memory"}, + {REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid"}, + {REG_EMPTY, "REG_EMPTY", "empty (sub)expression"}, + {REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug"}, + {REG_INVARG, "REG_INVARG", "invalid argument to regex routine"}, + {-1, "", "*** unknown regexp error code ***"}, +}; + +/* + - regerror - the interface to error numbers + = API_EXPORT(size_t) regerror(int, const regex_t *, char *, size_t); + */ +/* ARGSUSED */ +API_EXPORT(size_t) +regerror( +int errcode, +const regex_t *preg, +char *errbuf, +size_t errbuf_size) +{ + register const struct rerr *r; + register size_t len; + register int target = errcode &~ REG_ITOA; + register const char *s; + char convbuf[50]; + + if (errcode == REG_ATOI) + s = regatoi(preg, convbuf, sizeof(convbuf)); + else { + for (r = rerrs; r->code >= 0; r++) + if (r->code == target) + break; + + if (errcode®_ITOA) { + if (r->code >= 0) { + (void) strncpy(convbuf, r->name, sizeof(convbuf) - 1); + convbuf[sizeof(convbuf) - 1] = '\0'; + } else { + snprintf(convbuf, sizeof(convbuf), "REG_0x%x", target); + } + assert(strlen(convbuf) < sizeof(convbuf)); + s = convbuf; + } else + s = r->explain; + } + + len = strlen(s) + 1; + if (errbuf_size > 0) { + if (errbuf_size > len) + (void) strcpy(errbuf, s); + else { + (void) strncpy(errbuf, s, errbuf_size-1); + errbuf[errbuf_size-1] = '\0'; + } + } + + return(len); +} + +/* + - regatoi - internal routine to implement REG_ATOI + == static char *regatoi(const regex_t *preg, char *localbuf, int bufsize); + */ +static char * +regatoi(preg, localbuf, bufsize) +const regex_t *preg; +char *localbuf; +int bufsize; +{ + register const struct rerr *r; + + for (r = rerrs; r->code >= 0; r++) + if (strcmp(r->name, preg->re_endp) == 0) + break; + if (r->code < 0) + return("0"); + + snprintf(localbuf, bufsize, "%d", r->code); + return(localbuf); +} diff --git a/ext/ereg/regex/regerror.ih b/ext/ereg/regex/regerror.ih new file mode 100644 index 0000000000..5ff158e57d --- /dev/null +++ b/ext/ereg/regex/regerror.ih @@ -0,0 +1,12 @@ +/* ========= begin header generated by ./mkh ========= */ +#ifdef __cplusplus +extern "C" { +#endif + +/* === regerror.c === */ +static char *regatoi(const regex_t *preg, char *localbuf, int bufsize); + +#ifdef __cplusplus +} +#endif +/* ========= end header generated by ./mkh ========= */ diff --git a/ext/ereg/regex/regex.3 b/ext/ereg/regex/regex.3 new file mode 100644 index 0000000000..100c8a7f71 --- /dev/null +++ b/ext/ereg/regex/regex.3 @@ -0,0 +1,502 @@ +.TH REGEX 3 "17 May 1993" +.BY "Henry Spencer" +.de ZR +.\" one other place knows this name: the SEE ALSO section +.IR regex (7) \\$1 +.. +.SH NAME +regcomp, regexec, regerror, regfree \- regular-expression library +.SH SYNOPSIS +.ft B +.\".na +#include <sys/types.h> +.br +#include <regex.h> +.HP 10 +int regcomp(regex_t\ *preg, const\ char\ *pattern, int\ cflags); +.HP +int\ regexec(const\ regex_t\ *preg, const\ char\ *string, +size_t\ nmatch, regmatch_t\ pmatch[], int\ eflags); +.HP +size_t\ regerror(int\ errcode, const\ regex_t\ *preg, +char\ *errbuf, size_t\ errbuf_size); +.HP +void\ regfree(regex_t\ *preg); +.\".ad +.ft +.SH DESCRIPTION +These routines implement POSIX 1003.2 regular expressions (``RE''s); +see +.ZR . +.I Regcomp +compiles an RE written as a string into an internal form, +.I regexec +matches that internal form against a string and reports results, +.I regerror +transforms error codes from either into human-readable messages, +and +.I regfree +frees any dynamically-allocated storage used by the internal form +of an RE. +.PP +The header +.I <regex.h> +declares two structure types, +.I regex_t +and +.IR regmatch_t , +the former for compiled internal forms and the latter for match reporting. +It also declares the four functions, +a type +.IR regoff_t , +and a number of constants with names starting with ``REG_''. +.PP +.I Regcomp +compiles the regular expression contained in the +.I pattern +string, +subject to the flags in +.IR cflags , +and places the results in the +.I regex_t +structure pointed to by +.IR preg . +.I Cflags +is the bitwise OR of zero or more of the following flags: +.IP REG_EXTENDED \w'REG_EXTENDED'u+2n +Compile modern (``extended'') REs, +rather than the obsolete (``basic'') REs that +are the default. +.IP REG_BASIC +This is a synonym for 0, +provided as a counterpart to REG_EXTENDED to improve readability. +.IP REG_NOSPEC +Compile with recognition of all special characters turned off. +All characters are thus considered ordinary, +so the ``RE'' is a literal string. +This is an extension, +compatible with but not specified by POSIX 1003.2, +and should be used with +caution in software intended to be portable to other systems. +REG_EXTENDED and REG_NOSPEC may not be used +in the same call to +.IR regcomp . +.IP REG_ICASE +Compile for matching that ignores upper/lower case distinctions. +See +.ZR . +.IP REG_NOSUB +Compile for matching that need only report success or failure, +not what was matched. +.IP REG_NEWLINE +Compile for newline-sensitive matching. +By default, newline is a completely ordinary character with no special +meaning in either REs or strings. +With this flag, +`[^' bracket expressions and `.' never match newline, +a `^' anchor matches the null string after any newline in the string +in addition to its normal function, +and the `$' anchor matches the null string before any newline in the +string in addition to its normal function. +.IP REG_PEND +The regular expression ends, +not at the first NUL, +but just before the character pointed to by the +.I re_endp +member of the structure pointed to by +.IR preg . +The +.I re_endp +member is of type +.IR const\ char\ * . +This flag permits inclusion of NULs in the RE; +they are considered ordinary characters. +This is an extension, +compatible with but not specified by POSIX 1003.2, +and should be used with +caution in software intended to be portable to other systems. +.PP +When successful, +.I regcomp +returns 0 and fills in the structure pointed to by +.IR preg . +One member of that structure +(other than +.IR re_endp ) +is publicized: +.IR re_nsub , +of type +.IR size_t , +contains the number of parenthesized subexpressions within the RE +(except that the value of this member is undefined if the +REG_NOSUB flag was used). +If +.I regcomp +fails, it returns a non-zero error code; +see DIAGNOSTICS. +.PP +.I Regexec +matches the compiled RE pointed to by +.I preg +against the +.IR string , +subject to the flags in +.IR eflags , +and reports results using +.IR nmatch , +.IR pmatch , +and the returned value. +The RE must have been compiled by a previous invocation of +.IR regcomp . +The compiled form is not altered during execution of +.IR regexec , +so a single compiled RE can be used simultaneously by multiple threads. +.PP +By default, +the NUL-terminated string pointed to by +.I string +is considered to be the text of an entire line, minus any terminating +newline. +The +.I eflags +argument is the bitwise OR of zero or more of the following flags: +.IP REG_NOTBOL \w'REG_STARTEND'u+2n +The first character of +the string +is not the beginning of a line, so the `^' anchor should not match before it. +This does not affect the behavior of newlines under REG_NEWLINE. +.IP REG_NOTEOL +The NUL terminating +the string +does not end a line, so the `$' anchor should not match before it. +This does not affect the behavior of newlines under REG_NEWLINE. +.IP REG_STARTEND +The string is considered to start at +\fIstring\fR\ + \fIpmatch\fR[0].\fIrm_so\fR +and to have a terminating NUL located at +\fIstring\fR\ + \fIpmatch\fR[0].\fIrm_eo\fR +(there need not actually be a NUL at that location), +regardless of the value of +.IR nmatch . +See below for the definition of +.IR pmatch +and +.IR nmatch . +This is an extension, +compatible with but not specified by POSIX 1003.2, +and should be used with +caution in software intended to be portable to other systems. +Note that a non-zero \fIrm_so\fR does not imply REG_NOTBOL; +REG_STARTEND affects only the location of the string, +not how it is matched. +.PP +See +.ZR +for a discussion of what is matched in situations where an RE or a +portion thereof could match any of several substrings of +.IR string . +.PP +Normally, +.I regexec +returns 0 for success and the non-zero code REG_NOMATCH for failure. +Other non-zero error codes may be returned in exceptional situations; +see DIAGNOSTICS. +.PP +If REG_NOSUB was specified in the compilation of the RE, +or if +.I nmatch +is 0, +.I regexec +ignores the +.I pmatch +argument (but see below for the case where REG_STARTEND is specified). +Otherwise, +.I pmatch +points to an array of +.I nmatch +structures of type +.IR regmatch_t . +Such a structure has at least the members +.I rm_so +and +.IR rm_eo , +both of type +.I regoff_t +(a signed arithmetic type at least as large as an +.I off_t +and a +.IR ssize_t ), +containing respectively the offset of the first character of a substring +and the offset of the first character after the end of the substring. +Offsets are measured from the beginning of the +.I string +argument given to +.IR regexec . +An empty substring is denoted by equal offsets, +both indicating the character following the empty substring. +.PP +The 0th member of the +.I pmatch +array is filled in to indicate what substring of +.I string +was matched by the entire RE. +Remaining members report what substring was matched by parenthesized +subexpressions within the RE; +member +.I i +reports subexpression +.IR i , +with subexpressions counted (starting at 1) by the order of their opening +parentheses in the RE, left to right. +Unused entries in the array\(emcorresponding either to subexpressions that +did not participate in the match at all, or to subexpressions that do not +exist in the RE (that is, \fIi\fR\ > \fIpreg\fR\->\fIre_nsub\fR)\(emhave both +.I rm_so +and +.I rm_eo +set to \-1. +If a subexpression participated in the match several times, +the reported substring is the last one it matched. +(Note, as an example in particular, that when the RE `(b*)+' matches `bbb', +the parenthesized subexpression matches each of the three `b's and then +an infinite number of empty strings following the last `b', +so the reported substring is one of the empties.) +.PP +If REG_STARTEND is specified, +.I pmatch +must point to at least one +.I regmatch_t +(even if +.I nmatch +is 0 or REG_NOSUB was specified), +to hold the input offsets for REG_STARTEND. +Use for output is still entirely controlled by +.IR nmatch ; +if +.I nmatch +is 0 or REG_NOSUB was specified, +the value of +.IR pmatch [0] +will not be changed by a successful +.IR regexec . +.PP +.I Regerror +maps a non-zero +.I errcode +from either +.I regcomp +or +.I regexec +to a human-readable, printable message. +If +.I preg +is non-NULL, +the error code should have arisen from use of +the +.I regex_t +pointed to by +.IR preg , +and if the error code came from +.IR regcomp , +it should have been the result from the most recent +.I regcomp +using that +.IR regex_t . +.RI ( Regerror +may be able to supply a more detailed message using information +from the +.IR regex_t .) +.I Regerror +places the NUL-terminated message into the buffer pointed to by +.IR errbuf , +limiting the length (including the NUL) to at most +.I errbuf_size +bytes. +If the whole message won't fit, +as much of it as will fit before the terminating NUL is supplied. +In any case, +the returned value is the size of buffer needed to hold the whole +message (including terminating NUL). +If +.I errbuf_size +is 0, +.I errbuf +is ignored but the return value is still correct. +.PP +If the +.I errcode +given to +.I regerror +is first ORed with REG_ITOA, +the ``message'' that results is the printable name of the error code, +e.g. ``REG_NOMATCH'', +rather than an explanation thereof. +If +.I errcode +is REG_ATOI, +then +.I preg +shall be non-NULL and the +.I re_endp +member of the structure it points to +must point to the printable name of an error code; +in this case, the result in +.I errbuf +is the decimal digits of +the numeric value of the error code +(0 if the name is not recognized). +REG_ITOA and REG_ATOI are intended primarily as debugging facilities; +they are extensions, +compatible with but not specified by POSIX 1003.2, +and should be used with +caution in software intended to be portable to other systems. +Be warned also that they are considered experimental and changes are possible. +.PP +.I Regfree +frees any dynamically-allocated storage associated with the compiled RE +pointed to by +.IR preg . +The remaining +.I regex_t +is no longer a valid compiled RE +and the effect of supplying it to +.I regexec +or +.I regerror +is undefined. +.PP +None of these functions references global variables except for tables +of constants; +all are safe for use from multiple threads if the arguments are safe. +.SH IMPLEMENTATION CHOICES +There are a number of decisions that 1003.2 leaves up to the implementor, +either by explicitly saying ``undefined'' or by virtue of them being +forbidden by the RE grammar. +This implementation treats them as follows. +.PP +See +.ZR +for a discussion of the definition of case-independent matching. +.PP +There is no particular limit on the length of REs, +except insofar as memory is limited. +Memory usage is approximately linear in RE size, and largely insensitive +to RE complexity, except for bounded repetitions. +See BUGS for one short RE using them +that will run almost any system out of memory. +.PP +A backslashed character other than one specifically given a magic meaning +by 1003.2 (such magic meanings occur only in obsolete [``basic''] REs) +is taken as an ordinary character. +.PP +Any unmatched [ is a REG_EBRACK error. +.PP +Equivalence classes cannot begin or end bracket-expression ranges. +The endpoint of one range cannot begin another. +.PP +RE_DUP_MAX, the limit on repetition counts in bounded repetitions, is 255. +.PP +A repetition operator (?, *, +, or bounds) cannot follow another +repetition operator. +A repetition operator cannot begin an expression or subexpression +or follow `^' or `|'. +.PP +`|' cannot appear first or last in a (sub)expression or after another `|', +i.e. an operand of `|' cannot be an empty subexpression. +An empty parenthesized subexpression, `()', is legal and matches an +empty (sub)string. +An empty string is not a legal RE. +.PP +A `{' followed by a digit is considered the beginning of bounds for a +bounded repetition, which must then follow the syntax for bounds. +A `{' \fInot\fR followed by a digit is considered an ordinary character. +.PP +`^' and `$' beginning and ending subexpressions in obsolete (``basic'') +REs are anchors, not ordinary characters. +.SH SEE ALSO +grep(1), regex(7) +.PP +POSIX 1003.2, sections 2.8 (Regular Expression Notation) +and +B.5 (C Binding for Regular Expression Matching). +.SH DIAGNOSTICS +Non-zero error codes from +.I regcomp +and +.I regexec +include the following: +.PP +.nf +.ta \w'REG_ECOLLATE'u+3n +REG_NOMATCH regexec() failed to match +REG_BADPAT invalid regular expression +REG_ECOLLATE invalid collating element +REG_ECTYPE invalid character class +REG_EESCAPE \e applied to unescapable character +REG_ESUBREG invalid backreference number +REG_EBRACK brackets [ ] not balanced +REG_EPAREN parentheses ( ) not balanced +REG_EBRACE braces { } not balanced +REG_BADBR invalid repetition count(s) in { } +REG_ERANGE invalid character range in [ ] +REG_ESPACE ran out of memory +REG_BADRPT ?, *, or + operand invalid +REG_EMPTY empty (sub)expression +REG_ASSERT ``can't happen''\(emyou found a bug +REG_INVARG invalid argument, e.g. negative-length string +.fi +.SH HISTORY +Written by Henry Spencer at University of Toronto, +henry@zoo.toronto.edu. +.SH BUGS +This is an alpha release with known defects. +Please report problems. +.PP +There is one known functionality bug. +The implementation of internationalization is incomplete: +the locale is always assumed to be the default one of 1003.2, +and only the collating elements etc. of that locale are available. +.PP +The back-reference code is subtle and doubts linger about its correctness +in complex cases. +.PP +.I Regexec +performance is poor. +This will improve with later releases. +.I Nmatch +exceeding 0 is expensive; +.I nmatch +exceeding 1 is worse. +.I Regexec +is largely insensitive to RE complexity \fIexcept\fR that back +references are massively expensive. +RE length does matter; in particular, there is a strong speed bonus +for keeping RE length under about 30 characters, +with most special characters counting roughly double. +.PP +.I Regcomp +implements bounded repetitions by macro expansion, +which is costly in time and space if counts are large +or bounded repetitions are nested. +An RE like, say, +`((((a{1,100}){1,100}){1,100}){1,100}){1,100}' +will (eventually) run almost any existing machine out of swap space. +.PP +There are suspected problems with response to obscure error conditions. +Notably, +certain kinds of internal overflow, +produced only by truly enormous REs or by multiply nested bounded repetitions, +are probably not handled well. +.PP +Due to a mistake in 1003.2, things like `a)b' are legal REs because `)' is +a special character only in the presence of a previous unmatched `('. +This can't be fixed until the spec is fixed. +.PP +The standard's definition of back references is vague. +For example, does +`a\e(\e(b\e)*\e2\e)*d' match `abbbd'? +Until the standard is clarified, +behavior in such cases should not be relied on. +.PP +The implementation of word-boundary matching is a bit of a kludge, +and bugs may lurk in combinations of word-boundary matching and anchoring. diff --git a/ext/ereg/regex/regex.7 b/ext/ereg/regex/regex.7 new file mode 100644 index 0000000000..d89012bda1 --- /dev/null +++ b/ext/ereg/regex/regex.7 @@ -0,0 +1,233 @@ +.TH REGEX 7 "7 Feb 1994" +.BY "Henry Spencer" +.SH NAME +regex \- POSIX 1003.2 regular expressions +.SH DESCRIPTION +Regular expressions (``RE''s), +as defined in POSIX 1003.2, come in two forms: +modern REs (roughly those of +.IR egrep ; +1003.2 calls these ``extended'' REs) +and obsolete REs (roughly those of +.IR ed ; +1003.2 ``basic'' REs). +Obsolete REs mostly exist for backward compatibility in some old programs; +they will be discussed at the end. +1003.2 leaves some aspects of RE syntax and semantics open; +`\(dg' marks decisions on these aspects that +may not be fully portable to other 1003.2 implementations. +.PP +A (modern) RE is one\(dg or more non-empty\(dg \fIbranches\fR, +separated by `|'. +It matches anything that matches one of the branches. +.PP +A branch is one\(dg or more \fIpieces\fR, concatenated. +It matches a match for the first, followed by a match for the second, etc. +.PP +A piece is an \fIatom\fR possibly followed +by a single\(dg `*', `+', `?', or \fIbound\fR. +An atom followed by `*' matches a sequence of 0 or more matches of the atom. +An atom followed by `+' matches a sequence of 1 or more matches of the atom. +An atom followed by `?' matches a sequence of 0 or 1 matches of the atom. +.PP +A \fIbound\fR is `{' followed by an unsigned decimal integer, +possibly followed by `,' +possibly followed by another unsigned decimal integer, +always followed by `}'. +The integers must lie between 0 and RE_DUP_MAX (255\(dg) inclusive, +and if there are two of them, the first may not exceed the second. +An atom followed by a bound containing one integer \fIi\fR +and no comma matches +a sequence of exactly \fIi\fR matches of the atom. +An atom followed by a bound +containing one integer \fIi\fR and a comma matches +a sequence of \fIi\fR or more matches of the atom. +An atom followed by a bound +containing two integers \fIi\fR and \fIj\fR matches +a sequence of \fIi\fR through \fIj\fR (inclusive) matches of the atom. +.PP +An atom is a regular expression enclosed in `()' (matching a match for the +regular expression), +an empty set of `()' (matching the null string)\(dg, +a \fIbracket expression\fR (see below), `.' +(matching any single character), `^' (matching the null string at the +beginning of a line), `$' (matching the null string at the +end of a line), a `\e' followed by one of the characters +`^.[$()|*+?{\e' +(matching that character taken as an ordinary character), +a `\e' followed by any other character\(dg +(matching that character taken as an ordinary character, +as if the `\e' had not been present\(dg), +or a single character with no other significance (matching that character). +A `{' followed by a character other than a digit is an ordinary +character, not the beginning of a bound\(dg. +It is illegal to end an RE with `\e'. +.PP +A \fIbracket expression\fR is a list of characters enclosed in `[]'. +It normally matches any single character from the list (but see below). +If the list begins with `^', +it matches any single character +(but see below) \fInot\fR from the rest of the list. +If two characters in the list are separated by `\-', this is shorthand +for the full \fIrange\fR of characters between those two (inclusive) in the +collating sequence, +e.g. `[0-9]' in ASCII matches any decimal digit. +It is illegal\(dg for two ranges to share an +endpoint, e.g. `a-c-e'. +Ranges are very collating-sequence-dependent, +and portable programs should avoid relying on them. +.PP +To include a literal `]' in the list, make it the first character +(following a possible `^'). +To include a literal `\-', make it the first or last character, +or the second endpoint of a range. +To use a literal `\-' as the first endpoint of a range, +enclose it in `[.' and `.]' to make it a collating element (see below). +With the exception of these and some combinations using `[' (see next +paragraphs), all other special characters, including `\e', lose their +special significance within a bracket expression. +.PP +Within a bracket expression, a collating element (a character, +a multi-character sequence that collates as if it were a single character, +or a collating-sequence name for either) +enclosed in `[.' and `.]' stands for the +sequence of characters of that collating element. +The sequence is a single element of the bracket expression's list. +A bracket expression containing a multi-character collating element +can thus match more than one character, +e.g. if the collating sequence includes a `ch' collating element, +then the RE `[[.ch.]]*c' matches the first five characters +of `chchcc'. +.PP +Within a bracket expression, a collating element enclosed in `[=' and +`=]' is an equivalence class, standing for the sequences of characters +of all collating elements equivalent to that one, including itself. +(If there are no other equivalent collating elements, +the treatment is as if the enclosing delimiters were `[.' and `.]'.) +For example, if o and \o'o^' are the members of an equivalence class, +then `[[=o=]]', `[[=\o'o^'=]]', and `[o\o'o^']' are all synonymous. +An equivalence class may not\(dg be an endpoint +of a range. +.PP +Within a bracket expression, the name of a \fIcharacter class\fR enclosed +in `[:' and `:]' stands for the list of all characters belonging to that +class. +Standard character class names are: +.PP +.RS +.nf +.ta 3c 6c 9c +alnum digit punct +alpha graph space +blank lower upper +cntrl print xdigit +.fi +.RE +.PP +These stand for the character classes defined in +.IR ctype (3). +A locale may provide others. +A character class may not be used as an endpoint of a range. +.PP +There are two special cases\(dg of bracket expressions: +the bracket expressions `[[:<:]]' and `[[:>:]]' match the null string at +the beginning and end of a word respectively. +A word is defined as a sequence of +word characters +which is neither preceded nor followed by +word characters. +A word character is an +.I alnum +character (as defined by +.IR ctype (3)) +or an underscore. +This is an extension, +compatible with but not specified by POSIX 1003.2, +and should be used with +caution in software intended to be portable to other systems. +.PP +In the event that an RE could match more than one substring of a given +string, +the RE matches the one starting earliest in the string. +If the RE could match more than one substring starting at that point, +it matches the longest. +Subexpressions also match the longest possible substrings, subject to +the constraint that the whole match be as long as possible, +with subexpressions starting earlier in the RE taking priority over +ones starting later. +Note that higher-level subexpressions thus take priority over +their lower-level component subexpressions. +.PP +Match lengths are measured in characters, not collating elements. +A null string is considered longer than no match at all. +For example, +`bb*' matches the three middle characters of `abbbc', +`(wee|week)(knights|nights)' matches all ten characters of `weeknights', +when `(.*).*' is matched against `abc' the parenthesized subexpression +matches all three characters, and +when `(a*)*' is matched against `bc' both the whole RE and the parenthesized +subexpression match the null string. +.PP +If case-independent matching is specified, +the effect is much as if all case distinctions had vanished from the +alphabet. +When an alphabetic that exists in multiple cases appears as an +ordinary character outside a bracket expression, it is effectively +transformed into a bracket expression containing both cases, +e.g. `x' becomes `[xX]'. +When it appears inside a bracket expression, all case counterparts +of it are added to the bracket expression, so that (e.g.) `[x]' +becomes `[xX]' and `[^x]' becomes `[^xX]'. +.PP +No particular limit is imposed on the length of REs\(dg. +Programs intended to be portable should not employ REs longer +than 256 bytes, +as an implementation can refuse to accept such REs and remain +POSIX-compliant. +.PP +Obsolete (``basic'') regular expressions differ in several respects. +`|', `+', and `?' are ordinary characters and there is no equivalent +for their functionality. +The delimiters for bounds are `\e{' and `\e}', +with `{' and `}' by themselves ordinary characters. +The parentheses for nested subexpressions are `\e(' and `\e)', +with `(' and `)' by themselves ordinary characters. +`^' is an ordinary character except at the beginning of the +RE or\(dg the beginning of a parenthesized subexpression, +`$' is an ordinary character except at the end of the +RE or\(dg the end of a parenthesized subexpression, +and `*' is an ordinary character if it appears at the beginning of the +RE or the beginning of a parenthesized subexpression +(after a possible leading `^'). +Finally, there is one new type of atom, a \fIback reference\fR: +`\e' followed by a non-zero decimal digit \fId\fR +matches the same sequence of characters +matched by the \fId\fRth parenthesized subexpression +(numbering subexpressions by the positions of their opening parentheses, +left to right), +so that (e.g.) `\e([bc]\e)\e1' matches `bb' or `cc' but not `bc'. +.SH SEE ALSO +regex(3) +.PP +POSIX 1003.2, section 2.8 (Regular Expression Notation). +.SH BUGS +Having two kinds of REs is a botch. +.PP +The current 1003.2 spec says that `)' is an ordinary character in +the absence of an unmatched `('; +this was an unintentional result of a wording error, +and change is likely. +Avoid relying on it. +.PP +Back references are a dreadful botch, +posing major problems for efficient implementations. +They are also somewhat vaguely defined +(does +`a\e(\e(b\e)*\e2\e)*d' match `abbbd'?). +Avoid using them. +.PP +1003.2's specification of case-independent matching is vague. +The ``one case implies all cases'' definition given above +is current consensus among implementors as to the right interpretation. +.PP +The syntax for word boundaries is incredibly ugly. diff --git a/ext/ereg/regex/regex.dsp b/ext/ereg/regex/regex.dsp new file mode 100644 index 0000000000..e8f1ad4299 --- /dev/null +++ b/ext/ereg/regex/regex.dsp @@ -0,0 +1,106 @@ +# Microsoft Developer Studio Project File - Name="regex" - Package Owner=<4> +# Microsoft Developer Studio Generated Build File, Format Version 5.00 +# ** DO NOT EDIT ** + +# TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102 + +CFG=regex - Win32 Debug +!MESSAGE This is not a valid makefile. To build this project using NMAKE, +!MESSAGE use the Export Makefile command and run +!MESSAGE +!MESSAGE NMAKE /f "regex.mak". +!MESSAGE +!MESSAGE You can specify a configuration when running NMAKE +!MESSAGE by defining the macro CFG on the command line. For example: +!MESSAGE +!MESSAGE NMAKE /f "regex.mak" CFG="regex - Win32 Debug" +!MESSAGE +!MESSAGE Possible choices for configuration are: +!MESSAGE +!MESSAGE "regex - Win32 Release" (based on "Win32 (x86) Dynamic-Link Library") +!MESSAGE "regex - Win32 Debug" (based on "Win32 (x86) Dynamic-Link Library") +!MESSAGE + +# Begin Project +# PROP Scc_ProjName "" +# PROP Scc_LocalPath "" +CPP=cl.exe +MTL=midl.exe +RSC=rc.exe + +!IF "$(CFG)" == "regex - Win32 Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "Release" +# PROP BASE Intermediate_Dir "Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Release" +# PROP Intermediate_Dir "Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /FD /c +# ADD CPP /nologo /MT /W3 /GX /O2 /I "." /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /FD /c +# ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /o NUL /win32 +# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /o NUL /win32 +# ADD BASE RSC /l 0x409 /d "NDEBUG" +# ADD RSC /l 0x409 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /machine:I386 +# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /machine:I386 + +!ELSEIF "$(CFG)" == "regex - Win32 Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "Debug" +# PROP BASE Intermediate_Dir "Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Debug" +# PROP Intermediate_Dir "Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /YX /FD /c +# ADD CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /I "." /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /YX /FD /c +# ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /o NUL /win32 +# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /o NUL /win32 +# ADD BASE RSC /l 0x409 /d "_DEBUG" +# ADD RSC /l 0x409 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /debug /machine:I386 /pdbtype:sept +# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /debug /machine:I386 /pdbtype:sept + +!ENDIF + +# Begin Target + +# Name "regex - Win32 Release" +# Name "regex - Win32 Debug" +# Begin Source File + +SOURCE=.\regcomp.c +# End Source File +# Begin Source File + +SOURCE=.\regerror.c +# End Source File +# Begin Source File + +SOURCE=.\regexec.c +# End Source File +# Begin Source File + +SOURCE=.\regfree.c +# End Source File +# End Target +# End Project diff --git a/ext/ereg/regex/regex.dsw b/ext/ereg/regex/regex.dsw new file mode 100644 index 0000000000..7b7df8126c --- /dev/null +++ b/ext/ereg/regex/regex.dsw @@ -0,0 +1,29 @@ +Microsoft Developer Studio Workspace File, Format Version 5.00 +# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE! + +############################################################################### + +Project: "regex"=.\regex.dsp - Package Owner=<4> + +Package=<5> +{{{ +}}} + +Package=<4> +{{{ +}}} + +############################################################################### + +Global: + +Package=<5> +{{{ +}}} + +Package=<3> +{{{ +}}} + +############################################################################### + diff --git a/ext/ereg/regex/regex.h b/ext/ereg/regex/regex.h new file mode 100644 index 0000000000..56c63148d7 --- /dev/null +++ b/ext/ereg/regex/regex.h @@ -0,0 +1,85 @@ +#ifndef _HSREGEX_H_ +#define _HSREGEX_H_ +#ifndef _HSREGEX_H +#define _HSREGEX_H /* never again */ +/* ========= begin header generated by ././mkh ========= */ +#ifdef __cplusplus +extern "C" { +#endif + +/* === regex2.h === */ +#ifdef WIN32 +#define API_EXPORT(type) __declspec(dllexport) type __stdcall +#elif defined(__GNUC__) && __GNUC__ >= 4 +#define API_EXPORT(type) __attribute__ ((visibility("default"))) type +#else +#define API_EXPORT(type) type +#endif + +typedef off_t regoff_t; +typedef struct { + int re_magic; + size_t re_nsub; /* number of parenthesized subexpressions */ + const char *re_endp; /* end pointer for REG_PEND */ + struct re_guts *re_g; /* none of your business :-) */ +} regex_t; +typedef struct { + regoff_t rm_so; /* start of match */ + regoff_t rm_eo; /* end of match */ +} regmatch_t; + + +/* === regcomp.c === */ +API_EXPORT(int) regcomp(regex_t *, const char *, int); +#define REG_BASIC 0000 +#define REG_EXTENDED 0001 +#define REG_ICASE 0002 +#define REG_NOSUB 0004 +#define REG_NEWLINE 0010 +#define REG_NOSPEC 0020 +#define REG_PEND 0040 +#define REG_DUMP 0200 + + +/* === regerror.c === */ +#define REG_OKAY 0 +#define REG_NOMATCH 1 +#define REG_BADPAT 2 +#define REG_ECOLLATE 3 +#define REG_ECTYPE 4 +#define REG_EESCAPE 5 +#define REG_ESUBREG 6 +#define REG_EBRACK 7 +#define REG_EPAREN 8 +#define REG_EBRACE 9 +#define REG_BADBR 10 +#define REG_ERANGE 11 +#define REG_ESPACE 12 +#define REG_BADRPT 13 +#define REG_EMPTY 14 +#define REG_ASSERT 15 +#define REG_INVARG 16 +#define REG_ATOI 255 /* convert name to number (!) */ +#define REG_ITOA 0400 /* convert number to name (!) */ +API_EXPORT(size_t) regerror(int, const regex_t *, char *, size_t); + + +/* === regexec.c === */ +API_EXPORT(int) regexec(const regex_t *, const char *, size_t, regmatch_t [], int); +#define REG_NOTBOL 00001 +#define REG_NOTEOL 00002 +#define REG_STARTEND 00004 +#define REG_TRACE 00400 /* tracing of execution */ +#define REG_LARGE 01000 /* force large representation */ +#define REG_BACKR 02000 /* force use of backref code */ + + +/* === regfree.c === */ +API_EXPORT(void) regfree(regex_t *); + +#ifdef __cplusplus +} +#endif +/* ========= end header generated by ././mkh ========= */ +#endif +#endif diff --git a/ext/ereg/regex/regex.mak b/ext/ereg/regex/regex.mak new file mode 100644 index 0000000000..b87ded340b --- /dev/null +++ b/ext/ereg/regex/regex.mak @@ -0,0 +1,304 @@ +# Microsoft Developer Studio Generated NMAKE File, Based on regex.dsp +!IF "$(CFG)" == "" +CFG=regex - Win32 Release +!MESSAGE No configuration specified. Defaulting to regex - Win32 Release. +!ENDIF + +!IF "$(CFG)" != "regex - Win32 Release" && "$(CFG)" != "regex - Win32 Debug" +!MESSAGE Invalid configuration "$(CFG)" specified. +!MESSAGE You can specify a configuration when running NMAKE +!MESSAGE by defining the macro CFG on the command line. For example: +!MESSAGE +!MESSAGE NMAKE /f "regex.mak" CFG="regex - Win32 Release" +!MESSAGE +!MESSAGE Possible choices for configuration are: +!MESSAGE +!MESSAGE "regex - Win32 Release" (based on "Win32 (x86) Static Library") +!MESSAGE "regex - Win32 Debug" (based on "Win32 (x86) Static Library") +!MESSAGE +!ERROR An invalid configuration is specified. +!ENDIF + +!IF "$(OS)" == "Windows_NT" +NULL= +!ELSE +NULL=nul +!ENDIF + +CPP=cl.exe + +!IF "$(CFG)" == "regex - Win32 Release" + +OUTDIR=.\Release +INTDIR=.\Release +# Begin Custom Macros +OutDir=.\.\Release +# End Custom Macros + +!IF "$(RECURSE)" == "0" + +ALL : "$(OUTDIR)\regex.lib" + +!ELSE + +ALL : "$(OUTDIR)\regex.lib" + +!ENDIF + +CLEAN : + -@erase "$(INTDIR)\regcomp.obj" + -@erase "$(INTDIR)\regerror.obj" + -@erase "$(INTDIR)\regexec.obj" + -@erase "$(INTDIR)\regfree.obj" + -@erase "$(INTDIR)\vc50.idb" + -@erase "$(OUTDIR)\regex.lib" + +"$(OUTDIR)" : + if not exist "$(OUTDIR)/$(NULL)" mkdir "$(OUTDIR)" + +CPP_PROJ=/nologo /MD /W3 /GX /O2 /I "." /D "WIN32" /D "NDEBUG" /D "_WINDOWS"\ + /Fp"$(INTDIR)\regex.pch" /YX /Fo"$(INTDIR)\\" /Fd"$(INTDIR)\\" /FD /c +CPP_OBJS=.\Release/ +CPP_SBRS=. +BSC32=bscmake.exe +BSC32_FLAGS=/nologo /o"$(OUTDIR)\regex.bsc" +BSC32_SBRS= \ + +LIB32=link.exe -lib +LIB32_FLAGS=/nologo /out:"$(OUTDIR)\regex.lib" +LIB32_OBJS= \ + "$(INTDIR)\regcomp.obj" \ + "$(INTDIR)\regerror.obj" \ + "$(INTDIR)\regexec.obj" \ + "$(INTDIR)\regfree.obj" + +"$(OUTDIR)\regex.lib" : "$(OUTDIR)" $(DEF_FILE) $(LIB32_OBJS) + $(LIB32) @<< + $(LIB32_FLAGS) $(DEF_FLAGS) $(LIB32_OBJS) +<< + +!ELSEIF "$(CFG)" == "regex - Win32 Debug" + +OUTDIR=.\Debug +INTDIR=.\Debug +# Begin Custom Macros +OutDir=.\.\Debug +# End Custom Macros + +!IF "$(RECURSE)" == "0" + +ALL : "$(OUTDIR)\regex.lib" "$(OUTDIR)\regex.bsc" + +!ELSE + +ALL : "$(OUTDIR)\regex.lib" "$(OUTDIR)\regex.bsc" + +!ENDIF + +CLEAN : + -@erase "$(INTDIR)\regcomp.obj" + -@erase "$(INTDIR)\regcomp.sbr" + -@erase "$(INTDIR)\regerror.obj" + -@erase "$(INTDIR)\regerror.sbr" + -@erase "$(INTDIR)\regexec.obj" + -@erase "$(INTDIR)\regexec.sbr" + -@erase "$(INTDIR)\regfree.obj" + -@erase "$(INTDIR)\regfree.sbr" + -@erase "$(INTDIR)\vc50.idb" + -@erase "$(OUTDIR)\regex.bsc" + -@erase "$(OUTDIR)\regex.lib" + +"$(OUTDIR)" : + if not exist "$(OUTDIR)/$(NULL)" mkdir "$(OUTDIR)" + +CPP_PROJ=/nologo /MDd /W3 /GX /Z7 /Od /I "." /D "WIN32" /D "_DEBUG" /D\ + "_WINDOWS" /FR"$(INTDIR)\\" /Fp"$(INTDIR)\regex.pch" /YX /Fo"$(INTDIR)\\"\ + /Fd"$(INTDIR)\\" /FD /c +CPP_OBJS=.\Debug/ +CPP_SBRS=.\Debug/ +BSC32=bscmake.exe +BSC32_FLAGS=/nologo /o"$(OUTDIR)\regex.bsc" +BSC32_SBRS= \ + "$(INTDIR)\regcomp.sbr" \ + "$(INTDIR)\regerror.sbr" \ + "$(INTDIR)\regexec.sbr" \ + "$(INTDIR)\regfree.sbr" + +"$(OUTDIR)\regex.bsc" : "$(OUTDIR)" $(BSC32_SBRS) + $(BSC32) @<< + $(BSC32_FLAGS) $(BSC32_SBRS) +<< + +LIB32=link.exe -lib +LIB32_FLAGS=/nologo /out:"$(OUTDIR)\regex.lib" +LIB32_OBJS= \ + "$(INTDIR)\regcomp.obj" \ + "$(INTDIR)\regerror.obj" \ + "$(INTDIR)\regexec.obj" \ + "$(INTDIR)\regfree.obj" + +"$(OUTDIR)\regex.lib" : "$(OUTDIR)" $(DEF_FILE) $(LIB32_OBJS) + $(LIB32) @<< + $(LIB32_FLAGS) $(DEF_FLAGS) $(LIB32_OBJS) +<< + +!ENDIF + +.c{$(CPP_OBJS)}.obj:: + $(CPP) @<< + $(CPP_PROJ) $< +<< + +.cpp{$(CPP_OBJS)}.obj:: + $(CPP) @<< + $(CPP_PROJ) $< +<< + +.cxx{$(CPP_OBJS)}.obj:: + $(CPP) @<< + $(CPP_PROJ) $< +<< + +.c{$(CPP_SBRS)}.sbr:: + $(CPP) @<< + $(CPP_PROJ) $< +<< + +.cpp{$(CPP_SBRS)}.sbr:: + $(CPP) @<< + $(CPP_PROJ) $< +<< + +.cxx{$(CPP_SBRS)}.sbr:: + $(CPP) @<< + $(CPP_PROJ) $< +<< + + +!IF "$(CFG)" == "regex - Win32 Release" || "$(CFG)" == "regex - Win32 Debug" +SOURCE=.\regcomp.c + +!IF "$(CFG)" == "regex - Win32 Release" + +DEP_CPP_REGCO=\ + ".\cclass.h"\ + ".\cname.h"\ + ".\regcomp.ih"\ + ".\regex.h"\ + ".\regex2.h"\ + ".\utils.h"\ + + +"$(INTDIR)\regcomp.obj" : $(SOURCE) $(DEP_CPP_REGCO) "$(INTDIR)" + + +!ELSEIF "$(CFG)" == "regex - Win32 Debug" + +DEP_CPP_REGCO=\ + ".\cclass.h"\ + ".\cname.h"\ + ".\regcomp.ih"\ + ".\regex.h"\ + ".\regex2.h"\ + ".\utils.h"\ + {$(INCLUDE)}"sys\types.h"\ + + +"$(INTDIR)\regcomp.obj" "$(INTDIR)\regcomp.sbr" : $(SOURCE) $(DEP_CPP_REGCO)\ + "$(INTDIR)" + + +!ENDIF + +SOURCE=.\regerror.c + +!IF "$(CFG)" == "regex - Win32 Release" + +DEP_CPP_REGER=\ + ".\regerror.ih"\ + ".\regex.h"\ + ".\utils.h"\ + + +"$(INTDIR)\regerror.obj" : $(SOURCE) $(DEP_CPP_REGER) "$(INTDIR)" + + +!ELSEIF "$(CFG)" == "regex - Win32 Debug" + +DEP_CPP_REGER=\ + ".\regerror.ih"\ + ".\regex.h"\ + ".\utils.h"\ + {$(INCLUDE)}"sys\types.h"\ + + +"$(INTDIR)\regerror.obj" "$(INTDIR)\regerror.sbr" : $(SOURCE) $(DEP_CPP_REGER)\ + "$(INTDIR)" + + +!ENDIF + +SOURCE=.\regexec.c + +!IF "$(CFG)" == "regex - Win32 Release" + +DEP_CPP_REGEX=\ + ".\engine.c"\ + ".\engine.ih"\ + ".\regex.h"\ + ".\regex2.h"\ + ".\utils.h"\ + + +"$(INTDIR)\regexec.obj" : $(SOURCE) $(DEP_CPP_REGEX) "$(INTDIR)" + + +!ELSEIF "$(CFG)" == "regex - Win32 Debug" + +DEP_CPP_REGEX=\ + ".\engine.c"\ + ".\engine.ih"\ + ".\regex.h"\ + ".\regex2.h"\ + ".\utils.h"\ + {$(INCLUDE)}"sys\types.h"\ + + +"$(INTDIR)\regexec.obj" "$(INTDIR)\regexec.sbr" : $(SOURCE) $(DEP_CPP_REGEX)\ + "$(INTDIR)" + + +!ENDIF + +SOURCE=.\regfree.c + +!IF "$(CFG)" == "regex - Win32 Release" + +DEP_CPP_REGFR=\ + ".\regex.h"\ + ".\regex2.h"\ + ".\utils.h"\ + + +"$(INTDIR)\regfree.obj" : $(SOURCE) $(DEP_CPP_REGFR) "$(INTDIR)" + + +!ELSEIF "$(CFG)" == "regex - Win32 Debug" + +DEP_CPP_REGFR=\ + ".\regex.h"\ + ".\regex2.h"\ + ".\utils.h"\ + {$(INCLUDE)}"sys\types.h"\ + + +"$(INTDIR)\regfree.obj" "$(INTDIR)\regfree.sbr" : $(SOURCE) $(DEP_CPP_REGFR)\ + "$(INTDIR)" + + +!ENDIF + +SOURCE=.\engine.c + +!ENDIF + diff --git a/ext/ereg/regex/regex2.h b/ext/ereg/regex/regex2.h new file mode 100644 index 0000000000..4996f96ecd --- /dev/null +++ b/ext/ereg/regex/regex2.h @@ -0,0 +1,140 @@ +/* + * First, the stuff that ends up in the outside-world include file + = #ifdef WIN32 + = #define API_EXPORT(type) __declspec(dllexport) type __stdcall + = #else + = #define API_EXPORT(type) type + = #endif + = + = typedef off_t regoff_t; + = typedef struct { + = int re_magic; + = size_t re_nsub; // number of parenthesized subexpressions + = const unsigned char *re_endp; // end pointer for REG_PEND + = struct re_guts *re_g; // none of your business :-) + = } regex_t; + = typedef struct { + = regoff_t rm_so; // start of match + = regoff_t rm_eo; // end of match + = } regmatch_t; + */ +/* + * internals of regex_t + */ +#define MAGIC1 ((('r'^0200)<<8) | 'e') + +/* + * The internal representation is a *strip*, a sequence of + * operators ending with an endmarker. (Some terminology etc. is a + * historical relic of earlier versions which used multiple strips.) + * Certain oddities in the representation are there to permit running + * the machinery backwards; in particular, any deviation from sequential + * flow must be marked at both its source and its destination. Some + * fine points: + * + * - OPLUS_ and O_PLUS are *inside* the loop they create. + * - OQUEST_ and O_QUEST are *outside* the bypass they create. + * - OCH_ and O_CH are *outside* the multi-way branch they create, while + * OOR1 and OOR2 are respectively the end and the beginning of one of + * the branches. Note that there is an implicit OOR2 following OCH_ + * and an implicit OOR1 preceding O_CH. + * + * In state representations, an operator's bit is on to signify a state + * immediately *preceding* "execution" of that operator. + */ +typedef long sop; /* strip operator */ +typedef long sopno; +#define OPRMASK 0x7c000000 +#define OPDMASK 0x03ffffff +#define OPSHIFT (26) +#define OP(n) ((n)&OPRMASK) +#define OPND(n) ((n)&OPDMASK) +#define SOP(op, opnd) ((op)|(opnd)) +/* operators meaning operand */ +/* (back, fwd are offsets) */ +#define OEND (1<<OPSHIFT) /* endmarker - */ +#define OCHAR (2<<OPSHIFT) /* character unsigned char */ +#define OBOL (3<<OPSHIFT) /* left anchor - */ +#define OEOL (4<<OPSHIFT) /* right anchor - */ +#define OANY (5<<OPSHIFT) /* . - */ +#define OANYOF (6<<OPSHIFT) /* [...] set number */ +#define OBACK_ (7<<OPSHIFT) /* begin \d paren number */ +#define O_BACK (8<<OPSHIFT) /* end \d paren number */ +#define OPLUS_ (9<<OPSHIFT) /* + prefix fwd to suffix */ +#define O_PLUS (10<<OPSHIFT) /* + suffix back to prefix */ +#define OQUEST_ (11<<OPSHIFT) /* ? prefix fwd to suffix */ +#define O_QUEST (12<<OPSHIFT) /* ? suffix back to prefix */ +#define OLPAREN (13<<OPSHIFT) /* ( fwd to ) */ +#define ORPAREN (14<<OPSHIFT) /* ) back to ( */ +#define OCH_ (15<<OPSHIFT) /* begin choice fwd to OOR2 */ +#define OOR1 (16<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */ +#define OOR2 (17<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */ +#define O_CH (18<<OPSHIFT) /* end choice back to OOR1 */ +#define OBOW (19<<OPSHIFT) /* begin word - */ +#define OEOW (20<<OPSHIFT) /* end word - */ + +/* + * Structure for [] character-set representation. Character sets are + * done as bit vectors, grouped 8 to a byte vector for compactness. + * The individual set therefore has both a pointer to the byte vector + * and a mask to pick out the relevant bit of each byte. A hash code + * simplifies testing whether two sets could be identical. + * + * This will get trickier for multicharacter collating elements. As + * preliminary hooks for dealing with such things, we also carry along + * a string of multi-character elements, and decide the size of the + * vectors at run time. + */ +typedef struct { + uch *ptr; /* -> uch [csetsize] */ + uch mask; /* bit within array */ + uch hash; /* hash code */ + size_t smultis; + unsigned char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */ +} cset; +/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */ +#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c)) +#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c)) +#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask) +#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */ +#define MCsub(p, cs, cp) mcsub(p, cs, cp) +#define MCin(p, cs, cp) mcin(p, cs, cp) + +/* stuff for character categories */ +typedef unsigned char cat_t; + +/* + * main compiled-expression structure + */ +struct re_guts { + int magic; +# define MAGIC2 ((('R'^0200)<<8)|'E') + sop *strip; /* malloced area for strip */ + int csetsize; /* number of bits in a cset vector */ + int ncsets; /* number of csets in use */ + cset *sets; /* -> cset [ncsets] */ + uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */ + int cflags; /* copy of regcomp() cflags argument */ + sopno nstates; /* = number of sops */ + sopno firststate; /* the initial OEND (normally 0) */ + sopno laststate; /* the final OEND */ + int iflags; /* internal flags */ +# define USEBOL 01 /* used ^ */ +# define USEEOL 02 /* used $ */ +# define BAD 04 /* something wrong */ + int nbol; /* number of ^ used */ + int neol; /* number of $ used */ + int ncategories; /* how many character categories */ + cat_t *categories; /* ->catspace[-UCHAR_MIN] */ + unsigned char *must; /* match must contain this string */ + int mlen; /* length of must */ + size_t nsub; /* copy of re_nsub */ + int backrefs; /* does it use back references? */ + sopno nplus; /* how deep does it nest +s? */ + /* catspace must be last */ + cat_t catspace[1]; /* actually [NC] */ +}; + +/* misc utilities */ +#define OUT (UCHAR_MAX+1) /* a non-character value */ +#define ISWORD(c) (isalnum(c) || (c) == '_') diff --git a/ext/ereg/regex/regexec.c b/ext/ereg/regex/regexec.c new file mode 100644 index 0000000000..c1fdfe0e03 --- /dev/null +++ b/ext/ereg/regex/regexec.c @@ -0,0 +1,138 @@ +/* + * the outer shell of regexec() + * + * This file includes engine.c *twice*, after muchos fiddling with the + * macros that code uses. This lets the same code operate on two different + * representations for state sets. + */ +#include <sys/types.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#include <ctype.h> + +#include "regex.h" +#include "utils.h" +#include "regex2.h" + +#define PHP_REGEX_NOPE 0; /* for use in asserts; shuts lint up */ + +/* macros for manipulating states, small version */ +#define states unsigned +#define states1 unsigned /* for later use in regexec() decision */ +#define CLEAR(v) ((v) = 0) +#define SET0(v, n) ((v) &= ~((unsigned)1 << (n))) +#define SET1(v, n) ((v) |= (unsigned)1 << (n)) +#define ISSET(v, n) ((v) & ((unsigned)1 << (n))) +#define ASSIGN(d, s) ((d) = (s)) +#define EQ(a, b) ((a) == (b)) +#define STATEVARS int dummy /* dummy version */ +#define STATESETUP(m, n) /* nothing */ +#define STATETEARDOWN(m) /* nothing */ +#define SETUP(v) ((v) = 0) +#define onestate unsigned +#define INIT(o, n) ((o) = (unsigned)1 << (n)) +#define INC(o) ((o) <<= 1) +#define ISSTATEIN(v, o) ((v) & (o)) +/* some abbreviations; note that some of these know variable names! */ +/* do "if I'm here, I can also be there" etc without branches */ +#define FWD(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) << (n)) +#define BACK(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) >> (n)) +#define ISSETBACK(v, n) ((v) & ((unsigned)here >> (n))) +/* function names */ +#define SNAMES /* engine.c looks after details */ + +#include "engine.c" + +/* now undo things */ +#undef states +#undef CLEAR +#undef SET0 +#undef SET1 +#undef ISSET +#undef ASSIGN +#undef EQ +#undef STATEVARS +#undef STATESETUP +#undef STATETEARDOWN +#undef SETUP +#undef onestate +#undef INIT +#undef INC +#undef ISSTATEIN +#undef FWD +#undef BACK +#undef ISSETBACK +#undef SNAMES + +/* macros for manipulating states, large version */ +#define states unsigned char * +#define CLEAR(v) memset(v, 0, m->g->nstates) +#define SET0(v, n) ((v)[n] = 0) +#define SET1(v, n) ((v)[n] = 1) +#define ISSET(v, n) ((v)[n]) +#define ASSIGN(d, s) memcpy(d, s, m->g->nstates) +#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0) +#define STATEVARS int vn; unsigned char *space +#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \ + if ((m)->space == NULL) return(REG_ESPACE); \ + (m)->vn = 0; } +#define STATETEARDOWN(m) { free((m)->space); } +#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates]) +#define onestate int +#define INIT(o, n) ((o) = (n)) +#define INC(o) ((o)++) +#define ISSTATEIN(v, o) ((v)[o]) +/* some abbreviations; note that some of these know variable names! */ +/* do "if I'm here, I can also be there" etc without branches */ +#define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here]) +#define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here]) +#define ISSETBACK(v, n) ((v)[here - (n)]) +/* function names */ +#define LNAMES /* flag */ + +#include "engine.c" + +/* + - regexec - interface for matching + = API_EXPORT(int) regexec(const regex_t *, const char *, size_t, \ + = regmatch_t [], int); + = #define REG_NOTBOL 00001 + = #define REG_NOTEOL 00002 + = #define REG_STARTEND 00004 + = #define REG_TRACE 00400 // tracing of execution + = #define REG_LARGE 01000 // force large representation + = #define REG_BACKR 02000 // force use of backref code + * + * We put this here so we can exploit knowledge of the state representation + * when choosing which matcher to call. Also, by this point the matchers + * have been prototyped. + */ +API_EXPORT(int) /* 0 success, REG_NOMATCH failure */ +regexec(preg, string, nmatch, pmatch, eflags) +const regex_t *preg; +const char *string; +size_t nmatch; +regmatch_t pmatch[]; +int eflags; +{ + register struct re_guts *g = preg->re_g; +#ifdef REDEBUG +# define GOODFLAGS(f) (f) +#else +# define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND)) +#endif + + if (preg->re_magic != MAGIC1 || g->magic != MAGIC2) + return(REG_BADPAT); + assert(!(g->iflags&BAD)); + if (g->iflags&BAD) /* backstop for no-debug case */ + return(REG_BADPAT); + eflags = GOODFLAGS(eflags); + + if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) + return(smatcher(g, (unsigned char *)string, nmatch, pmatch, eflags)); + else + return(lmatcher(g, (unsigned char *)string, nmatch, pmatch, eflags)); +} diff --git a/ext/ereg/regex/regfree.c b/ext/ereg/regex/regfree.c new file mode 100644 index 0000000000..a1de3d4128 --- /dev/null +++ b/ext/ereg/regex/regfree.c @@ -0,0 +1,37 @@ +#include <sys/types.h> +#include <stdio.h> +#include <stdlib.h> + +#include "regex.h" +#include "utils.h" +#include "regex2.h" + +/* + - regfree - free everything + = API_EXPORT(void) regfree(regex_t *); + */ +API_EXPORT(void) +regfree(preg) +regex_t *preg; +{ + register struct re_guts *g; + + if (preg->re_magic != MAGIC1) /* oops */ + return; /* nice to complain, but hard */ + + g = preg->re_g; + if (g == NULL || g->magic != MAGIC2) /* oops again */ + return; + preg->re_magic = 0; /* mark it invalid */ + g->magic = 0; /* mark it invalid */ + + if (g->strip != NULL) + free((char *)g->strip); + if (g->sets != NULL) + free((char *)g->sets); + if (g->setbits != NULL) + free((char *)g->setbits); + if (g->must != NULL) + free(g->must); + free((char *)g); +} diff --git a/ext/ereg/regex/split.c b/ext/ereg/regex/split.c new file mode 100644 index 0000000000..188bdb775b --- /dev/null +++ b/ext/ereg/regex/split.c @@ -0,0 +1,316 @@ +#include <stdio.h> +#include <string.h> + +/* + - split - divide a string into fields, like awk split() + = int split(char *string, char *fields[], int nfields, char *sep); + */ +int /* number of fields, including overflow */ +split(string, fields, nfields, sep) +char *string; +char *fields[]; /* list is not NULL-terminated */ +int nfields; /* number of entries available in fields[] */ +char *sep; /* "" white, "c" single char, "ab" [ab]+ */ +{ + register char *p = string; + register char c; /* latest character */ + register char sepc = sep[0]; + register char sepc2; + register int fn; + register char **fp = fields; + register char *sepp; + register int trimtrail; + + /* white space */ + if (sepc == '\0') { + while ((c = *p++) == ' ' || c == '\t') + continue; + p--; + trimtrail = 1; + sep = " \t"; /* note, code below knows this is 2 long */ + sepc = ' '; + } else + trimtrail = 0; + sepc2 = sep[1]; /* now we can safely pick this up */ + + /* catch empties */ + if (*p == '\0') + return(0); + + /* single separator */ + if (sepc2 == '\0') { + fn = nfields; + for (;;) { + *fp++ = p; + fn--; + if (fn == 0) + break; + while ((c = *p++) != sepc) + if (c == '\0') + return(nfields - fn); + *(p-1) = '\0'; + } + /* we have overflowed the fields vector -- just count them */ + fn = nfields; + for (;;) { + while ((c = *p++) != sepc) + if (c == '\0') + return(fn); + fn++; + } + /* not reached */ + } + + /* two separators */ + if (sep[2] == '\0') { + fn = nfields; + for (;;) { + *fp++ = p; + fn--; + while ((c = *p++) != sepc && c != sepc2) + if (c == '\0') { + if (trimtrail && **(fp-1) == '\0') + fn++; + return(nfields - fn); + } + if (fn == 0) + break; + *(p-1) = '\0'; + while ((c = *p++) == sepc || c == sepc2) + continue; + p--; + } + /* we have overflowed the fields vector -- just count them */ + fn = nfields; + while (c != '\0') { + while ((c = *p++) == sepc || c == sepc2) + continue; + p--; + fn++; + while ((c = *p++) != '\0' && c != sepc && c != sepc2) + continue; + } + /* might have to trim trailing white space */ + if (trimtrail) { + p--; + while ((c = *--p) == sepc || c == sepc2) + continue; + p++; + if (*p != '\0') { + if (fn == nfields+1) + *p = '\0'; + fn--; + } + } + return(fn); + } + + /* n separators */ + fn = 0; + for (;;) { + if (fn < nfields) + *fp++ = p; + fn++; + for (;;) { + c = *p++; + if (c == '\0') + return(fn); + sepp = sep; + while ((sepc = *sepp++) != '\0' && sepc != c) + continue; + if (sepc != '\0') /* it was a separator */ + break; + } + if (fn < nfields) + *(p-1) = '\0'; + for (;;) { + c = *p++; + sepp = sep; + while ((sepc = *sepp++) != '\0' && sepc != c) + continue; + if (sepc == '\0') /* it wasn't a separator */ + break; + } + p--; + } + + /* not reached */ +} + +#ifdef TEST_SPLIT + + +/* + * test program + * pgm runs regression + * pgm sep splits stdin lines by sep + * pgm str sep splits str by sep + * pgm str sep n splits str by sep n times + */ +int +main(argc, argv) +int argc; +char *argv[]; +{ + char buf[512]; + register int n; +# define MNF 10 + char *fields[MNF]; + + if (argc > 4) + for (n = atoi(argv[3]); n > 0; n--) { + (void) strcpy(buf, argv[1]); + } + else if (argc > 3) + for (n = atoi(argv[3]); n > 0; n--) { + (void) strcpy(buf, argv[1]); + (void) split(buf, fields, MNF, argv[2]); + } + else if (argc > 2) + dosplit(argv[1], argv[2]); + else if (argc > 1) + while (fgets(buf, sizeof(buf), stdin) != NULL) { + buf[strlen(buf)-1] = '\0'; /* stomp newline */ + dosplit(buf, argv[1]); + } + else + regress(); + + exit(0); +} + +dosplit(string, seps) +char *string; +char *seps; +{ +# define NF 5 + char *fields[NF]; + register int nf; + + nf = split(string, fields, NF, seps); + print(nf, NF, fields); +} + +print(nf, nfp, fields) +int nf; +int nfp; +char *fields[]; +{ + register int fn; + register int bound; + + bound = (nf > nfp) ? nfp : nf; + printf("%d:\t", nf); + for (fn = 0; fn < bound; fn++) + printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n"); +} + +#define RNF 5 /* some table entries know this */ +struct { + char *str; + char *seps; + int nf; + char *fi[RNF]; +} tests[] = { + "", " ", 0, { "" }, + " ", " ", 2, { "", "" }, + "x", " ", 1, { "x" }, + "xy", " ", 1, { "xy" }, + "x y", " ", 2, { "x", "y" }, + "abc def g ", " ", 5, { "abc", "def", "", "g", "" }, + " a bcd", " ", 4, { "", "", "a", "bcd" }, + "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, + " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, + + "", " _", 0, { "" }, + " ", " _", 2, { "", "" }, + "x", " _", 1, { "x" }, + "x y", " _", 2, { "x", "y" }, + "ab _ cd", " _", 2, { "ab", "cd" }, + " a_b c ", " _", 5, { "", "a", "b", "c", "" }, + "a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" }, + " a b c d ", " _", 6, { "", "a", "b", "c", "d " }, + + "", " _~", 0, { "" }, + " ", " _~", 2, { "", "" }, + "x", " _~", 1, { "x" }, + "x y", " _~", 2, { "x", "y" }, + "ab _~ cd", " _~", 2, { "ab", "cd" }, + " a_b c~", " _~", 5, { "", "a", "b", "c", "" }, + "a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" }, + "~a b c d ", " _~", 6, { "", "a", "b", "c", "d " }, + + "", " _~-", 0, { "" }, + " ", " _~-", 2, { "", "" }, + "x", " _~-", 1, { "x" }, + "x y", " _~-", 2, { "x", "y" }, + "ab _~- cd", " _~-", 2, { "ab", "cd" }, + " a_b c~", " _~-", 5, { "", "a", "b", "c", "" }, + "a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" }, + "~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " }, + + "", " ", 0, { "" }, + " ", " ", 2, { "", "" }, + "x", " ", 1, { "x" }, + "xy", " ", 1, { "xy" }, + "x y", " ", 2, { "x", "y" }, + "abc def g ", " ", 4, { "abc", "def", "g", "" }, + " a bcd", " ", 3, { "", "a", "bcd" }, + "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, + " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, + + "", "", 0, { "" }, + " ", "", 0, { "" }, + "x", "", 1, { "x" }, + "xy", "", 1, { "xy" }, + "x y", "", 2, { "x", "y" }, + "abc def g ", "", 3, { "abc", "def", "g" }, + "\t a bcd", "", 2, { "a", "bcd" }, + " a \tb\t c ", "", 3, { "a", "b", "c" }, + "a b c d e ", "", 5, { "a", "b", "c", "d", "e" }, + "a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" }, + " a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " }, + + NULL, NULL, 0, { NULL }, +}; + +regress() +{ + char buf[512]; + register int n; + char *fields[RNF+1]; + register int nf; + register int i; + register int printit; + register char *f; + + for (n = 0; tests[n].str != NULL; n++) { + (void) strcpy(buf, tests[n].str); + fields[RNF] = NULL; + nf = split(buf, fields, RNF, tests[n].seps); + printit = 0; + if (nf != tests[n].nf) { + printf("split `%s' by `%s' gave %d fields, not %d\n", + tests[n].str, tests[n].seps, nf, tests[n].nf); + printit = 1; + } else if (fields[RNF] != NULL) { + printf("split() went beyond array end\n"); + printit = 1; + } else { + for (i = 0; i < nf && i < RNF; i++) { + f = fields[i]; + if (f == NULL) + f = "(NULL)"; + if (strcmp(f, tests[n].fi[i]) != 0) { + printf("split `%s' by `%s', field %d is `%s', not `%s'\n", + tests[n].str, tests[n].seps, + i, fields[i], tests[n].fi[i]); + printit = 1; + } + } + } + if (printit) + print(nf, RNF, fields); + } +} +#endif diff --git a/ext/ereg/regex/tests b/ext/ereg/regex/tests new file mode 100644 index 0000000000..c05846177f --- /dev/null +++ b/ext/ereg/regex/tests @@ -0,0 +1,475 @@ +# regular expression test set +# Lines are at least three fields, separated by one or more tabs. "" stands +# for an empty field. First field is an RE. Second field is flags. If +# C flag given, regcomp() is expected to fail, and the third field is the +# error name (minus the leading REG_). +# +# Otherwise it is expected to succeed, and the third field is the string to +# try matching it against. If there is no fourth field, the match is +# expected to fail. If there is a fourth field, it is the substring that +# the RE is expected to match. If there is a fifth field, it is a comma- +# separated list of what the subexpressions should match, with - indicating +# no match for that one. In both the fourth and fifth fields, a (sub)field +# starting with @ indicates that the (sub)expression is expected to match +# a null string followed by the stuff after the @; this provides a way to +# test where null strings match. The character `N' in REs and strings +# is newline, `S' is space, `T' is tab, `Z' is NUL. +# +# The full list of flags: +# - placeholder, does nothing +# b RE is a BRE, not an ERE +# & try it as both an ERE and a BRE +# C regcomp() error expected, third field is error name +# i REG_ICASE +# m ("mundane") REG_NOSPEC +# s REG_NOSUB (not really testable) +# n REG_NEWLINE +# ^ REG_NOTBOL +# $ REG_NOTEOL +# # REG_STARTEND (see below) +# p REG_PEND +# +# For REG_STARTEND, the start/end offsets are those of the substring +# enclosed in (). + +# basics +a & a a +abc & abc abc +abc|de - abc abc +a|b|c - abc a + +# parentheses and perversions thereof +a(b)c - abc abc +a\(b\)c b abc abc +a( C EPAREN +a( b a( a( +a\( - a( a( +a\( bC EPAREN +a\(b bC EPAREN +a(b C EPAREN +a(b b a(b a(b +# gag me with a right parenthesis -- 1003.2 goofed here (my fault, partly) +a) - a) a) +) - ) ) +# end gagging (in a just world, those *should* give EPAREN) +a) b a) a) +a\) bC EPAREN +\) bC EPAREN +a()b - ab ab +a\(\)b b ab ab + +# anchoring and REG_NEWLINE +^abc$ & abc abc +a^b - a^b +a^b b a^b a^b +a$b - a$b +a$b b a$b a$b +^ & abc @abc +$ & abc @ +^$ & "" @ +$^ - "" @ +\($\)\(^\) b "" @ +# stop retching, those are legitimate (although disgusting) +^^ - "" @ +$$ - "" @ +b$ & abNc +b$ &n abNc b +^b$ & aNbNc +^b$ &n aNbNc b +^$ &n aNNb @Nb +^$ n abc +^$ n abcN @ +$^ n aNNb @Nb +\($\)\(^\) bn aNNb @Nb +^^ n^ aNNb @Nb +$$ n aNNb @NN +^a ^ a +a$ $ a +^a ^n aNb +^b ^n aNb b +a$ $n bNa +b$ $n bNa b +a*(^b$)c* - b b +a*\(^b$\)c* b b b + +# certain syntax errors and non-errors +| C EMPTY +| b | | +* C BADRPT +* b * * ++ C BADRPT +? C BADRPT +"" &C EMPTY +() - abc @abc +\(\) b abc @abc +a||b C EMPTY +|ab C EMPTY +ab| C EMPTY +(|a)b C EMPTY +(a|)b C EMPTY +(*a) C BADRPT +(+a) C BADRPT +(?a) C BADRPT +({1}a) C BADRPT +\(\{1\}a\) bC BADRPT +(a|*b) C BADRPT +(a|+b) C BADRPT +(a|?b) C BADRPT +(a|{1}b) C BADRPT +^* C BADRPT +^* b * * +^+ C BADRPT +^? C BADRPT +^{1} C BADRPT +^\{1\} bC BADRPT + +# metacharacters, backslashes +a.c & abc abc +a[bc]d & abd abd +a\*c & a*c a*c +a\\b & a\b a\b +a\\\*b & a\*b a\*b +a\bc & abc abc +a\ &C EESCAPE +a\\bc & a\bc a\bc +\{ bC BADRPT +a\[b & a[b a[b +a[b &C EBRACK +# trailing $ is a peculiar special case for the BRE code +a$ & a a +a$ & a$ +a\$ & a +a\$ & a$ a$ +a\\$ & a +a\\$ & a$ +a\\$ & a\$ +a\\$ & a\ a\ + +# back references, ugh +a\(b\)\2c bC ESUBREG +a\(b\1\)c bC ESUBREG +a\(b*\)c\1d b abbcbbd abbcbbd bb +a\(b*\)c\1d b abbcbd +a\(b*\)c\1d b abbcbbbd +^\(.\)\1 b abc +a\([bc]\)\1d b abcdabbd abbd b +a\(\([bc]\)\2\)*d b abbccd abbccd +a\(\([bc]\)\2\)*d b abbcbd +# actually, this next one probably ought to fail, but the spec is unclear +a\(\(b\)*\2\)*d b abbbd abbbd +# here is a case that no NFA implementation does right +\(ab*\)[ab]*\1 b ababaaa ababaaa a +# check out normal matching in the presence of back refs +\(a\)\1bcd b aabcd aabcd +\(a\)\1bc*d b aabcd aabcd +\(a\)\1bc*d b aabd aabd +\(a\)\1bc*d b aabcccd aabcccd +\(a\)\1bc*[ce]d b aabcccd aabcccd +^\(a\)\1b\(c\)*cd$ b aabcccd aabcccd + +# ordinary repetitions +ab*c & abc abc +ab+c - abc abc +ab?c - abc abc +a\(*\)b b a*b a*b +a\(**\)b b ab ab +a\(***\)b bC BADRPT +*a b *a *a +**a b a a +***a bC BADRPT + +# the dreaded bounded repetitions +{ & { { +{abc & {abc {abc +{1 C BADRPT +{1} C BADRPT +a{b & a{b a{b +a{1}b - ab ab +a\{1\}b b ab ab +a{1,}b - ab ab +a\{1,\}b b ab ab +a{1,2}b - aab aab +a\{1,2\}b b aab aab +a{1 C EBRACE +a\{1 bC EBRACE +a{1a C EBRACE +a\{1a bC EBRACE +a{1a} C BADBR +a\{1a\} bC BADBR +a{,2} - a{,2} a{,2} +a\{,2\} bC BADBR +a{,} - a{,} a{,} +a\{,\} bC BADBR +a{1,x} C BADBR +a\{1,x\} bC BADBR +a{1,x C EBRACE +a\{1,x bC EBRACE +a{300} C BADBR +a\{300\} bC BADBR +a{1,0} C BADBR +a\{1,0\} bC BADBR +ab{0,0}c - abcac ac +ab\{0,0\}c b abcac ac +ab{0,1}c - abcac abc +ab\{0,1\}c b abcac abc +ab{0,3}c - abbcac abbc +ab\{0,3\}c b abbcac abbc +ab{1,1}c - acabc abc +ab\{1,1\}c b acabc abc +ab{1,3}c - acabc abc +ab\{1,3\}c b acabc abc +ab{2,2}c - abcabbc abbc +ab\{2,2\}c b abcabbc abbc +ab{2,4}c - abcabbc abbc +ab\{2,4\}c b abcabbc abbc +((a{1,10}){1,10}){1,10} - a a a,a + +# multiple repetitions +a** &C BADRPT +a++ C BADRPT +a?? C BADRPT +a*+ C BADRPT +a*? C BADRPT +a+* C BADRPT +a+? C BADRPT +a?* C BADRPT +a?+ C BADRPT +a{1}{1} C BADRPT +a*{1} C BADRPT +a+{1} C BADRPT +a?{1} C BADRPT +a{1}* C BADRPT +a{1}+ C BADRPT +a{1}? C BADRPT +a*{b} - a{b} a{b} +a\{1\}\{1\} bC BADRPT +a*\{1\} bC BADRPT +a\{1\}* bC BADRPT + +# brackets, and numerous perversions thereof +a[b]c & abc abc +a[ab]c & abc abc +a[^ab]c & adc adc +a[]b]c & a]c a]c +a[[b]c & a[c a[c +a[-b]c & a-c a-c +a[^]b]c & adc adc +a[^-b]c & adc adc +a[b-]c & a-c a-c +a[b &C EBRACK +a[] &C EBRACK +a[1-3]c & a2c a2c +a[3-1]c &C ERANGE +a[1-3-5]c &C ERANGE +a[[.-.]--]c & a-c a-c +a[1- &C ERANGE +a[[. &C EBRACK +a[[.x &C EBRACK +a[[.x. &C EBRACK +a[[.x.] &C EBRACK +a[[.x.]] & ax ax +a[[.x,.]] &C ECOLLATE +a[[.one.]]b & a1b a1b +a[[.notdef.]]b &C ECOLLATE +a[[.].]]b & a]b a]b +a[[:alpha:]]c & abc abc +a[[:notdef:]]c &C ECTYPE +a[[: &C EBRACK +a[[:alpha &C EBRACK +a[[:alpha:] &C EBRACK +a[[:alpha,:] &C ECTYPE +a[[:]:]]b &C ECTYPE +a[[:-:]]b &C ECTYPE +a[[:alph:]] &C ECTYPE +a[[:alphabet:]] &C ECTYPE +[[:alnum:]]+ - -%@a0X- a0X +[[:alpha:]]+ - -%@aX0- aX +[[:blank:]]+ - aSSTb SST +[[:cntrl:]]+ - aNTb NT +[[:digit:]]+ - a019b 019 +[[:graph:]]+ - Sa%bS a%b +[[:lower:]]+ - AabC ab +[[:print:]]+ - NaSbN aSb +[[:punct:]]+ - S%-&T %-& +[[:space:]]+ - aSNTb SNT +[[:upper:]]+ - aBCd BC +[[:xdigit:]]+ - p0f3Cq 0f3C +a[[=b=]]c & abc abc +a[[= &C EBRACK +a[[=b &C EBRACK +a[[=b= &C EBRACK +a[[=b=] &C EBRACK +a[[=b,=]] &C ECOLLATE +a[[=one=]]b & a1b a1b + +# complexities +a(((b)))c - abc abc +a(b|(c))d - abd abd +a(b*|c)d - abbd abbd +# just gotta have one DFA-buster, of course +a[ab]{20} - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab +# and an inline expansion in case somebody gets tricky +a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab +# and in case somebody just slips in an NFA... +a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) - aaaaabaaaabaaaabaaaabweeknights aaaaabaaaabaaaabaaaabweeknights +# fish for anomalies as the number of states passes 32 +12345678901234567890123456789 - a12345678901234567890123456789b 12345678901234567890123456789 +123456789012345678901234567890 - a123456789012345678901234567890b 123456789012345678901234567890 +1234567890123456789012345678901 - a1234567890123456789012345678901b 1234567890123456789012345678901 +12345678901234567890123456789012 - a12345678901234567890123456789012b 12345678901234567890123456789012 +123456789012345678901234567890123 - a123456789012345678901234567890123b 123456789012345678901234567890123 +# and one really big one, beyond any plausible word width +1234567890123456789012345678901234567890123456789012345678901234567890 - a1234567890123456789012345678901234567890123456789012345678901234567890b 1234567890123456789012345678901234567890123456789012345678901234567890 +# fish for problems as brackets go past 8 +[ab][cd][ef][gh][ij][kl][mn] - xacegikmoq acegikm +[ab][cd][ef][gh][ij][kl][mn][op] - xacegikmoq acegikmo +[ab][cd][ef][gh][ij][kl][mn][op][qr] - xacegikmoqy acegikmoq +[ab][cd][ef][gh][ij][kl][mn][op][q] - xacegikmoqy acegikmoq + +# subtleties of matching +abc & xabcy abc +a\(b\)?c\1d b acd +aBc i Abc Abc +a[Bc]*d i abBCcd abBCcd +0[[:upper:]]1 &i 0a1 0a1 +0[[:lower:]]1 &i 0A1 0A1 +a[^b]c &i abc +a[^b]c &i aBc +a[^b]c &i adc adc +[a]b[c] - abc abc +[a]b[a] - aba aba +[abc]b[abc] - abc abc +[abc]b[abd] - abd abd +a(b?c)+d - accd accd +(wee|week)(knights|night) - weeknights weeknights +(we|wee|week|frob)(knights|night|day) - weeknights weeknights +a[bc]d - xyzaaabcaababdacd abd +a[ab]c - aaabc abc +abc s abc abc +a* & b @b + +# Let's have some fun -- try to match a C comment. +# first the obvious, which looks okay at first glance... +/\*.*\*/ - /*x*/ /*x*/ +# but... +/\*.*\*/ - /*x*/y/*z*/ /*x*/y/*z*/ +# okay, we must not match */ inside; try to do that... +/\*([^*]|\*[^/])*\*/ - /*x*/ /*x*/ +/\*([^*]|\*[^/])*\*/ - /*x*/y/*z*/ /*x*/ +# but... +/\*([^*]|\*[^/])*\*/ - /*x**/y/*z*/ /*x**/y/*z*/ +# and a still fancier version, which does it right (I think)... +/\*([^*]|\*+[^*/])*\*+/ - /*x*/ /*x*/ +/\*([^*]|\*+[^*/])*\*+/ - /*x*/y/*z*/ /*x*/ +/\*([^*]|\*+[^*/])*\*+/ - /*x**/y/*z*/ /*x**/ +/\*([^*]|\*+[^*/])*\*+/ - /*x****/y/*z*/ /*x****/ +/\*([^*]|\*+[^*/])*\*+/ - /*x**x*/y/*z*/ /*x**x*/ +/\*([^*]|\*+[^*/])*\*+/ - /*x***x/y/*z*/ /*x***x/y/*z*/ + +# subexpressions +a(b)(c)d - abcd abcd b,c +a(((b)))c - abc abc b,b,b +a(b|(c))d - abd abd b,- +a(b*|c|e)d - abbd abbd bb +a(b*|c|e)d - acd acd c +a(b*|c|e)d - ad ad @d +a(b?)c - abc abc b +a(b?)c - ac ac @c +a(b+)c - abc abc b +a(b+)c - abbbc abbbc bbb +a(b*)c - ac ac @c +(a|ab)(bc([de]+)f|cde) - abcdef abcdef a,bcdef,de +# the regression tester only asks for 9 subexpressions +a(b)(c)(d)(e)(f)(g)(h)(i)(j)k - abcdefghijk abcdefghijk b,c,d,e,f,g,h,i,j +a(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)l - abcdefghijkl abcdefghijkl b,c,d,e,f,g,h,i,j,k +a([bc]?)c - abc abc b +a([bc]?)c - ac ac @c +a([bc]+)c - abc abc b +a([bc]+)c - abcc abcc bc +a([bc]+)bc - abcbc abcbc bc +a(bb+|b)b - abb abb b +a(bbb+|bb+|b)b - abb abb b +a(bbb+|bb+|b)b - abbb abbb bb +a(bbb+|bb+|b)bb - abbb abbb b +(.*).* - abcdef abcdef abcdef +(a*)* - bc @b @b + +# do we get the right subexpression when it is used more than once? +a(b|c)*d - ad ad - +a(b|c)*d - abcd abcd c +a(b|c)+d - abd abd b +a(b|c)+d - abcd abcd c +a(b|c?)+d - ad ad @d +a(b|c?)+d - abcd abcd @d +a(b|c){0,0}d - ad ad - +a(b|c){0,1}d - ad ad - +a(b|c){0,1}d - abd abd b +a(b|c){0,2}d - ad ad - +a(b|c){0,2}d - abcd abcd c +a(b|c){0,}d - ad ad - +a(b|c){0,}d - abcd abcd c +a(b|c){1,1}d - abd abd b +a(b|c){1,1}d - acd acd c +a(b|c){1,2}d - abd abd b +a(b|c){1,2}d - abcd abcd c +a(b|c){1,}d - abd abd b +a(b|c){1,}d - abcd abcd c +a(b|c){2,2}d - acbd acbd b +a(b|c){2,2}d - abcd abcd c +a(b|c){2,4}d - abcd abcd c +a(b|c){2,4}d - abcbd abcbd b +a(b|c){2,4}d - abcbcd abcbcd c +a(b|c){2,}d - abcd abcd c +a(b|c){2,}d - abcbd abcbd b +a(b+|((c)*))+d - abd abd @d,@d,- +a(b+|((c)*))+d - abcd abcd @d,@d,- + +# check out the STARTEND option +[abc] &# a(b)c b +[abc] &# a(d)c +[abc] &# a(bc)d b +[abc] &# a(dc)d c +. &# a()c +b.*c &# b(bc)c bc +b.* &# b(bc)c bc +.*c &# b(bc)c bc + +# plain strings, with the NOSPEC flag +abc m abc abc +abc m xabcy abc +abc m xyz +a*b m aba*b a*b +a*b m ab +"" mC EMPTY + +# cases involving NULs +aZb & a a +aZb &p a +aZb &p# (aZb) aZb +aZ*b &p# (ab) ab +a.b &# (aZb) aZb +a.* &# (aZb)c aZb + +# word boundaries (ick) +[[:<:]]a & a a +[[:<:]]a & ba +[[:<:]]a & -a a +a[[:>:]] & a a +a[[:>:]] & ab +a[[:>:]] & a- a +[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc abc +[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc-q abc +[[:<:]]a.c[[:>:]] & axc-dayc-dazce-abc axc +[[:<:]]b.c[[:>:]] & a_bxc-byc_d-bzc-q bzc +[[:<:]].x..[[:>:]] & y_xa_-_xb_y-_xc_-axdc _xc_ +[[:<:]]a_b[[:>:]] & x_a_b + +# past problems, and suspected problems +(A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A]) - A1 A1 +abcdefghijklmnop i abcdefghijklmnop abcdefghijklmnop +abcdefghijklmnopqrstuv i abcdefghijklmnopqrstuv abcdefghijklmnopqrstuv +(ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN]) - CC11 CC11 +CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a - CC11 CC11 +Char \([a-z0-9_]*\)\[.* b Char xyz[k Char xyz[k xyz +a?b - ab ab +-\{0,1\}[0-9]*$ b -5 -5 diff --git a/ext/ereg/regex/utils.h b/ext/ereg/regex/utils.h new file mode 100644 index 0000000000..66ae78437e --- /dev/null +++ b/ext/ereg/regex/utils.h @@ -0,0 +1,23 @@ +/* utility definitions */ + +#ifdef _POSIX2_RE_DUP_MAX +#define DUPMAX _POSIX2_RE_DUP_MAX +#else +#define DUPMAX 255 +#endif +#define INFINITY (DUPMAX + 1) +#define NC (CHAR_MAX - CHAR_MIN + 1) +typedef unsigned char uch; + +/* switch off assertions (if not already off) if no REDEBUG */ +#ifndef REDEBUG +#ifndef NDEBUG +#define NDEBUG /* no assertions please */ +#endif +#endif +#include <assert.h> + +/* for old systems with bcopy() but no memmove() */ +#ifdef USEBCOPY +#define memmove(d, s, c) bcopy(s, d, c) +#endif diff --git a/ext/ereg/tests/001.phpt b/ext/ereg/tests/001.phpt new file mode 100644 index 0000000000..0ba697978c --- /dev/null +++ b/ext/ereg/tests/001.phpt @@ -0,0 +1,8 @@ +--TEST-- +RegReplace test 1 +--FILE-- +<?php $a="abc123"; + echo ereg_replace("123","def",$a)?> +--EXPECTF-- +Deprecated: Function ereg_replace() is deprecated in %s on line %d +abcdef diff --git a/ext/ereg/tests/002.phpt b/ext/ereg/tests/002.phpt new file mode 100644 index 0000000000..6ce12a936a --- /dev/null +++ b/ext/ereg/tests/002.phpt @@ -0,0 +1,8 @@ +--TEST-- +RegReplace test 2 +--FILE-- +<?php $a="abc123"; + echo ereg_replace("123","",$a)?> +--EXPECTF-- +Deprecated: Function ereg_replace() is deprecated in %s on line %d +abc diff --git a/ext/ereg/tests/003.phpt b/ext/ereg/tests/003.phpt new file mode 100644 index 0000000000..5431349fa9 --- /dev/null +++ b/ext/ereg/tests/003.phpt @@ -0,0 +1,9 @@ +--TEST-- +ereg_replace single-quote test +--FILE-- +<?php $a="\\'test"; + echo ereg_replace("\\\\'","'",$a) +?> +--EXPECTF-- +Deprecated: Function ereg_replace() is deprecated in %s on line %d +'test diff --git a/ext/ereg/tests/004.phpt b/ext/ereg/tests/004.phpt new file mode 100644 index 0000000000..db16fe1471 --- /dev/null +++ b/ext/ereg/tests/004.phpt @@ -0,0 +1,17 @@ +--TEST-- +simple ereg test +--FILE-- +<?php $a="This is a nice and simple string"; + if (ereg(".*nice and simple.*",$a)) { + echo "ok\n"; + } + if (!ereg(".*doesn't exist.*",$a)) { + echo "ok\n"; + } +?> +--EXPECTF-- +Deprecated: Function ereg() is deprecated in %s on line %d +ok + +Deprecated: Function ereg() is deprecated in %s on line %d +ok diff --git a/ext/ereg/tests/005.phpt b/ext/ereg/tests/005.phpt new file mode 100644 index 0000000000..127ede0c49 --- /dev/null +++ b/ext/ereg/tests/005.phpt @@ -0,0 +1,19 @@ +--TEST-- +Test Regular expression register support in ereg +--FILE-- +<?php $a="This is a nice and simple string"; + echo ereg(".*(is).*(is).*",$a,$registers); + echo "\n"; + echo $registers[0]; + echo "\n"; + echo $registers[1]; + echo "\n"; + echo $registers[2]; + echo "\n"; +?> +--EXPECTF-- +Deprecated: Function ereg() is deprecated in %s on line %d +32 +This is a nice and simple string +is +is diff --git a/ext/ereg/tests/006.phpt b/ext/ereg/tests/006.phpt new file mode 100644 index 0000000000..7df88dd321 --- /dev/null +++ b/ext/ereg/tests/006.phpt @@ -0,0 +1,9 @@ +--TEST-- +Test ereg_replace of start-of-line +--FILE-- +<?php $a="This is a nice and simple string"; + echo ereg_replace("^This","That",$a); +?> +--EXPECTF-- +Deprecated: Function ereg_replace() is deprecated in %s on line %d +That is a nice and simple string diff --git a/ext/ereg/tests/007.phpt b/ext/ereg/tests/007.phpt new file mode 100644 index 0000000000..3bbd6551b6 --- /dev/null +++ b/ext/ereg/tests/007.phpt @@ -0,0 +1,11 @@ +--TEST-- +Test empty result buffer in reg_replace +--FILE-- +<?php + $a="abcd"; + $b=ereg_replace("abcd","",$a); + echo "strlen(\$b)=".strlen($b); +?> +--EXPECTF-- +Deprecated: Function ereg_replace() is deprecated in %s on line %d +strlen($b)=0 diff --git a/ext/ereg/tests/008.phpt b/ext/ereg/tests/008.phpt new file mode 100644 index 0000000000..ad34ba3978 --- /dev/null +++ b/ext/ereg/tests/008.phpt @@ -0,0 +1,9 @@ +--TEST-- +Test back-references in regular expressions +--FILE-- +<?php + echo ereg_replace("([a-z]*)([-=+|]*)([0-9]+)","\\3 \\1 \\2\n","abc+-|=123"); +?> +--EXPECTF-- +Deprecated: Function ereg_replace() is deprecated in %s on line %d +123 abc +-|= diff --git a/ext/ereg/tests/009.phpt b/ext/ereg/tests/009.phpt new file mode 100644 index 0000000000..513f31a83e --- /dev/null +++ b/ext/ereg/tests/009.phpt @@ -0,0 +1,18 @@ +--TEST-- +Test split() +--FILE-- +<?php + $a=split("[[:space:]]","this is a +test"); + echo count($a) . "\n"; + for ($i = 0; $i < count($a); $i++) { + echo $a[$i] . "\n"; + } +?> +--EXPECTF-- +Deprecated: Function split() is deprecated in %s on line %d +4 +this +is +a +test diff --git a/ext/ereg/tests/010.phpt b/ext/ereg/tests/010.phpt new file mode 100644 index 0000000000..52a3e3bb1c --- /dev/null +++ b/ext/ereg/tests/010.phpt @@ -0,0 +1,8 @@ +--TEST-- +Long back references +--FILE-- +<?php $a="abc122222222223"; + echo ereg_replace("1(2*)3","\\1def\\1",$a)?> +--EXPECTF-- +Deprecated: Function ereg_replace() is deprecated in %s on line %d +abc2222222222def2222222222 diff --git a/ext/ereg/tests/011.phpt b/ext/ereg/tests/011.phpt new file mode 100644 index 0000000000..8bd51b467a --- /dev/null +++ b/ext/ereg/tests/011.phpt @@ -0,0 +1,8 @@ +--TEST-- +\0 back reference +--FILE-- +<?php $a="abc123"; + echo ereg_replace("123","def\\0ghi",$a)?> +--EXPECTF-- +Deprecated: Function ereg_replace() is deprecated in %s on line %d +abcdef123ghi diff --git a/ext/ereg/tests/012.phpt b/ext/ereg/tests/012.phpt new file mode 100644 index 0000000000..d54ae01f12 --- /dev/null +++ b/ext/ereg/tests/012.phpt @@ -0,0 +1,8 @@ +--TEST-- +nonexisting back reference +--FILE-- +<?php $a="abc123"; + echo ereg_replace("123",'def\1ghi',$a)?> +--EXPECTF-- +Deprecated: Function ereg_replace() is deprecated in %s on line %d +abcdef\1ghi diff --git a/ext/ereg/tests/013.phpt b/ext/ereg/tests/013.phpt new file mode 100644 index 0000000000..23f6944ff5 --- /dev/null +++ b/ext/ereg/tests/013.phpt @@ -0,0 +1,8 @@ +--TEST-- +escapes in replace string +--FILE-- +<?php $a="abc123"; + echo ereg_replace("123","def\\g\\\\hi\\",$a)?> +--EXPECTF-- +Deprecated: Function ereg_replace() is deprecated in %s on line %d +abcdef\g\\hi\ diff --git a/ext/ereg/tests/014.phpt b/ext/ereg/tests/014.phpt new file mode 100644 index 0000000000..ac68802061 --- /dev/null +++ b/ext/ereg/tests/014.phpt @@ -0,0 +1,8 @@ +--TEST-- +backreferences not replaced recursively +--FILE-- +<?php $a="a\\2bxc"; + echo ereg_replace("a(.*)b(.*)c","\\1",$a)?> +--EXPECTF-- +Deprecated: Function ereg_replace() is deprecated in %s on line %d +\2 diff --git a/ext/ereg/tests/015.phpt b/ext/ereg/tests/015.phpt new file mode 100644 index 0000000000..edb8e709c6 --- /dev/null +++ b/ext/ereg/tests/015.phpt @@ -0,0 +1,7 @@ +--TEST-- +replace empty matches +--FILE-- +<?php echo ereg_replace("^","z","abc123")?> +--EXPECTF-- +Deprecated: Function ereg_replace() is deprecated in %s on line %d +zabc123 diff --git a/ext/ereg/tests/016.phpt b/ext/ereg/tests/016.phpt new file mode 100644 index 0000000000..93dbda7ee5 --- /dev/null +++ b/ext/ereg/tests/016.phpt @@ -0,0 +1,7 @@ +--TEST-- +test backslash handling in regular expressions +--FILE-- +<?php echo ereg_replace('\?',"abc","?123?")?> +--EXPECTF-- +Deprecated: Function ereg_replace() is deprecated in %s on line %d +abc123abc diff --git a/ext/ereg/tests/ereg_basic_001.phpt b/ext/ereg/tests/ereg_basic_001.phpt new file mode 100644 index 0000000000..e9dad37636 --- /dev/null +++ b/ext/ereg/tests/ereg_basic_001.phpt @@ -0,0 +1,153 @@ +--TEST-- +Test ereg() function : basic functionality (with $regs) +--FILE-- +<?php +/* Prototype : proto int ereg(string pattern, string string [, array registers]) + * Description: Regular expression match + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +/* + * Test a number of simple, valid matches with ereg, specifying $regs + */ + +echo "*** Testing ereg() : basic functionality ***\n"; + +include(dirname(__FILE__) . '/regular_expressions.inc'); + +foreach ($expressions as $re) { + list($pattern,$string) = $re; + echo "--> Pattern: '$pattern'; string: '$string'\n"; + var_dump(ereg($pattern, $string, $regs)); + var_dump($regs); +} + +echo "Done"; +?> +--EXPECTF-- +*** Testing ereg() : basic functionality *** +--> Pattern: '..(a|b|c)(a|b|c)..'; string: '--- ab ---' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(6) +array(3) { + [0]=> + string(6) "- ab -" + [1]=> + string(1) "a" + [2]=> + string(1) "b" +} +--> Pattern: '()'; string: '' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(1) +array(2) { + [0]=> + bool(false) + [1]=> + bool(false) +} +--> Pattern: '()'; string: 'abcdef' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(1) +array(2) { + [0]=> + bool(false) + [1]=> + bool(false) +} +--> Pattern: '[x]|[^x]'; string: 'abcdef' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(1) +array(1) { + [0]=> + string(1) "a" +} +--> Pattern: '(a{1})(a{1,}) (b{1,3}) (c+) (d?ddd|e)'; string: '--- aaa bbb ccc ddd ---' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(15) +array(6) { + [0]=> + string(15) "aaa bbb ccc ddd" + [1]=> + string(1) "a" + [2]=> + string(2) "aa" + [3]=> + string(3) "bbb" + [4]=> + string(3) "ccc" + [5]=> + string(3) "ddd" +} +--> Pattern: '\\\`\^\.\[\$\(\)\|\*\+\?\{\''; string: '\`^.[$()|*+?{'' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(14) +array(1) { + [0]=> + string(14) "\`^.[$()|*+?{'" +} +--> Pattern: '\a'; string: 'a' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(1) +array(1) { + [0]=> + string(1) "a" +} +--> Pattern: '[0-9][^0-9]'; string: '2a' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(2) +array(1) { + [0]=> + string(2) "2a" +} +--> Pattern: '^[[:alnum:]]{62,62}$'; string: '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(62) +array(1) { + [0]=> + string(62) "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +} +--> Pattern: '^[[:digit:]]{5}'; string: '0123456789' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(5) +array(1) { + [0]=> + string(5) "01234" +} +--> Pattern: '[[:digit:]]{5}$'; string: '0123456789' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(5) +array(1) { + [0]=> + string(5) "56789" +} +--> Pattern: '[[:blank:]]{1,10}'; string: ' + ' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(2) +array(1) { + [0]=> + string(2) " " +} +--> Pattern: '[[:print:]]{3}'; string: ' a ' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(3) +array(1) { + [0]=> + string(3) " a " +} +Done diff --git a/ext/ereg/tests/ereg_basic_002.phpt b/ext/ereg/tests/ereg_basic_002.phpt new file mode 100644 index 0000000000..75665fb100 --- /dev/null +++ b/ext/ereg/tests/ereg_basic_002.phpt @@ -0,0 +1,82 @@ +--TEST-- +Test ereg() function : basic functionality (without $regs) +--FILE-- +<?php +/* Prototype : proto int ereg(string pattern, string string [, array registers]) + * Description: Regular expression match + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +/* + * Test a number of simple, valid matches with ereg, without specifying $regs + */ + +echo "*** Testing ereg() : basic functionality ***\n"; + +include(dirname(__FILE__) . '/regular_expressions.inc'); + +foreach ($expressions as $re) { + list($pattern,$string) = $re; + echo "--> Pattern: '$pattern'; string: '$string'\n"; + var_dump(ereg($pattern, $string)); +} + +echo "Done"; +?> +--EXPECTF-- +*** Testing ereg() : basic functionality *** +--> Pattern: '..(a|b|c)(a|b|c)..'; string: '--- ab ---' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(1) +--> Pattern: '()'; string: '' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(1) +--> Pattern: '()'; string: 'abcdef' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(1) +--> Pattern: '[x]|[^x]'; string: 'abcdef' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(1) +--> Pattern: '(a{1})(a{1,}) (b{1,3}) (c+) (d?ddd|e)'; string: '--- aaa bbb ccc ddd ---' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(1) +--> Pattern: '\\\`\^\.\[\$\(\)\|\*\+\?\{\''; string: '\`^.[$()|*+?{'' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(1) +--> Pattern: '\a'; string: 'a' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(1) +--> Pattern: '[0-9][^0-9]'; string: '2a' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(1) +--> Pattern: '^[[:alnum:]]{62,62}$'; string: '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(1) +--> Pattern: '^[[:digit:]]{5}'; string: '0123456789' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(1) +--> Pattern: '[[:digit:]]{5}$'; string: '0123456789' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(1) +--> Pattern: '[[:blank:]]{1,10}'; string: ' + ' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(1) +--> Pattern: '[[:print:]]{3}'; string: ' a ' + +Deprecated: Function ereg() is deprecated in %s on line %d +int(1) +Done diff --git a/ext/ereg/tests/ereg_basic_003.phpt b/ext/ereg/tests/ereg_basic_003.phpt new file mode 100644 index 0000000000..0cbe0797b9 --- /dev/null +++ b/ext/ereg/tests/ereg_basic_003.phpt @@ -0,0 +1,28 @@ +--TEST-- +Test ereg() function : basic functionality - long RE +--FILE-- +<?php +/* Prototype : proto int ereg(string pattern, string string [, array registers]) + * Description: Regular expression match + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +/* + * Test a long RE with lots of matches + */ + +var_dump(ereg(str_repeat('(.)', 2048), str_repeat('x', 2048))); +var_dump(ereg(str_repeat('(.)', 2048), str_repeat('x', 2048), $regs)); +var_dump(count($regs)); + +echo "Done"; +?> +--EXPECTF-- +Deprecated: Function ereg() is deprecated in %s on line %d +int(1) + +Deprecated: Function ereg() is deprecated in %s on line %d +int(2048) +int(2049) +Done diff --git a/ext/ereg/tests/ereg_basic_004.phpt b/ext/ereg/tests/ereg_basic_004.phpt new file mode 100644 index 0000000000..20bdf80073 --- /dev/null +++ b/ext/ereg/tests/ereg_basic_004.phpt @@ -0,0 +1,44 @@ +--TEST-- +Test ereg() function : basic functionality - a few non-matches +--FILE-- +<?php +/* Prototype : proto int ereg(string pattern, string string [, array registers]) + * Description: Regular expression match + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +$regs = 'original'; + +var_dump(ereg('A', 'a', $regs)); +var_dump(ereg('[A-Z]', '0', $regs)); +var_dump(ereg('(a){4}', 'aaa', $regs)); +var_dump(ereg('^a', 'ba', $regs)); +var_dump(ereg('b$', 'ba', $regs)); +var_dump(ereg('[:alpha:]', 'x', $regs)); + +// Ensure $regs is unchanged +var_dump($regs); + +echo "Done"; +?> +--EXPECTF-- +Deprecated: Function ereg() is deprecated in %s on line %d +bool(false) + +Deprecated: Function ereg() is deprecated in %s on line %d +bool(false) + +Deprecated: Function ereg() is deprecated in %s on line %d +bool(false) + +Deprecated: Function ereg() is deprecated in %s on line %d +bool(false) + +Deprecated: Function ereg() is deprecated in %s on line %d +bool(false) + +Deprecated: Function ereg() is deprecated in %s on line %d +bool(false) +string(8) "original" +Done diff --git a/ext/ereg/tests/ereg_error_001.phpt b/ext/ereg/tests/ereg_error_001.phpt new file mode 100644 index 0000000000..ccb770897d --- /dev/null +++ b/ext/ereg/tests/ereg_error_001.phpt @@ -0,0 +1,49 @@ +--TEST-- +Test ereg() function : error conditions - wrong number of args +--FILE-- +<?php +/* Prototype : proto int ereg(string pattern, string string [, array registers]) + * Description: Regular expression match + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +/* + * Test wrong number of args + */ + +echo "*** Testing ereg() : error conditions ***\n"; + + +//Test ereg with one more than the expected number of arguments +echo "\n-- Testing ereg() function with more than expected no. of arguments --\n"; +$pattern = 'string_val'; +$string = 'string_val'; +$registers = array(1, 2); +$extra_arg = 10; +var_dump( ereg($pattern, $string, $registers, $extra_arg) ); + +// Testing ereg with one less than the expected number of arguments +echo "\n-- Testing ereg() function with less than expected no. of arguments --\n"; +$pattern = 'string_val'; +var_dump( ereg($pattern) ); + +echo "Done"; +?> +--EXPECTF-- +*** Testing ereg() : error conditions *** + +-- Testing ereg() function with more than expected no. of arguments -- + +Deprecated: Function ereg() is deprecated in %s on line %d + +Warning: ereg() expects at most 3 parameters, 4 given in %s on line %d +NULL + +-- Testing ereg() function with less than expected no. of arguments -- + +Deprecated: Function ereg() is deprecated in %s on line %d + +Warning: ereg() expects at least 2 parameters, 1 given in %s on line %d +NULL +Done diff --git a/ext/ereg/tests/ereg_error_002.phpt b/ext/ereg/tests/ereg_error_002.phpt new file mode 100644 index 0000000000..707b7317f7 --- /dev/null +++ b/ext/ereg/tests/ereg_error_002.phpt @@ -0,0 +1,118 @@ +--TEST-- +Test ereg() function : error conditions - test bad regular expressions +--FILE-- +<?php +/* Prototype : proto int ereg(string pattern, string string [, array registers]) + * Description: Regular expression match + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +/* + * Test bad regular expressions + */ + +echo "*** Testing ereg() : error conditions ***\n"; + +$regs = 'original'; + +var_dump(ereg("", "hello")); +var_dump(ereg("c(d", "hello")); +var_dump(ereg("a[b", "hello")); +var_dump(ereg("c(d", "hello")); +var_dump(ereg("*", "hello")); +var_dump(ereg("+", "hello")); +var_dump(ereg("?", "hello")); +var_dump(ereg("(+?*)", "hello", $regs)); +var_dump(ereg("h{256}", "hello")); +var_dump(ereg("h|", "hello")); +var_dump(ereg("h{0}", "hello")); +var_dump(ereg("h{2,1}", "hello")); +var_dump(ereg('[a-c-e]', 'd')); +var_dump(ereg('\\', 'x')); +var_dump(ereg('([9-0])', '1', $regs)); + +//ensure $regs unchanged +var_dump($regs); + +echo "Done"; +?> +--EXPECTF-- +*** Testing ereg() : error conditions *** + +Deprecated: Function ereg() is deprecated in %s on line %d + +Warning: ereg(): REG_EMPTY in %s on line %d +bool(false) + +Deprecated: Function ereg() is deprecated in %s on line %d + +Warning: ereg(): REG_EPAREN in %s on line %d +bool(false) + +Deprecated: Function ereg() is deprecated in %s on line %d + +Warning: ereg(): REG_EBRACK in %s on line %d +bool(false) + +Deprecated: Function ereg() is deprecated in %s on line %d + +Warning: ereg(): REG_EPAREN in %s on line %d +bool(false) + +Deprecated: Function ereg() is deprecated in %s on line %d + +Warning: ereg(): REG_BADRPT in %s on line %d +bool(false) + +Deprecated: Function ereg() is deprecated in %s on line %d + +Warning: ereg(): REG_BADRPT in %s on line %d +bool(false) + +Deprecated: Function ereg() is deprecated in %s on line %d + +Warning: ereg(): REG_BADRPT in %s on line %d +bool(false) + +Deprecated: Function ereg() is deprecated in %s on line %d + +Warning: ereg(): REG_BADRPT in %s on line %d +bool(false) + +Deprecated: Function ereg() is deprecated in %s on line %d + +Warning: ereg(): REG_BADBR in %s on line %d +bool(false) + +Deprecated: Function ereg() is deprecated in %s on line %d + +Warning: ereg(): REG_EMPTY in %s on line %d +bool(false) + +Deprecated: Function ereg() is deprecated in %s on line %d + +Warning: ereg(): REG_EMPTY in %s on line %d +bool(false) + +Deprecated: Function ereg() is deprecated in %s on line %d + +Warning: ereg(): REG_BADBR in %s on line %d +bool(false) + +Deprecated: Function ereg() is deprecated in %s on line %d + +Warning: ereg(): REG_ERANGE in %s on line %d +bool(false) + +Deprecated: Function ereg() is deprecated in %s on line %d + +Warning: ereg(): REG_EESCAPE in %s on line %d +bool(false) + +Deprecated: Function ereg() is deprecated in %s on line %d + +Warning: ereg(): REG_ERANGE in %s on line %d +bool(false) +string(8) "original" +Done diff --git a/ext/ereg/tests/ereg_replace_basic_001.phpt b/ext/ereg/tests/ereg_replace_basic_001.phpt new file mode 100644 index 0000000000..66b056ed60 --- /dev/null +++ b/ext/ereg/tests/ereg_replace_basic_001.phpt @@ -0,0 +1,86 @@ +--TEST-- +Test ereg_replace() function : basic functionality +--FILE-- +<?php +/* Prototype : proto string ereg_replace(string pattern, string replacement, string string) + * Description: Replace regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +/* + * Test a number of simple, valid matches with ereg_replace + */ + +echo "*** Testing ereg() : basic functionality ***\n"; + +include(dirname(__FILE__) . '/regular_expressions.inc'); + +$replacement = '[this is a replacement]'; + +foreach ($expressions as $re) { + list($pattern, $match) = $re; + echo "--> Pattern: '$pattern'; match: '$match'\n"; + var_dump(ereg_replace($pattern, $replacement, $match . ' this contains some matches ' . $match)); +} + +echo "Done"; +?> +--EXPECTF-- +*** Testing ereg() : basic functionality *** +--> Pattern: '..(a|b|c)(a|b|c)..'; match: '--- ab ---' + +Deprecated: Function ereg_replace() is deprecated in %s on line %d +string(82) "--[this is a replacement]-- this contains some matches --[this is a replacement]--" +--> Pattern: '()'; match: '' + +Deprecated: Function ereg_replace() is deprecated in %s on line %d +string(695) "[this is a replacement] [this is a replacement]t[this is a replacement]h[this is a replacement]i[this is a replacement]s[this is a replacement] [this is a replacement]c[this is a replacement]o[this is a replacement]n[this is a replacement]t[this is a replacement]a[this is a replacement]i[this is a replacement]n[this is a replacement]s[this is a replacement] [this is a replacement]s[this is a replacement]o[this is a replacement]m[this is a replacement]e[this is a replacement] [this is a replacement]m[this is a replacement]a[this is a replacement]t[this is a replacement]c[this is a replacement]h[this is a replacement]e[this is a replacement]s[this is a replacement] [this is a replacement]" +--> Pattern: '()'; match: 'abcdef' + +Deprecated: Function ereg_replace() is deprecated in %s on line %d +string(983) "[this is a replacement]a[this is a replacement]b[this is a replacement]c[this is a replacement]d[this is a replacement]e[this is a replacement]f[this is a replacement] [this is a replacement]t[this is a replacement]h[this is a replacement]i[this is a replacement]s[this is a replacement] [this is a replacement]c[this is a replacement]o[this is a replacement]n[this is a replacement]t[this is a replacement]a[this is a replacement]i[this is a replacement]n[this is a replacement]s[this is a replacement] [this is a replacement]s[this is a replacement]o[this is a replacement]m[this is a replacement]e[this is a replacement] [this is a replacement]m[this is a replacement]a[this is a replacement]t[this is a replacement]c[this is a replacement]h[this is a replacement]e[this is a replacement]s[this is a replacement] [this is a replacement]a[this is a replacement]b[this is a replacement]c[this is a replacement]d[this is a replacement]e[this is a replacement]f[this is a replacement]" +--> Pattern: '[x]|[^x]'; match: 'abcdef' + +Deprecated: Function ereg_replace() is deprecated in %s on line %d +string(920) "[this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement]" +--> Pattern: '(a{1})(a{1,}) (b{1,3}) (c+) (d?ddd|e)'; match: '--- aaa bbb ccc ddd ---' + +Deprecated: Function ereg_replace() is deprecated in %s on line %d +string(90) "--- [this is a replacement] --- this contains some matches --- [this is a replacement] ---" +--> Pattern: '\\\`\^\.\[\$\(\)\|\*\+\?\{\''; match: '\`^.[$()|*+?{'' + +Deprecated: Function ereg_replace() is deprecated in %s on line %d +string(74) "[this is a replacement] this contains some matches [this is a replacement]" +--> Pattern: '\a'; match: 'a' + +Deprecated: Function ereg_replace() is deprecated in %s on line %d +string(118) "[this is a replacement] this cont[this is a replacement]ins some m[this is a replacement]tches [this is a replacement]" +--> Pattern: '[0-9][^0-9]'; match: '2a' + +Deprecated: Function ereg_replace() is deprecated in %s on line %d +string(74) "[this is a replacement] this contains some matches [this is a replacement]" +--> Pattern: '^[[:alnum:]]{62,62}$'; match: '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + +Deprecated: Function ereg_replace() is deprecated in %s on line %d +string(152) "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ this contains some matches 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +--> Pattern: '^[[:digit:]]{5}'; match: '0123456789' + +Deprecated: Function ereg_replace() is deprecated in %s on line %d +string(66) "[this is a replacement]56789 this contains some matches 0123456789" +--> Pattern: '[[:digit:]]{5}$'; match: '0123456789' + +Deprecated: Function ereg_replace() is deprecated in %s on line %d +string(66) "0123456789 this contains some matches 01234[this is a replacement]" +--> Pattern: '[[:blank:]]{1,10}'; match: ' + ' + +Deprecated: Function ereg_replace() is deprecated in %s on line %d +string(163) " +[this is a replacement]this[this is a replacement]contains[this is a replacement]some[this is a replacement]matches[this is a replacement] +[this is a replacement]" +--> Pattern: '[[:print:]]{3}'; match: ' a ' + +Deprecated: Function ereg_replace() is deprecated in %s on line %d +string(254) "[this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement] " +Done diff --git a/ext/ereg/tests/ereg_replace_basic_002.phpt b/ext/ereg/tests/ereg_replace_basic_002.phpt new file mode 100644 index 0000000000..e12612d5ad --- /dev/null +++ b/ext/ereg/tests/ereg_replace_basic_002.phpt @@ -0,0 +1,41 @@ +--TEST-- +Test ereg_replace() function : basic functionality - a few non-matches +--FILE-- +<?php +/* Prototype : proto string ereg_replace(string pattern, string replacement, string string) + * Description: Replace regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +$replacement = 'r'; + +var_dump(ereg_replace('A', $replacement, 'a')); +var_dump(ereg_replace('[A-Z]', $replacement, '0')); +var_dump(ereg_replace('(a){4}', $replacement, 'aaa')); +var_dump(ereg_replace('^a', $replacement, 'ba')); +var_dump(ereg_replace('b$', $replacement, 'ba')); +var_dump(ereg_replace('[:alpha:]', $replacement, 'x')); + + +echo "Done"; +?> +--EXPECTF-- +Deprecated: Function ereg_replace() is deprecated in %s on line %d +string(1) "a" + +Deprecated: Function ereg_replace() is deprecated in %s on line %d +string(1) "0" + +Deprecated: Function ereg_replace() is deprecated in %s on line %d +string(3) "aaa" + +Deprecated: Function ereg_replace() is deprecated in %s on line %d +string(2) "ba" + +Deprecated: Function ereg_replace() is deprecated in %s on line %d +string(2) "ba" + +Deprecated: Function ereg_replace() is deprecated in %s on line %d +string(1) "x" +Done diff --git a/ext/ereg/tests/ereg_replace_error_001.phpt b/ext/ereg/tests/ereg_replace_error_001.phpt new file mode 100644 index 0000000000..d49d87f9d8 --- /dev/null +++ b/ext/ereg/tests/ereg_replace_error_001.phpt @@ -0,0 +1,46 @@ +--TEST-- +Test ereg_replace() function : error conditions - wrong number of args +--FILE-- +<?php +/* Prototype : proto string ereg_replace(string pattern, string replacement, string string) + * Description: Replace regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +echo "*** Testing ereg_replace() : error conditions ***\n"; + + +//Test ereg_replace with one more than the expected number of arguments +echo "\n-- Testing ereg_replace() function with more than expected no. of arguments --\n"; +$pattern = 'string_val'; +$replacement = 'string_val'; +$string = 'string_val'; +$extra_arg = 10; +var_dump( ereg_replace($pattern, $replacement, $string, $extra_arg) ); + +// Testing ereg_replace with one less than the expected number of arguments +echo "\n-- Testing ereg_replace() function with less than expected no. of arguments --\n"; +$pattern = 'string_val'; +$replacement = 'string_val'; +var_dump( ereg_replace($pattern, $replacement) ); + +echo "Done"; +?> +--EXPECTF-- +*** Testing ereg_replace() : error conditions *** + +-- Testing ereg_replace() function with more than expected no. of arguments -- + +Deprecated: Function ereg_replace() is deprecated in %s on line %d + +Warning: ereg_replace() expects exactly 3 parameters, 4 given in %s on line %d +NULL + +-- Testing ereg_replace() function with less than expected no. of arguments -- + +Deprecated: Function ereg_replace() is deprecated in %s on line %d + +Warning: ereg_replace() expects exactly 3 parameters, 2 given in %s on line %d +NULL +Done diff --git a/ext/ereg/tests/ereg_replace_error_002.phpt b/ext/ereg/tests/ereg_replace_error_002.phpt new file mode 100644 index 0000000000..b161a0dcf2 --- /dev/null +++ b/ext/ereg/tests/ereg_replace_error_002.phpt @@ -0,0 +1,106 @@ +--TEST-- +Test ereg_replace() function : error conditions - bad regular expressions +--FILE-- +<?php +/* Prototype : proto string ereg_replace(string pattern, string replacement, string string) + * Description: Replace regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +echo "*** Testing ereg_replace() : bad REs ***\n"; +var_dump(ereg_replace("", "hello", "some string")); +var_dump(ereg_replace("c(d", "hello", "some string")); +var_dump(ereg_replace("a[b", "hello", "some string")); +var_dump(ereg_replace("c(d", "hello", "some string"));; +var_dump(ereg_replace("*", "hello", "some string")); +var_dump(ereg_replace("+", "hello", "some string")); +var_dump(ereg_replace("?", "hello", "some string")); +var_dump(ereg_replace("(+?*)", "hello", "some string")); +var_dump(ereg_replace("h{256}", "hello", "some string")); +var_dump(ereg_replace("h|", "hello", "some string")); +var_dump(ereg_replace("h{0}", "hello", "some string")); +var_dump(ereg_replace("h{2,1}", "hello", "some string")); +var_dump(ereg_replace('[a-c-e]', 'd', "some string")); +var_dump(ereg_replace('\\', 'x', "some string")); +var_dump(ereg_replace('([9-0])', '1', "some string")); +echo "Done"; +?> +--EXPECTF-- +*** Testing ereg_replace() : bad REs *** + +Deprecated: Function ereg_replace() is deprecated in %s on line %d + +Warning: ereg_replace(): REG_EMPTY in %s on line %d +bool(false) + +Deprecated: Function ereg_replace() is deprecated in %s on line %d + +Warning: ereg_replace(): REG_EPAREN in %s on line %d +bool(false) + +Deprecated: Function ereg_replace() is deprecated in %s on line %d + +Warning: ereg_replace(): REG_EBRACK in %s on line %d +bool(false) + +Deprecated: Function ereg_replace() is deprecated in %s on line %d + +Warning: ereg_replace(): REG_EPAREN in %s on line %d +bool(false) + +Deprecated: Function ereg_replace() is deprecated in %s on line %d + +Warning: ereg_replace(): REG_BADRPT in %s on line %d +bool(false) + +Deprecated: Function ereg_replace() is deprecated in %s on line %d + +Warning: ereg_replace(): REG_BADRPT in %s on line %d +bool(false) + +Deprecated: Function ereg_replace() is deprecated in %s on line %d + +Warning: ereg_replace(): REG_BADRPT in %s on line %d +bool(false) + +Deprecated: Function ereg_replace() is deprecated in %s on line %d + +Warning: ereg_replace(): REG_BADRPT in %s on line %d +bool(false) + +Deprecated: Function ereg_replace() is deprecated in %s on line %d + +Warning: ereg_replace(): REG_BADBR in %s on line %d +bool(false) + +Deprecated: Function ereg_replace() is deprecated in %s on line %d + +Warning: ereg_replace(): REG_EMPTY in %s on line %d +bool(false) + +Deprecated: Function ereg_replace() is deprecated in %s on line %d + +Warning: ereg_replace(): REG_EMPTY in %s on line %d +bool(false) + +Deprecated: Function ereg_replace() is deprecated in %s on line %d + +Warning: ereg_replace(): REG_BADBR in %s on line %d +bool(false) + +Deprecated: Function ereg_replace() is deprecated in %s on line %d + +Warning: ereg_replace(): REG_ERANGE in %s on line %d +bool(false) + +Deprecated: Function ereg_replace() is deprecated in %s on line %d + +Warning: ereg_replace(): REG_EESCAPE in %s on line %d +bool(false) + +Deprecated: Function ereg_replace() is deprecated in %s on line %d + +Warning: ereg_replace(): REG_ERANGE in %s on line %d +bool(false) +Done diff --git a/ext/ereg/tests/ereg_replace_variation_001.phpt b/ext/ereg/tests/ereg_replace_variation_001.phpt new file mode 100644 index 0000000000..1e16d1c8cb --- /dev/null +++ b/ext/ereg/tests/ereg_replace_variation_001.phpt @@ -0,0 +1,205 @@ +--TEST-- +Test ereg_replace() function : usage variations - unexpected type arg 1 +--FILE-- +<?php +/* Prototype : proto string ereg_replace(string pattern, string replacement, string string) + * Description: Replace regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +function test_error_handler($err_no, $err_msg, $filename, $linenum, $vars) { + echo "Error: $err_no - $err_msg, $filename($linenum)\n"; +} +set_error_handler('test_error_handler'); + +echo "*** Testing ereg_replace() : usage variations ***\n"; + +// Initialise function arguments not being substituted (if any) +$replacement = 'new'; +$string = 'original'; + +//get an unset variable +$unset_var = 10; +unset ($unset_var); + +//array of values to iterate over +$values = array( + + // int data + 0, + 1, + 12345, + -2345, + + // float data + 10.5, + -10.5, + 10.1234567e10, + 10.7654321E-10, + .5, + + // array data + array(), + array(0), + array(1), + array(1, 2), + array('color' => 'red', 'item' => 'pen'), + + // null data + NULL, + null, + + // boolean data + true, + false, + TRUE, + FALSE, + + // empty data + "", + '', + + // object data + new stdclass(), + + // undefined data + $undefined_var, + + // unset data + $unset_var, +); + +// loop through each element of the array for pattern + +foreach($values as $value) { + echo "\nArg value $value \n"; + var_dump( ereg_replace($value, $replacement, $string) ); +}; + +echo "Done"; +?> +--EXPECTF-- +*** Testing ereg_replace() : usage variations *** +Error: 8 - Undefined variable: undefined_var, %s(64) +Error: 8 - Undefined variable: unset_var, %s(67) + +Arg value 0 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +Error: 2 - ereg_replace(): REG_EMPTY, %s(74) +bool(false) + +Arg value 1 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(8) "original" + +Arg value 12345 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(8) "original" + +Arg value -2345 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(8) "original" + +Arg value 10.5 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(8) "original" + +Arg value -10.5 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(8) "original" + +Arg value 101234567000 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(8) "original" + +Arg value 1.07654321E-9 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +Error: 2 - ereg_replace(): REG_EMPTY, %s(74) +bool(false) + +Arg value 0.5 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +Error: 2 - ereg_replace(): REG_EMPTY, %s(74) +bool(false) +Error: 8 - Array to string conversion, %sereg_replace_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +Error: 2 - ereg_replace(): REG_EMPTY, %s(74) +bool(false) +Error: 8 - Array to string conversion, %sereg_replace_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(8) "original" +Error: 8 - Array to string conversion, %sereg_replace_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(8) "original" +Error: 8 - Array to string conversion, %sereg_replace_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(8) "original" +Error: 8 - Array to string conversion, %sereg_replace_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(8) "original" + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +Error: 2 - ereg_replace(): REG_EMPTY, %s(74) +bool(false) + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +Error: 2 - ereg_replace(): REG_EMPTY, %s(74) +bool(false) + +Arg value 1 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(8) "original" + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +Error: 2 - ereg_replace(): REG_EMPTY, %s(74) +bool(false) + +Arg value 1 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(8) "original" + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +Error: 2 - ereg_replace(): REG_EMPTY, %s(74) +bool(false) + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +Error: 2 - ereg_replace(): REG_EMPTY, %s(74) +bool(false) + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +Error: 2 - ereg_replace(): REG_EMPTY, %s(74) +bool(false) +Error: 4096 - Object of class stdClass could not be converted to string, %s(73) + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +Error: 8 - Object of class stdClass could not be converted to int, %s(74) +string(8) "original" + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +Error: 2 - ereg_replace(): REG_EMPTY, %s(74) +bool(false) + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +Error: 2 - ereg_replace(): REG_EMPTY, %s(74) +bool(false) +Done diff --git a/ext/ereg/tests/ereg_replace_variation_002.phpt b/ext/ereg/tests/ereg_replace_variation_002.phpt new file mode 100644 index 0000000000..afaece6b22 --- /dev/null +++ b/ext/ereg/tests/ereg_replace_variation_002.phpt @@ -0,0 +1,193 @@ +--TEST-- +Test ereg_replace() function : usage variations - unexpected type arg 2 +--FILE-- +<?php +/* Prototype : proto string ereg_replace(string pattern, string replacement, string string) + * Description: Replace regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +function test_error_handler($err_no, $err_msg, $filename, $linenum, $vars) { + echo "Error: $err_no - $err_msg, $filename($linenum)\n"; +} +set_error_handler('test_error_handler'); + +echo "*** Testing ereg_replace() : usage variations ***\n"; + +// Initialise function arguments not being substituted (if any) +$pattern = b'ell'; +$string = 'hello!'; + +//get an unset variable +$unset_var = 10; +unset ($unset_var); + +//array of values to iterate over +$values = array( + + // int data + 0, + 1, + 12345, + -2345, + + // float data + 10.5, + -10.5, + 10.1234567e10, + 10.7654321E-10, + .5, + + // array data + array(), + array(0), + array(1), + array(1, 2), + array('color' => 'red', 'item' => 'pen'), + + // null data + NULL, + null, + + // boolean data + true, + false, + TRUE, + FALSE, + + // empty data + "", + '', + + // object data + new stdclass(), + + // undefined data + $undefined_var, + + // unset data + $unset_var, +); + +// loop through each element of the array for replacement + +foreach($values as $value) { + echo "\nArg value $value \n"; + var_dump(urlencode(ereg_replace($pattern, $value, $string))); +}; + +echo "Done"; +?> +--EXPECTF-- +*** Testing ereg_replace() : usage variations *** +Error: 8 - Undefined variable: undefined_var, %s(64) +Error: 8 - Undefined variable: unset_var, %s(67) + +Arg value 0 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(5) "ho%21" + +Arg value 1 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(8) "h%01o%21" + +Arg value 12345 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(6) "h9o%21" + +Arg value -2345 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(8) "h%D7o%21" + +Arg value 10.5 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(8) "h%0Ao%21" + +Arg value -10.5 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(8) "h%F6o%21" + +Arg value 101234567000 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(%d) "h%so%21" + +Arg value 1.07654321E-9 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(5) "ho%21" + +Arg value 0.5 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(5) "ho%21" +Error: 8 - Array to string conversion, %sereg_replace_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(5) "ho%21" +Error: 8 - Array to string conversion, %sereg_replace_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(8) "h%01o%21" +Error: 8 - Array to string conversion, %sereg_replace_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(8) "h%01o%21" +Error: 8 - Array to string conversion, %sereg_replace_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(8) "h%01o%21" +Error: 8 - Array to string conversion, %sereg_replace_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(8) "h%01o%21" + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(5) "ho%21" + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(5) "ho%21" + +Arg value 1 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(8) "h%01o%21" + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(5) "ho%21" + +Arg value 1 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(8) "h%01o%21" + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(5) "ho%21" + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(5) "ho%21" + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(5) "ho%21" +Error: 4096 - Object of class stdClass could not be converted to string, %s(73) + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +Error: 8 - Object of class stdClass could not be converted to int, %s(74) +string(8) "h%01o%21" + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(5) "ho%21" + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(5) "ho%21" +Done
\ No newline at end of file diff --git a/ext/ereg/tests/ereg_replace_variation_003.phpt b/ext/ereg/tests/ereg_replace_variation_003.phpt new file mode 100644 index 0000000000..b189c4efc6 --- /dev/null +++ b/ext/ereg/tests/ereg_replace_variation_003.phpt @@ -0,0 +1,198 @@ +--TEST-- +Test ereg_replace() function : usage variations - unexpected type arg 3 +--FILE-- +<?php +/* Prototype : proto string ereg_replace(string pattern, string replacement, string string) + * Description: Replace regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +function test_error_handler($err_no, $err_msg, $filename, $linenum, $vars) { + echo "Error: $err_no - $err_msg, $filename($linenum)\n"; +} +set_error_handler('test_error_handler'); + +echo "*** Testing ereg_replace() : usage variations ***\n"; + +// Initialise function arguments not being substituted (if any) +$pattern = '1'; +$replacement = 'new value'; + +//get an unset variable +$unset_var = 10; +unset ($unset_var); + +//array of values to iterate over +$values = array( + + // int data + 0, + 1, + 12345, + -2345, + + // float data + 10.5, + -10.5, + 10.1234567e10, + 10.7654321E-10, + .5, + + // array data + array(), + array(0), + array(1), + array(1, 2), + array('color' => 'red', 'item' => 'pen'), + + // null data + NULL, + null, + + // boolean data + true, + false, + TRUE, + FALSE, + + // empty data + "", + '', + + // object data + new stdclass(), + + // undefined data + $undefined_var, + + // unset data + $unset_var, +); + +// loop through each element of the array for string + +foreach($values as $value) { + echo "\nArg value $value \n"; + var_dump( ereg_replace($pattern, $replacement, $value) ); +}; + +echo "Done"; +?> +--EXPECTF-- +*** Testing ereg_replace() : usage variations *** +Error: 8 - Undefined variable: undefined_var, %s(64) +Error: 8 - Undefined variable: unset_var, %s(67) + +Arg value 0 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(1) "0" + +Arg value 1 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(9) "new value" + +Arg value 12345 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(13) "new value2345" + +Arg value -2345 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(5) "-2345" + +Arg value 10.5 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(12) "new value0.5" + +Arg value -10.5 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(13) "-new value0.5" + +Arg value 101234567000 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(28) "new value0new value234567000" + +Arg value 1.07654321E-9 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(29) "new value.0765432new valueE-9" + +Arg value 0.5 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(3) "0.5" +Error: 8 - Array to string conversion, %sereg_replace_variation_003.php(%d) + +Arg value Array +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +Error: 2 - ereg_replace() expects parameter 3 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %sereg_replace_variation_003.php(%d) + +Arg value Array +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +Error: 2 - ereg_replace() expects parameter 3 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %sereg_replace_variation_003.php(%d) + +Arg value Array +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +Error: 2 - ereg_replace() expects parameter 3 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %sereg_replace_variation_003.php(%d) + +Arg value Array +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +Error: 2 - ereg_replace() expects parameter 3 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %sereg_replace_variation_003.php(%d) + +Arg value Array +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +Error: 2 - ereg_replace() expects parameter 3 to be string, array given, %s(74) +NULL + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(0) "" + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(0) "" + +Arg value 1 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(9) "new value" + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(0) "" + +Arg value 1 +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(9) "new value" + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(0) "" + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(0) "" + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(0) "" +Error: 4096 - Object of class stdClass could not be converted to string, %s(73) + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +Error: 2 - ereg_replace() expects parameter 3 to be string, object given, %s(74) +NULL + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(0) "" + +Arg value +Error: 8192 - Function ereg_replace() is deprecated, %s(74) +string(0) "" +Done diff --git a/ext/ereg/tests/ereg_variation_001.phpt b/ext/ereg/tests/ereg_variation_001.phpt new file mode 100644 index 0000000000..8e28d89a90 --- /dev/null +++ b/ext/ereg/tests/ereg_variation_001.phpt @@ -0,0 +1,208 @@ +--TEST-- +Test ereg() function : usage variations - unexpected type arg 1 +--FILE-- +<?php +/* Prototype : proto int ereg(string pattern, string string [, array registers]) + * Description: Regular expression match + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +function test_error_handler($err_no, $err_msg, $filename, $linenum, $vars) { + echo "Error: $err_no - $err_msg, $filename($linenum)\n"; +} +set_error_handler('test_error_handler'); + + +echo "*** Testing ereg() : usage variations ***\n"; + +// Initialise function arguments not being substituted (if any) +$string = '1'; +$registers = array(1, 2); + +//get an unset variable +$unset_var = 10; +unset ($unset_var); + +//array of values to iterate over +$values = array( + + // int data + 0, + 1, + 12345, + -2345, + + // float data + 10.5, + -10.5, + 10.1234567e10, + 10.7654321E-10, + .5, + + // array data + array(), + array(0), + array(1), + array(1, 2), + array('color' => 'red', 'item' => 'pen'), + + // null data + NULL, + null, + + // boolean data + true, + false, + TRUE, + FALSE, + + // empty data + "", + '', + + // object data + new stdclass(), + + // undefined data + $undefined_var, + + // unset data + $unset_var, +); + +// loop through each element of the array for pattern + +foreach($values as $value) { + echo "\nArg value $value \n"; + var_dump( ereg($value, $string, $registers) ); +}; + +echo "Done"; +?> +--EXPECTF-- +*** Testing ereg() : usage variations *** +Error: 8 - Undefined variable: undefined_var, %s(65) +Error: 8 - Undefined variable: unset_var, %s(68) + +Arg value 0 +Error: 8192 - Function ereg() is deprecated, %s(75) +bool(false) + +Arg value 1 +Error: 8192 - Function ereg() is deprecated, %s(75) +int(1) + +Arg value 12345 +Error: 8192 - Function ereg() is deprecated, %s(75) +bool(false) + +Arg value -2345 +Error: 8192 - Function ereg() is deprecated, %s(75) +bool(false) + +Arg value 10.5 +Error: 8192 - Function ereg() is deprecated, %s(75) +bool(false) + +Arg value -10.5 +Error: 8192 - Function ereg() is deprecated, %s(75) +bool(false) + +Arg value 101234567000 +Error: 8192 - Function ereg() is deprecated, %s(75) +bool(false) + +Arg value 1.07654321E-9 +Error: 8192 - Function ereg() is deprecated, %s(75) +bool(false) + +Arg value 0.5 +Error: 8192 - Function ereg() is deprecated, %s(75) +bool(false) +Error: 8 - Array to string conversion, %sereg_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function ereg() is deprecated, %s(75) +Error: 8 - Array to string conversion, %s(75) +bool(false) +Error: 8 - Array to string conversion, %sereg_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function ereg() is deprecated, %s(75) +Error: 8 - Array to string conversion, %s(75) +bool(false) +Error: 8 - Array to string conversion, %sereg_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function ereg() is deprecated, %s(75) +Error: 8 - Array to string conversion, %s(75) +bool(false) +Error: 8 - Array to string conversion, %sereg_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function ereg() is deprecated, %s(75) +Error: 8 - Array to string conversion, %s(75) +bool(false) +Error: 8 - Array to string conversion, %sereg_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function ereg() is deprecated, %s(75) +Error: 8 - Array to string conversion, %s(75) +bool(false) + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(75) +Error: 2 - ereg(): REG_EMPTY, %s(75) +bool(false) + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(75) +Error: 2 - ereg(): REG_EMPTY, %s(75) +bool(false) + +Arg value 1 +Error: 8192 - Function ereg() is deprecated, %s(75) +int(1) + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(75) +Error: 2 - ereg(): REG_EMPTY, %s(75) +bool(false) + +Arg value 1 +Error: 8192 - Function ereg() is deprecated, %s(75) +int(1) + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(75) +Error: 2 - ereg(): REG_EMPTY, %s(75) +bool(false) + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(75) +Error: 2 - ereg(): REG_EMPTY, %s(75) +bool(false) + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(75) +Error: 2 - ereg(): REG_EMPTY, %s(75) +bool(false) +Error: 4096 - Object of class stdClass could not be converted to string, %s(74) + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(75) +Error: 4096 - Object of class stdClass could not be converted to string, %s(75) +Error: 8 - Object of class stdClass to string conversion, %s(75) +bool(false) + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(75) +Error: 2 - ereg(): REG_EMPTY, %s(75) +bool(false) + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(75) +Error: 2 - ereg(): REG_EMPTY, %s(75) +bool(false) +Done diff --git a/ext/ereg/tests/ereg_variation_002.phpt b/ext/ereg/tests/ereg_variation_002.phpt new file mode 100644 index 0000000000..33e0fe1d10 --- /dev/null +++ b/ext/ereg/tests/ereg_variation_002.phpt @@ -0,0 +1,198 @@ +--TEST-- +Test ereg() function : usage variations - unexpected type arg 2 +--FILE-- +<?php +/* Prototype : proto int ereg(string pattern, string string [, array registers]) + * Description: Regular expression match + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +function test_error_handler($err_no, $err_msg, $filename, $linenum, $vars) { + echo "Error: $err_no - $err_msg, $filename($linenum)\n"; +} +set_error_handler('test_error_handler'); + +echo "*** Testing ereg() : usage variations ***\n"; + +// Initialise function arguments not being substituted (if any) +$pattern = '1'; +$registers = array(); + +//get an unset variable +$unset_var = 10; +unset ($unset_var); + +//array of values to iterate over +$values = array( + + // int data + 0, + 1, + 12345, + -2345, + + // float data + 10.5, + -10.5, + 10.1234567e10, + 10.7654321E-10, + .5, + + // array data + array(), + array(0), + array(1), + array(1, 2), + array('color' => 'red', 'item' => 'pen'), + + // null data + NULL, + null, + + // boolean data + true, + false, + TRUE, + FALSE, + + // empty data + "", + '', + + // object data + new stdclass(), + + // undefined data + $undefined_var, + + // unset data + $unset_var, +); + +// loop through each element of the array for string + +foreach($values as $value) { + echo "\nArg value $value \n"; + var_dump( ereg($pattern, $value, $registers) ); +}; + +echo "Done"; +?> +--EXPECTF-- +*** Testing ereg() : usage variations *** +Error: 8 - Undefined variable: undefined_var, %s(64) +Error: 8 - Undefined variable: unset_var, %s(67) + +Arg value 0 +Error: 8192 - Function ereg() is deprecated, %s(74) +bool(false) + +Arg value 1 +Error: 8192 - Function ereg() is deprecated, %s(74) +int(1) + +Arg value 12345 +Error: 8192 - Function ereg() is deprecated, %s(74) +int(1) + +Arg value -2345 +Error: 8192 - Function ereg() is deprecated, %s(74) +bool(false) + +Arg value 10.5 +Error: 8192 - Function ereg() is deprecated, %s(74) +int(1) + +Arg value -10.5 +Error: 8192 - Function ereg() is deprecated, %s(74) +int(1) + +Arg value 101234567000 +Error: 8192 - Function ereg() is deprecated, %s(74) +int(1) + +Arg value 1.07654321E-9 +Error: 8192 - Function ereg() is deprecated, %s(74) +int(1) + +Arg value 0.5 +Error: 8192 - Function ereg() is deprecated, %s(74) +bool(false) +Error: 8 - Array to string conversion, %sereg_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function ereg() is deprecated, %s(74) +Error: 2 - ereg() expects parameter 2 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %sereg_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function ereg() is deprecated, %s(74) +Error: 2 - ereg() expects parameter 2 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %sereg_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function ereg() is deprecated, %s(74) +Error: 2 - ereg() expects parameter 2 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %sereg_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function ereg() is deprecated, %s(74) +Error: 2 - ereg() expects parameter 2 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %sereg_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function ereg() is deprecated, %s(74) +Error: 2 - ereg() expects parameter 2 to be string, array given, %s(74) +NULL + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(74) +bool(false) + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(74) +bool(false) + +Arg value 1 +Error: 8192 - Function ereg() is deprecated, %s(74) +int(1) + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(74) +bool(false) + +Arg value 1 +Error: 8192 - Function ereg() is deprecated, %s(74) +int(1) + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(74) +bool(false) + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(74) +bool(false) + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(74) +bool(false) +Error: 4096 - Object of class stdClass could not be converted to string, %s(73) + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(74) +Error: 2 - ereg() expects parameter 2 to be string, object given, %s(74) +NULL + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(74) +bool(false) + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(74) +bool(false) +Done diff --git a/ext/ereg/tests/ereg_variation_003.phpt b/ext/ereg/tests/ereg_variation_003.phpt new file mode 100644 index 0000000000..bf898d908f --- /dev/null +++ b/ext/ereg/tests/ereg_variation_003.phpt @@ -0,0 +1,305 @@ +--TEST-- +Test ereg() function : usage variations - unexpected type for arg 3 +--FILE-- +<?php +/* Prototype : proto int ereg(string pattern, string string [, array registers]) + * Description: Regular expression match + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +function test_error_handler($err_no, $err_msg, $filename, $linenum, $vars) { + echo "Error: $err_no - $err_msg, $filename($linenum)\n"; +} +set_error_handler('test_error_handler'); + +echo "*** Testing ereg() : usage variations ***\n"; + +// Initialise function arguments not being substituted (if any) +$pattern = 'h(.*)lo!'; +$string = 'hello!'; + +//get an unset variable +$unset_var = 10; +unset ($unset_var); + +//array of values to iterate over +$values = array( + + // int data + 0, + 1, + 12345, + -2345, + + // float data + 10.5, + -10.5, + 10.1234567e10, + 10.7654321E-10, + .5, + + // null data + NULL, + null, + + // boolean data + true, + false, + TRUE, + FALSE, + + // empty data + "", + '', + + // string data + "string", + 'string', + + // object data + new stdclass(), + + // undefined data + $undefined_var, + + // unset data + $unset_var, +); + +// loop through each element of the array for registers + +foreach($values as $value) { + echo "\nArg value $value \n"; + var_dump( ereg($pattern, $string, $value) ); + var_dump($value); +}; + +echo "Done"; +?> +--EXPECTF-- +*** Testing ereg() : usage variations *** +Error: 8 - Undefined variable: undefined_var, %s(61) +Error: 8 - Undefined variable: unset_var, %s(64) + +Arg value 0 +Error: 8192 - Function ereg() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value 1 +Error: 8192 - Function ereg() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value 12345 +Error: 8192 - Function ereg() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value -2345 +Error: 8192 - Function ereg() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value 10.5 +Error: 8192 - Function ereg() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value -10.5 +Error: 8192 - Function ereg() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value 101234567000 +Error: 8192 - Function ereg() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value 1.07654321E-9 +Error: 8192 - Function ereg() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value 0.5 +Error: 8192 - Function ereg() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value 1 +Error: 8192 - Function ereg() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value 1 +Error: 8192 - Function ereg() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value string +Error: 8192 - Function ereg() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value string +Error: 8192 - Function ereg() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} +Error: 4096 - Object of class stdClass could not be converted to string, %s(70) + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value +Error: 8192 - Function ereg() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} +Done diff --git a/ext/ereg/tests/ereg_variation_004.phpt b/ext/ereg/tests/ereg_variation_004.phpt new file mode 100644 index 0000000000..86f616f86f --- /dev/null +++ b/ext/ereg/tests/ereg_variation_004.phpt @@ -0,0 +1,19 @@ +--TEST-- +Test ereg() function : usage variations - pass non-variable as arg 3, which is pass-by-ref. +--FILE-- +<?php +/* Prototype : proto int ereg(string pattern, string string [, array registers]) + * Description: Regular expression match + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +var_dump(ereg('l{2}', 'hello', str_repeat('x',1))); +echo "Done"; +?> +--EXPECTF-- +Strict Standards: Only variables should be passed by reference in %s on line %d + +Deprecated: Function ereg() is deprecated in %s on line %d +int(2) +Done diff --git a/ext/ereg/tests/eregi_basic.phpt b/ext/ereg/tests/eregi_basic.phpt new file mode 100644 index 0000000000..cbcfdb6554 --- /dev/null +++ b/ext/ereg/tests/eregi_basic.phpt @@ -0,0 +1,45 @@ +--TEST-- +Test eregi() function : basic functionality - confirm case insensitivity +--FILE-- +<?php +/* Prototype : proto int eregi(string pattern, string string [, array registers]) + * Description: Case-insensitive regular expression match + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +/* + * Test basic functionality of eregi() + */ + +echo "*** Testing eregi() : basic functionality ***\n"; +$string = <<<END +UPPERCASE WORDS +lowercase words +MIxED CaSe woRdS +END; + +var_dump(eregi('words', $string, $match1)); +var_dump($match1); + +var_dump(eregi('[[:lower:]]+[[:space:]]case', $string, $match2)); //character class lower should just match [a-z] but in case insensitive search matches [a-zA-Z] +var_dump($match2); +echo "Done"; +?> +--EXPECTF-- +*** Testing eregi() : basic functionality *** + +Deprecated: Function eregi() is deprecated in %s on line %d +int(5) +array(1) { + [0]=> + string(5) "WORDS" +} + +Deprecated: Function eregi() is deprecated in %s on line %d +int(10) +array(1) { + [0]=> + string(10) "MIxED CaSe" +} +Done diff --git a/ext/ereg/tests/eregi_basic_001.phpt b/ext/ereg/tests/eregi_basic_001.phpt new file mode 100644 index 0000000000..9460b89e11 --- /dev/null +++ b/ext/ereg/tests/eregi_basic_001.phpt @@ -0,0 +1,153 @@ +--TEST-- +Test eregi() function : basic functionality (with $regs) +--FILE-- +<?php +/* Prototype : proto int eregi(string pattern, string string [, array registers]) + * Description: Regular expression match + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +/* + * Test a number of simple, valid matches with eregi, specifying $regs + */ + +echo "*** Testing eregi() : basic functionality ***\n"; + +include(dirname(__FILE__) . '/regular_expressions.inc'); + +foreach ($expressions as $re) { + list($pattern,$string) = $re; + echo "--> Pattern: '$pattern'; string: '$string'\n"; + var_dump(eregi($pattern, $string, $regs)); + var_dump($regs); +} + +echo "Done"; +?> +--EXPECTF-- +*** Testing eregi() : basic functionality *** +--> Pattern: '..(a|b|c)(a|b|c)..'; string: '--- ab ---' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(6) +array(3) { + [0]=> + string(6) "- ab -" + [1]=> + string(1) "a" + [2]=> + string(1) "b" +} +--> Pattern: '()'; string: '' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(1) +array(2) { + [0]=> + bool(false) + [1]=> + bool(false) +} +--> Pattern: '()'; string: 'abcdef' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(1) +array(2) { + [0]=> + bool(false) + [1]=> + bool(false) +} +--> Pattern: '[x]|[^x]'; string: 'abcdef' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(1) +array(1) { + [0]=> + string(1) "a" +} +--> Pattern: '(a{1})(a{1,}) (b{1,3}) (c+) (d?ddd|e)'; string: '--- aaa bbb ccc ddd ---' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(15) +array(6) { + [0]=> + string(15) "aaa bbb ccc ddd" + [1]=> + string(1) "a" + [2]=> + string(2) "aa" + [3]=> + string(3) "bbb" + [4]=> + string(3) "ccc" + [5]=> + string(3) "ddd" +} +--> Pattern: '\\\`\^\.\[\$\(\)\|\*\+\?\{\''; string: '\`^.[$()|*+?{'' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(14) +array(1) { + [0]=> + string(14) "\`^.[$()|*+?{'" +} +--> Pattern: '\a'; string: 'a' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(1) +array(1) { + [0]=> + string(1) "a" +} +--> Pattern: '[0-9][^0-9]'; string: '2a' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(2) +array(1) { + [0]=> + string(2) "2a" +} +--> Pattern: '^[[:alnum:]]{62,62}$'; string: '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(62) +array(1) { + [0]=> + string(62) "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +} +--> Pattern: '^[[:digit:]]{5}'; string: '0123456789' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(5) +array(1) { + [0]=> + string(5) "01234" +} +--> Pattern: '[[:digit:]]{5}$'; string: '0123456789' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(5) +array(1) { + [0]=> + string(5) "56789" +} +--> Pattern: '[[:blank:]]{1,10}'; string: ' + ' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(2) +array(1) { + [0]=> + string(2) " " +} +--> Pattern: '[[:print:]]{3}'; string: ' a ' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(3) +array(1) { + [0]=> + string(3) " a " +} +Done diff --git a/ext/ereg/tests/eregi_basic_002.phpt b/ext/ereg/tests/eregi_basic_002.phpt new file mode 100644 index 0000000000..f81821ab1b --- /dev/null +++ b/ext/ereg/tests/eregi_basic_002.phpt @@ -0,0 +1,82 @@ +--TEST-- +Test eregi() function : basic functionality (without $regs) +--FILE-- +<?php +/* Prototype : proto int eregi(string pattern, string string [, array registers]) + * Description: Regular expression match + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +/* + * Test a number of simple, valid matches with eregi, without specifying $regs + */ + +echo "*** Testing eregi() : basic functionality ***\n"; + +include(dirname(__FILE__) . '/regular_expressions.inc'); + +foreach ($expressions as $re) { + list($pattern,$string) = $re; + echo "--> Pattern: '$pattern'; string: '$string'\n"; + var_dump(eregi($pattern, $string)); +} + +echo "Done"; +?> +--EXPECTF-- +*** Testing eregi() : basic functionality *** +--> Pattern: '..(a|b|c)(a|b|c)..'; string: '--- ab ---' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(1) +--> Pattern: '()'; string: '' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(1) +--> Pattern: '()'; string: 'abcdef' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(1) +--> Pattern: '[x]|[^x]'; string: 'abcdef' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(1) +--> Pattern: '(a{1})(a{1,}) (b{1,3}) (c+) (d?ddd|e)'; string: '--- aaa bbb ccc ddd ---' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(1) +--> Pattern: '\\\`\^\.\[\$\(\)\|\*\+\?\{\''; string: '\`^.[$()|*+?{'' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(1) +--> Pattern: '\a'; string: 'a' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(1) +--> Pattern: '[0-9][^0-9]'; string: '2a' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(1) +--> Pattern: '^[[:alnum:]]{62,62}$'; string: '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(1) +--> Pattern: '^[[:digit:]]{5}'; string: '0123456789' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(1) +--> Pattern: '[[:digit:]]{5}$'; string: '0123456789' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(1) +--> Pattern: '[[:blank:]]{1,10}'; string: ' + ' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(1) +--> Pattern: '[[:print:]]{3}'; string: ' a ' + +Deprecated: Function eregi() is deprecated in %s on line %d +int(1) +Done diff --git a/ext/ereg/tests/eregi_basic_003.phpt b/ext/ereg/tests/eregi_basic_003.phpt new file mode 100644 index 0000000000..f045ad4280 --- /dev/null +++ b/ext/ereg/tests/eregi_basic_003.phpt @@ -0,0 +1,28 @@ +--TEST-- +Test eregi() function : basic functionality - long RE +--FILE-- +<?php +/* Prototype : proto int eregi(string pattern, string string [, array registers]) + * Description: Regular expression match + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +/* + * Test a long RE with lots of matches + */ + +var_dump(eregi(str_repeat('(.)', 2048), str_repeat('x', 2048))); +var_dump(eregi(str_repeat('(.)', 2048), str_repeat('x', 2048), $regs)); +var_dump(count($regs)); + +echo "Done"; +?> +--EXPECTF-- +Deprecated: Function eregi() is deprecated in %s on line %d +int(1) + +Deprecated: Function eregi() is deprecated in %s on line %d +int(2048) +int(2049) +Done diff --git a/ext/ereg/tests/eregi_basic_004.phpt b/ext/ereg/tests/eregi_basic_004.phpt new file mode 100644 index 0000000000..78d8f5e547 --- /dev/null +++ b/ext/ereg/tests/eregi_basic_004.phpt @@ -0,0 +1,40 @@ +--TEST-- +Test eregi() function : basic functionality - a few non-matches +--FILE-- +<?php +/* Prototype : proto int eregi(string pattern, string string [, array registers]) + * Description: Regular expression match + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +$regs = 'original'; + +var_dump(eregi('[A-Z]', '0', $regs)); +var_dump(eregi('(a){4}', 'aaa', $regs)); +var_dump(eregi('^a', 'ba', $regs)); +var_dump(eregi('b$', 'ba', $regs)); +var_dump(eregi('[:alpha:]', 'x', $regs)); + +// Ensure $regs is unchanged +var_dump($regs); + +echo "Done"; +?> +--EXPECTF-- +Deprecated: Function eregi() is deprecated in %s on line %d +bool(false) + +Deprecated: Function eregi() is deprecated in %s on line %d +bool(false) + +Deprecated: Function eregi() is deprecated in %s on line %d +bool(false) + +Deprecated: Function eregi() is deprecated in %s on line %d +bool(false) + +Deprecated: Function eregi() is deprecated in %s on line %d +bool(false) +string(8) "original" +Done diff --git a/ext/ereg/tests/eregi_error_001.phpt b/ext/ereg/tests/eregi_error_001.phpt new file mode 100644 index 0000000000..fed3a725c2 --- /dev/null +++ b/ext/ereg/tests/eregi_error_001.phpt @@ -0,0 +1,49 @@ +--TEST-- +Test eregi() function : error conditions - wrong number of args +--FILE-- +<?php +/* Prototype : proto int eregi(string pattern, string string [, array registers]) + * Description: Regular expression match + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +/* + * Test wrong number of args + */ + +echo "*** Testing eregi() : error conditions ***\n"; + + +//Test eregi with one more than the expected number of arguments +echo "\n-- Testing eregi() function with more than expected no. of arguments --\n"; +$pattern = 'string_val'; +$string = 'string_val'; +$registers = array(1, 2); +$extra_arg = 10; +var_dump( eregi($pattern, $string, $registers, $extra_arg) ); + +// Testing eregi with one less than the expected number of arguments +echo "\n-- Testing eregi() function with less than expected no. of arguments --\n"; +$pattern = 'string_val'; +var_dump( eregi($pattern) ); + +echo "Done"; +?> +--EXPECTF-- +*** Testing eregi() : error conditions *** + +-- Testing eregi() function with more than expected no. of arguments -- + +Deprecated: Function eregi() is deprecated in %s on line %d + +Warning: eregi() expects at most 3 parameters, 4 given in %s on line %d +NULL + +-- Testing eregi() function with less than expected no. of arguments -- + +Deprecated: Function eregi() is deprecated in %s on line %d + +Warning: eregi() expects at least 2 parameters, 1 given in %s on line %d +NULL +Done diff --git a/ext/ereg/tests/eregi_error_002.phpt b/ext/ereg/tests/eregi_error_002.phpt new file mode 100644 index 0000000000..2764741d5d --- /dev/null +++ b/ext/ereg/tests/eregi_error_002.phpt @@ -0,0 +1,118 @@ +--TEST-- +Test eregi() function : error conditions - test bad regular expressions +--FILE-- +<?php +/* Prototype : proto int eregi(string pattern, string string [, array registers]) + * Description: Regular expression match + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +/* + * Test bad regular expressions + */ + +echo "*** Testing eregi() : error conditions ***\n"; + +$regs = 'original'; + +var_dump(eregi("", "hello")); +var_dump(eregi("c(d", "hello")); +var_dump(eregi("a[b", "hello")); +var_dump(eregi("c(d", "hello")); +var_dump(eregi("*", "hello")); +var_dump(eregi("+", "hello")); +var_dump(eregi("?", "hello")); +var_dump(eregi("(+?*)", "hello", $regs)); +var_dump(eregi("h{256}", "hello")); +var_dump(eregi("h|", "hello")); +var_dump(eregi("h{0}", "hello")); +var_dump(eregi("h{2,1}", "hello")); +var_dump(eregi('[a-c-e]', 'd')); +var_dump(eregi('\\', 'x')); +var_dump(eregi('([9-0])', '1', $regs)); + +//ensure $regs unchanged +var_dump($regs); + +echo "Done"; +?> +--EXPECTF-- +*** Testing eregi() : error conditions *** + +Deprecated: Function eregi() is deprecated in %s on line %d + +Warning: eregi(): REG_EMPTY in %s on line %d +bool(false) + +Deprecated: Function eregi() is deprecated in %s on line %d + +Warning: eregi(): REG_EPAREN in %s on line %d +bool(false) + +Deprecated: Function eregi() is deprecated in %s on line %d + +Warning: eregi(): REG_EBRACK in %s on line %d +bool(false) + +Deprecated: Function eregi() is deprecated in %s on line %d + +Warning: eregi(): REG_EPAREN in %s on line %d +bool(false) + +Deprecated: Function eregi() is deprecated in %s on line %d + +Warning: eregi(): REG_BADRPT in %s on line %d +bool(false) + +Deprecated: Function eregi() is deprecated in %s on line %d + +Warning: eregi(): REG_BADRPT in %s on line %d +bool(false) + +Deprecated: Function eregi() is deprecated in %s on line %d + +Warning: eregi(): REG_BADRPT in %s on line %d +bool(false) + +Deprecated: Function eregi() is deprecated in %s on line %d + +Warning: eregi(): REG_BADRPT in %s on line %d +bool(false) + +Deprecated: Function eregi() is deprecated in %s on line %d + +Warning: eregi(): REG_BADBR in %s on line %d +bool(false) + +Deprecated: Function eregi() is deprecated in %s on line %d + +Warning: eregi(): REG_EMPTY in %s on line %d +bool(false) + +Deprecated: Function eregi() is deprecated in %s on line %d + +Warning: eregi(): REG_EMPTY in %s on line %d +bool(false) + +Deprecated: Function eregi() is deprecated in %s on line %d + +Warning: eregi(): REG_BADBR in %s on line %d +bool(false) + +Deprecated: Function eregi() is deprecated in %s on line %d + +Warning: eregi(): REG_ERANGE in %s on line %d +bool(false) + +Deprecated: Function eregi() is deprecated in %s on line %d + +Warning: eregi(): REG_EESCAPE in %s on line %d +bool(false) + +Deprecated: Function eregi() is deprecated in %s on line %d + +Warning: eregi(): REG_ERANGE in %s on line %d +bool(false) +string(8) "original" +Done diff --git a/ext/ereg/tests/eregi_replace_basic.phpt b/ext/ereg/tests/eregi_replace_basic.phpt new file mode 100644 index 0000000000..f965c8f03a --- /dev/null +++ b/ext/ereg/tests/eregi_replace_basic.phpt @@ -0,0 +1,38 @@ +--TEST-- +Test eregi_replace() function : basic functionality - confirm case insensitivity +--FILE-- + +<?php +/* Prototype : proto string eregi_replace(string pattern, string replacement, string string) + * Description: Case insensitive replace regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +/* + * Test basic functionality of eregi_replace() + */ + +echo "*** Testing eregi_replace() : basic functionality ***\n"; + +$string = 'UPPERCASE WORDS, lowercase words, MIxED CaSe woRdS'; + +echo "String Before...\n"; +var_dump($string); +echo "\nString after...\n"; + +var_dump(eregi_replace('([[:lower:]]+) word', '\\1_character', $string)); + +echo "Done"; +?> + +--EXPECTF-- +*** Testing eregi_replace() : basic functionality *** +String Before... +string(50) "UPPERCASE WORDS, lowercase words, MIxED CaSe woRdS" + +String after... + +Deprecated: Function eregi_replace() is deprecated in %s on line %d +string(65) "UPPERCASE_characterS, lowercase_characters, MIxED CaSe_characterS" +Done diff --git a/ext/ereg/tests/eregi_replace_basic_001.phpt b/ext/ereg/tests/eregi_replace_basic_001.phpt new file mode 100644 index 0000000000..710e39e2de --- /dev/null +++ b/ext/ereg/tests/eregi_replace_basic_001.phpt @@ -0,0 +1,86 @@ +--TEST-- +Test ereg() function : basic functionality +--FILE-- +<?php +/* Prototype : proto string eregi_replace(string pattern, string replacement, string string) + * Description: Replace regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +/* + * Test a number of simple, valid matches with eregi_replace + */ + +echo "*** Testing ereg() : basic functionality ***\n"; + +include(dirname(__FILE__) . '/regular_expressions.inc'); + +$replacement = '[this is a replacement]'; + +foreach ($expressions as $re) { + list($pattern, $match) = $re; + echo "--> Pattern: '$pattern'; match: '$match'\n"; + var_dump(eregi_replace($pattern, $replacement, $match . ' this contains some matches ' . $match)); +} + +echo "Done"; +?> +--EXPECTF-- +*** Testing ereg() : basic functionality *** +--> Pattern: '..(a|b|c)(a|b|c)..'; match: '--- ab ---' + +Deprecated: Function eregi_replace() is deprecated in %s on line %d +string(82) "--[this is a replacement]-- this contains some matches --[this is a replacement]--" +--> Pattern: '()'; match: '' + +Deprecated: Function eregi_replace() is deprecated in %s on line %d +string(695) "[this is a replacement] [this is a replacement]t[this is a replacement]h[this is a replacement]i[this is a replacement]s[this is a replacement] [this is a replacement]c[this is a replacement]o[this is a replacement]n[this is a replacement]t[this is a replacement]a[this is a replacement]i[this is a replacement]n[this is a replacement]s[this is a replacement] [this is a replacement]s[this is a replacement]o[this is a replacement]m[this is a replacement]e[this is a replacement] [this is a replacement]m[this is a replacement]a[this is a replacement]t[this is a replacement]c[this is a replacement]h[this is a replacement]e[this is a replacement]s[this is a replacement] [this is a replacement]" +--> Pattern: '()'; match: 'abcdef' + +Deprecated: Function eregi_replace() is deprecated in %s on line %d +string(983) "[this is a replacement]a[this is a replacement]b[this is a replacement]c[this is a replacement]d[this is a replacement]e[this is a replacement]f[this is a replacement] [this is a replacement]t[this is a replacement]h[this is a replacement]i[this is a replacement]s[this is a replacement] [this is a replacement]c[this is a replacement]o[this is a replacement]n[this is a replacement]t[this is a replacement]a[this is a replacement]i[this is a replacement]n[this is a replacement]s[this is a replacement] [this is a replacement]s[this is a replacement]o[this is a replacement]m[this is a replacement]e[this is a replacement] [this is a replacement]m[this is a replacement]a[this is a replacement]t[this is a replacement]c[this is a replacement]h[this is a replacement]e[this is a replacement]s[this is a replacement] [this is a replacement]a[this is a replacement]b[this is a replacement]c[this is a replacement]d[this is a replacement]e[this is a replacement]f[this is a replacement]" +--> Pattern: '[x]|[^x]'; match: 'abcdef' + +Deprecated: Function eregi_replace() is deprecated in %s on line %d +string(920) "[this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement]" +--> Pattern: '(a{1})(a{1,}) (b{1,3}) (c+) (d?ddd|e)'; match: '--- aaa bbb ccc ddd ---' + +Deprecated: Function eregi_replace() is deprecated in %s on line %d +string(90) "--- [this is a replacement] --- this contains some matches --- [this is a replacement] ---" +--> Pattern: '\\\`\^\.\[\$\(\)\|\*\+\?\{\''; match: '\`^.[$()|*+?{'' + +Deprecated: Function eregi_replace() is deprecated in %s on line %d +string(74) "[this is a replacement] this contains some matches [this is a replacement]" +--> Pattern: '\a'; match: 'a' + +Deprecated: Function eregi_replace() is deprecated in %s on line %d +string(118) "[this is a replacement] this cont[this is a replacement]ins some m[this is a replacement]tches [this is a replacement]" +--> Pattern: '[0-9][^0-9]'; match: '2a' + +Deprecated: Function eregi_replace() is deprecated in %s on line %d +string(74) "[this is a replacement] this contains some matches [this is a replacement]" +--> Pattern: '^[[:alnum:]]{62,62}$'; match: '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + +Deprecated: Function eregi_replace() is deprecated in %s on line %d +string(152) "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ this contains some matches 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +--> Pattern: '^[[:digit:]]{5}'; match: '0123456789' + +Deprecated: Function eregi_replace() is deprecated in %s on line %d +string(66) "[this is a replacement]56789 this contains some matches 0123456789" +--> Pattern: '[[:digit:]]{5}$'; match: '0123456789' + +Deprecated: Function eregi_replace() is deprecated in %s on line %d +string(66) "0123456789 this contains some matches 01234[this is a replacement]" +--> Pattern: '[[:blank:]]{1,10}'; match: ' + ' + +Deprecated: Function eregi_replace() is deprecated in %s on line %d +string(163) " +[this is a replacement]this[this is a replacement]contains[this is a replacement]some[this is a replacement]matches[this is a replacement] +[this is a replacement]" +--> Pattern: '[[:print:]]{3}'; match: ' a ' + +Deprecated: Function eregi_replace() is deprecated in %s on line %d +string(254) "[this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement][this is a replacement] " +Done diff --git a/ext/ereg/tests/eregi_replace_basic_002.phpt b/ext/ereg/tests/eregi_replace_basic_002.phpt new file mode 100644 index 0000000000..7683e3323e --- /dev/null +++ b/ext/ereg/tests/eregi_replace_basic_002.phpt @@ -0,0 +1,37 @@ +--TEST-- +Test eregi_replace() function : basic functionality - a few non-matches +--FILE-- +<?php +/* Prototype : proto string eregi_replace(string pattern, string replacement, string string) + * Description: Replace regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +$replacement = 'r'; + +var_dump(eregi_replace('[A-Z]', $replacement, '0')); +var_dump(eregi_replace('(a){4}', $replacement, 'aaa')); +var_dump(eregi_replace('^a', $replacement, 'ba')); +var_dump(eregi_replace('b$', $replacement, 'ba')); +var_dump(eregi_replace('[:alpha:]', $replacement, 'x')); + + +echo "Done"; +?> +--EXPECTF-- +Deprecated: Function eregi_replace() is deprecated in %s on line %d +string(1) "0" + +Deprecated: Function eregi_replace() is deprecated in %s on line %d +string(3) "aaa" + +Deprecated: Function eregi_replace() is deprecated in %s on line %d +string(2) "ba" + +Deprecated: Function eregi_replace() is deprecated in %s on line %d +string(2) "ba" + +Deprecated: Function eregi_replace() is deprecated in %s on line %d +string(1) "x" +Done diff --git a/ext/ereg/tests/eregi_replace_error_001.phpt b/ext/ereg/tests/eregi_replace_error_001.phpt new file mode 100644 index 0000000000..ff94833555 --- /dev/null +++ b/ext/ereg/tests/eregi_replace_error_001.phpt @@ -0,0 +1,46 @@ +--TEST-- +Test eregi_replace() function : error conditions - wrong number of args +--FILE-- +<?php +/* Prototype : proto string eregi_replace(string pattern, string replacement, string string) + * Description: Replace regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +echo "*** Testing eregi_replace() : error conditions ***\n"; + + +//Test eregi_replace with one more than the expected number of arguments +echo "\n-- Testing eregi_replace() function with more than expected no. of arguments --\n"; +$pattern = 'string_val'; +$replacement = 'string_val'; +$string = 'string_val'; +$extra_arg = 10; +var_dump( eregi_replace($pattern, $replacement, $string, $extra_arg) ); + +// Testing eregi_replace with one less than the expected number of arguments +echo "\n-- Testing eregi_replace() function with less than expected no. of arguments --\n"; +$pattern = 'string_val'; +$replacement = 'string_val'; +var_dump( eregi_replace($pattern, $replacement) ); + +echo "Done"; +?> +--EXPECTF-- +*** Testing eregi_replace() : error conditions *** + +-- Testing eregi_replace() function with more than expected no. of arguments -- + +Deprecated: Function eregi_replace() is deprecated in %s on line %d + +Warning: eregi_replace() expects exactly 3 parameters, 4 given in %s on line %d +NULL + +-- Testing eregi_replace() function with less than expected no. of arguments -- + +Deprecated: Function eregi_replace() is deprecated in %s on line %d + +Warning: eregi_replace() expects exactly 3 parameters, 2 given in %s on line %d +NULL +Done diff --git a/ext/ereg/tests/eregi_replace_error_002.phpt b/ext/ereg/tests/eregi_replace_error_002.phpt new file mode 100644 index 0000000000..32c58613d2 --- /dev/null +++ b/ext/ereg/tests/eregi_replace_error_002.phpt @@ -0,0 +1,106 @@ +--TEST-- +Test eregi_replace() function : error conditions - bad regular expressions +--FILE-- +<?php +/* Prototype : proto string eregi_replace(string pattern, string replacement, string string) + * Description: Replace regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +echo "*** Testing eregi_replace() : bad REs ***\n"; +var_dump(eregi_replace("", "hello", "some string")); +var_dump(eregi_replace("c(d", "hello", "some string")); +var_dump(eregi_replace("a[b", "hello", "some string")); +var_dump(eregi_replace("c(d", "hello", "some string"));; +var_dump(eregi_replace("*", "hello", "some string")); +var_dump(eregi_replace("+", "hello", "some string")); +var_dump(eregi_replace("?", "hello", "some string")); +var_dump(eregi_replace("(+?*)", "hello", "some string")); +var_dump(eregi_replace("h{256}", "hello", "some string")); +var_dump(eregi_replace("h|", "hello", "some string")); +var_dump(eregi_replace("h{0}", "hello", "some string")); +var_dump(eregi_replace("h{2,1}", "hello", "some string")); +var_dump(eregi_replace('[a-c-e]', 'd', "some string")); +var_dump(eregi_replace('\\', 'x', "some string")); +var_dump(eregi_replace('([9-0])', '1', "some string")); +echo "Done"; +?> +--EXPECTF-- +*** Testing eregi_replace() : bad REs *** + +Deprecated: Function eregi_replace() is deprecated in %s on line %d + +Warning: eregi_replace(): REG_EMPTY in %s on line %d +bool(false) + +Deprecated: Function eregi_replace() is deprecated in %s on line %d + +Warning: eregi_replace(): REG_EPAREN in %s on line %d +bool(false) + +Deprecated: Function eregi_replace() is deprecated in %s on line %d + +Warning: eregi_replace(): REG_EBRACK in %s on line %d +bool(false) + +Deprecated: Function eregi_replace() is deprecated in %s on line %d + +Warning: eregi_replace(): REG_EPAREN in %s on line %d +bool(false) + +Deprecated: Function eregi_replace() is deprecated in %s on line %d + +Warning: eregi_replace(): REG_BADRPT in %s on line %d +bool(false) + +Deprecated: Function eregi_replace() is deprecated in %s on line %d + +Warning: eregi_replace(): REG_BADRPT in %s on line %d +bool(false) + +Deprecated: Function eregi_replace() is deprecated in %s on line %d + +Warning: eregi_replace(): REG_BADRPT in %s on line %d +bool(false) + +Deprecated: Function eregi_replace() is deprecated in %s on line %d + +Warning: eregi_replace(): REG_BADRPT in %s on line %d +bool(false) + +Deprecated: Function eregi_replace() is deprecated in %s on line %d + +Warning: eregi_replace(): REG_BADBR in %s on line %d +bool(false) + +Deprecated: Function eregi_replace() is deprecated in %s on line %d + +Warning: eregi_replace(): REG_EMPTY in %s on line %d +bool(false) + +Deprecated: Function eregi_replace() is deprecated in %s on line %d + +Warning: eregi_replace(): REG_EMPTY in %s on line %d +bool(false) + +Deprecated: Function eregi_replace() is deprecated in %s on line %d + +Warning: eregi_replace(): REG_BADBR in %s on line %d +bool(false) + +Deprecated: Function eregi_replace() is deprecated in %s on line %d + +Warning: eregi_replace(): REG_ERANGE in %s on line %d +bool(false) + +Deprecated: Function eregi_replace() is deprecated in %s on line %d + +Warning: eregi_replace(): REG_EESCAPE in %s on line %d +bool(false) + +Deprecated: Function eregi_replace() is deprecated in %s on line %d + +Warning: eregi_replace(): REG_ERANGE in %s on line %d +bool(false) +Done diff --git a/ext/ereg/tests/eregi_replace_variation_001.phpt b/ext/ereg/tests/eregi_replace_variation_001.phpt new file mode 100644 index 0000000000..e3d2b655f5 --- /dev/null +++ b/ext/ereg/tests/eregi_replace_variation_001.phpt @@ -0,0 +1,205 @@ +--TEST-- +Test eregi_replace() function : usage variations - unexpected type arg 1 +--FILE-- +<?php +/* Prototype : proto string eregi_replace(string pattern, string replacement, string string) + * Description: Replace regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +function test_error_handler($err_no, $err_msg, $filename, $linenum, $vars) { + echo "Error: $err_no - $err_msg, $filename($linenum)\n"; +} +set_error_handler('test_error_handler'); + +echo "*** Testing eregi_replace() : usage variations ***\n"; + +// Initialise function arguments not being substituted (if any) +$replacement = 'new'; +$string = 'original'; + +//get an unset variable +$unset_var = 10; +unset ($unset_var); + +//array of values to iterate over +$values = array( + + // int data + 0, + 1, + 12345, + -2345, + + // float data + 10.5, + -10.5, + 10.1234567e10, + 10.7654321E-10, + .5, + + // array data + array(), + array(0), + array(1), + array(1, 2), + array('color' => 'red', 'item' => 'pen'), + + // null data + NULL, + null, + + // boolean data + true, + false, + TRUE, + FALSE, + + // empty data + "", + '', + + // object data + new stdclass(), + + // undefined data + $undefined_var, + + // unset data + $unset_var, +); + +// loop through each element of the array for pattern + +foreach($values as $value) { + echo "\nArg value $value \n"; + var_dump( eregi_replace($value, $replacement, $string) ); +}; + +echo "Done"; +?> +--EXPECTF-- +*** Testing eregi_replace() : usage variations *** +Error: 8 - Undefined variable: undefined_var, %s(64) +Error: 8 - Undefined variable: unset_var, %s(67) + +Arg value 0 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +Error: 2 - eregi_replace(): REG_EMPTY, %s(74) +bool(false) + +Arg value 1 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(8) "original" + +Arg value 12345 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(8) "original" + +Arg value -2345 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(8) "original" + +Arg value 10.5 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(8) "original" + +Arg value -10.5 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(8) "original" + +Arg value 101234567000 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(8) "original" + +Arg value 1.07654321E-9 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +Error: 2 - eregi_replace(): REG_EMPTY, %s(74) +bool(false) + +Arg value 0.5 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +Error: 2 - eregi_replace(): REG_EMPTY, %s(74) +bool(false) +Error: 8 - Array to string conversion, %seregi_replace_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +Error: 2 - eregi_replace(): REG_EMPTY, %s(74) +bool(false) +Error: 8 - Array to string conversion, %seregi_replace_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(8) "original" +Error: 8 - Array to string conversion, %seregi_replace_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(8) "original" +Error: 8 - Array to string conversion, %seregi_replace_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(8) "original" +Error: 8 - Array to string conversion, %seregi_replace_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(8) "original" + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +Error: 2 - eregi_replace(): REG_EMPTY, %s(74) +bool(false) + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +Error: 2 - eregi_replace(): REG_EMPTY, %s(74) +bool(false) + +Arg value 1 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(8) "original" + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +Error: 2 - eregi_replace(): REG_EMPTY, %s(74) +bool(false) + +Arg value 1 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(8) "original" + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +Error: 2 - eregi_replace(): REG_EMPTY, %s(74) +bool(false) + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +Error: 2 - eregi_replace(): REG_EMPTY, %s(74) +bool(false) + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +Error: 2 - eregi_replace(): REG_EMPTY, %s(74) +bool(false) +Error: 4096 - Object of class stdClass could not be converted to string, %s(73) + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +Error: 8 - Object of class stdClass could not be converted to int, %s(74) +string(8) "original" + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +Error: 2 - eregi_replace(): REG_EMPTY, %s(74) +bool(false) + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +Error: 2 - eregi_replace(): REG_EMPTY, %s(74) +bool(false) +Done diff --git a/ext/ereg/tests/eregi_replace_variation_002.phpt b/ext/ereg/tests/eregi_replace_variation_002.phpt new file mode 100644 index 0000000000..46229e0f57 --- /dev/null +++ b/ext/ereg/tests/eregi_replace_variation_002.phpt @@ -0,0 +1,193 @@ +--TEST-- +Test eregi_replace() function : usage variations - unexpected type arg 2 +--FILE-- +<?php +/* Prototype : proto string eregi_replace(string pattern, string replacement, string string) + * Description: Replace regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +function test_error_handler($err_no, $err_msg, $filename, $linenum, $vars) { + echo "Error: $err_no - $err_msg, $filename($linenum)\n"; +} +set_error_handler('test_error_handler'); + +echo "*** Testing eregi_replace() : usage variations ***\n"; + +// Initialise function arguments not being substituted (if any) +$pattern = b'ell'; +$string = 'hello!'; + +//get an unset variable +$unset_var = 10; +unset ($unset_var); + +//array of values to iterate over +$values = array( + + // int data + 0, + 1, + 12345, + -2345, + + // float data + 10.5, + -10.5, + 10.1234567e10, + 10.7654321E-10, + .5, + + // array data + array(), + array(0), + array(1), + array(1, 2), + array('color' => 'red', 'item' => 'pen'), + + // null data + NULL, + null, + + // boolean data + true, + false, + TRUE, + FALSE, + + // empty data + "", + '', + + // object data + new stdclass(), + + // undefined data + $undefined_var, + + // unset data + $unset_var, +); + +// loop through each element of the array for replacement + +foreach($values as $value) { + echo "\nArg value $value \n"; + var_dump(urlencode(eregi_replace($pattern, $value, $string))); +}; + +echo "Done"; +?> +--EXPECTF-- +*** Testing eregi_replace() : usage variations *** +Error: 8 - Undefined variable: undefined_var, %s(64) +Error: 8 - Undefined variable: unset_var, %s(67) + +Arg value 0 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(5) "ho%21" + +Arg value 1 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(8) "h%01o%21" + +Arg value 12345 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(6) "h9o%21" + +Arg value -2345 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(8) "h%D7o%21" + +Arg value 10.5 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(8) "h%0Ao%21" + +Arg value -10.5 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(8) "h%F6o%21" + +Arg value 101234567000 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(%d) "h%so%21" + +Arg value 1.07654321E-9 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(5) "ho%21" + +Arg value 0.5 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(5) "ho%21" +Error: 8 - Array to string conversion, %seregi_replace_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(5) "ho%21" +Error: 8 - Array to string conversion, %seregi_replace_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(8) "h%01o%21" +Error: 8 - Array to string conversion, %seregi_replace_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(8) "h%01o%21" +Error: 8 - Array to string conversion, %seregi_replace_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(8) "h%01o%21" +Error: 8 - Array to string conversion, %seregi_replace_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(8) "h%01o%21" + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(5) "ho%21" + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(5) "ho%21" + +Arg value 1 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(8) "h%01o%21" + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(5) "ho%21" + +Arg value 1 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(8) "h%01o%21" + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(5) "ho%21" + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(5) "ho%21" + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(5) "ho%21" +Error: 4096 - Object of class stdClass could not be converted to string, %s(73) + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +Error: 8 - Object of class stdClass could not be converted to int, %s(74) +string(8) "h%01o%21" + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(5) "ho%21" + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(5) "ho%21" +Done
\ No newline at end of file diff --git a/ext/ereg/tests/eregi_replace_variation_003.phpt b/ext/ereg/tests/eregi_replace_variation_003.phpt new file mode 100644 index 0000000000..ae9edba829 --- /dev/null +++ b/ext/ereg/tests/eregi_replace_variation_003.phpt @@ -0,0 +1,198 @@ +--TEST-- +Test eregi_replace() function : usage variations - unexpected type arg 3 +--FILE-- +<?php +/* Prototype : proto string eregi_replace(string pattern, string replacement, string string) + * Description: Replace regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +function test_error_handler($err_no, $err_msg, $filename, $linenum, $vars) { + echo "Error: $err_no - $err_msg, $filename($linenum)\n"; +} +set_error_handler('test_error_handler'); + +echo "*** Testing eregi_replace() : usage variations ***\n"; + +// Initialise function arguments not being substituted (if any) +$pattern = '1'; +$replacement = 'new value'; + +//get an unset variable +$unset_var = 10; +unset ($unset_var); + +//array of values to iterate over +$values = array( + + // int data + 0, + 1, + 12345, + -2345, + + // float data + 10.5, + -10.5, + 10.1234567e10, + 10.7654321E-10, + .5, + + // array data + array(), + array(0), + array(1), + array(1, 2), + array('color' => 'red', 'item' => 'pen'), + + // null data + NULL, + null, + + // boolean data + true, + false, + TRUE, + FALSE, + + // empty data + "", + '', + + // object data + new stdclass(), + + // undefined data + $undefined_var, + + // unset data + $unset_var, +); + +// loop through each element of the array for string + +foreach($values as $value) { + echo "\nArg value $value \n"; + var_dump( eregi_replace($pattern, $replacement, $value) ); +}; + +echo "Done"; +?> +--EXPECTF-- +*** Testing eregi_replace() : usage variations *** +Error: 8 - Undefined variable: undefined_var, %s(64) +Error: 8 - Undefined variable: unset_var, %s(67) + +Arg value 0 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(1) "0" + +Arg value 1 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(9) "new value" + +Arg value 12345 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(13) "new value2345" + +Arg value -2345 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(5) "-2345" + +Arg value 10.5 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(12) "new value0.5" + +Arg value -10.5 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(13) "-new value0.5" + +Arg value 101234567000 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(28) "new value0new value234567000" + +Arg value 1.07654321E-9 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(29) "new value.0765432new valueE-9" + +Arg value 0.5 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(3) "0.5" +Error: 8 - Array to string conversion, %seregi_replace_variation_003.php(%d) + +Arg value Array +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +Error: 2 - eregi_replace() expects parameter 3 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %seregi_replace_variation_003.php(%d) + +Arg value Array +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +Error: 2 - eregi_replace() expects parameter 3 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %seregi_replace_variation_003.php(%d) + +Arg value Array +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +Error: 2 - eregi_replace() expects parameter 3 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %seregi_replace_variation_003.php(%d) + +Arg value Array +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +Error: 2 - eregi_replace() expects parameter 3 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %seregi_replace_variation_003.php(%d) + +Arg value Array +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +Error: 2 - eregi_replace() expects parameter 3 to be string, array given, %s(74) +NULL + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(0) "" + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(0) "" + +Arg value 1 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(9) "new value" + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(0) "" + +Arg value 1 +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(9) "new value" + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(0) "" + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(0) "" + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(0) "" +Error: 4096 - Object of class stdClass could not be converted to string, %s(73) + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +Error: 2 - eregi_replace() expects parameter 3 to be string, object given, %s(74) +NULL + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(0) "" + +Arg value +Error: 8192 - Function eregi_replace() is deprecated, %s(74) +string(0) "" +Done diff --git a/ext/ereg/tests/eregi_variation_001.phpt b/ext/ereg/tests/eregi_variation_001.phpt new file mode 100644 index 0000000000..38c2cbaef3 --- /dev/null +++ b/ext/ereg/tests/eregi_variation_001.phpt @@ -0,0 +1,208 @@ +--TEST-- +Test eregi() function : usage variations - unexpected type arg 1 +--FILE-- +<?php +/* Prototype : proto int eregi(string pattern, string string [, array registers]) + * Description: Regular expression match + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +function test_error_handler($err_no, $err_msg, $filename, $linenum, $vars) { + echo "Error: $err_no - $err_msg, $filename($linenum)\n"; +} +set_error_handler('test_error_handler'); + + +echo "*** Testing eregi() : usage variations ***\n"; + +// Initialise function arguments not being substituted (if any) +$string = '1'; +$registers = array(1, 2); + +//get an unset variable +$unset_var = 10; +unset ($unset_var); + +//array of values to iterate over +$values = array( + + // int data + 0, + 1, + 12345, + -2345, + + // float data + 10.5, + -10.5, + 10.1234567e10, + 10.7654321E-10, + .5, + + // array data + array(), + array(0), + array(1), + array(1, 2), + array('color' => 'red', 'item' => 'pen'), + + // null data + NULL, + null, + + // boolean data + true, + false, + TRUE, + FALSE, + + // empty data + "", + '', + + // object data + new stdclass(), + + // undefined data + $undefined_var, + + // unset data + $unset_var, +); + +// loop through each element of the array for pattern + +foreach($values as $value) { + echo "\nArg value $value \n"; + var_dump( eregi($value, $string, $registers) ); +}; + +echo "Done"; +?> +--EXPECTF-- +*** Testing eregi() : usage variations *** +Error: 8 - Undefined variable: undefined_var, %s(65) +Error: 8 - Undefined variable: unset_var, %s(68) + +Arg value 0 +Error: 8192 - Function eregi() is deprecated, %s(75) +bool(false) + +Arg value 1 +Error: 8192 - Function eregi() is deprecated, %s(75) +int(1) + +Arg value 12345 +Error: 8192 - Function eregi() is deprecated, %s(75) +bool(false) + +Arg value -2345 +Error: 8192 - Function eregi() is deprecated, %s(75) +bool(false) + +Arg value 10.5 +Error: 8192 - Function eregi() is deprecated, %s(75) +bool(false) + +Arg value -10.5 +Error: 8192 - Function eregi() is deprecated, %s(75) +bool(false) + +Arg value 101234567000 +Error: 8192 - Function eregi() is deprecated, %s(75) +bool(false) + +Arg value 1.07654321E-9 +Error: 8192 - Function eregi() is deprecated, %s(75) +bool(false) + +Arg value 0.5 +Error: 8192 - Function eregi() is deprecated, %s(75) +bool(false) +Error: 8 - Array to string conversion, %seregi_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function eregi() is deprecated, %s(75) +Error: 8 - Array to string conversion, %s(75) +bool(false) +Error: 8 - Array to string conversion, %seregi_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function eregi() is deprecated, %s(75) +Error: 8 - Array to string conversion, %s(75) +bool(false) +Error: 8 - Array to string conversion, %seregi_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function eregi() is deprecated, %s(75) +Error: 8 - Array to string conversion, %s(75) +bool(false) +Error: 8 - Array to string conversion, %seregi_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function eregi() is deprecated, %s(75) +Error: 8 - Array to string conversion, %s(75) +bool(false) +Error: 8 - Array to string conversion, %seregi_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function eregi() is deprecated, %s(75) +Error: 8 - Array to string conversion, %s(75) +bool(false) + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(75) +Error: 2 - eregi(): REG_EMPTY, %s(75) +bool(false) + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(75) +Error: 2 - eregi(): REG_EMPTY, %s(75) +bool(false) + +Arg value 1 +Error: 8192 - Function eregi() is deprecated, %s(75) +int(1) + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(75) +Error: 2 - eregi(): REG_EMPTY, %s(75) +bool(false) + +Arg value 1 +Error: 8192 - Function eregi() is deprecated, %s(75) +int(1) + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(75) +Error: 2 - eregi(): REG_EMPTY, %s(75) +bool(false) + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(75) +Error: 2 - eregi(): REG_EMPTY, %s(75) +bool(false) + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(75) +Error: 2 - eregi(): REG_EMPTY, %s(75) +bool(false) +Error: 4096 - Object of class stdClass could not be converted to string, %s(74) + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(75) +Error: 4096 - Object of class stdClass could not be converted to string, %s(75) +Error: 8 - Object of class stdClass to string conversion, %s(75) +bool(false) + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(75) +Error: 2 - eregi(): REG_EMPTY, %s(75) +bool(false) + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(75) +Error: 2 - eregi(): REG_EMPTY, %s(75) +bool(false) +Done diff --git a/ext/ereg/tests/eregi_variation_002.phpt b/ext/ereg/tests/eregi_variation_002.phpt new file mode 100644 index 0000000000..8e803b55fa --- /dev/null +++ b/ext/ereg/tests/eregi_variation_002.phpt @@ -0,0 +1,198 @@ +--TEST-- +Test eregi() function : usage variations - unexpected type arg 2 +--FILE-- +<?php +/* Prototype : proto int eregi(string pattern, string string [, array registers]) + * Description: Regular expression match + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +function test_error_handler($err_no, $err_msg, $filename, $linenum, $vars) { + echo "Error: $err_no - $err_msg, $filename($linenum)\n"; +} +set_error_handler('test_error_handler'); + +echo "*** Testing eregi() : usage variations ***\n"; + +// Initialise function arguments not being substituted (if any) +$pattern = '1'; +$registers = array(); + +//get an unset variable +$unset_var = 10; +unset ($unset_var); + +//array of values to iterate over +$values = array( + + // int data + 0, + 1, + 12345, + -2345, + + // float data + 10.5, + -10.5, + 10.1234567e10, + 10.7654321E-10, + .5, + + // array data + array(), + array(0), + array(1), + array(1, 2), + array('color' => 'red', 'item' => 'pen'), + + // null data + NULL, + null, + + // boolean data + true, + false, + TRUE, + FALSE, + + // empty data + "", + '', + + // object data + new stdclass(), + + // undefined data + $undefined_var, + + // unset data + $unset_var, +); + +// loop through each element of the array for string + +foreach($values as $value) { + echo "\nArg value $value \n"; + var_dump( eregi($pattern, $value, $registers) ); +}; + +echo "Done"; +?> +--EXPECTF-- +*** Testing eregi() : usage variations *** +Error: 8 - Undefined variable: undefined_var, %s(64) +Error: 8 - Undefined variable: unset_var, %s(67) + +Arg value 0 +Error: 8192 - Function eregi() is deprecated, %s(74) +bool(false) + +Arg value 1 +Error: 8192 - Function eregi() is deprecated, %s(74) +int(1) + +Arg value 12345 +Error: 8192 - Function eregi() is deprecated, %s(74) +int(1) + +Arg value -2345 +Error: 8192 - Function eregi() is deprecated, %s(74) +bool(false) + +Arg value 10.5 +Error: 8192 - Function eregi() is deprecated, %s(74) +int(1) + +Arg value -10.5 +Error: 8192 - Function eregi() is deprecated, %s(74) +int(1) + +Arg value 101234567000 +Error: 8192 - Function eregi() is deprecated, %s(74) +int(1) + +Arg value 1.07654321E-9 +Error: 8192 - Function eregi() is deprecated, %s(74) +int(1) + +Arg value 0.5 +Error: 8192 - Function eregi() is deprecated, %s(74) +bool(false) +Error: 8 - Array to string conversion, %seregi_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function eregi() is deprecated, %s(74) +Error: 2 - eregi() expects parameter 2 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %seregi_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function eregi() is deprecated, %s(74) +Error: 2 - eregi() expects parameter 2 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %seregi_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function eregi() is deprecated, %s(74) +Error: 2 - eregi() expects parameter 2 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %seregi_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function eregi() is deprecated, %s(74) +Error: 2 - eregi() expects parameter 2 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %seregi_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function eregi() is deprecated, %s(74) +Error: 2 - eregi() expects parameter 2 to be string, array given, %s(74) +NULL + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(74) +bool(false) + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(74) +bool(false) + +Arg value 1 +Error: 8192 - Function eregi() is deprecated, %s(74) +int(1) + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(74) +bool(false) + +Arg value 1 +Error: 8192 - Function eregi() is deprecated, %s(74) +int(1) + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(74) +bool(false) + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(74) +bool(false) + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(74) +bool(false) +Error: 4096 - Object of class stdClass could not be converted to string, %s(73) + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(74) +Error: 2 - eregi() expects parameter 2 to be string, object given, %s(74) +NULL + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(74) +bool(false) + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(74) +bool(false) +Done diff --git a/ext/ereg/tests/eregi_variation_003.phpt b/ext/ereg/tests/eregi_variation_003.phpt new file mode 100644 index 0000000000..97777e22ff --- /dev/null +++ b/ext/ereg/tests/eregi_variation_003.phpt @@ -0,0 +1,305 @@ +--TEST-- +Test eregi() function : usage variations - unexpected type for arg 3 +--FILE-- +<?php +/* Prototype : proto int eregi(string pattern, string string [, array registers]) + * Description: Regular expression match + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +function test_error_handler($err_no, $err_msg, $filename, $linenum, $vars) { + echo "Error: $err_no - $err_msg, $filename($linenum)\n"; +} +set_error_handler('test_error_handler'); + +echo "*** Testing eregi() : usage variations ***\n"; + +// Initialise function arguments not being substituted (if any) +$pattern = 'h(.*)lo!'; +$string = 'hello!'; + +//get an unset variable +$unset_var = 10; +unset ($unset_var); + +//array of values to iterate over +$values = array( + + // int data + 0, + 1, + 12345, + -2345, + + // float data + 10.5, + -10.5, + 10.1234567e10, + 10.7654321E-10, + .5, + + // null data + NULL, + null, + + // boolean data + true, + false, + TRUE, + FALSE, + + // empty data + "", + '', + + // string data + "string", + 'string', + + // object data + new stdclass(), + + // undefined data + $undefined_var, + + // unset data + $unset_var, +); + +// loop through each element of the array for registers + +foreach($values as $value) { + echo "\nArg value $value \n"; + var_dump( eregi($pattern, $string, $value) ); + var_dump($value); +}; + +echo "Done"; +?> +--EXPECTF-- +*** Testing eregi() : usage variations *** +Error: 8 - Undefined variable: undefined_var, %s(61) +Error: 8 - Undefined variable: unset_var, %s(64) + +Arg value 0 +Error: 8192 - Function eregi() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value 1 +Error: 8192 - Function eregi() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value 12345 +Error: 8192 - Function eregi() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value -2345 +Error: 8192 - Function eregi() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value 10.5 +Error: 8192 - Function eregi() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value -10.5 +Error: 8192 - Function eregi() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value 101234567000 +Error: 8192 - Function eregi() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value 1.07654321E-9 +Error: 8192 - Function eregi() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value 0.5 +Error: 8192 - Function eregi() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value 1 +Error: 8192 - Function eregi() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value 1 +Error: 8192 - Function eregi() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value string +Error: 8192 - Function eregi() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value string +Error: 8192 - Function eregi() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} +Error: 4096 - Object of class stdClass could not be converted to string, %s(70) + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} + +Arg value +Error: 8192 - Function eregi() is deprecated, %s(71) +int(6) +array(2) { + [0]=> + string(6) "hello!" + [1]=> + string(2) "el" +} +Done diff --git a/ext/ereg/tests/eregi_variation_004.phpt b/ext/ereg/tests/eregi_variation_004.phpt new file mode 100644 index 0000000000..7378c36d3d --- /dev/null +++ b/ext/ereg/tests/eregi_variation_004.phpt @@ -0,0 +1,19 @@ +--TEST-- +Test eregi() function : usage variations - pass non-variable as arg 3, which is pass-by-ref. +--FILE-- +<?php +/* Prototype : proto int eregi(string pattern, string string [, array registers]) + * Description: Regular expression match + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +var_dump(eregi('l{2}', 'hello', str_repeat('x',1))); +echo "Done"; +?> +--EXPECTF-- +Strict Standards: Only variables should be passed by reference in %s on line %d + +Deprecated: Function eregi() is deprecated in %s on line %d +int(2) +Done diff --git a/ext/ereg/tests/regular_expressions.inc b/ext/ereg/tests/regular_expressions.inc new file mode 100644 index 0000000000..98fae4b112 --- /dev/null +++ b/ext/ereg/tests/regular_expressions.inc @@ -0,0 +1,24 @@ +<?php + +/** + * POSIX regular expressions each coupled with a string that they match, + * based on documentation on http://www.tin.org/bin/man.cgi?section=7&topic=regex . + */ +$expressions = array( + //array(pattern, string to match) + array('..(a|b|c)(a|b|c)..', '--- ab ---'), + array('()', ''), + array('()', 'abcdef'), + array('[x]|[^x]', 'abcdef'), + array('(a{1})(a{1,}) (b{1,3}) (c+) (d?ddd|e)', '--- aaa bbb ccc ddd ---'), + array('\\\\\`\^\.\[\$\(\)\|\*\+\?\{\\\'', '\\`^.[$()|*+?{\''), + array('\\a', 'a'), + array('[0-9][^0-9]', '2a'), + array('^[[:alnum:]]{62,62}$', '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'), + array('^[[:digit:]]{5}', '0123456789'), + array('[[:digit:]]{5}$', '0123456789'), + array('[[:blank:]]{1,10}', "\n \t"), + array('[[:print:]]{3}', " a "), +); + +?>
\ No newline at end of file diff --git a/ext/ereg/tests/split_basic_001.phpt b/ext/ereg/tests/split_basic_001.phpt new file mode 100644 index 0000000000..1c11f71ca8 --- /dev/null +++ b/ext/ereg/tests/split_basic_001.phpt @@ -0,0 +1,155 @@ +--TEST-- +Test split() function : basic functionality - test a number of simple split, specifying a limit +--FILE-- +<?php +/* Prototype : proto array split(string pattern, string string [, int limit]) + * Description: Split string into array by regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +/* + * Test a number of simple split, specifying a limit + */ + +echo "*** Testing ereg() : basic functionality ***\n"; + +include(dirname(__FILE__) . '/regular_expressions.inc'); + +foreach ($expressions as $re) { + list($pattern,$string) = $re; + echo "\n--> Pattern: '$pattern'; match: '$string'\n"; + var_dump(split($pattern, $string . ' |1| ' . $string . ' |2| ' . $string, 2)); +} + +echo "Done"; +?> +--EXPECTF-- +*** Testing ereg() : basic functionality *** + +--> Pattern: '..(a|b|c)(a|b|c)..'; match: '--- ab ---' + +Deprecated: Function split() is deprecated in %s on line %d +array(2) { + [0]=> + string(2) "--" + [1]=> + string(32) "-- |1| --- ab --- |2| --- ab ---" +} + +--> Pattern: '()'; match: '' + +Deprecated: Function split() is deprecated in %s on line %d + +Warning: split(): Invalid Regular Expression in %s on line %d +bool(false) + +--> Pattern: '()'; match: 'abcdef' + +Deprecated: Function split() is deprecated in %s on line %d + +Warning: split(): Invalid Regular Expression in %s on line %d +bool(false) + +--> Pattern: '[x]|[^x]'; match: 'abcdef' + +Deprecated: Function split() is deprecated in %s on line %d +array(2) { + [0]=> + string(0) "" + [1]=> + string(27) "bcdef |1| abcdef |2| abcdef" +} + +--> Pattern: '(a{1})(a{1,}) (b{1,3}) (c+) (d?ddd|e)'; match: '--- aaa bbb ccc ddd ---' + +Deprecated: Function split() is deprecated in %s on line %d +array(2) { + [0]=> + string(4) "--- " + [1]=> + string(60) " --- |1| --- aaa bbb ccc ddd --- |2| --- aaa bbb ccc ddd ---" +} + +--> Pattern: '\\\`\^\.\[\$\(\)\|\*\+\?\{\''; match: '\`^.[$()|*+?{'' + +Deprecated: Function split() is deprecated in %s on line %d +array(2) { + [0]=> + string(0) "" + [1]=> + string(38) " |1| \`^.[$()|*+?{' |2| \`^.[$()|*+?{'" +} + +--> Pattern: '\a'; match: 'a' + +Deprecated: Function split() is deprecated in %s on line %d +array(2) { + [0]=> + string(0) "" + [1]=> + string(12) " |1| a |2| a" +} + +--> Pattern: '[0-9][^0-9]'; match: '2a' + +Deprecated: Function split() is deprecated in %s on line %d +array(2) { + [0]=> + string(0) "" + [1]=> + string(14) " |1| 2a |2| 2a" +} + +--> Pattern: '^[[:alnum:]]{62,62}$'; match: '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + +Deprecated: Function split() is deprecated in %s on line %d +array(1) { + [0]=> + string(196) "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ |1| 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ |2| 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +} + +--> Pattern: '^[[:digit:]]{5}'; match: '0123456789' + +Deprecated: Function split() is deprecated in %s on line %d +array(2) { + [0]=> + string(0) "" + [1]=> + string(35) "56789 |1| 0123456789 |2| 0123456789" +} + +--> Pattern: '[[:digit:]]{5}$'; match: '0123456789' + +Deprecated: Function split() is deprecated in %s on line %d +array(2) { + [0]=> + string(35) "0123456789 |1| 0123456789 |2| 01234" + [1]=> + string(0) "" +} + +--> Pattern: '[[:blank:]]{1,10}'; match: ' + ' + +Deprecated: Function split() is deprecated in %s on line %d +array(2) { + [0]=> + string(1) " +" + [1]=> + string(15) "|1| + |2| + " +} + +--> Pattern: '[[:print:]]{3}'; match: ' a ' + +Deprecated: Function split() is deprecated in %s on line %d +array(2) { + [0]=> + string(0) "" + [1]=> + string(16) " |1| a |2| a " +} +Done diff --git a/ext/ereg/tests/split_basic_002.phpt b/ext/ereg/tests/split_basic_002.phpt new file mode 100644 index 0000000000..443a6b6b92 --- /dev/null +++ b/ext/ereg/tests/split_basic_002.phpt @@ -0,0 +1,253 @@ +--TEST-- +Test split() function : basic functionality - test a number of simple split, without specifying a limit +--FILE-- +<?php +/* Prototype : proto array split(string pattern, string string [, int limit]) + * Description: Split string into array by regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +/* + * Test a number of simple split, without specifying a limit + */ + +echo "*** Testing ereg() : basic functionality ***\n"; + +include(dirname(__FILE__) . '/regular_expressions.inc'); + +foreach ($expressions as $re) { + list($pattern,$string) = $re; + echo "\n--> Pattern: '$pattern'; match: '$string'\n"; + var_dump(split($pattern, $string . ' |1| ' . $string . ' |2| ' . $string)); +} + +echo "Done"; +?> +--EXPECTF-- +*** Testing ereg() : basic functionality *** + +--> Pattern: '..(a|b|c)(a|b|c)..'; match: '--- ab ---' + +Deprecated: Function split() is deprecated in %s on line %d +array(4) { + [0]=> + string(2) "--" + [1]=> + string(9) "-- |1| --" + [2]=> + string(9) "-- |2| --" + [3]=> + string(2) "--" +} + +--> Pattern: '()'; match: '' + +Deprecated: Function split() is deprecated in %s on line %d + +Warning: split(): Invalid Regular Expression in %s on line %d +bool(false) + +--> Pattern: '()'; match: 'abcdef' + +Deprecated: Function split() is deprecated in %s on line %d + +Warning: split(): Invalid Regular Expression in %s on line %d +bool(false) + +--> Pattern: '[x]|[^x]'; match: 'abcdef' + +Deprecated: Function split() is deprecated in %s on line %d +array(29) { + [0]=> + string(0) "" + [1]=> + string(0) "" + [2]=> + string(0) "" + [3]=> + string(0) "" + [4]=> + string(0) "" + [5]=> + string(0) "" + [6]=> + string(0) "" + [7]=> + string(0) "" + [8]=> + string(0) "" + [9]=> + string(0) "" + [10]=> + string(0) "" + [11]=> + string(0) "" + [12]=> + string(0) "" + [13]=> + string(0) "" + [14]=> + string(0) "" + [15]=> + string(0) "" + [16]=> + string(0) "" + [17]=> + string(0) "" + [18]=> + string(0) "" + [19]=> + string(0) "" + [20]=> + string(0) "" + [21]=> + string(0) "" + [22]=> + string(0) "" + [23]=> + string(0) "" + [24]=> + string(0) "" + [25]=> + string(0) "" + [26]=> + string(0) "" + [27]=> + string(0) "" + [28]=> + string(0) "" +} + +--> Pattern: '(a{1})(a{1,}) (b{1,3}) (c+) (d?ddd|e)'; match: '--- aaa bbb ccc ddd ---' + +Deprecated: Function split() is deprecated in %s on line %d +array(4) { + [0]=> + string(4) "--- " + [1]=> + string(13) " --- |1| --- " + [2]=> + string(13) " --- |2| --- " + [3]=> + string(4) " ---" +} + +--> Pattern: '\\\`\^\.\[\$\(\)\|\*\+\?\{\''; match: '\`^.[$()|*+?{'' + +Deprecated: Function split() is deprecated in %s on line %d +array(4) { + [0]=> + string(0) "" + [1]=> + string(5) " |1| " + [2]=> + string(5) " |2| " + [3]=> + string(0) "" +} + +--> Pattern: '\a'; match: 'a' + +Deprecated: Function split() is deprecated in %s on line %d +array(4) { + [0]=> + string(0) "" + [1]=> + string(5) " |1| " + [2]=> + string(5) " |2| " + [3]=> + string(0) "" +} + +--> Pattern: '[0-9][^0-9]'; match: '2a' + +Deprecated: Function split() is deprecated in %s on line %d +array(6) { + [0]=> + string(0) "" + [1]=> + string(2) " |" + [2]=> + string(1) " " + [3]=> + string(2) " |" + [4]=> + string(1) " " + [5]=> + string(0) "" +} + +--> Pattern: '^[[:alnum:]]{62,62}$'; match: '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + +Deprecated: Function split() is deprecated in %s on line %d +array(1) { + [0]=> + string(196) "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ |1| 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ |2| 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +} + +--> Pattern: '^[[:digit:]]{5}'; match: '0123456789' + +Deprecated: Function split() is deprecated in %s on line %d +array(3) { + [0]=> + string(0) "" + [1]=> + string(0) "" + [2]=> + string(30) " |1| 0123456789 |2| 0123456789" +} + +--> Pattern: '[[:digit:]]{5}$'; match: '0123456789' + +Deprecated: Function split() is deprecated in %s on line %d +array(2) { + [0]=> + string(35) "0123456789 |1| 0123456789 |2| 01234" + [1]=> + string(0) "" +} + +--> Pattern: '[[:blank:]]{1,10}'; match: ' + ' + +Deprecated: Function split() is deprecated in %s on line %d +array(6) { + [0]=> + string(1) " +" + [1]=> + string(3) "|1|" + [2]=> + string(1) " +" + [3]=> + string(3) "|2|" + [4]=> + string(1) " +" + [5]=> + string(0) "" +} + +--> Pattern: '[[:print:]]{3}'; match: ' a ' + +Deprecated: Function split() is deprecated in %s on line %d +array(7) { + [0]=> + string(0) "" + [1]=> + string(0) "" + [2]=> + string(0) "" + [3]=> + string(0) "" + [4]=> + string(0) "" + [5]=> + string(0) "" + [6]=> + string(1) " " +} +Done diff --git a/ext/ereg/tests/split_basic_003.phpt b/ext/ereg/tests/split_basic_003.phpt new file mode 100644 index 0000000000..8767144f7d --- /dev/null +++ b/ext/ereg/tests/split_basic_003.phpt @@ -0,0 +1,59 @@ +--TEST-- +Test split() function : basic functionality - a few non-matches +--FILE-- +<?php +/* Prototype : proto array split(string pattern, string string [, int limit]) + * Description: split string into array by regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +$replacement = 'r'; + +var_dump(split('A', '-- a --')); +var_dump(split('[A-Z]', '-- 0 --')); +var_dump(split('(a){4}', '--- aaa ---')); +var_dump(split('^a', '--- ba ---')); +var_dump(split('b$', '--- ba ---')); +var_dump(split('[:alpha:]', '--- x ---')); + + +echo "Done"; +?> +--EXPECTF-- +Deprecated: Function split() is deprecated in %s on line %d +array(1) { + [0]=> + string(7) "-- a --" +} + +Deprecated: Function split() is deprecated in %s on line %d +array(1) { + [0]=> + string(7) "-- 0 --" +} + +Deprecated: Function split() is deprecated in %s on line %d +array(1) { + [0]=> + string(11) "--- aaa ---" +} + +Deprecated: Function split() is deprecated in %s on line %d +array(1) { + [0]=> + string(10) "--- ba ---" +} + +Deprecated: Function split() is deprecated in %s on line %d +array(1) { + [0]=> + string(10) "--- ba ---" +} + +Deprecated: Function split() is deprecated in %s on line %d +array(1) { + [0]=> + string(9) "--- x ---" +} +Done diff --git a/ext/ereg/tests/split_error_001.phpt b/ext/ereg/tests/split_error_001.phpt new file mode 100644 index 0000000000..2575717a22 --- /dev/null +++ b/ext/ereg/tests/split_error_001.phpt @@ -0,0 +1,45 @@ +--TEST-- +Test split() function : error conditions - wrong number of args +--FILE-- +<?php +/* Prototype : proto array split(string pattern, string string [, int limit]) + * Description: Split string into array by regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +echo "*** Testing split() : error conditions - wrong number of args ***\n"; + + +//Test split with one more than the expected number of arguments +echo "\n-- Testing split() function with more than expected no. of arguments --\n"; +$pattern = 'string_val'; +$string = 'string_val'; +$limit = 10; +$extra_arg = 10; +var_dump( split($pattern, $string, $limit, $extra_arg) ); + +// Testing split with one less than the expected number of arguments +echo "\n-- Testing split() function with less than expected no. of arguments --\n"; +$pattern = 'string_val'; +var_dump( split($pattern) ); + +echo "Done"; +?> +--EXPECTF-- +*** Testing split() : error conditions - wrong number of args *** + +-- Testing split() function with more than expected no. of arguments -- + +Deprecated: Function split() is deprecated in %s on line %d + +Warning: split() expects at most 3 parameters, 4 given in %s on line %d +NULL + +-- Testing split() function with less than expected no. of arguments -- + +Deprecated: Function split() is deprecated in %s on line %d + +Warning: split() expects at least 2 parameters, 1 given in %s on line %d +NULL +Done diff --git a/ext/ereg/tests/split_error_002.phpt b/ext/ereg/tests/split_error_002.phpt new file mode 100644 index 0000000000..9eaac59e44 --- /dev/null +++ b/ext/ereg/tests/split_error_002.phpt @@ -0,0 +1,118 @@ +--TEST-- +Test split() function : error conditions - test bad regular expressions +--FILE-- +<?php +/* Prototype : proto array split(string pattern, string string [, int limit]) + * Description: Split string into array by regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +/* + * Test bad regular expressions + */ + +echo "*** Testing split() : error conditions ***\n"; + +$regs = 'original'; + +var_dump(split("", "hello")); +var_dump(split("c(d", "hello")); +var_dump(split("a[b", "hello")); +var_dump(split("c(d", "hello")); +var_dump(split("*", "hello")); +var_dump(split("+", "hello")); +var_dump(split("?", "hello")); +var_dump(split("(+?*)", "hello", $regs)); +var_dump(split("h{256}", "hello")); +var_dump(split("h|", "hello")); +var_dump(split("h{0}", "hello")); +var_dump(split("h{2,1}", "hello")); +var_dump(split('[a-c-e]', 'd')); +var_dump(split('\\', 'x')); +var_dump(split('([9-0])', '1', $regs)); + +//ensure $regs unchanged +var_dump($regs); + +echo "Done"; +?> +--EXPECTF-- +*** Testing split() : error conditions *** + +Deprecated: Function split() is deprecated in %s on line %d + +Warning: split(): REG_EMPTY in %s on line %d +bool(false) + +Deprecated: Function split() is deprecated in %s on line %d + +Warning: split(): REG_EPAREN in %s on line %d +bool(false) + +Deprecated: Function split() is deprecated in %s on line %d + +Warning: split(): REG_EBRACK in %s on line %d +bool(false) + +Deprecated: Function split() is deprecated in %s on line %d + +Warning: split(): REG_EPAREN in %s on line %d +bool(false) + +Deprecated: Function split() is deprecated in %s on line %d + +Warning: split(): REG_BADRPT in %s on line %d +bool(false) + +Deprecated: Function split() is deprecated in %s on line %d + +Warning: split(): REG_BADRPT in %s on line %d +bool(false) + +Deprecated: Function split() is deprecated in %s on line %d + +Warning: split(): REG_BADRPT in %s on line %d +bool(false) + +Deprecated: Function split() is deprecated in %s on line %d + +Warning: split() expects parameter 3 to be long, string given in %s on line %d +NULL + +Deprecated: Function split() is deprecated in %s on line %d + +Warning: split(): REG_BADBR in %s on line %d +bool(false) + +Deprecated: Function split() is deprecated in %s on line %d + +Warning: split(): REG_EMPTY in %s on line %d +bool(false) + +Deprecated: Function split() is deprecated in %s on line %d + +Warning: split(): REG_EMPTY in %s on line %d +bool(false) + +Deprecated: Function split() is deprecated in %s on line %d + +Warning: split(): REG_BADBR in %s on line %d +bool(false) + +Deprecated: Function split() is deprecated in %s on line %d + +Warning: split(): REG_ERANGE in %s on line %d +bool(false) + +Deprecated: Function split() is deprecated in %s on line %d + +Warning: split(): REG_EESCAPE in %s on line %d +bool(false) + +Deprecated: Function split() is deprecated in %s on line %d + +Warning: split() expects parameter 3 to be long, string given in %s on line %d +NULL +string(8) "original" +Done diff --git a/ext/ereg/tests/split_variation_001.phpt b/ext/ereg/tests/split_variation_001.phpt new file mode 100644 index 0000000000..9d9fc9d518 --- /dev/null +++ b/ext/ereg/tests/split_variation_001.phpt @@ -0,0 +1,257 @@ +--TEST-- +Test split() function : usage variations - unexpected type for arg 1 +--FILE-- +<?php +/* Prototype : proto array split(string pattern, string string [, int limit]) + * Description: Split string into array by regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +function test_error_handler($err_no, $err_msg, $filename, $linenum, $vars) { + echo "Error: $err_no - $err_msg, $filename($linenum)\n"; +} +set_error_handler('test_error_handler'); + +echo "*** Testing split() : usage variations ***\n"; + +// Initialise function arguments not being substituted (if any) +$string = '1 a 1 Array 1 c '; +$limit = 5; + +//get an unset variable +$unset_var = 10; +unset ($unset_var); + +//array of values to iterate over +$values = array( + + // int data + 0, + 1, + 12345, + -2345, + + // float data + 10.5, + -10.5, + 10.1234567e10, + 10.7654321E-10, + .5, + + // array data + array(), + array(0), + array(1), + array(1, 2), + array('color' => 'red', 'item' => 'pen'), + + // null data + NULL, + null, + + // boolean data + true, + false, + TRUE, + FALSE, + + // empty data + "", + '', + + // object data + new stdclass(), + + // undefined data + $undefined_var, + + // unset data + $unset_var, +); + +// loop through each element of the array for pattern + +foreach($values as $value) { + echo "\nArg value $value \n"; + var_dump( split($value, $string, $limit) ); +}; + +echo "Done"; +?> +--EXPECTF-- +*** Testing split() : usage variations *** +Error: 8 - Undefined variable: undefined_var, %s(64) +Error: 8 - Undefined variable: unset_var, %s(67) + +Arg value 0 +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(16) "1 a 1 Array 1 c " +} + +Arg value 1 +Error: 8192 - Function split() is deprecated, %s(74) +array(4) { + [0]=> + string(0) "" + [1]=> + string(3) " a " + [2]=> + string(7) " Array " + [3]=> + string(3) " c " +} + +Arg value 12345 +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(16) "1 a 1 Array 1 c " +} + +Arg value -2345 +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(16) "1 a 1 Array 1 c " +} + +Arg value 10.5 +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(16) "1 a 1 Array 1 c " +} + +Arg value -10.5 +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(16) "1 a 1 Array 1 c " +} + +Arg value 101234567000 +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(16) "1 a 1 Array 1 c " +} + +Arg value 1.07654321E-9 +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(16) "1 a 1 Array 1 c " +} + +Arg value 0.5 +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(16) "1 a 1 Array 1 c " +} +Error: 8 - Array to string conversion, %ssplit_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function split() is deprecated, %s(74) +Error: 2 - split() expects parameter 1 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %ssplit_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function split() is deprecated, %s(74) +Error: 2 - split() expects parameter 1 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %ssplit_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function split() is deprecated, %s(74) +Error: 2 - split() expects parameter 1 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %ssplit_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function split() is deprecated, %s(74) +Error: 2 - split() expects parameter 1 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %ssplit_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function split() is deprecated, %s(74) +Error: 2 - split() expects parameter 1 to be string, array given, %s(74) +NULL + +Arg value +Error: 8192 - Function split() is deprecated, %s(74) +Error: 2 - split(): REG_EMPTY, %s(74) +bool(false) + +Arg value +Error: 8192 - Function split() is deprecated, %s(74) +Error: 2 - split(): REG_EMPTY, %s(74) +bool(false) + +Arg value 1 +Error: 8192 - Function split() is deprecated, %s(74) +array(4) { + [0]=> + string(0) "" + [1]=> + string(3) " a " + [2]=> + string(7) " Array " + [3]=> + string(3) " c " +} + +Arg value +Error: 8192 - Function split() is deprecated, %s(74) +Error: 2 - split(): REG_EMPTY, %s(74) +bool(false) + +Arg value 1 +Error: 8192 - Function split() is deprecated, %s(74) +array(4) { + [0]=> + string(0) "" + [1]=> + string(3) " a " + [2]=> + string(7) " Array " + [3]=> + string(3) " c " +} + +Arg value +Error: 8192 - Function split() is deprecated, %s(74) +Error: 2 - split(): REG_EMPTY, %s(74) +bool(false) + +Arg value +Error: 8192 - Function split() is deprecated, %s(74) +Error: 2 - split(): REG_EMPTY, %s(74) +bool(false) + +Arg value +Error: 8192 - Function split() is deprecated, %s(74) +Error: 2 - split(): REG_EMPTY, %s(74) +bool(false) +Error: 4096 - Object of class stdClass could not be converted to string, %s(73) + +Arg value +Error: 8192 - Function split() is deprecated, %s(74) +Error: 2 - split() expects parameter 1 to be string, object given, %s(74) +NULL + +Arg value +Error: 8192 - Function split() is deprecated, %s(74) +Error: 2 - split(): REG_EMPTY, %s(74) +bool(false) + +Arg value +Error: 8192 - Function split() is deprecated, %s(74) +Error: 2 - split(): REG_EMPTY, %s(74) +bool(false) +Done diff --git a/ext/ereg/tests/split_variation_002.phpt b/ext/ereg/tests/split_variation_002.phpt new file mode 100644 index 0000000000..b1ea57c787 --- /dev/null +++ b/ext/ereg/tests/split_variation_002.phpt @@ -0,0 +1,257 @@ +--TEST-- +Test split() function : usage variations - unexpected type for arg 2 +--FILE-- +<?php +/* Prototype : proto array split(string pattern, string string [, int limit]) + * Description: Split string into array by regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +function test_error_handler($err_no, $err_msg, $filename, $linenum, $vars) { + echo "Error: $err_no - $err_msg, $filename($linenum)\n"; +} +set_error_handler('test_error_handler'); + +echo "*** Testing split() : usage variations ***\n"; + +// Initialise function arguments not being substituted (if any) +$pattern = 'r|j|E'; +$limit = 5; + +//get an unset variable +$unset_var = 10; +unset ($unset_var); + +//array of values to iterate over +$values = array( + + // int data + 0, + 1, + 12345, + -2345, + + // float data + 10.5, + -10.5, + 10.1234567e10, + 10.7654321E-10, + .5, + + // array data + array(), + array(0), + array(1), + array(1, 2), + array('color' => 'red', 'item' => 'pen'), + + // null data + NULL, + null, + + // boolean data + true, + false, + TRUE, + FALSE, + + // empty data + "", + '', + + // object data + new stdclass(), + + // undefined data + $undefined_var, + + // unset data + $unset_var, +); + +// loop through each element of the array for string + +foreach($values as $value) { + echo "\nArg value $value \n"; + var_dump( split($pattern, $value, $limit) ); +}; + +echo "Done"; +?> +--EXPECTF-- +*** Testing split() : usage variations *** +Error: 8 - Undefined variable: undefined_var, %s(64) +Error: 8 - Undefined variable: unset_var, %s(67) + +Arg value 0 +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(1) "0" +} + +Arg value 1 +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(1) "1" +} + +Arg value 12345 +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(5) "12345" +} + +Arg value -2345 +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(5) "-2345" +} + +Arg value 10.5 +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(4) "10.5" +} + +Arg value -10.5 +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(5) "-10.5" +} + +Arg value 101234567000 +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(12) "101234567000" +} + +Arg value 1.07654321E-9 +Error: 8192 - Function split() is deprecated, %s(74) +array(2) { + [0]=> + string(10) "1.07654321" + [1]=> + string(2) "-9" +} + +Arg value 0.5 +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(3) "0.5" +} +Error: 8 - Array to string conversion, %ssplit_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function split() is deprecated, %s(74) +Error: 2 - split() expects parameter 2 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %ssplit_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function split() is deprecated, %s(74) +Error: 2 - split() expects parameter 2 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %ssplit_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function split() is deprecated, %s(74) +Error: 2 - split() expects parameter 2 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %ssplit_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function split() is deprecated, %s(74) +Error: 2 - split() expects parameter 2 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %ssplit_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function split() is deprecated, %s(74) +Error: 2 - split() expects parameter 2 to be string, array given, %s(74) +NULL + +Arg value +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(0) "" +} + +Arg value +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(0) "" +} + +Arg value 1 +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(1) "1" +} + +Arg value +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(0) "" +} + +Arg value 1 +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(1) "1" +} + +Arg value +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(0) "" +} + +Arg value +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(0) "" +} + +Arg value +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(0) "" +} +Error: 4096 - Object of class stdClass could not be converted to string, %s(73) + +Arg value +Error: 8192 - Function split() is deprecated, %s(74) +Error: 2 - split() expects parameter 2 to be string, object given, %s(74) +NULL + +Arg value +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(0) "" +} + +Arg value +Error: 8192 - Function split() is deprecated, %s(74) +array(1) { + [0]=> + string(0) "" +} +Done diff --git a/ext/ereg/tests/split_variation_003.phpt b/ext/ereg/tests/split_variation_003.phpt new file mode 100644 index 0000000000..edef9cfeb9 --- /dev/null +++ b/ext/ereg/tests/split_variation_003.phpt @@ -0,0 +1,231 @@ +--TEST-- +Test split() function : usage variations - unexpected type for arg 3 +--FILE-- +<?php +/* Prototype : proto array split(string pattern, string string [, int limit]) + * Description: Split string into array by regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +function test_error_handler($err_no, $err_msg, $filename, $linenum, $vars) { + if (error_reporting() != 0) { + // report non-silenced errors + echo "Error: $err_no - $err_msg, $filename($linenum)\n"; + } +} +set_error_handler('test_error_handler'); +echo "*** Testing split() : usage variations ***\n"; + +// Initialise function arguments not being substituted (if any) +$pattern = b'[[:space:]]'; +$string = '1 2 3 4 5'; + +//get an unset variable +$unset_var = 10; +unset ($unset_var); + +//array of values to iterate over +$values = array( + + // float data + 10.5, + -10.5, + 10.7654321E-10, + .5, + + // array data + array(), + array(0), + array(1), + array(1, 2), + array('color' => 'red', 'item' => 'pen'), + + // null data + NULL, + null, + + // boolean data + true, + false, + TRUE, + FALSE, + + // empty data + "", + '', + + // string data + "string", + 'string', + + // object data + new stdclass(), + + // undefined data + @$undefined_var, + + // unset data + @$unset_var, +); + +// loop through each element of the array for limit + +foreach($values as $value) { + echo "\nArg value $value \n"; + var_dump( split($pattern, $string, $value) ); +}; + +echo "Done"; +?> +--EXPECTF-- +*** Testing split() : usage variations *** + +Arg value 10.5 +Error: 8192 - Function split() is deprecated, %s(73) +array(5) { + [0]=> + string(1) "1" + [1]=> + string(1) "2" + [2]=> + string(1) "3" + [3]=> + string(1) "4" + [4]=> + string(1) "5" +} + +Arg value -10.5 +Error: 8192 - Function split() is deprecated, %s(73) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} + +Arg value 1.07654321E-9 +Error: 8192 - Function split() is deprecated, %s(73) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} + +Arg value 0.5 +Error: 8192 - Function split() is deprecated, %s(73) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} +Error: 8 - Array to string conversion, %ssplit_variation_003.php(%d) + +Arg value Array +Error: 8192 - Function split() is deprecated, %s(73) +Error: 2 - split() expects parameter 3 to be long, array given, %s(73) +NULL +Error: 8 - Array to string conversion, %ssplit_variation_003.php(%d) + +Arg value Array +Error: 8192 - Function split() is deprecated, %s(73) +Error: 2 - split() expects parameter 3 to be long, array given, %s(73) +NULL +Error: 8 - Array to string conversion, %ssplit_variation_003.php(%d) + +Arg value Array +Error: 8192 - Function split() is deprecated, %s(73) +Error: 2 - split() expects parameter 3 to be long, array given, %s(73) +NULL +Error: 8 - Array to string conversion, %ssplit_variation_003.php(%d) + +Arg value Array +Error: 8192 - Function split() is deprecated, %s(73) +Error: 2 - split() expects parameter 3 to be long, array given, %s(73) +NULL +Error: 8 - Array to string conversion, %ssplit_variation_003.php(%d) + +Arg value Array +Error: 8192 - Function split() is deprecated, %s(73) +Error: 2 - split() expects parameter 3 to be long, array given, %s(73) +NULL + +Arg value +Error: 8192 - Function split() is deprecated, %s(73) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} + +Arg value +Error: 8192 - Function split() is deprecated, %s(73) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} + +Arg value 1 +Error: 8192 - Function split() is deprecated, %s(73) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} + +Arg value +Error: 8192 - Function split() is deprecated, %s(73) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} + +Arg value 1 +Error: 8192 - Function split() is deprecated, %s(73) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} + +Arg value +Error: 8192 - Function split() is deprecated, %s(73) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} + +Arg value +Error: 8192 - Function split() is deprecated, %s(73) +Error: 2 - split() expects parameter 3 to be long, string given, %s(73) +NULL + +Arg value +Error: 8192 - Function split() is deprecated, %s(73) +Error: 2 - split() expects parameter 3 to be long, string given, %s(73) +NULL + +Arg value string +Error: 8192 - Function split() is deprecated, %s(73) +Error: 2 - split() expects parameter 3 to be long, string given, %s(73) +NULL + +Arg value string +Error: 8192 - Function split() is deprecated, %s(73) +Error: 2 - split() expects parameter 3 to be long, string given, %s(73) +NULL +Error: 4096 - Object of class stdClass could not be converted to string, %s(72) + +Arg value +Error: 8192 - Function split() is deprecated, %s(73) +Error: 2 - split() expects parameter 3 to be long, object given, %s(73) +NULL + +Arg value +Error: 8192 - Function split() is deprecated, %s(73) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} + +Arg value +Error: 8192 - Function split() is deprecated, %s(73) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} +Done
\ No newline at end of file diff --git a/ext/ereg/tests/split_variation_004.phpt b/ext/ereg/tests/split_variation_004.phpt new file mode 100644 index 0000000000..d3d2de84a3 --- /dev/null +++ b/ext/ereg/tests/split_variation_004.phpt @@ -0,0 +1,37 @@ +--TEST-- +Test split() function : usage variations - out-of-range values for limit +--FILE-- +<?php +/* Prototype : proto array split(string pattern, string string [, int limit]) + * Description: Split string into array by regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +function test_error_handler($err_no, $err_msg, $filename, $linenum, $vars) { + echo "Error: $err_no - $err_msg, $filename($linenum)\n"; +} +set_error_handler('test_error_handler'); +echo "*** Testing split() : usage variations ***\n"; + +$pattern = '[[:space:]]'; +$string = '1 2 3 4 5'; +var_dump(split($pattern, $string, 0)); +var_dump(split($pattern, $string, -10)); + + +echo "Done"; +?> +--EXPECTF-- +*** Testing split() : usage variations *** +Error: 8192 - Function split() is deprecated, %s(16) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} +Error: 8192 - Function split() is deprecated, %s(17) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} +Done diff --git a/ext/ereg/tests/spliti_basic_001.phpt b/ext/ereg/tests/spliti_basic_001.phpt new file mode 100644 index 0000000000..743bd66212 --- /dev/null +++ b/ext/ereg/tests/spliti_basic_001.phpt @@ -0,0 +1,155 @@ +--TEST-- +Test spliti() function : basic functionality - test a number of simple spliti, specifying a limit +--FILE-- +<?php +/* Prototype : proto array spliti(string pattern, string string [, int limit]) + * Description: spliti string into array by regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +/* + * Test a number of simple spliti, specifying a limit + */ + +echo "*** Testing ereg() : basic functionality ***\n"; + +include(dirname(__FILE__) . '/regular_expressions.inc'); + +foreach ($expressions as $re) { + list($pattern,$string) = $re; + echo "\n--> Pattern: '$pattern'; match: '$string'\n"; + var_dump(spliti($pattern, $string . ' |1| ' . $string . ' |2| ' . $string, 2)); +} + +echo "Done"; +?> +--EXPECTF-- +*** Testing ereg() : basic functionality *** + +--> Pattern: '..(a|b|c)(a|b|c)..'; match: '--- ab ---' + +Deprecated: Function spliti() is deprecated in %s on line %d +array(2) { + [0]=> + string(2) "--" + [1]=> + string(32) "-- |1| --- ab --- |2| --- ab ---" +} + +--> Pattern: '()'; match: '' + +Deprecated: Function spliti() is deprecated in %s on line %d + +Warning: spliti(): Invalid Regular Expression in %s on line %d +bool(false) + +--> Pattern: '()'; match: 'abcdef' + +Deprecated: Function spliti() is deprecated in %s on line %d + +Warning: spliti(): Invalid Regular Expression in %s on line %d +bool(false) + +--> Pattern: '[x]|[^x]'; match: 'abcdef' + +Deprecated: Function spliti() is deprecated in %s on line %d +array(2) { + [0]=> + string(0) "" + [1]=> + string(27) "bcdef |1| abcdef |2| abcdef" +} + +--> Pattern: '(a{1})(a{1,}) (b{1,3}) (c+) (d?ddd|e)'; match: '--- aaa bbb ccc ddd ---' + +Deprecated: Function spliti() is deprecated in %s on line %d +array(2) { + [0]=> + string(4) "--- " + [1]=> + string(60) " --- |1| --- aaa bbb ccc ddd --- |2| --- aaa bbb ccc ddd ---" +} + +--> Pattern: '\\\`\^\.\[\$\(\)\|\*\+\?\{\''; match: '\`^.[$()|*+?{'' + +Deprecated: Function spliti() is deprecated in %s on line %d +array(2) { + [0]=> + string(0) "" + [1]=> + string(38) " |1| \`^.[$()|*+?{' |2| \`^.[$()|*+?{'" +} + +--> Pattern: '\a'; match: 'a' + +Deprecated: Function spliti() is deprecated in %s on line %d +array(2) { + [0]=> + string(0) "" + [1]=> + string(12) " |1| a |2| a" +} + +--> Pattern: '[0-9][^0-9]'; match: '2a' + +Deprecated: Function spliti() is deprecated in %s on line %d +array(2) { + [0]=> + string(0) "" + [1]=> + string(14) " |1| 2a |2| 2a" +} + +--> Pattern: '^[[:alnum:]]{62,62}$'; match: '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + +Deprecated: Function spliti() is deprecated in %s on line %d +array(1) { + [0]=> + string(196) "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ |1| 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ |2| 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +} + +--> Pattern: '^[[:digit:]]{5}'; match: '0123456789' + +Deprecated: Function spliti() is deprecated in %s on line %d +array(2) { + [0]=> + string(0) "" + [1]=> + string(35) "56789 |1| 0123456789 |2| 0123456789" +} + +--> Pattern: '[[:digit:]]{5}$'; match: '0123456789' + +Deprecated: Function spliti() is deprecated in %s on line %d +array(2) { + [0]=> + string(35) "0123456789 |1| 0123456789 |2| 01234" + [1]=> + string(0) "" +} + +--> Pattern: '[[:blank:]]{1,10}'; match: ' + ' + +Deprecated: Function spliti() is deprecated in %s on line %d +array(2) { + [0]=> + string(1) " +" + [1]=> + string(15) "|1| + |2| + " +} + +--> Pattern: '[[:print:]]{3}'; match: ' a ' + +Deprecated: Function spliti() is deprecated in %s on line %d +array(2) { + [0]=> + string(0) "" + [1]=> + string(16) " |1| a |2| a " +} +Done diff --git a/ext/ereg/tests/spliti_basic_002.phpt b/ext/ereg/tests/spliti_basic_002.phpt new file mode 100644 index 0000000000..df61a42cae --- /dev/null +++ b/ext/ereg/tests/spliti_basic_002.phpt @@ -0,0 +1,253 @@ +--TEST-- +Test spliti() function : basic functionality - test a number of simple spliti, without specifying a limit +--FILE-- +<?php +/* Prototype : proto array spliti(string pattern, string string [, int limit]) + * Description: spliti string into array by regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +/* + * Test a number of simple spliti, without specifying a limit + */ + +echo "*** Testing ereg() : basic functionality ***\n"; + +include(dirname(__FILE__) . '/regular_expressions.inc'); + +foreach ($expressions as $re) { + list($pattern,$string) = $re; + echo "\n--> Pattern: '$pattern'; match: '$string'\n"; + var_dump(spliti($pattern, $string . ' |1| ' . $string . ' |2| ' . $string)); +} + +echo "Done"; +?> +--EXPECTF-- +*** Testing ereg() : basic functionality *** + +--> Pattern: '..(a|b|c)(a|b|c)..'; match: '--- ab ---' + +Deprecated: Function spliti() is deprecated in %s on line %d +array(4) { + [0]=> + string(2) "--" + [1]=> + string(9) "-- |1| --" + [2]=> + string(9) "-- |2| --" + [3]=> + string(2) "--" +} + +--> Pattern: '()'; match: '' + +Deprecated: Function spliti() is deprecated in %s on line %d + +Warning: spliti(): Invalid Regular Expression in %s on line %d +bool(false) + +--> Pattern: '()'; match: 'abcdef' + +Deprecated: Function spliti() is deprecated in %s on line %d + +Warning: spliti(): Invalid Regular Expression in %s on line %d +bool(false) + +--> Pattern: '[x]|[^x]'; match: 'abcdef' + +Deprecated: Function spliti() is deprecated in %s on line %d +array(29) { + [0]=> + string(0) "" + [1]=> + string(0) "" + [2]=> + string(0) "" + [3]=> + string(0) "" + [4]=> + string(0) "" + [5]=> + string(0) "" + [6]=> + string(0) "" + [7]=> + string(0) "" + [8]=> + string(0) "" + [9]=> + string(0) "" + [10]=> + string(0) "" + [11]=> + string(0) "" + [12]=> + string(0) "" + [13]=> + string(0) "" + [14]=> + string(0) "" + [15]=> + string(0) "" + [16]=> + string(0) "" + [17]=> + string(0) "" + [18]=> + string(0) "" + [19]=> + string(0) "" + [20]=> + string(0) "" + [21]=> + string(0) "" + [22]=> + string(0) "" + [23]=> + string(0) "" + [24]=> + string(0) "" + [25]=> + string(0) "" + [26]=> + string(0) "" + [27]=> + string(0) "" + [28]=> + string(0) "" +} + +--> Pattern: '(a{1})(a{1,}) (b{1,3}) (c+) (d?ddd|e)'; match: '--- aaa bbb ccc ddd ---' + +Deprecated: Function spliti() is deprecated in %s on line %d +array(4) { + [0]=> + string(4) "--- " + [1]=> + string(13) " --- |1| --- " + [2]=> + string(13) " --- |2| --- " + [3]=> + string(4) " ---" +} + +--> Pattern: '\\\`\^\.\[\$\(\)\|\*\+\?\{\''; match: '\`^.[$()|*+?{'' + +Deprecated: Function spliti() is deprecated in %s on line %d +array(4) { + [0]=> + string(0) "" + [1]=> + string(5) " |1| " + [2]=> + string(5) " |2| " + [3]=> + string(0) "" +} + +--> Pattern: '\a'; match: 'a' + +Deprecated: Function spliti() is deprecated in %s on line %d +array(4) { + [0]=> + string(0) "" + [1]=> + string(5) " |1| " + [2]=> + string(5) " |2| " + [3]=> + string(0) "" +} + +--> Pattern: '[0-9][^0-9]'; match: '2a' + +Deprecated: Function spliti() is deprecated in %s on line %d +array(6) { + [0]=> + string(0) "" + [1]=> + string(2) " |" + [2]=> + string(1) " " + [3]=> + string(2) " |" + [4]=> + string(1) " " + [5]=> + string(0) "" +} + +--> Pattern: '^[[:alnum:]]{62,62}$'; match: '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + +Deprecated: Function spliti() is deprecated in %s on line %d +array(1) { + [0]=> + string(196) "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ |1| 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ |2| 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +} + +--> Pattern: '^[[:digit:]]{5}'; match: '0123456789' + +Deprecated: Function spliti() is deprecated in %s on line %d +array(3) { + [0]=> + string(0) "" + [1]=> + string(0) "" + [2]=> + string(30) " |1| 0123456789 |2| 0123456789" +} + +--> Pattern: '[[:digit:]]{5}$'; match: '0123456789' + +Deprecated: Function spliti() is deprecated in %s on line %d +array(2) { + [0]=> + string(35) "0123456789 |1| 0123456789 |2| 01234" + [1]=> + string(0) "" +} + +--> Pattern: '[[:blank:]]{1,10}'; match: ' + ' + +Deprecated: Function spliti() is deprecated in %s on line %d +array(6) { + [0]=> + string(1) " +" + [1]=> + string(3) "|1|" + [2]=> + string(1) " +" + [3]=> + string(3) "|2|" + [4]=> + string(1) " +" + [5]=> + string(0) "" +} + +--> Pattern: '[[:print:]]{3}'; match: ' a ' + +Deprecated: Function spliti() is deprecated in %s on line %d +array(7) { + [0]=> + string(0) "" + [1]=> + string(0) "" + [2]=> + string(0) "" + [3]=> + string(0) "" + [4]=> + string(0) "" + [5]=> + string(0) "" + [6]=> + string(1) " " +} +Done diff --git a/ext/ereg/tests/spliti_basic_003.phpt b/ext/ereg/tests/spliti_basic_003.phpt new file mode 100644 index 0000000000..39bd1dcbf7 --- /dev/null +++ b/ext/ereg/tests/spliti_basic_003.phpt @@ -0,0 +1,52 @@ +--TEST-- +Test spliti() function : basic functionality - a few non-matches +--FILE-- +<?php +/* Prototype : proto array spliti(string pattern, string string [, int limit]) + * Description: spliti string into array by regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +$replacement = 'r'; + +var_dump(spliti('[A-Z]', '-- 0 --')); +var_dump(spliti('(a){4}', '--- aaa ---')); +var_dump(spliti('^a', '--- ba ---')); +var_dump(spliti('b$', '--- ba ---')); +var_dump(spliti('[:alpha:]', '--- x ---')); + + +echo "Done"; +?> +--EXPECTF-- +Deprecated: Function spliti() is deprecated in %s on line %d +array(1) { + [0]=> + string(7) "-- 0 --" +} + +Deprecated: Function spliti() is deprecated in %s on line %d +array(1) { + [0]=> + string(11) "--- aaa ---" +} + +Deprecated: Function spliti() is deprecated in %s on line %d +array(1) { + [0]=> + string(10) "--- ba ---" +} + +Deprecated: Function spliti() is deprecated in %s on line %d +array(1) { + [0]=> + string(10) "--- ba ---" +} + +Deprecated: Function spliti() is deprecated in %s on line %d +array(1) { + [0]=> + string(9) "--- x ---" +} +Done diff --git a/ext/ereg/tests/spliti_basic_004.phpt b/ext/ereg/tests/spliti_basic_004.phpt new file mode 100644 index 0000000000..ed07193194 --- /dev/null +++ b/ext/ereg/tests/spliti_basic_004.phpt @@ -0,0 +1,52 @@ +--TEST-- +Test spliti() function : basic functionality - confirm case insensitivity +--FILE-- +<?php +/* Prototype : proto array spliti(string pattern, string string [, int limit]) + * Description: spliti string into array by regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +$replacement = 'r'; + +var_dump(spliti('[a-z]', '--- A ---')); +var_dump(spliti('[A-Z]', '--- a ---')); +var_dump(spliti('[[:lower:]]', '--- A ---')); +var_dump(spliti('[[:upper:]]', '--- a ---')); + +echo "Done"; +?> +--EXPECTF-- +Deprecated: Function spliti() is deprecated in %s on line %d +array(2) { + [0]=> + string(4) "--- " + [1]=> + string(4) " ---" +} + +Deprecated: Function spliti() is deprecated in %s on line %d +array(2) { + [0]=> + string(4) "--- " + [1]=> + string(4) " ---" +} + +Deprecated: Function spliti() is deprecated in %s on line %d +array(2) { + [0]=> + string(4) "--- " + [1]=> + string(4) " ---" +} + +Deprecated: Function spliti() is deprecated in %s on line %d +array(2) { + [0]=> + string(4) "--- " + [1]=> + string(4) " ---" +} +Done diff --git a/ext/ereg/tests/spliti_error_001.phpt b/ext/ereg/tests/spliti_error_001.phpt new file mode 100644 index 0000000000..cd4e6a99e6 --- /dev/null +++ b/ext/ereg/tests/spliti_error_001.phpt @@ -0,0 +1,45 @@ +--TEST-- +Test spliti() function : error conditions - wrong number of args +--FILE-- +<?php +/* Prototype : proto array spliti(string pattern, string string [, int limit]) + * Description: spliti string into array by regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +echo "*** Testing spliti() : error conditions - wrong number of args ***\n"; + + +//Test spliti with one more than the expected number of arguments +echo "\n-- Testing spliti() function with more than expected no. of arguments --\n"; +$pattern = 'string_val'; +$string = 'string_val'; +$limit = 10; +$extra_arg = 10; +var_dump( spliti($pattern, $string, $limit, $extra_arg) ); + +// Testing spliti with one less than the expected number of arguments +echo "\n-- Testing spliti() function with less than expected no. of arguments --\n"; +$pattern = 'string_val'; +var_dump( spliti($pattern) ); + +echo "Done"; +?> +--EXPECTF-- +*** Testing spliti() : error conditions - wrong number of args *** + +-- Testing spliti() function with more than expected no. of arguments -- + +Deprecated: Function spliti() is deprecated in %s on line %d + +Warning: spliti() expects at most 3 parameters, 4 given in %s on line %d +NULL + +-- Testing spliti() function with less than expected no. of arguments -- + +Deprecated: Function spliti() is deprecated in %s on line %d + +Warning: spliti() expects at least 2 parameters, 1 given in %s on line %d +NULL +Done diff --git a/ext/ereg/tests/spliti_error_002.phpt b/ext/ereg/tests/spliti_error_002.phpt new file mode 100644 index 0000000000..19dd171655 --- /dev/null +++ b/ext/ereg/tests/spliti_error_002.phpt @@ -0,0 +1,118 @@ +--TEST-- +Test spliti() function : error conditions - test bad regular expressions +--FILE-- +<?php +/* Prototype : proto array spliti(string pattern, string string [, int limit]) + * Description: spliti string into array by regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +/* + * Test bad regular expressions + */ + +echo "*** Testing spliti() : error conditions ***\n"; + +$regs = 'original'; + +var_dump(spliti("", "hello")); +var_dump(spliti("c(d", "hello")); +var_dump(spliti("a[b", "hello")); +var_dump(spliti("c(d", "hello")); +var_dump(spliti("*", "hello")); +var_dump(spliti("+", "hello")); +var_dump(spliti("?", "hello")); +var_dump(spliti("(+?*)", "hello", $regs)); +var_dump(spliti("h{256}", "hello")); +var_dump(spliti("h|", "hello")); +var_dump(spliti("h{0}", "hello")); +var_dump(spliti("h{2,1}", "hello")); +var_dump(spliti('[a-c-e]', 'd')); +var_dump(spliti('\\', 'x')); +var_dump(spliti('([9-0])', '1', $regs)); + +//ensure $regs unchanged +var_dump($regs); + +echo "Done"; +?> +--EXPECTF-- +*** Testing spliti() : error conditions *** + +Deprecated: Function spliti() is deprecated in %s on line %d + +Warning: spliti(): REG_EMPTY in %s on line %d +bool(false) + +Deprecated: Function spliti() is deprecated in %s on line %d + +Warning: spliti(): REG_EPAREN in %s on line %d +bool(false) + +Deprecated: Function spliti() is deprecated in %s on line %d + +Warning: spliti(): REG_EBRACK in %s on line %d +bool(false) + +Deprecated: Function spliti() is deprecated in %s on line %d + +Warning: spliti(): REG_EPAREN in %s on line %d +bool(false) + +Deprecated: Function spliti() is deprecated in %s on line %d + +Warning: spliti(): REG_BADRPT in %s on line %d +bool(false) + +Deprecated: Function spliti() is deprecated in %s on line %d + +Warning: spliti(): REG_BADRPT in %s on line %d +bool(false) + +Deprecated: Function spliti() is deprecated in %s on line %d + +Warning: spliti(): REG_BADRPT in %s on line %d +bool(false) + +Deprecated: Function spliti() is deprecated in %s on line %d + +Warning: spliti() expects parameter 3 to be long, string given in %s on line %d +NULL + +Deprecated: Function spliti() is deprecated in %s on line %d + +Warning: spliti(): REG_BADBR in %s on line %d +bool(false) + +Deprecated: Function spliti() is deprecated in %s on line %d + +Warning: spliti(): REG_EMPTY in %s on line %d +bool(false) + +Deprecated: Function spliti() is deprecated in %s on line %d + +Warning: spliti(): REG_EMPTY in %s on line %d +bool(false) + +Deprecated: Function spliti() is deprecated in %s on line %d + +Warning: spliti(): REG_BADBR in %s on line %d +bool(false) + +Deprecated: Function spliti() is deprecated in %s on line %d + +Warning: spliti(): REG_ERANGE in %s on line %d +bool(false) + +Deprecated: Function spliti() is deprecated in %s on line %d + +Warning: spliti(): REG_EESCAPE in %s on line %d +bool(false) + +Deprecated: Function spliti() is deprecated in %s on line %d + +Warning: spliti() expects parameter 3 to be long, string given in %s on line %d +NULL +string(8) "original" +Done diff --git a/ext/ereg/tests/spliti_variation_001.phpt b/ext/ereg/tests/spliti_variation_001.phpt new file mode 100644 index 0000000000..e5493b34ad --- /dev/null +++ b/ext/ereg/tests/spliti_variation_001.phpt @@ -0,0 +1,257 @@ +--TEST-- +Test spliti() function : usage variations - unexpected type for arg 1 +--FILE-- +<?php +/* Prototype : proto array spliti(string pattern, string string [, int limit]) + * Description: spliti string into array by regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +function test_error_handler($err_no, $err_msg, $filename, $linenum, $vars) { + echo "Error: $err_no - $err_msg, $filename($linenum)\n"; +} +set_error_handler('test_error_handler'); + +echo "*** Testing spliti() : usage variations ***\n"; + +// Initialise function arguments not being substituted (if any) +$string = '1 a 1 Array 1 c '; +$limit = 5; + +//get an unset variable +$unset_var = 10; +unset ($unset_var); + +//array of values to iterate over +$values = array( + + // int data + 0, + 1, + 12345, + -2345, + + // float data + 10.5, + -10.5, + 10.1234567e10, + 10.7654321E-10, + .5, + + // array data + array(), + array(0), + array(1), + array(1, 2), + array('color' => 'red', 'item' => 'pen'), + + // null data + NULL, + null, + + // boolean data + true, + false, + TRUE, + FALSE, + + // empty data + "", + '', + + // object data + new stdclass(), + + // undefined data + $undefined_var, + + // unset data + $unset_var, +); + +// loop through each element of the array for pattern + +foreach($values as $value) { + echo "\nArg value $value \n"; + var_dump( spliti($value, $string, $limit) ); +}; + +echo "Done"; +?> +--EXPECTF-- +*** Testing spliti() : usage variations *** +Error: 8 - Undefined variable: undefined_var, %s(64) +Error: 8 - Undefined variable: unset_var, %s(67) + +Arg value 0 +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(16) "1 a 1 Array 1 c " +} + +Arg value 1 +Error: 8192 - Function spliti() is deprecated, %s(74) +array(4) { + [0]=> + string(0) "" + [1]=> + string(3) " a " + [2]=> + string(7) " Array " + [3]=> + string(3) " c " +} + +Arg value 12345 +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(16) "1 a 1 Array 1 c " +} + +Arg value -2345 +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(16) "1 a 1 Array 1 c " +} + +Arg value 10.5 +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(16) "1 a 1 Array 1 c " +} + +Arg value -10.5 +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(16) "1 a 1 Array 1 c " +} + +Arg value 101234567000 +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(16) "1 a 1 Array 1 c " +} + +Arg value 1.07654321E-9 +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(16) "1 a 1 Array 1 c " +} + +Arg value 0.5 +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(16) "1 a 1 Array 1 c " +} +Error: 8 - Array to string conversion, %sspliti_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function spliti() is deprecated, %s(74) +Error: 2 - spliti() expects parameter 1 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %sspliti_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function spliti() is deprecated, %s(74) +Error: 2 - spliti() expects parameter 1 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %sspliti_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function spliti() is deprecated, %s(74) +Error: 2 - spliti() expects parameter 1 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %sspliti_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function spliti() is deprecated, %s(74) +Error: 2 - spliti() expects parameter 1 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %sspliti_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function spliti() is deprecated, %s(74) +Error: 2 - spliti() expects parameter 1 to be string, array given, %s(74) +NULL + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(74) +Error: 2 - spliti(): REG_EMPTY, %s(74) +bool(false) + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(74) +Error: 2 - spliti(): REG_EMPTY, %s(74) +bool(false) + +Arg value 1 +Error: 8192 - Function spliti() is deprecated, %s(74) +array(4) { + [0]=> + string(0) "" + [1]=> + string(3) " a " + [2]=> + string(7) " Array " + [3]=> + string(3) " c " +} + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(74) +Error: 2 - spliti(): REG_EMPTY, %s(74) +bool(false) + +Arg value 1 +Error: 8192 - Function spliti() is deprecated, %s(74) +array(4) { + [0]=> + string(0) "" + [1]=> + string(3) " a " + [2]=> + string(7) " Array " + [3]=> + string(3) " c " +} + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(74) +Error: 2 - spliti(): REG_EMPTY, %s(74) +bool(false) + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(74) +Error: 2 - spliti(): REG_EMPTY, %s(74) +bool(false) + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(74) +Error: 2 - spliti(): REG_EMPTY, %s(74) +bool(false) +Error: 4096 - Object of class stdClass could not be converted to string, %s(73) + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(74) +Error: 2 - spliti() expects parameter 1 to be string, object given, %s(74) +NULL + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(74) +Error: 2 - spliti(): REG_EMPTY, %s(74) +bool(false) + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(74) +Error: 2 - spliti(): REG_EMPTY, %s(74) +bool(false) +Done diff --git a/ext/ereg/tests/spliti_variation_002.phpt b/ext/ereg/tests/spliti_variation_002.phpt new file mode 100644 index 0000000000..752cafc4f5 --- /dev/null +++ b/ext/ereg/tests/spliti_variation_002.phpt @@ -0,0 +1,257 @@ +--TEST-- +Test spliti() function : usage variations - unexpected type for arg 2 +--FILE-- +<?php +/* Prototype : proto array spliti(string pattern, string string [, int limit]) + * Description: spliti string into array by regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +function test_error_handler($err_no, $err_msg, $filename, $linenum, $vars) { + echo "Error: $err_no - $err_msg, $filename($linenum)\n"; +} +set_error_handler('test_error_handler'); + +echo "*** Testing spliti() : usage variations ***\n"; + +// Initialise function arguments not being substituted (if any) +$pattern = 'r|j|E'; +$limit = 5; + +//get an unset variable +$unset_var = 10; +unset ($unset_var); + +//array of values to iterate over +$values = array( + + // int data + 0, + 1, + 12345, + -2345, + + // float data + 10.5, + -10.5, + 10.1234567e10, + 10.7654321E-10, + .5, + + // array data + array(), + array(0), + array(1), + array(1, 2), + array('color' => 'red', 'item' => 'pen'), + + // null data + NULL, + null, + + // boolean data + true, + false, + TRUE, + FALSE, + + // empty data + "", + '', + + // object data + new stdclass(), + + // undefined data + $undefined_var, + + // unset data + $unset_var, +); + +// loop through each element of the array for string + +foreach($values as $value) { + echo "\nArg value $value \n"; + var_dump( spliti($pattern, $value, $limit) ); +}; + +echo "Done"; +?> +--EXPECTF-- +*** Testing spliti() : usage variations *** +Error: 8 - Undefined variable: undefined_var, %s(64) +Error: 8 - Undefined variable: unset_var, %s(67) + +Arg value 0 +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(1) "0" +} + +Arg value 1 +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(1) "1" +} + +Arg value 12345 +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(5) "12345" +} + +Arg value -2345 +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(5) "-2345" +} + +Arg value 10.5 +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(4) "10.5" +} + +Arg value -10.5 +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(5) "-10.5" +} + +Arg value 101234567000 +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(12) "101234567000" +} + +Arg value 1.07654321E-9 +Error: 8192 - Function spliti() is deprecated, %s(74) +array(2) { + [0]=> + string(10) "1.07654321" + [1]=> + string(2) "-9" +} + +Arg value 0.5 +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(3) "0.5" +} +Error: 8 - Array to string conversion, %sspliti_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function spliti() is deprecated, %s(74) +Error: 2 - spliti() expects parameter 2 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %sspliti_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function spliti() is deprecated, %s(74) +Error: 2 - spliti() expects parameter 2 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %sspliti_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function spliti() is deprecated, %s(74) +Error: 2 - spliti() expects parameter 2 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %sspliti_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function spliti() is deprecated, %s(74) +Error: 2 - spliti() expects parameter 2 to be string, array given, %s(74) +NULL +Error: 8 - Array to string conversion, %sspliti_variation_002.php(%d) + +Arg value Array +Error: 8192 - Function spliti() is deprecated, %s(74) +Error: 2 - spliti() expects parameter 2 to be string, array given, %s(74) +NULL + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(0) "" +} + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(0) "" +} + +Arg value 1 +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(1) "1" +} + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(0) "" +} + +Arg value 1 +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(1) "1" +} + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(0) "" +} + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(0) "" +} + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(0) "" +} +Error: 4096 - Object of class stdClass could not be converted to string, %s(73) + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(74) +Error: 2 - spliti() expects parameter 2 to be string, object given, %s(74) +NULL + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(0) "" +} + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(74) +array(1) { + [0]=> + string(0) "" +} +Done diff --git a/ext/ereg/tests/spliti_variation_003.phpt b/ext/ereg/tests/spliti_variation_003.phpt new file mode 100644 index 0000000000..3cfeaeae1a --- /dev/null +++ b/ext/ereg/tests/spliti_variation_003.phpt @@ -0,0 +1,231 @@ +--TEST-- +Test spliti() function : usage variations - unexpected type for arg 3 +--FILE-- +<?php +/* Prototype : proto array spliti(string pattern, string string [, int limit]) + * Description: spliti string into array by regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +function test_error_handler($err_no, $err_msg, $filename, $linenum, $vars) { + if (error_reporting() != 0) { + // report non-silenced errors + echo "Error: $err_no - $err_msg, $filename($linenum)\n"; + } +} +set_error_handler('test_error_handler'); +echo "*** Testing spliti() : usage variations ***\n"; + +// Initialise function arguments not being substituted (if any) +$pattern = b'[[:space:]]'; +$string = '1 2 3 4 5'; + +//get an unset variable +$unset_var = 10; +unset ($unset_var); + +//array of values to iterate over +$values = array( + + // float data + 10.5, + -10.5, + 10.7654321E-10, + .5, + + // array data + array(), + array(0), + array(1), + array(1, 2), + array('color' => 'red', 'item' => 'pen'), + + // null data + NULL, + null, + + // boolean data + true, + false, + TRUE, + FALSE, + + // empty data + "", + '', + + // string data + "string", + 'string', + + // object data + new stdclass(), + + // undefined data + @$undefined_var, + + // unset data + @$unset_var, +); + +// loop through each element of the array for limit + +foreach($values as $value) { + echo "\nArg value $value \n"; + var_dump( spliti($pattern, $string, $value) ); +}; + +echo "Done"; +?> +--EXPECTF-- +*** Testing spliti() : usage variations *** + +Arg value 10.5 +Error: 8192 - Function spliti() is deprecated, %s(73) +array(5) { + [0]=> + string(1) "1" + [1]=> + string(1) "2" + [2]=> + string(1) "3" + [3]=> + string(1) "4" + [4]=> + string(1) "5" +} + +Arg value -10.5 +Error: 8192 - Function spliti() is deprecated, %s(73) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} + +Arg value 1.07654321E-9 +Error: 8192 - Function spliti() is deprecated, %s(73) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} + +Arg value 0.5 +Error: 8192 - Function spliti() is deprecated, %s(73) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} +Error: 8 - Array to string conversion, %sspliti_variation_003.php(%d) + +Arg value Array +Error: 8192 - Function spliti() is deprecated, %s(73) +Error: 2 - spliti() expects parameter 3 to be long, array given, %s(73) +NULL +Error: 8 - Array to string conversion, %sspliti_variation_003.php(%d) + +Arg value Array +Error: 8192 - Function spliti() is deprecated, %s(73) +Error: 2 - spliti() expects parameter 3 to be long, array given, %s(73) +NULL +Error: 8 - Array to string conversion, %sspliti_variation_003.php(%d) + +Arg value Array +Error: 8192 - Function spliti() is deprecated, %s(73) +Error: 2 - spliti() expects parameter 3 to be long, array given, %s(73) +NULL +Error: 8 - Array to string conversion, %sspliti_variation_003.php(%d) + +Arg value Array +Error: 8192 - Function spliti() is deprecated, %s(73) +Error: 2 - spliti() expects parameter 3 to be long, array given, %s(73) +NULL +Error: 8 - Array to string conversion, %sspliti_variation_003.php(%d) + +Arg value Array +Error: 8192 - Function spliti() is deprecated, %s(73) +Error: 2 - spliti() expects parameter 3 to be long, array given, %s(73) +NULL + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(73) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(73) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} + +Arg value 1 +Error: 8192 - Function spliti() is deprecated, %s(73) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(73) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} + +Arg value 1 +Error: 8192 - Function spliti() is deprecated, %s(73) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(73) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(73) +Error: 2 - spliti() expects parameter 3 to be long, string given, %s(73) +NULL + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(73) +Error: 2 - spliti() expects parameter 3 to be long, string given, %s(73) +NULL + +Arg value string +Error: 8192 - Function spliti() is deprecated, %s(73) +Error: 2 - spliti() expects parameter 3 to be long, string given, %s(73) +NULL + +Arg value string +Error: 8192 - Function spliti() is deprecated, %s(73) +Error: 2 - spliti() expects parameter 3 to be long, string given, %s(73) +NULL +Error: 4096 - Object of class stdClass could not be converted to string, %s(72) + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(73) +Error: 2 - spliti() expects parameter 3 to be long, object given, %s(73) +NULL + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(73) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} + +Arg value +Error: 8192 - Function spliti() is deprecated, %s(73) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} +Done
\ No newline at end of file diff --git a/ext/ereg/tests/spliti_variation_004.phpt b/ext/ereg/tests/spliti_variation_004.phpt new file mode 100644 index 0000000000..d9afa13892 --- /dev/null +++ b/ext/ereg/tests/spliti_variation_004.phpt @@ -0,0 +1,37 @@ +--TEST-- +Test spliti() function : usage variations - out-of-range values for limit +--FILE-- +<?php +/* Prototype : proto array spliti(string pattern, string string [, int limit]) + * Description: spliti string into array by regular expression + * Source code: ext/standard/reg.c + * Alias to functions: + */ + +function test_error_handler($err_no, $err_msg, $filename, $linenum, $vars) { + echo "Error: $err_no - $err_msg, $filename($linenum)\n"; +} +set_error_handler('test_error_handler'); +echo "*** Testing spliti() : usage variations ***\n"; + +$pattern = '[[:space:]]'; +$string = '1 2 3 4 5'; +var_dump(spliti($pattern, $string, 0)); +var_dump(spliti($pattern, $string, -10)); + + +echo "Done"; +?> +--EXPECTF-- +*** Testing spliti() : usage variations *** +Error: 8192 - Function spliti() is deprecated, %s(16) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} +Error: 8192 - Function spliti() is deprecated, %s(17) +array(1) { + [0]=> + string(9) "1 2 3 4 5" +} +Done diff --git a/ext/ereg/tests/sql_regcase_basic_001.phpt b/ext/ereg/tests/sql_regcase_basic_001.phpt new file mode 100644 index 0000000000..5a04724b75 --- /dev/null +++ b/ext/ereg/tests/sql_regcase_basic_001.phpt @@ -0,0 +1,27 @@ +--TEST-- +Test sql_regcase() function : basic functionality +--FILE-- +<?php +/* Prototype : proto string sql_regcase(string string) + * Description: Make regular expression for case insensitive match + * Source code: ext/standard/reg.c + * Alias to functions: msql_regcase + */ + +echo "*** Testing sql_regcase() : basic functionality ***\n"; + + +// Initialise all required variables +$string = 'string_Val-0'; + +// Calling sql_regcase() with all possible arguments +var_dump( sql_regcase($string) ); + +echo "Done"; +?> +--EXPECTF-- +*** Testing sql_regcase() : basic functionality *** + +Deprecated: Function sql_regcase() is deprecated in %s on line %d +string(39) "[Ss][Tt][Rr][Ii][Nn][Gg]_[Vv][Aa][Ll]-0" +Done diff --git a/ext/ereg/tests/sql_regcase_error_001.phpt b/ext/ereg/tests/sql_regcase_error_001.phpt new file mode 100644 index 0000000000..6ac71f27c1 --- /dev/null +++ b/ext/ereg/tests/sql_regcase_error_001.phpt @@ -0,0 +1,41 @@ +--TEST-- +Test sql_regcase() function : error conditions +--FILE-- +<?php +/* Prototype : proto string sql_regcase(string string) + * Description: Make regular expression for case insensitive match + * Source code: ext/standard/reg.c + * Alias to functions: msql_regcase + */ + +echo "*** Testing sql_regcase() : error conditions ***\n"; + +// Zero arguments +echo "\n-- Testing sql_regcase() function with Zero arguments --\n"; +var_dump( sql_regcase() ); + +//Test sql_regcase with one more than the expected number of arguments +echo "\n-- Testing sql_regcase() function with more than expected no. of arguments --\n"; +$string = 'string_val'; +$extra_arg = 10; +var_dump( sql_regcase($string, $extra_arg) ); + +echo "Done"; +?> +--EXPECTF-- +*** Testing sql_regcase() : error conditions *** + +-- Testing sql_regcase() function with Zero arguments -- + +Deprecated: Function sql_regcase() is deprecated in %s on line %d + +Warning: sql_regcase() expects exactly 1 parameter, 0 given in %s on line %d +NULL + +-- Testing sql_regcase() function with more than expected no. of arguments -- + +Deprecated: Function sql_regcase() is deprecated in %s on line %d + +Warning: sql_regcase() expects exactly 1 parameter, 2 given in %s on line %d +NULL +Done diff --git a/ext/ereg/tests/sql_regcase_variation_001.phpt b/ext/ereg/tests/sql_regcase_variation_001.phpt new file mode 100644 index 0000000000..0efbe92622 --- /dev/null +++ b/ext/ereg/tests/sql_regcase_variation_001.phpt @@ -0,0 +1,196 @@ +--TEST-- +Test sql_regcase() function : usage variations - unexpected arg type +--FILE-- +<?php +/* Prototype : proto string sql_regcase(string string) + * Description: Make regular expression for case insensitive match + * Source code: ext/standard/reg.c + * Alias to functions: msql_regcase + */ + +function test_error_handler($err_no, $err_msg, $filename, $linenum, $vars) { + echo "Error: $err_no - $err_msg, $filename($linenum)\n"; +} +set_error_handler('test_error_handler'); + +echo "*** Testing sql_regcase() : usage variations ***\n"; + +// Initialise function arguments not being substituted (if any) + +//get an unset variable +$unset_var = 10; +unset ($unset_var); + +//array of values to iterate over +$values = array( + + // int data + 0, + 1, + 12345, + -2345, + + // float data + 10.5, + -10.5, + 10.1234567e10, + 10.7654321E-10, + .5, + + // array data + array(), + array(0), + array(1), + array(1, 2), + array('color' => 'red', 'item' => 'pen'), + + // null data + NULL, + null, + + // boolean data + true, + false, + TRUE, + FALSE, + + // empty data + "", + '', + + // object data + new stdclass(), + + // undefined data + $undefined_var, + + // unset data + $unset_var, +); + +// loop through each element of the array for string + +foreach($values as $value) { + echo "\nArg value $value \n"; + var_dump( sql_regcase($value) ); +}; + +echo "Done"; +?> +--EXPECTF-- +*** Testing sql_regcase() : usage variations *** +Error: 8 - Undefined variable: undefined_var, %s(62) +Error: 8 - Undefined variable: unset_var, %s(65) + +Arg value 0 +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +string(1) "0" + +Arg value 1 +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +string(1) "1" + +Arg value 12345 +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +string(5) "12345" + +Arg value -2345 +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +string(5) "-2345" + +Arg value 10.5 +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +string(4) "10.5" + +Arg value -10.5 +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +string(5) "-10.5" + +Arg value 101234567000 +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +string(12) "101234567000" + +Arg value 1.07654321E-9 +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +string(16) "1.07654321[Ee]-9" + +Arg value 0.5 +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +string(3) "0.5" +Error: 8 - Array to string conversion, %ssql_regcase_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +Error: 2 - sql_regcase() expects parameter 1 to be string, array given, %s(72) +NULL +Error: 8 - Array to string conversion, %ssql_regcase_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +Error: 2 - sql_regcase() expects parameter 1 to be string, array given, %s(72) +NULL +Error: 8 - Array to string conversion, %ssql_regcase_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +Error: 2 - sql_regcase() expects parameter 1 to be string, array given, %s(72) +NULL +Error: 8 - Array to string conversion, %ssql_regcase_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +Error: 2 - sql_regcase() expects parameter 1 to be string, array given, %s(72) +NULL +Error: 8 - Array to string conversion, %ssql_regcase_variation_001.php(%d) + +Arg value Array +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +Error: 2 - sql_regcase() expects parameter 1 to be string, array given, %s(72) +NULL + +Arg value +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +string(0) "" + +Arg value +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +string(0) "" + +Arg value 1 +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +string(1) "1" + +Arg value +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +string(0) "" + +Arg value 1 +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +string(1) "1" + +Arg value +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +string(0) "" + +Arg value +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +string(0) "" + +Arg value +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +string(0) "" +Error: 4096 - Object of class stdClass could not be converted to string, %s(71) + +Arg value +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +Error: 2 - sql_regcase() expects parameter 1 to be string, object given, %s(72) +NULL + +Arg value +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +string(0) "" + +Arg value +Error: 8192 - Function sql_regcase() is deprecated, %s(72) +string(0) "" +Done diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 965c6be356..212c8d013f 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -204,6 +204,13 @@ static const struct mb_overload_def mb_ovld[] = { {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"}, {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"}, {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"}, +#if HAVE_MBREGEX + {MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"}, + {MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"}, + {MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"}, + {MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"}, + {MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"}, +#endif {0, NULL, NULL, NULL} }; /* }}} */ diff --git a/ext/mbstring/tests/mb_split.phpt b/ext/mbstring/tests/mb_split.phpt index 1031cd26d7..c10b0d7b80 100644 --- a/ext/mbstring/tests/mb_split.phpt +++ b/ext/mbstring/tests/mb_split.phpt @@ -3,7 +3,6 @@ mb_split() --SKIPIF-- <?php extension_loaded('mbstring') or die('skip mbstring not available'); -extension_loaded('pcre') or die('skip pcre not available'); function_exists('mb_split') or die("skip mb_split() is not available in this build"); ?> --INI-- @@ -16,7 +15,7 @@ mbstring.func_overload=0 function verify_split( $spliton, $str, $count = 0 ) { $result1 = mb_split( $spliton, $str, $count ); - $result2 = preg_split( "/$spliton/", $str, $count ); + $result2 = split( $spliton, $str, $count ); if ( $result1 == $result2 ) { print "ok\n"; } else { @@ -27,7 +26,7 @@ mbstring.func_overload=0 var_dump( mb_split( b" ", b"a b c d e f g" ) == mb_split( b"[[:space:]]", b"a\nb\tc\nd e f g" ) ); - for ( $i = 1; $i < 5; ++$i ) { + for ( $i = 0; $i < 5; ++$i ) { verify_split( b" ", b"a\tb\tc\td e\tf g", $i ); } @@ -38,12 +37,31 @@ mbstring.func_overload=0 --EXPECTF-- bool(true) + +Deprecated: Function split() is deprecated in %s on line %d ok + +Deprecated: Function split() is deprecated in %s on line %d ok + +Deprecated: Function split() is deprecated in %s on line %d ok + +Deprecated: Function split() is deprecated in %s on line %d ok + +Deprecated: Function split() is deprecated in %s on line %d ok + +Deprecated: Function split() is deprecated in %s on line %d +ok + +Deprecated: Function split() is deprecated in %s on line %d 2-2 + +Deprecated: Function split() is deprecated in %s on line %d 3-3 + +Deprecated: Function split() is deprecated in %s on line %d 4-4 diff --git a/ext/mbstring/tests/overload02.phpt b/ext/mbstring/tests/overload02.phpt index ad007cc066..9b5cecdc40 100644 --- a/ext/mbstring/tests/overload02.phpt +++ b/ext/mbstring/tests/overload02.phpt @@ -24,7 +24,7 @@ $converted_str = mb_convert_encoding($str, 'Shift_JIS'); mb_regex_encoding('Shift_JIS'); foreach($ngchars as $c) { $c = mb_convert_encoding($c, 'Shift_JIS'); - $replaced = mb_convert_encoding(str_replace($c, '!!', $converted_str), mb_internal_encoding(), 'Shift_JIS'); + $replaced = mb_convert_encoding(ereg_replace($c, '!!', $converted_str), mb_internal_encoding(), 'Shift_JIS'); var_dump(strpos($replaced, '!!')); } ?> diff --git a/ext/mbstring/tests/php_gr_jp_10830.phpt b/ext/mbstring/tests/php_gr_jp_10830.phpt index f9599d995f..1cbfc2ae1f 100644 --- a/ext/mbstring/tests/php_gr_jp_10830.phpt +++ b/ext/mbstring/tests/php_gr_jp_10830.phpt @@ -10,9 +10,11 @@ function_exists('mb_ereg') or die("skip mb_ereg() is not available in this build $a="aaa\n<>"; var_dump( mb_ereg("^[^><]+$",$a) ); -var_dump( !!preg_match("/^[^><]+$/",$a) ); +var_dump( ereg("^[^><]+$",$a) ); ?> --EXPECTF-- bool(false) + +Deprecated: Function ereg() is deprecated in %s on line %d bool(false) diff --git a/ext/mbstring/tests/php_gr_jp_dev_884-1.phpt b/ext/mbstring/tests/php_gr_jp_dev_884-1.phpt index 4978d0ce57..64fc054aab 100644 --- a/ext/mbstring/tests/php_gr_jp_dev_884-1.phpt +++ b/ext/mbstring/tests/php_gr_jp_dev_884-1.phpt @@ -8,9 +8,12 @@ function_exists('mb_ereg_replace') or die("skip mb_ereg_replace() is not availab --FILE-- <?php set_time_limit(2); +var_dump(ereg_replace(".*", "b", "a")); var_dump(preg_replace("/.*/", "b", "a")); var_dump(mb_ereg_replace(".*", "b", "a")); ?> --EXPECTF-- +Deprecated: Function ereg_replace() is deprecated in %s on line %d +string(2) "bb" string(2) "bb" string(2) "bb" diff --git a/ext/mbstring/tests/php_gr_jp_dev_884-2.phpt b/ext/mbstring/tests/php_gr_jp_dev_884-2.phpt index 2f74dff64b..937b2171b5 100644 --- a/ext/mbstring/tests/php_gr_jp_dev_884-2.phpt +++ b/ext/mbstring/tests/php_gr_jp_dev_884-2.phpt @@ -8,12 +8,16 @@ function_exists('mb_ereg_replace') or die("skip mb_ereg_replace() is not availab --FILE-- <?php var_dump(mb_ereg_replace("C?$", "Z", "ABC")); -var_dump(preg_replace("/C?$/", "Z", "ABC")); +var_dump(ereg_replace("C?$", "Z", "ABC")); var_dump(mb_ereg_replace("C*$", "Z", "ABC")); -var_dump(preg_replace("/C*$/", "Z", "ABC")); +var_dump(ereg_replace("C*$", "Z", "ABC")); ?> --EXPECTF-- string(4) "ABZZ" + +Deprecated: Function ereg_replace() is deprecated in %s on line %d string(4) "ABZZ" string(4) "ABZZ" + +Deprecated: Function ereg_replace() is deprecated in %s on line %d string(4) "ABZZ" diff --git a/ext/opcache/zend_accelerator_blacklist.c b/ext/opcache/zend_accelerator_blacklist.c index 41ac16028a..7263ed3c93 100644 --- a/ext/opcache/zend_accelerator_blacklist.c +++ b/ext/opcache/zend_accelerator_blacklist.c @@ -23,6 +23,7 @@ #include "main/fopen_wrappers.h" #include "ZendAccelerator.h" #include "zend_accelerator_blacklist.h" +#include "ext/ereg/php_regex.h" #ifdef ZEND_WIN32 # define REGEX_MODE (REG_EXTENDED|REG_NOSUB|REG_ICASE) @@ -38,12 +39,10 @@ #endif #endif -#include "ext/pcre/php_pcre.h" - #define ZEND_BLACKLIST_BLOCK_SIZE 32 struct _zend_regexp_list { - pcre *re; + regex_t comp_regex; zend_regexp_list *next; }; @@ -66,15 +65,23 @@ void zend_accel_blacklist_init(zend_blacklist *blacklist) blacklist->regexp_list = NULL; } -static void blacklist_report_regexp_error(const char *pcre_error, int pcre_error_offset) +static void blacklist_report_regexp_error(regex_t *comp_regex, int reg_err) { - zend_accel_error(ACCEL_LOG_ERROR, "Blacklist compilation failed (offset: %d), %s\n", pcre_error_offset, pcre_error); + char *errbuf; + int errsize = regerror(reg_err, comp_regex, NULL, 0); + errbuf = malloc(errsize); + if (!errbuf) { + zend_accel_error(ACCEL_LOG_ERROR, "Blacklist compilation: no memory\n"); + return; + } + regerror(reg_err, comp_regex, errbuf, errsize); + zend_accel_error(ACCEL_LOG_ERROR, "Blacklist compilation: %s\n", errbuf); + free(errbuf); } static void zend_accel_blacklist_update_regexp(zend_blacklist *blacklist) { - const char *pcre_error; - int i, pcre_error_offset; + int i, reg_err; zend_regexp_list **regexp_list_it, *it; char regexp[12*1024], *p, *end, *c, *backtrack = NULL; @@ -177,8 +184,8 @@ static void zend_accel_blacklist_update_regexp(zend_blacklist *blacklist) } it->next = NULL; - if ((it->re = pcre_compile(regexp, PCRE_NO_AUTO_CAPTURE, &pcre_error, &pcre_error_offset, 0)) == NULL) { - blacklist_report_regexp_error(pcre_error, pcre_error_offset); + if ((reg_err = regcomp(&it->comp_regex, regexp, REGEX_MODE)) != 0) { + blacklist_report_regexp_error(&it->comp_regex, reg_err); } /* prepare for the next iteration */ p = regexp + 2; @@ -205,7 +212,7 @@ void zend_accel_blacklist_shutdown(zend_blacklist *blacklist) if (blacklist->regexp_list) { zend_regexp_list *temp, *it = blacklist->regexp_list; while (it) { - pcre_free(it->re); + regfree(&it->comp_regex); temp = it; it = it->next; free(temp); @@ -342,7 +349,7 @@ zend_bool zend_accel_blacklist_is_blacklisted(zend_blacklist *blacklist, char *v return 0; } while (regexp_list_it != NULL) { - if (pcre_exec(regexp_list_it->re, NULL, verify_path, strlen(verify_path), 0, 0, NULL, 0) >= 0) { + if (regexec(&(regexp_list_it->comp_regex), verify_path, 0, NULL, 0) == 0) { ret = 1; break; } diff --git a/ext/pgsql/pgsql.c b/ext/pgsql/pgsql.c index 1d51b05b9f..e3f64e0ed0 100644 --- a/ext/pgsql/pgsql.c +++ b/ext/pgsql/pgsql.c @@ -36,7 +36,7 @@ #include "php_ini.h" #include "ext/standard/php_standard.h" #include "ext/standard/php_smart_str.h" -#include "ext/pcre/php_pcre.h" +#include "ext/ereg/php_regex.h" #ifdef PHP_WIN32 # include "win32/time.h" #endif @@ -5637,10 +5637,10 @@ static php_pgsql_data_type php_pgsql_get_data_type(const char *type_name, size_t */ static int php_pgsql_convert_match(const char *str, size_t str_len, const char *regex , int icase TSRMLS_DC) { - pcre *re; - const char *err_msg; - int err_offset; - int options = PCRE_NO_AUTO_CAPTURE, res; + regex_t re; + regmatch_t *subs; + int regopt = REG_EXTENDED; + int regerr, ret = SUCCESS; size_t i; /* Check invalid chars for POSIX regex */ @@ -5653,27 +5653,31 @@ static int php_pgsql_convert_match(const char *str, size_t str_len, const char * } if (icase) { - options |= PCRE_CASELESS; + regopt |= REG_ICASE; } - if ((re = pcre_compile(regex, options, &err_msg, &err_offset, NULL)) == NULL) { + regerr = regcomp(&re, regex, regopt); + if (regerr) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot compile regex"); + regfree(&re); return FAILURE; } + subs = (regmatch_t *)ecalloc(sizeof(regmatch_t), re.re_nsub+1); - res = pcre_exec(re, NULL, str, str_len, 0, 0, NULL, 0); - pcre_free(re); - - if (res == PCRE_ERROR_NOMATCH) { + regerr = regexec(&re, str, re.re_nsub+1, subs, 0); + if (regerr == REG_NOMATCH) { #ifdef PHP_DEBUG php_error_docref(NULL TSRMLS_CC, E_NOTICE, "'%s' does not match with '%s'", str, regex); #endif - return FAILURE; - } else if (res) { + ret = FAILURE; + } + else if (regerr) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot exec regex"); - return FAILURE; + ret = FAILURE; } - return SUCCESS; + regfree(&re); + efree(subs); + return ret; } /* }}} */ diff --git a/ext/reflection/tests/ReflectionExtension_getClassNames_variation1.phpt b/ext/reflection/tests/ReflectionExtension_getClassNames_variation1.phpt index 91912b9220..cd5dc0b281 100644 --- a/ext/reflection/tests/ReflectionExtension_getClassNames_variation1.phpt +++ b/ext/reflection/tests/ReflectionExtension_getClassNames_variation1.phpt @@ -2,14 +2,10 @@ ReflectionExtension::getClassNames() method on an extension with no classes --CREDITS-- Felix De Vliegher <felix.devliegher@gmail.com> ---SKIPIF-- -<?php -extension_loaded('ctype') or die("skip Requires 'ctype' extension"); -?> --FILE-- <?php -$extension = new ReflectionExtension('ctype'); -var_dump($extension->getClassNames()); +$ereg = new ReflectionExtension('ereg'); +var_dump($ereg->getClassNames()); ?> ==DONE== --EXPECT-- diff --git a/ext/reflection/tests/ReflectionFunction_isDeprecated_basic.phpt b/ext/reflection/tests/ReflectionFunction_isDeprecated_basic.phpt index 720ac95f4f..4148fada0c 100644 --- a/ext/reflection/tests/ReflectionFunction_isDeprecated_basic.phpt +++ b/ext/reflection/tests/ReflectionFunction_isDeprecated_basic.phpt @@ -3,9 +3,13 @@ ReflectionFunction::isDeprecated --CREDITS-- Stefan Koopmanschap <stefan@phpgg.nl> TestFest PHP|Tek +--SKIPIF-- +<?php +if (!extension_loaded('reflection') || !defined('PHP_VERSION_ID') || PHP_VERSION_ID < 50300) print 'skip'; +?> --FILE-- <?php -$rc = new ReflectionFunction('magic_quotes_runtime'); +$rc = new ReflectionFunction('ereg'); var_dump($rc->isDeprecated()); --EXPECTF-- bool(true) diff --git a/sapi/apache/php_apache_http.h b/sapi/apache/php_apache_http.h index a50306ccd9..a18e5dbac2 100644 --- a/sapi/apache/php_apache_http.h +++ b/sapi/apache/php_apache_http.h @@ -31,6 +31,7 @@ #endif #include "zend.h" +#include "ext/ereg/php_regex.h" #include "php_compat.h" #ifdef HAVE_OPENSSL_EXT diff --git a/sapi/apache_hooks/php_apache_http.h b/sapi/apache_hooks/php_apache_http.h index e381a25cce..23cf7fea69 100644 --- a/sapi/apache_hooks/php_apache_http.h +++ b/sapi/apache_hooks/php_apache_http.h @@ -11,6 +11,7 @@ #include "zend.h" #include "zend_stack.h" +#include "ext/ereg/php_regex.h" #include "httpd.h" #include "http_config.h" |