From faa2ed197535aa44e84f51a86cb6f81b6984ed34 Mon Sep 17 00:00:00 2001 From: foobar Date: Sun, 15 Jul 2007 19:45:50 +0000 Subject: - Removed ereg extension from PHP_4_4 branch (never will be added here) --- ext/ereg/ereg.c | 668 ------------------ ext/ereg/php_ereg.h | 50 -- ext/ereg/php_regex.h | 45 -- ext/ereg/regex/COPYRIGHT | 20 - ext/ereg/regex/README | 32 - ext/ereg/regex/WHATSNEW | 92 --- ext/ereg/regex/cclass.h | 30 - ext/ereg/regex/cname.h | 102 --- ext/ereg/regex/debug.c | 242 ------- ext/ereg/regex/debug.ih | 14 - ext/ereg/regex/engine.c | 1019 -------------------------- ext/ereg/regex/engine.ih | 35 - ext/ereg/regex/main.c | 510 -------------- ext/ereg/regex/main.ih | 19 - ext/ereg/regex/mkh | 76 -- ext/ereg/regex/regcomp.c | 1608 ------------------------------------------ ext/ereg/regex/regcomp.ih | 53 -- ext/ereg/regex/regerror.c | 126 ---- ext/ereg/regex/regerror.ih | 12 - ext/ereg/regex/regex.3 | 502 ------------- ext/ereg/regex/regex.7 | 233 ------ ext/ereg/regex/regex.dsp | 106 --- ext/ereg/regex/regex.dsw | 29 - ext/ereg/regex/regex.h | 83 --- ext/ereg/regex/regex.mak | 304 -------- ext/ereg/regex/regex2.h | 140 ---- ext/ereg/regex/regex_extra.h | 23 - ext/ereg/regex/regexec.c | 138 ---- ext/ereg/regex/regfree.c | 37 - ext/ereg/regex/split.c | 316 --------- ext/ereg/regex/tests | 475 ------------- ext/ereg/regex/utils.h | 25 - ext/ereg/tests/001.phpt | 9 - ext/ereg/tests/002.phpt | 9 - ext/ereg/tests/003.phpt | 10 - ext/ereg/tests/004.phpt | 16 - ext/ereg/tests/005.phpt | 20 - ext/ereg/tests/006.phpt | 10 - ext/ereg/tests/007.phpt | 12 - ext/ereg/tests/008.phpt | 10 - ext/ereg/tests/009.phpt | 18 - ext/ereg/tests/010.phpt | 9 - ext/ereg/tests/011.phpt | 9 - ext/ereg/tests/012.phpt | 9 - ext/ereg/tests/013.phpt | 9 - ext/ereg/tests/014.phpt | 9 - ext/ereg/tests/015.phpt | 8 - ext/ereg/tests/016.phpt | 8 - 48 files changed, 7339 deletions(-) delete mode 100644 ext/ereg/ereg.c delete mode 100644 ext/ereg/php_ereg.h delete mode 100644 ext/ereg/php_regex.h delete mode 100644 ext/ereg/regex/COPYRIGHT delete mode 100644 ext/ereg/regex/README delete mode 100644 ext/ereg/regex/WHATSNEW delete mode 100644 ext/ereg/regex/cclass.h delete mode 100644 ext/ereg/regex/cname.h delete mode 100644 ext/ereg/regex/debug.c delete mode 100644 ext/ereg/regex/debug.ih delete mode 100644 ext/ereg/regex/engine.c delete mode 100644 ext/ereg/regex/engine.ih delete mode 100644 ext/ereg/regex/main.c delete mode 100644 ext/ereg/regex/main.ih delete mode 100644 ext/ereg/regex/mkh delete mode 100644 ext/ereg/regex/regcomp.c delete mode 100644 ext/ereg/regex/regcomp.ih delete mode 100644 ext/ereg/regex/regerror.c delete mode 100644 ext/ereg/regex/regerror.ih delete mode 100644 ext/ereg/regex/regex.3 delete mode 100644 ext/ereg/regex/regex.7 delete mode 100644 ext/ereg/regex/regex.dsp delete mode 100644 ext/ereg/regex/regex.dsw delete mode 100644 ext/ereg/regex/regex.h delete mode 100644 ext/ereg/regex/regex.mak delete mode 100644 ext/ereg/regex/regex2.h delete mode 100644 ext/ereg/regex/regex_extra.h delete mode 100644 ext/ereg/regex/regexec.c delete mode 100644 ext/ereg/regex/regfree.c delete mode 100644 ext/ereg/regex/split.c delete mode 100644 ext/ereg/regex/tests delete mode 100644 ext/ereg/regex/utils.h delete mode 100644 ext/ereg/tests/001.phpt delete mode 100644 ext/ereg/tests/002.phpt delete mode 100644 ext/ereg/tests/003.phpt delete mode 100644 ext/ereg/tests/004.phpt delete mode 100644 ext/ereg/tests/005.phpt delete mode 100644 ext/ereg/tests/006.phpt delete mode 100644 ext/ereg/tests/007.phpt delete mode 100644 ext/ereg/tests/008.phpt delete mode 100644 ext/ereg/tests/009.phpt delete mode 100644 ext/ereg/tests/010.phpt delete mode 100644 ext/ereg/tests/011.phpt delete mode 100644 ext/ereg/tests/012.phpt delete mode 100644 ext/ereg/tests/013.phpt delete mode 100644 ext/ereg/tests/014.phpt delete mode 100644 ext/ereg/tests/015.phpt delete mode 100644 ext/ereg/tests/016.phpt diff --git a/ext/ereg/ereg.c b/ext/ereg/ereg.c deleted file mode 100644 index 6e079c511b..0000000000 --- a/ext/ereg/ereg.c +++ /dev/null @@ -1,668 +0,0 @@ -/* - +----------------------------------------------------------------------+ - | PHP Version 4 | - +----------------------------------------------------------------------+ - | Copyright (c) 1997-2007 The PHP Group | - +----------------------------------------------------------------------+ - | This source file is subject to version 3.01 of the PHP license, | - | that is bundled with this package in the file LICENSE, and is | - | available through the world-wide-web at the following url: | - | http://www.php.net/license/3_01.txt | - | If you did not receive a copy of the PHP license and are unable to | - | obtain it through the world-wide-web, please send a note to | - | license@php.net so we can mail you a copy immediately. | - +----------------------------------------------------------------------+ - | Authors: Rasmus Lerdorf | - | Jim Winstead | - | Jaakko Hyvätti | - +----------------------------------------------------------------------+ - */ -/* $Id$ */ - -#include -#include -#include "php.h" -#include "php_string.h" -#include "reg.h" -#include "ext/standard/info.h" - -ZEND_DECLARE_MODULE_GLOBALS(reg) - -typedef struct { - regex_t preg; - int cflags; -} reg_cache; - -static int reg_magic = 0; - -/* {{{ _php_regcomp - */ -static int _php_regcomp(regex_t *preg, const char *pattern, int cflags) -{ - int r = 0; - int patlen = strlen(pattern); - reg_cache *rc = NULL; - TSRMLS_FETCH(); - - if(zend_hash_find(®(ht_rc), (char *) pattern, patlen+1, (void **) &rc) == SUCCESS - && rc->cflags == cflags) { -#ifdef HAVE_REGEX_T_RE_MAGIC - /* - * We use a saved magic number to see whether cache is corrupted, and if it - * is, we flush it and compile the pattern from scratch. - */ - if (rc->preg.re_magic != reg_magic) { - zend_hash_clean(®(ht_rc)); - } else { - memcpy(preg, &rc->preg, sizeof(*preg)); - return r; - } - } - - r = regcomp(preg, pattern, cflags); - if(!r) { - reg_cache rcp; - - rcp.cflags = cflags; - memcpy(&rcp.preg, preg, sizeof(*preg)); - /* - * Since we don't have access to the actual MAGIC1 definition in the private - * header file, we save the magic value immediately after compilation. Hopefully, - * it's good. - */ - if (!reg_magic) reg_magic = preg->re_magic; - zend_hash_update(®(ht_rc), (char *) pattern, patlen+1, - (void *) &rcp, sizeof(rcp), NULL); - } -#else - memcpy(preg, &rc->preg, sizeof(*preg)); - } else { - r = regcomp(preg, pattern, cflags); - if(!r) { - reg_cache rcp; - - rcp.cflags = cflags; - memcpy(&rcp.preg, preg, sizeof(*preg)); - zend_hash_update(®(ht_rc), (char *) pattern, patlen+1, - (void *) &rcp, sizeof(rcp), NULL); - } - } -#endif - return r; -} -/* }}} */ - -static void _free_reg_cache(reg_cache *rc) -{ - regfree(&rc->preg); -} - -#undef regfree -#define regfree(a); -#undef regcomp -#define regcomp(a, b, c) _php_regcomp(a, b, c) - -static void php_reg_init_globals(zend_reg_globals *reg_globals TSRMLS_DC) -{ - zend_hash_init(®_globals->ht_rc, 0, NULL, (void (*)(void *)) _free_reg_cache, 1); -} - -static void php_reg_destroy_globals(zend_reg_globals *reg_globals TSRMLS_DC) -{ - zend_hash_destroy(®_globals->ht_rc); -} - -PHP_MINIT_FUNCTION(regex) -{ - ZEND_INIT_MODULE_GLOBALS(reg, php_reg_init_globals, php_reg_destroy_globals); - return SUCCESS; -} - -PHP_MSHUTDOWN_FUNCTION(regex) -{ -#ifndef ZTS - php_reg_destroy_globals(®_globals TSRMLS_CC); -#endif - - return SUCCESS; -} - -PHP_MINFO_FUNCTION(regex) -{ -#if HSREGEX - php_info_print_table_row(2, "Regex Library", "Bundled library enabled"); -#else - php_info_print_table_row(2, "Regex Library", "System library enabled"); -#endif -} - - -/* {{{ php_reg_eprint - * php_reg_eprint - convert error number to name - */ -static void php_reg_eprint(int err, regex_t *re) { - char *buf = NULL, *message = NULL; - size_t len; - size_t buf_len; - -#ifdef REG_ITOA - /* get the length of the message */ - buf_len = regerror(REG_ITOA | err, re, NULL, 0); - if (buf_len) { - buf = (char *)safe_emalloc(buf_len, sizeof(char), 0); - if (!buf) return; /* fail silently */ - /* finally, get the error message */ - regerror(REG_ITOA | err, re, buf, buf_len); - } -#else - buf_len = 0; -#endif - len = regerror(err, re, NULL, 0); - if (len) { - TSRMLS_FETCH(); - - message = (char *)safe_emalloc((buf_len + len + 2), sizeof(char), 0); - if (!message) { - return; /* fail silently */ - } - if (buf_len) { - snprintf(message, buf_len, "%s: ", buf); - buf_len += 1; /* so pointer math below works */ - } - /* drop the message into place */ - regerror(err, re, message + buf_len, len); - - php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", message); - } - - STR_FREE(buf); - STR_FREE(message); -} -/* }}} */ - -/* {{{ php_ereg - */ -static void php_ereg(INTERNAL_FUNCTION_PARAMETERS, int icase) -{ - pval **regex, /* Regular expression */ - **findin, /* String to apply expression to */ - **array = NULL; /* Optional register array */ - regex_t re; - regmatch_t *subs; - int err, match_len, string_len; - uint i; - int copts = 0; - off_t start, end; - char *buf = NULL; - char *string = NULL; - int argc = ZEND_NUM_ARGS(); - - if (argc < 2 || argc > 3 || - zend_get_parameters_ex(argc, ®ex, &findin, &array) == FAILURE) { - WRONG_PARAM_COUNT; - } - - if (icase) - copts |= REG_ICASE; - - if (argc == 2) - copts |= REG_NOSUB; - - /* compile the regular expression from the supplied regex */ - if (Z_TYPE_PP(regex) == IS_STRING) { - err = regcomp(&re, Z_STRVAL_PP(regex), REG_EXTENDED | copts); - } else { - /* we convert numbers to integers and treat them as a string */ - if (Z_TYPE_PP(regex) == IS_DOUBLE) - convert_to_long_ex(regex); /* get rid of decimal places */ - convert_to_string_ex(regex); - /* don't bother doing an extended regex with just a number */ - err = regcomp(&re, Z_STRVAL_PP(regex), copts); - } - - if (err) { - php_reg_eprint(err, &re); - RETURN_FALSE; - } - - /* make a copy of the string we're looking in */ - convert_to_string_ex(findin); - string = estrndup(Z_STRVAL_PP(findin), Z_STRLEN_PP(findin)); - - /* allocate storage for (sub-)expression-matches */ - subs = (regmatch_t *)ecalloc(sizeof(regmatch_t),re.re_nsub+1); - - /* actually execute the regular expression */ - err = regexec(&re, string, re.re_nsub+1, subs, 0); - if (err && err != REG_NOMATCH) { - php_reg_eprint(err, &re); - regfree(&re); - efree(subs); - RETURN_FALSE; - } - match_len = 1; - - if (array && err != REG_NOMATCH) { - match_len = (int) (subs[0].rm_eo - subs[0].rm_so); - string_len = Z_STRLEN_PP(findin) + 1; - - buf = emalloc(string_len); - - zval_dtor(*array); /* start with clean array */ - array_init(*array); - - for (i = 0; i <= re.re_nsub; i++) { - start = subs[i].rm_so; - end = subs[i].rm_eo; - if (start != -1 && end > 0 && start < string_len && end < string_len && start < end) { - add_index_stringl(*array, i, string+start, end-start, 1); - } else { - add_index_bool(*array, i, 0); - } - } - efree(buf); - } - - efree(subs); - efree(string); - if (err == REG_NOMATCH) { - RETVAL_FALSE; - } else { - if (match_len == 0) - match_len = 1; - RETVAL_LONG(match_len); - } - regfree(&re); -} -/* }}} */ - -/* {{{ proto int ereg(string pattern, string string [, array registers]) - Regular expression match */ -PHP_FUNCTION(ereg) -{ - php_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); -} -/* }}} */ - -/* {{{ proto int eregi(string pattern, string string [, array registers]) - Case-insensitive regular expression match */ -PHP_FUNCTION(eregi) -{ - php_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); -} -/* }}} */ - -/* {{{ php_reg_replace - * this is the meat and potatoes of regex replacement! */ -PHPAPI char *php_reg_replace(const char *pattern, const char *replace, const char *string, int icase, int extended) -{ - regex_t re; - regmatch_t *subs; - - char *buf, /* buf is where we build the replaced string */ - *nbuf, /* nbuf is used when we grow the buffer */ - *walkbuf; /* used to walk buf when replacing backrefs */ - const char *walk; /* used to walk replacement string for backrefs */ - int buf_len; - int pos, tmp, string_len, new_l; - int err, copts = 0; - - string_len = strlen(string); - - if (icase) { - copts = REG_ICASE; - } - if (extended) { - copts |= REG_EXTENDED; - } - - err = regcomp(&re, pattern, copts); - if (err) { - php_reg_eprint(err, &re); - return ((char *) -1); - } - - - /* allocate storage for (sub-)expression-matches */ - subs = (regmatch_t *)ecalloc(sizeof(regmatch_t),re.re_nsub+1); - - /* start with a buffer that is twice the size of the stringo - we're doing replacements in */ - buf_len = 2 * string_len + 1; - buf = safe_emalloc(buf_len, sizeof(char), 0); - - err = pos = 0; - buf[0] = '\0'; - while (!err) { - err = regexec(&re, &string[pos], re.re_nsub+1, subs, (pos ? REG_NOTBOL : 0)); - - if (err && err != REG_NOMATCH) { - php_reg_eprint(err, &re); - efree(subs); - efree(buf); - regfree(&re); - return ((char *) -1); - } - - if (!err) { - /* backref replacement is done in two passes: - 1) find out how long the string will be, and allocate buf - 2) copy the part before match, replacement and backrefs to buf - - Jaakko Hyvätti - */ - - new_l = strlen(buf) + subs[0].rm_so; /* part before the match */ - walk = replace; - while (*walk) { - if ('\\' == *walk && isdigit((unsigned char)walk[1]) && ((unsigned char)walk[1]) - '0' <= re.re_nsub) { - if (subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1) { - new_l += subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so; - } - walk += 2; - } else { - new_l++; - walk++; - } - } - if (new_l + 1 > buf_len) { - buf_len = 1 + buf_len + 2 * new_l; - nbuf = emalloc(buf_len); - strcpy(nbuf, buf); - efree(buf); - buf = nbuf; - } - tmp = strlen(buf); - /* copy the part of the string before the match */ - strncat(buf, &string[pos], subs[0].rm_so); - - /* copy replacement and backrefs */ - walkbuf = &buf[tmp + subs[0].rm_so]; - walk = replace; - while (*walk) { - if ('\\' == *walk && isdigit(walk[1]) && walk[1] - '0' <= re.re_nsub) { - if (subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1 - /* this next case shouldn't happen. it does. */ - && subs[walk[1] - '0'].rm_so <= subs[walk[1] - '0'].rm_eo) { - - tmp = subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so; - memcpy (walkbuf, &string[pos + subs[walk[1] - '0'].rm_so], tmp); - walkbuf += tmp; - } - walk += 2; - } else { - *walkbuf++ = *walk++; - } - } - *walkbuf = '\0'; - - /* and get ready to keep looking for replacements */ - if (subs[0].rm_so == subs[0].rm_eo) { - if (subs[0].rm_so + pos >= string_len) { - break; - } - new_l = strlen (buf) + 1; - if (new_l + 1 > buf_len) { - buf_len = 1 + buf_len + 2 * new_l; - nbuf = safe_emalloc(buf_len, sizeof(char), 0); - strcpy(nbuf, buf); - efree(buf); - buf = nbuf; - } - pos += subs[0].rm_eo + 1; - buf [new_l-1] = string [pos-1]; - buf [new_l] = '\0'; - } else { - pos += subs[0].rm_eo; - } - } else { /* REG_NOMATCH */ - new_l = strlen(buf) + strlen(&string[pos]); - if (new_l + 1 > buf_len) { - buf_len = new_l + 1; /* now we know exactly how long it is */ - nbuf = safe_emalloc(buf_len, sizeof(char), 0); - strcpy(nbuf, buf); - efree(buf); - buf = nbuf; - } - /* stick that last bit of string on our output */ - strcat(buf, &string[pos]); - } - } - - /* don't want to leak memory .. */ - efree(subs); - regfree(&re); - - /* whew. */ - return (buf); -} -/* }}} */ - -/* {{{ php_ereg_replace - */ -static void php_ereg_replace(INTERNAL_FUNCTION_PARAMETERS, int icase) -{ - pval **arg_pattern, - **arg_replace, - **arg_string; - char *pattern; - char *string; - char *replace; - char *ret; - - if (ZEND_NUM_ARGS() != 3 || - zend_get_parameters_ex(3, &arg_pattern, &arg_replace, &arg_string) == FAILURE) { - WRONG_PARAM_COUNT; - } - - if (Z_TYPE_PP(arg_pattern) == IS_STRING) { - if (Z_STRVAL_PP(arg_pattern) && Z_STRLEN_PP(arg_pattern)) - pattern = estrndup(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern)); - else - pattern = empty_string; - } else { - convert_to_long_ex(arg_pattern); - pattern = emalloc(2); - pattern[0] = (char) Z_LVAL_PP(arg_pattern); - pattern[1] = '\0'; - } - - if (Z_TYPE_PP(arg_replace) == IS_STRING) { - if (Z_STRVAL_PP(arg_replace) && Z_STRLEN_PP(arg_replace)) - replace = estrndup(Z_STRVAL_PP(arg_replace), Z_STRLEN_PP(arg_replace)); - else - replace = empty_string; - } else { - convert_to_long_ex(arg_replace); - replace = emalloc(2); - replace[0] = (char) Z_LVAL_PP(arg_replace); - replace[1] = '\0'; - } - - convert_to_string_ex(arg_string); - if (Z_STRVAL_PP(arg_string) && Z_STRLEN_PP(arg_string)) - string = estrndup(Z_STRVAL_PP(arg_string), Z_STRLEN_PP(arg_string)); - else - string = empty_string; - - /* do the actual work */ - ret = php_reg_replace(pattern, replace, string, icase, 1); - if (ret == (char *) -1) { - RETVAL_FALSE; - } else { - RETVAL_STRING(ret, 1); - STR_FREE(ret); - } - - STR_FREE(string); - STR_FREE(replace); - STR_FREE(pattern); -} -/* }}} */ - -/* {{{ proto string ereg_replace(string pattern, string replacement, string string) - Replace regular expression */ -PHP_FUNCTION(ereg_replace) -{ - php_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); -} -/* }}} */ - -/* {{{ proto string eregi_replace(string pattern, string replacement, string string) - Case insensitive replace regular expression */ -PHP_FUNCTION(eregi_replace) -{ - php_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); -} -/* }}} */ - -/* {{{ php_split - */ -static void php_split(INTERNAL_FUNCTION_PARAMETERS, int icase) -{ - zval **spliton, **str, **arg_count = NULL; - regex_t re; - regmatch_t subs[1]; - char *strp, *endp; - int err, size, count = -1, copts = 0; - int argc = ZEND_NUM_ARGS(); - - if (argc < 2 || argc > 3 || - zend_get_parameters_ex(argc, &spliton, &str, &arg_count) == FAILURE) { - WRONG_PARAM_COUNT; - } - - if (argc > 2) { - convert_to_long_ex(arg_count); - count = Z_LVAL_PP(arg_count); - } - - if (icase) - copts = REG_ICASE; - - convert_to_string_ex(spliton); - convert_to_string_ex(str); - - strp = Z_STRVAL_PP(str); - endp = strp + Z_STRLEN_PP(str); - - err = regcomp(&re, Z_STRVAL_PP(spliton), REG_EXTENDED | copts); - if (err) { - php_reg_eprint(err, &re); - RETURN_FALSE; - } - - array_init(return_value); - - /* churn through str, generating array entries as we go */ - while ((count == -1 || count > 1) && !(err = regexec(&re, strp, 1, subs, 0))) { - if (subs[0].rm_so == 0 && subs[0].rm_eo) { - /* match is at start of string, return empty string */ - add_next_index_stringl(return_value, empty_string, 0, 1); - /* skip ahead the length of the regex match */ - strp += subs[0].rm_eo; - } else if (subs[0].rm_so == 0 && subs[0].rm_eo == 0) { - /* No more matches */ - regfree(&re); - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid Regular Expression to split()"); - zend_hash_destroy(Z_ARRVAL_P(return_value)); - efree(Z_ARRVAL_P(return_value)); - RETURN_FALSE; - } else { - /* On a real match */ - - /* make a copy of the substring */ - size = subs[0].rm_so; - - /* add it to the array */ - add_next_index_stringl(return_value, strp, size, 1); - - /* point at our new starting point */ - strp = strp + subs[0].rm_eo; - } - - /* if we're only looking for a certain number of points, - stop looking once we hit it */ - if (count != -1) { - count--; - } - } - - /* see if we encountered an error */ - if (err && err != REG_NOMATCH) { - php_reg_eprint(err, &re); - regfree(&re); - zend_hash_destroy(Z_ARRVAL_P(return_value)); - efree(Z_ARRVAL_P(return_value)); - RETURN_FALSE; - } - - /* otherwise we just have one last element to add to the array */ - size = endp - strp; - - add_next_index_stringl(return_value, strp, size, 1); - - regfree(&re); -} -/* }}} */ - -/* {{{ proto array split(string pattern, string string [, int limit]) - Split string into array by regular expression */ -PHP_FUNCTION(split) -{ - php_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); -} -/* }}} */ - -/* {{{ proto array spliti(string pattern, string string [, int limit]) - Split string into array by regular expression case-insensitive */ - -PHP_FUNCTION(spliti) -{ - php_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); -} - -/* }}} */ - -/* {{{ proto string sql_regcase(string string) - Make regular expression for case insensitive match */ -PHPAPI PHP_FUNCTION(sql_regcase) -{ - zval **string; - char *tmp; - unsigned char c; - register int i, j; - - if (ZEND_NUM_ARGS()!=1 || zend_get_parameters_ex(1, &string)==FAILURE) { - WRONG_PARAM_COUNT; - } - convert_to_string_ex(string); - - tmp = safe_emalloc(Z_STRLEN_PP(string), 4, 1); - - for (i = j = 0; i < Z_STRLEN_PP(string); i++) { - c = (unsigned char) Z_STRVAL_PP(string)[i]; - if(isalpha(c)) { - tmp[j++] = '['; - tmp[j++] = toupper(c); - tmp[j++] = tolower(c); - tmp[j++] = ']'; - } else { - tmp[j++] = c; - } - } - tmp[j] = 0; - - RETVAL_STRINGL(tmp, j, 1); - efree(tmp); -} -/* }}} */ - -/* - * Local variables: - * tab-width: 4 - * c-basic-offset: 4 - * End: - * vim600: noet sw=4 ts=4 fdm=marker - * vim<600: noet sw=4 ts=4 - */ diff --git a/ext/ereg/php_ereg.h b/ext/ereg/php_ereg.h deleted file mode 100644 index f0d7ace14e..0000000000 --- a/ext/ereg/php_ereg.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - +----------------------------------------------------------------------+ - | PHP Version 4 | - +----------------------------------------------------------------------+ - | Copyright (c) 1997-2007 The PHP Group | - +----------------------------------------------------------------------+ - | This source file is subject to version 3.01 of the PHP license, | - | that is bundled with this package in the file LICENSE, and is | - | available through the world-wide-web at the following url: | - | http://www.php.net/license/3_01.txt | - | If you did not receive a copy of the PHP license and are unable to | - | obtain it through the world-wide-web, please send a note to | - | license@php.net so we can mail you a copy immediately. | - +----------------------------------------------------------------------+ - | Author: Rasmus Lerdorf | - +----------------------------------------------------------------------+ -*/ - - -/* $Id$ */ - -#ifndef REG_H -#define REG_H - -PHPAPI char *php_reg_replace(const char *pattern, const char *replace, const char *string, int icase, int extended); - -PHP_FUNCTION(ereg); -PHP_FUNCTION(eregi); -PHP_FUNCTION(eregi_replace); -PHP_FUNCTION(ereg_replace); -PHP_FUNCTION(split); -PHP_FUNCTION(spliti); -PHPAPI PHP_FUNCTION(sql_regcase); - -ZEND_BEGIN_MODULE_GLOBALS(reg) - HashTable ht_rc; -ZEND_END_MODULE_GLOBALS(reg) - -PHP_MINIT_FUNCTION(regex); -PHP_MSHUTDOWN_FUNCTION(regex); -PHP_MINFO_FUNCTION(regex); - - -#ifdef ZTS -#define REG(v) TSRMG(reg_globals_id, zend_reg_globals *, v) -#else -#define REG(v) (reg_globals.v) -#endif - -#endif /* REG_H */ diff --git a/ext/ereg/php_regex.h b/ext/ereg/php_regex.h deleted file mode 100644 index c1d1e0c232..0000000000 --- a/ext/ereg/php_regex.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef PHP_REGEX_H -#define PHP_REGEX_H - -/* - * REGEX means: - * 0.. system regex - * 1.. bundled regex - */ - -#if REGEX -/* get aliases */ -#include "regex/regex_extra.h" -#include "regex/regex.h" - -/* get rid of aliases */ -#define PHP_NO_ALIASES -#include "regex/regex_extra.h" -#undef PHP_NO_ALIASES - -#undef _PCREPOSIX_H -#define _PCREPOSIX_H 1 - -#ifndef _REGEX_H -#define _REGEX_H 1 /* this should stop Apache from loading the system version of regex.h */ -#endif -#ifndef _REGEX_H_ -#define _REGEX_H_ 1 -#endif -#ifndef _RX_H -#define _RX_H 1 /* Try defining these for Linux to */ -#endif -#ifndef __REGEXP_LIBRARY_H__ -#define __REGEXP_LIBRARY_H__ 1 /* avoid Apache including regex.h */ -#endif -#ifndef _H_REGEX -#define _H_REGEX 1 /* This one is for AIX */ -#endif -#elif REGEX == 0 -#include -#ifndef _REGEX_H_ -#define _REGEX_H_ 1 -#endif -#endif - -#endif /* PHP_REGEX_H */ diff --git a/ext/ereg/regex/COPYRIGHT b/ext/ereg/regex/COPYRIGHT deleted file mode 100644 index d43362fbfc..0000000000 --- a/ext/ereg/regex/COPYRIGHT +++ /dev/null @@ -1,20 +0,0 @@ -Copyright 1992, 1993, 1994 Henry Spencer. All rights reserved. -This software is not subject to any license of the American Telephone -and Telegraph Company or of the Regents of the University of California. - -Permission is granted to anyone to use this software for any purpose on -any computer system, and to alter it and redistribute it, subject -to the following restrictions: - -1. The author is not responsible for the consequences of use of this - software, no matter how awful, even if they arise from flaws in it. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. Since few users ever read sources, - credits must appear in the documentation. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. Since few users - ever read sources, credits must appear in the documentation. - -4. This notice may not be removed or altered. diff --git a/ext/ereg/regex/README b/ext/ereg/regex/README deleted file mode 100644 index cea9b67b66..0000000000 --- a/ext/ereg/regex/README +++ /dev/null @@ -1,32 +0,0 @@ -alpha3.4 release. -Thu Mar 17 23:17:18 EST 1994 -henry@zoo.toronto.edu - -See WHATSNEW for change listing. - -installation notes: --------- -Read the comments at the beginning of Makefile before running. - -Utils.h contains some things that just might have to be modified on -some systems, as well as a nested include (ugh) of . - -The "fake" directory contains quick-and-dirty fakes for some header -files and routines that old systems may not have. Note also that --DUSEBCOPY will make utils.h substitute bcopy() for memmove(). - -After that, "make r" will build regcomp.o, regexec.o, regfree.o, -and regerror.o (the actual routines), bundle them together into a test -program, and run regression tests on them. No output is good output. - -"make lib" builds just the .o files for the actual routines (when -you're happy with testing and have adjusted CFLAGS for production), -and puts them together into libregex.a. You can pick up either the -library or *.o ("make lib" makes sure there are no other .o files left -around to confuse things). - -Main.c, debug.c, split.c are used for regression testing but are not part -of the RE routines themselves. - -Regex.h goes in /usr/include. All other .h files are internal only. --------- diff --git a/ext/ereg/regex/WHATSNEW b/ext/ereg/regex/WHATSNEW deleted file mode 100644 index 6e82e1dae0..0000000000 --- a/ext/ereg/regex/WHATSNEW +++ /dev/null @@ -1,92 +0,0 @@ -New in alpha3.4: The complex bug alluded to below has been fixed (in a -slightly kludgey temporary way that may hurt efficiency a bit; this is -another "get it out the door for 4.4" release). The tests at the end of -the tests file have accordingly been uncommented. The primary sign of -the bug was that something like a?b matching ab matched b rather than ab. -(The bug was essentially specific to this exact situation, else it would -have shown up earlier.) - -New in alpha3.3: The definition of word boundaries has been altered -slightly, to more closely match the usual programming notion that "_" -is an alphabetic. Stuff used for pre-ANSI systems is now in a subdir, -and the makefile no longer alludes to it in mysterious ways. The -makefile has generally been cleaned up some. Fixes have been made -(again!) so that the regression test will run without -DREDEBUG, at -the cost of weaker checking. A workaround for a bug in some folks' - has been added. And some more things have been added to -tests, including a couple right at the end which are commented out -because the code currently flunks them (complex bug; fix coming). -Plus the usual minor cleanup. - -New in alpha3.2: Assorted bits of cleanup and portability improvement -(the development base is now a BSDI system using GCC instead of an ancient -Sun system, and the newer compiler exposed some glitches). Fix for a -serious bug that affected REs using many [] (including REG_ICASE REs -because of the way they are implemented), *sometimes*, depending on -memory-allocation patterns. The header-file prototypes no longer name -the parameters, avoiding possible name conflicts. The possibility that -some clot has defined CHAR_MIN as (say) `-128' instead of `(-128)' is -now handled gracefully. "uchar" is no longer used as an internal type -name (too many people have the same idea). Still the same old lousy -performance, alas. - -New in alpha3.1: Basically nothing, this release is just a bookkeeping -convenience. Stay tuned. - -New in alpha3.0: Performance is no better, alas, but some fixes have been -made and some functionality has been added. (This is basically the "get -it out the door in time for 4.4" release.) One bug fix: regfree() didn't -free the main internal structure (how embarrassing). It is now possible -to put NULs in either the RE or the target string, using (resp.) a new -REG_PEND flag and the old REG_STARTEND flag. The REG_NOSPEC flag to -regcomp() makes all characters ordinary, so you can match a literal -string easily (this will become more useful when performance improves!). -There are now primitives to match beginnings and ends of words, although -the syntax is disgusting and so is the implementation. The REG_ATOI -debugging interface has changed a bit. And there has been considerable -internal cleanup of various kinds. - -New in alpha2.3: Split change list out of README, and moved flags notes -into Makefile. Macro-ized the name of regex(7) in regex(3), since it has -to change for 4.4BSD. Cleanup work in engine.c, and some new regression -tests to catch tricky cases thereof. - -New in alpha2.2: Out-of-date manpages updated. Regerror() acquires two -small extensions -- REG_ITOA and REG_ATOI -- which avoid debugging kludges -in my own test program and might be useful to others for similar purposes. -The regression test will now compile (and run) without REDEBUG. The -BRE \$ bug is fixed. Most uses of "uchar" are gone; it's all chars now. -Char/uchar parameters are now written int/unsigned, to avoid possible -portability problems with unpromoted parameters. Some unsigned casts have -been introduced to minimize portability problems with shifting into sign -bits. - -New in alpha2.1: Lots of little stuff, cleanup and fixes. The one big -thing is that regex.h is now generated, using mkh, rather than being -supplied in the distribution; due to circularities in dependencies, -you have to build regex.h explicitly by "make h". The two known bugs -have been fixed (and the regression test now checks for them), as has a -problem with assertions not being suppressed in the absence of REDEBUG. -No performance work yet. - -New in alpha2: Backslash-anything is an ordinary character, not an -error (except, of course, for the handful of backslashed metacharacters -in BREs), which should reduce script breakage. The regression test -checks *where* null strings are supposed to match, and has generally -been tightened up somewhat. Small bug fixes in parameter passing (not -harmful, but technically errors) and some other areas. Debugging -invoked by defining REDEBUG rather than not defining NDEBUG. - -New in alpha+3: full prototyping for internal routines, using a little -helper program, mkh, which extracts prototypes given in stylized comments. -More minor cleanup. Buglet fix: it's CHAR_BIT, not CHAR_BITS. Simple -pre-screening of input when a literal string is known to be part of the -RE; this does wonders for performance. - -New in alpha+2: minor bits of cleanup. Notably, the number "32" for the -word width isn't hardwired into regexec.c any more, the public header -file prototypes the functions if __STDC__ is defined, and some small typos -in the manpages have been fixed. - -New in alpha+1: improvements to the manual pages, and an important -extension, the REG_STARTEND option to regexec(). diff --git a/ext/ereg/regex/cclass.h b/ext/ereg/regex/cclass.h deleted file mode 100644 index df41694b04..0000000000 --- a/ext/ereg/regex/cclass.h +++ /dev/null @@ -1,30 +0,0 @@ -/* character-class table */ -static struct cclass { - unsigned char *name; - unsigned char *chars; - unsigned char *multis; -} cclasses[] = { - {"alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", ""}, - {"alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", - ""}, - {"blank", " \t", ""}, - {"cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ -\25\26\27\30\31\32\33\34\35\36\37\177", ""}, - {"digit", "0123456789", ""}, - {"graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - ""}, - {"lower", "abcdefghijklmnopqrstuvwxyz", - ""}, - {"print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", - ""}, - {"punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - ""}, - {"space", "\t\n\v\f\r ", ""}, - {"upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", - ""}, - {"xdigit", "0123456789ABCDEFabcdef", - ""}, - {NULL, 0, ""} -}; diff --git a/ext/ereg/regex/cname.h b/ext/ereg/regex/cname.h deleted file mode 100644 index 670b273882..0000000000 --- a/ext/ereg/regex/cname.h +++ /dev/null @@ -1,102 +0,0 @@ -/* character-name table */ -static struct cname { - char *name; - char code; -} cnames[] = { - {"NUL", '\0'}, - {"SOH", '\001'}, - {"STX", '\002'}, - {"ETX", '\003'}, - {"EOT", '\004'}, - {"ENQ", '\005'}, - {"ACK", '\006'}, - {"BEL", '\007'}, - {"alert", '\007'}, - {"BS", '\010'}, - {"backspace", '\b'}, - {"HT", '\011'}, - {"tab", '\t'}, - {"LF", '\012'}, - {"newline", '\n'}, - {"VT", '\013'}, - {"vertical-tab", '\v'}, - {"FF", '\014'}, - {"form-feed", '\f'}, - {"CR", '\015'}, - {"carriage-return", '\r'}, - {"SO", '\016'}, - {"SI", '\017'}, - {"DLE", '\020'}, - {"DC1", '\021'}, - {"DC2", '\022'}, - {"DC3", '\023'}, - {"DC4", '\024'}, - {"NAK", '\025'}, - {"SYN", '\026'}, - {"ETB", '\027'}, - {"CAN", '\030'}, - {"EM", '\031'}, - {"SUB", '\032'}, - {"ESC", '\033'}, - {"IS4", '\034'}, - {"FS", '\034'}, - {"IS3", '\035'}, - {"GS", '\035'}, - {"IS2", '\036'}, - {"RS", '\036'}, - {"IS1", '\037'}, - {"US", '\037'}, - {"space", ' '}, - {"exclamation-mark", '!'}, - {"quotation-mark", '"'}, - {"number-sign", '#'}, - {"dollar-sign", '$'}, - {"percent-sign", '%'}, - {"ampersand", '&'}, - {"apostrophe", '\''}, - {"left-parenthesis", '('}, - {"right-parenthesis", ')'}, - {"asterisk", '*'}, - {"plus-sign", '+'}, - {"comma", ','}, - {"hyphen", '-'}, - {"hyphen-minus", '-'}, - {"period", '.'}, - {"full-stop", '.'}, - {"slash", '/'}, - {"solidus", '/'}, - {"zero", '0'}, - {"one", '1'}, - {"two", '2'}, - {"three", '3'}, - {"four", '4'}, - {"five", '5'}, - {"six", '6'}, - {"seven", '7'}, - {"eight", '8'}, - {"nine", '9'}, - {"colon", ':'}, - {"semicolon", ';'}, - {"less-than-sign", '<'}, - {"equals-sign", '='}, - {"greater-than-sign", '>'}, - {"question-mark", '?'}, - {"commercial-at", '@'}, - {"left-square-bracket", '['}, - {"backslash", '\\'}, - {"reverse-solidus", '\\'}, - {"right-square-bracket", ']'}, - {"circumflex", '^'}, - {"circumflex-accent", '^'}, - {"underscore", '_'}, - {"low-line", '_'}, - {"grave-accent", '`'}, - {"left-brace", '{'}, - {"left-curly-bracket", '{'}, - {"vertical-line", '|'}, - {"right-brace", '}'}, - {"right-curly-bracket", '}'}, - {"tilde", '~'}, - {"DEL", '\177'}, - {NULL, 0}, -}; diff --git a/ext/ereg/regex/debug.c b/ext/ereg/regex/debug.c deleted file mode 100644 index 3db93ef293..0000000000 --- a/ext/ereg/regex/debug.c +++ /dev/null @@ -1,242 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include "utils.h" -#include "regex2.h" -#include "debug.ih" - -/* - - regprint - print a regexp for debugging - == void regprint(regex_t *r, FILE *d); - */ -void -regprint(r, d) -regex_t *r; -FILE *d; -{ - register struct re_guts *g = r->re_g; - register int i; - register int c; - register int last; - int nincat[NC]; - - fprintf(d, "%ld states, %d categories", (long)g->nstates, - g->ncategories); - fprintf(d, ", first %ld last %ld", (long)g->firststate, - (long)g->laststate); - if (g->iflags&USEBOL) - fprintf(d, ", USEBOL"); - if (g->iflags&USEEOL) - fprintf(d, ", USEEOL"); - if (g->iflags&BAD) - fprintf(d, ", BAD"); - if (g->nsub > 0) - fprintf(d, ", nsub=%ld", (long)g->nsub); - if (g->must != NULL) - fprintf(d, ", must(%ld) `%*s'", (long)g->mlen, (int)g->mlen, - g->must); - if (g->backrefs) - fprintf(d, ", backrefs"); - if (g->nplus > 0) - fprintf(d, ", nplus %ld", (long)g->nplus); - fprintf(d, "\n"); - s_print(g, d); - for (i = 0; i < g->ncategories; i++) { - nincat[i] = 0; - for (c = CHAR_MIN; c <= CHAR_MAX; c++) - if (g->categories[c] == i) - nincat[i]++; - } - fprintf(d, "cc0#%d", nincat[0]); - for (i = 1; i < g->ncategories; i++) - if (nincat[i] == 1) { - for (c = CHAR_MIN; c <= CHAR_MAX; c++) - if (g->categories[c] == i) - break; - fprintf(d, ", %d=%s", i, regchar(c)); - } - fprintf(d, "\n"); - for (i = 1; i < g->ncategories; i++) - if (nincat[i] != 1) { - fprintf(d, "cc%d\t", i); - last = -1; - for (c = CHAR_MIN; c <= CHAR_MAX+1; c++) /* +1 does flush */ - if (c <= CHAR_MAX && g->categories[c] == i) { - if (last < 0) { - fprintf(d, "%s", regchar(c)); - last = c; - } - } else { - if (last >= 0) { - if (last != c-1) - fprintf(d, "-%s", - regchar(c-1)); - last = -1; - } - } - fprintf(d, "\n"); - } -} - -/* - - s_print - print the strip for debugging - == static void s_print(register struct re_guts *g, FILE *d); - */ -static void -s_print(g, d) -register struct re_guts *g; -FILE *d; -{ - register sop *s; - register cset *cs; - register int i; - register int done = 0; - register sop opnd; - register int col = 0; - register int last; - register sopno offset = 2; -# define GAP() { if (offset % 5 == 0) { \ - if (col > 40) { \ - fprintf(d, "\n\t"); \ - col = 0; \ - } else { \ - fprintf(d, " "); \ - col++; \ - } \ - } else \ - col++; \ - offset++; \ - } - - if (OP(g->strip[0]) != OEND) - fprintf(d, "missing initial OEND!\n"); - for (s = &g->strip[1]; !done; s++) { - opnd = OPND(*s); - switch (OP(*s)) { - case OEND: - fprintf(d, "\n"); - done = 1; - break; - case OCHAR: - if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL) - fprintf(d, "\\%c", (unsigned char)opnd); - else - fprintf(d, "%s", regchar((unsigned char)opnd)); - break; - case OBOL: - fprintf(d, "^"); - break; - case OEOL: - fprintf(d, "$"); - break; - case OBOW: - fprintf(d, "\\{"); - break; - case OEOW: - fprintf(d, "\\}"); - break; - case OANY: - fprintf(d, "."); - break; - case OANYOF: - fprintf(d, "[(%ld)", (long)opnd); - cs = &g->sets[opnd]; - last = -1; - for (i = 0; i < g->csetsize+1; i++) /* +1 flushes */ - if (CHIN(cs, i) && i < g->csetsize) { - if (last < 0) { - fprintf(d, "%s", regchar(i)); - last = i; - } - } else { - if (last >= 0) { - if (last != i-1) - fprintf(d, "-%s", - regchar(i-1)); - last = -1; - } - } - fprintf(d, "]"); - break; - case OBACK_: - fprintf(d, "(\\<%ld>", (long)opnd); - break; - case O_BACK: - fprintf(d, "<%ld>\\)", (long)opnd); - break; - case OPLUS_: - fprintf(d, "(+"); - if (OP(*(s+opnd)) != O_PLUS) - fprintf(d, "<%ld>", (long)opnd); - break; - case O_PLUS: - if (OP(*(s-opnd)) != OPLUS_) - fprintf(d, "<%ld>", (long)opnd); - fprintf(d, "+)"); - break; - case OQUEST_: - fprintf(d, "(?"); - if (OP(*(s+opnd)) != O_QUEST) - fprintf(d, "<%ld>", (long)opnd); - break; - case O_QUEST: - if (OP(*(s-opnd)) != OQUEST_) - fprintf(d, "<%ld>", (long)opnd); - fprintf(d, "?)"); - break; - case OLPAREN: - fprintf(d, "((<%ld>", (long)opnd); - break; - case ORPAREN: - fprintf(d, "<%ld>))", (long)opnd); - break; - case OCH_: - fprintf(d, "<"); - if (OP(*(s+opnd)) != OOR2) - fprintf(d, "<%ld>", (long)opnd); - break; - case OOR1: - if (OP(*(s-opnd)) != OOR1 && OP(*(s-opnd)) != OCH_) - fprintf(d, "<%ld>", (long)opnd); - fprintf(d, "|"); - break; - case OOR2: - fprintf(d, "|"); - if (OP(*(s+opnd)) != OOR2 && OP(*(s+opnd)) != O_CH) - fprintf(d, "<%ld>", (long)opnd); - break; - case O_CH: - if (OP(*(s-opnd)) != OOR1) - fprintf(d, "<%ld>", (long)opnd); - fprintf(d, ">"); - break; - default: - fprintf(d, "!%ld(%ld)!", OP(*s), opnd); - break; - } - if (!done) - GAP(); - } -} - -/* - - regchar - make a character printable - == static char *regchar(int ch); - */ -static unsigned char * /* -> representation */ -regchar(ch) -int ch; -{ - static unsigned char buf[10]; - - if (isprint(ch) || ch == ' ') - sprintf(buf, "%c", ch); - else - sprintf(buf, "\\%o", ch); - return(buf); -} diff --git a/ext/ereg/regex/debug.ih b/ext/ereg/regex/debug.ih deleted file mode 100644 index 5f40ff7917..0000000000 --- a/ext/ereg/regex/debug.ih +++ /dev/null @@ -1,14 +0,0 @@ -/* ========= begin header generated by ./mkh ========= */ -#ifdef __cplusplus -extern "C" { -#endif - -/* === debug.c === */ -void regprint(regex_t *r, FILE *d); -static void s_print(register struct re_guts *g, FILE *d); -static char *regchar(int ch); - -#ifdef __cplusplus -} -#endif -/* ========= end header generated by ./mkh ========= */ diff --git a/ext/ereg/regex/engine.c b/ext/ereg/regex/engine.c deleted file mode 100644 index c4bdc8079f..0000000000 --- a/ext/ereg/regex/engine.c +++ /dev/null @@ -1,1019 +0,0 @@ -/* - * The matching engine and friends. This file is #included by regexec.c - * after suitable #defines of a variety of macros used herein, so that - * different state representations can be used without duplicating masses - * of code. - */ - -#ifdef SNAMES -#define matcher smatcher -#define fast sfast -#define slow sslow -#define dissect sdissect -#define backref sbackref -#define step sstep -#define print sprint -#define at sat -#define match smat -#endif -#ifdef LNAMES -#define matcher lmatcher -#define fast lfast -#define slow lslow -#define dissect ldissect -#define backref lbackref -#define step lstep -#define print lprint -#define at lat -#define match lmat -#endif - -/* another structure passed up and down to avoid zillions of parameters */ -struct match { - struct re_guts *g; - int eflags; - regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ - unsigned char *offp; /* offsets work from here */ - unsigned char *beginp; /* start of string -- virtual NUL precedes */ - unsigned char *endp; /* end of string -- virtual NUL here */ - unsigned char *coldp; /* can be no match starting before here */ - unsigned char **lastpos; /* [nplus+1] */ - STATEVARS; - states st; /* current states */ - states fresh; /* states for a fresh start */ - states tmp; /* temporary */ - states empty; /* empty set of states */ -}; - -#include "engine.ih" - -#ifdef REDEBUG -#define SP(t, s, c) print(m, t, s, c, stdout) -#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) -#define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); } -#else -#define SP(t, s, c) /* nothing */ -#define AT(t, p1, p2, s1, s2) /* nothing */ -#define NOTE(s) /* nothing */ -#endif - -/* - - matcher - the actual matching engine - == static int matcher(register struct re_guts *g, char *string, \ - == size_t nmatch, regmatch_t pmatch[], int eflags); - */ -static int /* 0 success, REG_NOMATCH failure */ -matcher(g, string, nmatch, pmatch, eflags) -register struct re_guts *g; -unsigned char *string; -size_t nmatch; -regmatch_t pmatch[]; -int eflags; -{ - register unsigned char *endp; - register size_t i; - struct match mv; - register struct match *m = &mv; - register unsigned char *dp; - const register sopno gf = g->firststate+1; /* +1 for OEND */ - const register sopno gl = g->laststate; - unsigned char *start; - unsigned char *stop; - - /* simplify the situation where possible */ - if (g->cflags®_NOSUB) - nmatch = 0; - if (eflags®_STARTEND) { - start = string + pmatch[0].rm_so; - stop = string + pmatch[0].rm_eo; - } else { - start = string; - stop = start + strlen(start); - } - if (stop < start) - return(REG_INVARG); - - /* prescreening; this does wonders for this rather slow code */ - if (g->must != NULL) { - for (dp = start; dp < stop; dp++) - if (*dp == g->must[0] && stop - dp >= g->mlen && - memcmp(dp, g->must, (size_t)g->mlen) == 0) - break; - if (dp == stop) /* we didn't find g->must */ - return(REG_NOMATCH); - } - - /* match struct setup */ - m->g = g; - m->eflags = eflags; - m->pmatch = NULL; - m->lastpos = NULL; - m->offp = string; - m->beginp = start; - m->endp = stop; - STATESETUP(m, 4); - SETUP(m->st); - SETUP(m->fresh); - SETUP(m->tmp); - SETUP(m->empty); - CLEAR(m->empty); - - /* this loop does only one repetition except for backrefs */ - for (;;) { - endp = fast(m, start, stop, gf, gl); - if (endp == NULL) { /* a miss */ - STATETEARDOWN(m); - return(REG_NOMATCH); - } - if (nmatch == 0 && !g->backrefs) - break; /* no further info needed */ - - /* where? */ - assert(m->coldp != NULL); - for (;;) { - NOTE("finding start"); - endp = slow(m, m->coldp, stop, gf, gl); - if (endp != NULL) - break; - assert(m->coldp < m->endp); - m->coldp++; - } - if (nmatch == 1 && !g->backrefs) - break; /* no further info needed */ - - /* oh my, he wants the subexpressions... */ - if (m->pmatch == NULL) - m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * - sizeof(regmatch_t)); - if (m->pmatch == NULL) { - STATETEARDOWN(m); - return(REG_ESPACE); - } - for (i = 1; i <= m->g->nsub; i++) - m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; - if (!g->backrefs && !(m->eflags®_BACKR)) { - NOTE("dissecting"); - dp = dissect(m, m->coldp, endp, gf, gl); - } else { - if (g->nplus > 0 && m->lastpos == NULL) - m->lastpos = (unsigned char **)malloc((g->nplus+1) * - sizeof(unsigned char *)); - if (g->nplus > 0 && m->lastpos == NULL) { - free((char *)m->pmatch); - STATETEARDOWN(m); - return(REG_ESPACE); - } - NOTE("backref dissect"); - dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); - } - if (dp != NULL) - break; - - /* uh-oh... we couldn't find a subexpression-level match */ - assert(g->backrefs); /* must be back references doing it */ - assert(g->nplus == 0 || m->lastpos != NULL); - for (;;) { - if (dp != NULL || endp <= m->coldp) - break; /* defeat */ - NOTE("backoff"); - endp = slow(m, m->coldp, endp-1, gf, gl); - if (endp == NULL) - break; /* defeat */ - /* try it on a shorter possibility */ -#ifndef NDEBUG - for (i = 1; i <= m->g->nsub; i++) { - assert(m->pmatch[i].rm_so == -1); - assert(m->pmatch[i].rm_eo == -1); - } -#endif - NOTE("backoff dissect"); - dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); - } - assert(dp == NULL || dp == endp); - if (dp != NULL) /* found a shorter one */ - break; - - /* despite initial appearances, there is no match here */ - NOTE("false alarm"); - start = m->coldp + 1; /* recycle starting later */ - assert(start <= stop); - } - - /* fill in the details if requested */ - if (nmatch > 0) { - pmatch[0].rm_so = m->coldp - m->offp; - pmatch[0].rm_eo = endp - m->offp; - } - if (nmatch > 1) { - assert(m->pmatch != NULL); - for (i = 1; i < nmatch; i++) - if (i <= m->g->nsub) - pmatch[i] = m->pmatch[i]; - else { - pmatch[i].rm_so = -1; - pmatch[i].rm_eo = -1; - } - } - - if (m->pmatch != NULL) - free((char *)m->pmatch); - if (m->lastpos != NULL) - free((char *)m->lastpos); - STATETEARDOWN(m); - return(0); -} - -/* - - dissect - figure out what matched what, no back references - == static unsigned char *dissect(register struct match *m, unsigned char *start, \ - == unsigned char *stop, sopno startst, sopno stopst); - */ -static unsigned char * /* == stop (success) always */ -dissect(m, start, stop, startst, stopst) -register struct match *m; -unsigned char *start; -unsigned char *stop; -sopno startst; -sopno stopst; -{ - register int i; - register sopno ss; /* start sop of current subRE */ - register sopno es; /* end sop of current subRE */ - register unsigned char *sp; /* start of string matched by it */ - register unsigned char *stp; /* string matched by it cannot pass here */ - register unsigned char *rest; /* start of rest of string */ - register unsigned char *tail; /* string unmatched by rest of RE */ - register sopno ssub; /* start sop of subsubRE */ - register sopno esub; /* end sop of subsubRE */ - register unsigned char *ssp; /* start of string matched by subsubRE */ - register unsigned char *sep; /* end of string matched by subsubRE */ - register unsigned char *oldssp; /* previous ssp */ - register unsigned char *dp; - - AT("diss", start, stop, startst, stopst); - sp = start; - for (ss = startst; ss < stopst; ss = es) { - /* identify end of subRE */ - es = ss; - switch (OP(m->g->strip[es])) { - case OPLUS_: - case OQUEST_: - es += OPND(m->g->strip[es]); - break; - case OCH_: - while (OP(m->g->strip[es]) != O_CH) - es += OPND(m->g->strip[es]); - break; - } - es++; - - /* figure out what it matched */ - switch (OP(m->g->strip[ss])) { - case OEND: - assert(nope); - break; - case OCHAR: - sp++; - break; - case OBOL: - case OEOL: - case OBOW: - case OEOW: - break; - case OANY: - case OANYOF: - sp++; - break; - case OBACK_: - case O_BACK: - assert(nope); - break; - /* cases where length of match is hard to find */ - case OQUEST_: - stp = stop; - for (;;) { - /* how long could this one be? */ - rest = slow(m, sp, stp, ss, es); - assert(rest != NULL); /* it did match */ - /* could the rest match the rest? */ - tail = slow(m, rest, stop, es, stopst); - if (tail == stop) - break; /* yes! */ - /* no -- try a shorter match for this one */ - stp = rest - 1; - assert(stp >= sp); /* it did work */ - } - ssub = ss + 1; - esub = es - 1; - /* did innards match? */ - if (slow(m, sp, rest, ssub, esub) != NULL) { - dp = dissect(m, sp, rest, ssub, esub); - assert(dp == rest); - } else /* no */ - assert(sp == rest); - sp = rest; - break; - case OPLUS_: - stp = stop; - for (;;) { - /* how long could this one be? */ - rest = slow(m, sp, stp, ss, es); - assert(rest != NULL); /* it did match */ - /* could the rest match the rest? */ - tail = slow(m, rest, stop, es, stopst); - if (tail == stop) - break; /* yes! */ - /* no -- try a shorter match for this one */ - stp = rest - 1; - assert(stp >= sp); /* it did work */ - } - ssub = ss + 1; - esub = es - 1; - ssp = sp; - oldssp = ssp; - for (;;) { /* find last match of innards */ - sep = slow(m, ssp, rest, ssub, esub); - if (sep == NULL || sep == ssp) - break; /* failed or matched null */ - oldssp = ssp; /* on to next try */ - ssp = sep; - } - if (sep == NULL) { - /* last successful match */ - sep = ssp; - ssp = oldssp; - } - assert(sep == rest); /* must exhaust substring */ - assert(slow(m, ssp, sep, ssub, esub) == rest); - dp = dissect(m, ssp, sep, ssub, esub); - assert(dp == sep); - sp = rest; - break; - case OCH_: - stp = stop; - for (;;) { - /* how long could this one be? */ - rest = slow(m, sp, stp, ss, es); - assert(rest != NULL); /* it did match */ - /* could the rest match the rest? */ - tail = slow(m, rest, stop, es, stopst); - if (tail == stop) - break; /* yes! */ - /* no -- try a shorter match for this one */ - stp = rest - 1; - assert(stp >= sp); /* it did work */ - } - ssub = ss + 1; - esub = ss + OPND(m->g->strip[ss]) - 1; - assert(OP(m->g->strip[esub]) == OOR1); - for (;;) { /* find first matching branch */ - if (slow(m, sp, rest, ssub, esub) == rest) - break; /* it matched all of it */ - /* that one missed, try next one */ - assert(OP(m->g->strip[esub]) == OOR1); - esub++; - assert(OP(m->g->strip[esub]) == OOR2); - ssub = esub + 1; - esub += OPND(m->g->strip[esub]); - if (OP(m->g->strip[esub]) == OOR2) - esub--; - else - assert(OP(m->g->strip[esub]) == O_CH); - } - dp = dissect(m, sp, rest, ssub, esub); - assert(dp == rest); - sp = rest; - break; - case O_PLUS: - case O_QUEST: - case OOR1: - case OOR2: - case O_CH: - assert(nope); - break; - case OLPAREN: - i = OPND(m->g->strip[ss]); - assert(0 < i && i <= m->g->nsub); - m->pmatch[i].rm_so = sp - m->offp; - break; - case ORPAREN: - i = OPND(m->g->strip[ss]); - assert(0 < i && i <= m->g->nsub); - m->pmatch[i].rm_eo = sp - m->offp; - break; - default: /* uh oh */ - assert(nope); - break; - } - } - - assert(sp == stop); - return(sp); -} - -/* - - backref - figure out what matched what, figuring in back references - == static unsigned char *backref(register struct match *m, unsigned char *start, \ - == unsigned char *stop, sopno startst, sopno stopst, sopno lev); - */ -static unsigned char * /* == stop (success) or NULL (failure) */ -backref(m, start, stop, startst, stopst, lev) -register struct match *m; -unsigned char *start; -unsigned char *stop; -sopno startst; -sopno stopst; -sopno lev; /* PLUS nesting level */ -{ - register int i; - register sopno ss; /* start sop of current subRE */ - register unsigned char *sp; /* start of string matched by it */ - register sopno ssub; /* start sop of subsubRE */ - register sopno esub; /* end sop of subsubRE */ - register unsigned char *ssp; /* start of string matched by subsubRE */ - register unsigned char *dp; - register size_t len; - register int hard; - register sop s; - register regoff_t offsave; - register cset *cs; - - AT("back", start, stop, startst, stopst); - sp = start; - - /* get as far as we can with easy stuff */ - hard = 0; - for (ss = startst; !hard && ss < stopst; ss++) - switch (OP(s = m->g->strip[ss])) { - case OCHAR: - if (sp == stop || *sp++ != (unsigned char)OPND(s)) - return(NULL); - break; - case OANY: - if (sp == stop) - return(NULL); - sp++; - break; - case OANYOF: - cs = &m->g->sets[OPND(s)]; - if (sp == stop || !CHIN(cs, *sp++)) - return(NULL); - break; - case OBOL: - if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || - (sp < m->endp && *(sp-1) == '\n' && - (m->g->cflags®_NEWLINE)) ) - { /* yes */ } - else - return(NULL); - break; - case OEOL: - if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || - (sp < m->endp && *sp == '\n' && - (m->g->cflags®_NEWLINE)) ) - { /* yes */ } - else - return(NULL); - break; - case OBOW: - if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || - (sp < m->endp && *(sp-1) == '\n' && - (m->g->cflags®_NEWLINE)) || - (sp > m->beginp && - !ISWORD(*(sp-1))) ) && - (sp < m->endp && ISWORD(*sp)) ) - { /* yes */ } - else - return(NULL); - break; - case OEOW: - if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || - (sp < m->endp && *sp == '\n' && - (m->g->cflags®_NEWLINE)) || - (sp < m->endp && !ISWORD(*sp)) ) && - (sp > m->beginp && ISWORD(*(sp-1))) ) - { /* yes */ } - else - return(NULL); - break; - case O_QUEST: - break; - case OOR1: /* matches null but needs to skip */ - ss++; - s = m->g->strip[ss]; - do { - assert(OP(s) == OOR2); - ss += OPND(s); - } while (OP(s = m->g->strip[ss]) != O_CH); - /* note that the ss++ gets us past the O_CH */ - break; - default: /* have to make a choice */ - hard = 1; - break; - } - if (!hard) { /* that was it! */ - if (sp != stop) - return(NULL); - return(sp); - } - ss--; /* adjust for the for's final increment */ - - /* the hard stuff */ - AT("hard", sp, stop, ss, stopst); - s = m->g->strip[ss]; - switch (OP(s)) { - case OBACK_: /* the vilest depths */ - i = OPND(s); - assert(0 < i && i <= m->g->nsub); - if (m->pmatch[i].rm_eo == -1) - return(NULL); - assert(m->pmatch[i].rm_so != -1); - len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; - assert(stop - m->beginp >= len); - if (sp > stop - len) - return(NULL); /* not enough left to match */ - ssp = m->offp + m->pmatch[i].rm_so; - if (memcmp(sp, ssp, len) != 0) - return(NULL); - while (m->g->strip[ss] != SOP(O_BACK, i)) - ss++; - return(backref(m, sp+len, stop, ss+1, stopst, lev)); - break; - case OQUEST_: /* to null or not */ - dp = backref(m, sp, stop, ss+1, stopst, lev); - if (dp != NULL) - return(dp); /* not */ - return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); - break; - case OPLUS_: - assert(m->lastpos != NULL); - assert(lev+1 <= m->g->nplus); - m->lastpos[lev+1] = sp; - return(backref(m, sp, stop, ss+1, stopst, lev+1)); - break; - case O_PLUS: - if (sp == m->lastpos[lev]) /* last pass matched null */ - return(backref(m, sp, stop, ss+1, stopst, lev-1)); - /* try another pass */ - m->lastpos[lev] = sp; - dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); - if (dp == NULL) - return(backref(m, sp, stop, ss+1, stopst, lev-1)); - else - return(dp); - break; - case OCH_: /* find the right one, if any */ - ssub = ss + 1; - esub = ss + OPND(s) - 1; - assert(OP(m->g->strip[esub]) == OOR1); - for (;;) { /* find first matching branch */ - dp = backref(m, sp, stop, ssub, esub, lev); - if (dp != NULL) - return(dp); - /* that one missed, try next one */ - if (OP(m->g->strip[esub]) == O_CH) - return(NULL); /* there is none */ - esub++; - assert(OP(m->g->strip[esub]) == OOR2); - ssub = esub + 1; - esub += OPND(m->g->strip[esub]); - if (OP(m->g->strip[esub]) == OOR2) - esub--; - else - assert(OP(m->g->strip[esub]) == O_CH); - } - break; - case OLPAREN: /* must undo assignment if rest fails */ - i = OPND(s); - assert(0 < i && i <= m->g->nsub); - offsave = m->pmatch[i].rm_so; - m->pmatch[i].rm_so = sp - m->offp; - dp = backref(m, sp, stop, ss+1, stopst, lev); - if (dp != NULL) - return(dp); - m->pmatch[i].rm_so = offsave; - return(NULL); - break; - case ORPAREN: /* must undo assignment if rest fails */ - i = OPND(s); - assert(0 < i && i <= m->g->nsub); - offsave = m->pmatch[i].rm_eo; - m->pmatch[i].rm_eo = sp - m->offp; - dp = backref(m, sp, stop, ss+1, stopst, lev); - if (dp != NULL) - return(dp); - m->pmatch[i].rm_eo = offsave; - return(NULL); - break; - default: /* uh oh */ - assert(nope); - break; - } - - /* "can't happen" */ - assert(nope); - /* NOTREACHED */ - return((unsigned char *)NULL); /* dummy */ -} - -/* - - fast - step through the string at top speed - == static unsigned char *fast(register struct match *m, unsigned char *start, \ - == unsigned char *stop, sopno startst, sopno stopst); - */ -static unsigned char * /* where tentative match ended, or NULL */ -fast(m, start, stop, startst, stopst) -register struct match *m; -unsigned char *start; -unsigned char *stop; -sopno startst; -sopno stopst; -{ - register states st = m->st; - register states fresh = m->fresh; - register states tmp = m->tmp; - register unsigned char *p = start; - register int c = (start == m->beginp) ? OUT : *(start-1); - register int lastc; /* previous c */ - register int flagch; - register int i; - register unsigned char *coldp; /* last p after which no match was underway */ - - CLEAR(st); - SET1(st, startst); - st = step(m->g, startst, stopst, st, NOTHING, st); - ASSIGN(fresh, st); - SP("start", st, *p); - coldp = NULL; - for (;;) { - /* next character */ - lastc = c; - c = (p == m->endp) ? OUT : *p; - if (EQ(st, fresh)) - coldp = p; - - /* is there an EOL and/or BOL between lastc and c? */ - flagch = '\0'; - i = 0; - if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || - (lastc == OUT && !(m->eflags®_NOTBOL)) ) { - flagch = BOL; - i = m->g->nbol; - } - if ( (c == '\n' && m->g->cflags®_NEWLINE) || - (c == OUT && !(m->eflags®_NOTEOL)) ) { - flagch = (flagch == BOL) ? BOLEOL : EOL; - i += m->g->neol; - } - if (i != 0) { - for (; i > 0; i--) - st = step(m->g, startst, stopst, st, flagch, st); - SP("boleol", st, c); - } - - /* how about a word boundary? */ - if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && - (c != OUT && ISWORD(c)) ) { - flagch = BOW; - } - if ( (lastc != OUT && ISWORD(lastc)) && - (flagch == EOL || (c != OUT && !ISWORD(c))) ) { - flagch = EOW; - } - if (flagch == BOW || flagch == EOW) { - st = step(m->g, startst, stopst, st, flagch, st); - SP("boweow", st, c); - } - - /* are we done? */ - if (ISSET(st, stopst) || p == stop) - break; /* NOTE BREAK OUT */ - - /* no, we must deal with this character */ - ASSIGN(tmp, st); - ASSIGN(st, fresh); - assert(c != OUT); - st = step(m->g, startst, stopst, tmp, c, st); - SP("aft", st, c); - assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); - p++; - } - - assert(coldp != NULL); - m->coldp = coldp; - if (ISSET(st, stopst)) - return(p+1); - else - return(NULL); -} - -/* - - slow - step through the string more deliberately - == static unsigned char *slow(register struct match *m, unsigned char *start, \ - == unsigned char *stop, sopno startst, sopno stopst); - */ -static unsigned char * /* where it ended */ -slow(m, start, stop, startst, stopst) -register struct match *m; -unsigned char *start; -unsigned char *stop; -sopno startst; -sopno stopst; -{ - register states st = m->st; - register states empty = m->empty; - register states tmp = m->tmp; - register unsigned char *p = start; - register int c = (start == m->beginp) ? OUT : *(start-1); - register int lastc; /* previous c */ - register int flagch; - register int i; - register unsigned char *matchp; /* last p at which a match ended */ - - AT("slow", start, stop, startst, stopst); - CLEAR(st); - SET1(st, startst); - SP("sstart", st, *p); - st = step(m->g, startst, stopst, st, NOTHING, st); - matchp = NULL; - for (;;) { - /* next character */ - lastc = c; - c = (p == m->endp) ? OUT : *p; - - /* is there an EOL and/or BOL between lastc and c? */ - flagch = '\0'; - i = 0; - if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || - (lastc == OUT && !(m->eflags®_NOTBOL)) ) { - flagch = BOL; - i = m->g->nbol; - } - if ( (c == '\n' && m->g->cflags®_NEWLINE) || - (c == OUT && !(m->eflags®_NOTEOL)) ) { - flagch = (flagch == BOL) ? BOLEOL : EOL; - i += m->g->neol; - } - if (i != 0) { - for (; i > 0; i--) - st = step(m->g, startst, stopst, st, flagch, st); - SP("sboleol", st, c); - } - - /* how about a word boundary? */ - if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && - (c != OUT && ISWORD(c)) ) { - flagch = BOW; - } - if ( (lastc != OUT && ISWORD(lastc)) && - (flagch == EOL || (c != OUT && !ISWORD(c))) ) { - flagch = EOW; - } - if (flagch == BOW || flagch == EOW) { - st = step(m->g, startst, stopst, st, flagch, st); - SP("sboweow", st, c); - } - - /* are we done? */ - if (ISSET(st, stopst)) - matchp = p; - if (EQ(st, empty) || p == stop) - break; /* NOTE BREAK OUT */ - - /* no, we must deal with this character */ - ASSIGN(tmp, st); - ASSIGN(st, empty); - assert(c != OUT); - st = step(m->g, startst, stopst, tmp, c, st); - SP("saft", st, c); - assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); - p++; - } - - return(matchp); -} - - -/* - - step - map set of states reachable before char to set reachable after - == static states step(register struct re_guts *g, sopno start, sopno stop, \ - == register states bef, int ch, register states aft); - == #define BOL (OUT+1) - == #define EOL (BOL+1) - == #define BOLEOL (BOL+2) - == #define NOTHING (BOL+3) - == #define BOW (BOL+4) - == #define EOW (BOL+5) - == #define CODEMAX (BOL+5) // highest code used - == #define NONCHAR(c) ((c) > UCHAR_MAX) - == #define NNONCHAR (CODEMAX-UCHAR_MAX) - */ -static states -step(g, start, stop, bef, ch, aft) -register struct re_guts *g; -sopno start; /* start state within strip */ -sopno stop; /* state after stop state within strip */ -register states bef; /* states reachable before */ -int ch; /* character or NONCHAR code */ -register states aft; /* states already known reachable after */ -{ - register cset *cs; - register sop s; - register sopno pc; - register onestate here; /* note, macros know this name */ - register sopno look; - register long i; - - for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { - s = g->strip[pc]; - switch (OP(s)) { - case OEND: - assert(pc == stop-1); - break; - case OCHAR: - /* only characters can match */ - assert(!NONCHAR(ch) || ch != (unsigned char)OPND(s)); - if (ch == (unsigned char)OPND(s)) - FWD(aft, bef, 1); - break; - case OBOL: - if (ch == BOL || ch == BOLEOL) - FWD(aft, bef, 1); - break; - case OEOL: - if (ch == EOL || ch == BOLEOL) - FWD(aft, bef, 1); - break; - case OBOW: - if (ch == BOW) - FWD(aft, bef, 1); - break; - case OEOW: - if (ch == EOW) - FWD(aft, bef, 1); - break; - case OANY: - if (!NONCHAR(ch)) - FWD(aft, bef, 1); - break; - case OANYOF: - cs = &g->sets[OPND(s)]; - if (!NONCHAR(ch) && CHIN(cs, ch)) - FWD(aft, bef, 1); - break; - case OBACK_: /* ignored here */ - case O_BACK: - FWD(aft, aft, 1); - break; - case OPLUS_: /* forward, this is just an empty */ - FWD(aft, aft, 1); - break; - case O_PLUS: /* both forward and back */ - FWD(aft, aft, 1); - i = ISSETBACK(aft, OPND(s)); - BACK(aft, aft, OPND(s)); - if (!i && ISSETBACK(aft, OPND(s))) { - /* oho, must reconsider loop body */ - pc -= OPND(s) + 1; - INIT(here, pc); - } - break; - case OQUEST_: /* two branches, both forward */ - FWD(aft, aft, 1); - FWD(aft, aft, OPND(s)); - break; - case O_QUEST: /* just an empty */ - FWD(aft, aft, 1); - break; - case OLPAREN: /* not significant here */ - case ORPAREN: - FWD(aft, aft, 1); - break; - case OCH_: /* mark the first two branches */ - FWD(aft, aft, 1); - assert(OP(g->strip[pc+OPND(s)]) == OOR2); - FWD(aft, aft, OPND(s)); - break; - case OOR1: /* done a branch, find the O_CH */ - if (ISSTATEIN(aft, here)) { - for (look = 1; - OP(s = g->strip[pc+look]) != O_CH; - look += OPND(s)) - assert(OP(s) == OOR2); - FWD(aft, aft, look); - } - break; - case OOR2: /* propagate OCH_'s marking */ - FWD(aft, aft, 1); - if (OP(g->strip[pc+OPND(s)]) != O_CH) { - assert(OP(g->strip[pc+OPND(s)]) == OOR2); - FWD(aft, aft, OPND(s)); - } - break; - case O_CH: /* just empty */ - FWD(aft, aft, 1); - break; - default: /* ooooops... */ - assert(nope); - break; - } - } - - return(aft); -} - -#ifdef REDEBUG -/* - - print - print a set of states - == #ifdef REDEBUG - == static void print(struct match *m, unsigned char *caption, states st, \ - == int ch, FILE *d); - == #endif - */ -static void -print(m, caption, st, ch, d) -struct match *m; -unsigned char *caption; -states st; -int ch; -FILE *d; -{ - register struct re_guts *g = m->g; - register int i; - register int first = 1; - - if (!(m->eflags®_TRACE)) - return; - - fprintf(d, "%s", caption); - if (ch != '\0') - fprintf(d, " %s", pchar(ch)); - for (i = 0; i < g->nstates; i++) - if (ISSET(st, i)) { - fprintf(d, "%s%d", (first) ? "\t" : ", ", i); - first = 0; - } - fprintf(d, "\n"); -} - -/* - - at - print current situation - == #ifdef REDEBUG - == static void at(struct match *m, unsigned char *title, unsigned char *start, unsigned char *stop, \ - == sopno startst, sopno stopst); - == #endif - */ -static void -at(m, title, start, stop, startst, stopst) -struct match *m; -unsigned char *title; -unsigned char *start; -unsigned char *stop; -sopno startst; -sopno stopst; -{ - if (!(m->eflags®_TRACE)) - return; - - printf("%s %s-", title, pchar(*start)); - printf("%s ", pchar(*stop)); - printf("%ld-%ld\n", (long)startst, (long)stopst); -} - -#ifndef PCHARDONE -#define PCHARDONE /* never again */ -/* - - pchar - make a character printable - == #ifdef REDEBUG - == static unsigned char *pchar(int ch); - == #endif - * - * Is this identical to regchar() over in debug.c? Well, yes. But a - * duplicate here avoids having a debugging-capable regexec.o tied to - * a matching debug.o, and this is convenient. It all disappears in - * the non-debug compilation anyway, so it doesn't matter much. - */ -static unsigned char * /* -> representation */ -pchar(ch) -int ch; -{ - static unsigned char pbuf[10]; - - if (isprint(ch) || ch == ' ') - sprintf(pbuf, "%c", ch); - else - sprintf(pbuf, "\\%o", ch); - return(pbuf); -} -#endif -#endif - -#undef matcher -#undef fast -#undef slow -#undef dissect -#undef backref -#undef step -#undef print -#undef at -#undef match diff --git a/ext/ereg/regex/engine.ih b/ext/ereg/regex/engine.ih deleted file mode 100644 index 9a301838bc..0000000000 --- a/ext/ereg/regex/engine.ih +++ /dev/null @@ -1,35 +0,0 @@ -/* ========= begin header generated by ./mkh ========= */ -#ifdef __cplusplus -extern "C" { -#endif - -/* === engine.c === */ -static int matcher(register struct re_guts *g, unsigned char *string, size_t nmatch, regmatch_t pmatch[], int eflags); -static unsigned char *dissect(register struct match *m, unsigned char *start, unsigned char *stop, sopno startst, sopno stopst); -static unsigned char *backref(register struct match *m, unsigned char *start, unsigned char *stop, sopno startst, sopno stopst, sopno lev); -static unsigned char *fast(register struct match *m, unsigned char *start, unsigned char *stop, sopno startst, sopno stopst); -static unsigned char *slow(register struct match *m, unsigned char *start, unsigned char *stop, sopno startst, sopno stopst); -static states step(register struct re_guts *g, sopno start, sopno stop, register states bef, int ch, register states aft); -#define BOL (OUT+1) -#define EOL (BOL+1) -#define BOLEOL (BOL+2) -#define NOTHING (BOL+3) -#define BOW (BOL+4) -#define EOW (BOL+5) -#define CODEMAX (BOL+5) /* highest code used */ -#define NONCHAR(c) ((c) > UCHAR_MAX) -#define NNONCHAR (CODEMAX-UCHAR_MAX) -#ifdef REDEBUG -static void print(struct match *m, unsigned char *caption, states st, int ch, FILE *d); -#endif -#ifdef REDEBUG -static void at(struct match *m, unsigned char *title, unsigned char *start, unsigned char *stop, sopno startst, sopno stopst); -#endif -#ifdef REDEBUG -static unsigned char *pchar(int ch); -#endif - -#ifdef __cplusplus -} -#endif -/* ========= end header generated by ./mkh ========= */ diff --git a/ext/ereg/regex/main.c b/ext/ereg/regex/main.c deleted file mode 100644 index 657338a2c1..0000000000 --- a/ext/ereg/regex/main.c +++ /dev/null @@ -1,510 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include "main.ih" - -char *progname; -int debug = 0; -int line = 0; -int status = 0; - -int copts = REG_EXTENDED; -int eopts = 0; -regoff_t startoff = 0; -regoff_t endoff = 0; - - -extern int split(); -extern void regprint(); - -/* - - main - do the simple case, hand off to regress() for regression - */ -int main(argc, argv) -int argc; -char *argv[]; -{ - regex_t re; -# define NS 10 - regmatch_t subs[NS]; - char erbuf[100]; - int err; - size_t len; - int c; - int errflg = 0; - register int i; - extern int optind; - extern char *optarg; - - progname = argv[0]; - - while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF) - switch (c) { - case 'c': /* compile options */ - copts = options('c', optarg); - break; - case 'e': /* execute options */ - eopts = options('e', optarg); - break; - case 'S': /* start offset */ - startoff = (regoff_t)atoi(optarg); - break; - case 'E': /* end offset */ - endoff = (regoff_t)atoi(optarg); - break; - case 'x': /* Debugging. */ - debug++; - break; - case '?': - default: - errflg++; - break; - } - if (errflg) { - fprintf(stderr, "usage: %s ", progname); - fprintf(stderr, "[-c copt][-C][-d] [re]\n"); - exit(2); - } - - if (optind >= argc) { - regress(stdin); - exit(status); - } - - err = regcomp(&re, argv[optind++], copts); - if (err) { - len = regerror(err, &re, erbuf, sizeof(erbuf)); - fprintf(stderr, "error %s, %d/%d `%s'\n", - eprint(err), len, sizeof(erbuf), erbuf); - exit(status); - } - regprint(&re, stdout); - - if (optind >= argc) { - regfree(&re); - exit(status); - } - - if (eopts®_STARTEND) { - subs[0].rm_so = startoff; - subs[0].rm_eo = strlen(argv[optind]) - endoff; - } - err = regexec(&re, argv[optind], (size_t)NS, subs, eopts); - if (err) { - len = regerror(err, &re, erbuf, sizeof(erbuf)); - fprintf(stderr, "error %s, %d/%d `%s'\n", - eprint(err), len, sizeof(erbuf), erbuf); - exit(status); - } - if (!(copts®_NOSUB)) { - len = (int)(subs[0].rm_eo - subs[0].rm_so); - if (subs[0].rm_so != -1) { - if (len != 0) - printf("match `%.*s'\n", (int)len, - argv[optind] + subs[0].rm_so); - else - printf("match `'@%.1s\n", - argv[optind] + subs[0].rm_so); - } - for (i = 1; i < NS; i++) - if (subs[i].rm_so != -1) - printf("(%d) `%.*s'\n", i, - (int)(subs[i].rm_eo - subs[i].rm_so), - argv[optind] + subs[i].rm_so); - } - exit(status); -} - -/* - - regress - main loop of regression test - == void regress(FILE *in); - */ -void -regress(in) -FILE *in; -{ - char inbuf[1000]; -# define MAXF 10 - char *f[MAXF]; - int nf; - int i; - char erbuf[100]; - size_t ne; - char *badpat = "invalid regular expression"; -# define SHORT 10 - char *bpname = "REG_BADPAT"; - regex_t re; - - while (fgets(inbuf, sizeof(inbuf), in) != NULL) { - line++; - if (inbuf[0] == '#' || inbuf[0] == '\n') - continue; /* NOTE CONTINUE */ - inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */ - if (debug) - fprintf(stdout, "%d:\n", line); - nf = split(inbuf, f, MAXF, "\t\t"); - if (nf < 3) { - fprintf(stderr, "bad input, line %d\n", line); - exit(1); - } - for (i = 0; i < nf; i++) - if (strcmp(f[i], "\"\"") == 0) - f[i] = ""; - if (nf <= 3) - f[3] = NULL; - if (nf <= 4) - f[4] = NULL; - try(f[0], f[1], f[2], f[3], f[4], options('c', f[1])); - if (opt('&', f[1])) /* try with either type of RE */ - try(f[0], f[1], f[2], f[3], f[4], - options('c', f[1]) &~ REG_EXTENDED); - } - - ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf)); - if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) { - fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n", - erbuf, badpat); - status = 1; - } - ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT); - if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' || - ne != strlen(badpat)+1) { - fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n", - erbuf, SHORT-1, badpat); - status = 1; - } - ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf)); - if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) { - fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n", - erbuf, bpname); - status = 1; - } - re.re_endp = bpname; - ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf)); - if (atoi(erbuf) != (int)REG_BADPAT) { - fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n", - erbuf, (long)REG_BADPAT); - status = 1; - } else if (ne != strlen(erbuf)+1) { - fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n", - erbuf, (long)REG_BADPAT); - status = 1; - } -} - -/* - - try - try it, and report on problems - == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts); - */ -void -try(f0, f1, f2, f3, f4, opts) -char *f0; -char *f1; -char *f2; -char *f3; -char *f4; -int opts; /* may not match f1 */ -{ - regex_t re; -# define NSUBS 10 - regmatch_t subs[NSUBS]; -# define NSHOULD 15 - char *should[NSHOULD]; - int nshould; - char erbuf[100]; - int err; - int len; - char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE"; - register int i; - char *grump; - char f0copy[1000]; - char f2copy[1000]; - - strcpy(f0copy, f0); - re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL; - fixstr(f0copy); - err = regcomp(&re, f0copy, opts); - if (err != 0 && (!opt('C', f1) || err != efind(f2))) { - /* unexpected error or wrong error */ - len = regerror(err, &re, erbuf, sizeof(erbuf)); - fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n", - line, type, eprint(err), len, - sizeof(erbuf), erbuf); - status = 1; - } else if (err == 0 && opt('C', f1)) { - /* unexpected success */ - fprintf(stderr, "%d: %s should have given REG_%s\n", - line, type, f2); - status = 1; - err = 1; /* so we won't try regexec */ - } - - if (err != 0) { - regfree(&re); - return; - } - - strcpy(f2copy, f2); - fixstr(f2copy); - - if (options('e', f1)®_STARTEND) { - if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL) - fprintf(stderr, "%d: bad STARTEND syntax\n", line); - subs[0].rm_so = strchr(f2, '(') - f2 + 1; - subs[0].rm_eo = strchr(f2, ')') - f2; - } - err = regexec(&re, f2copy, NSUBS, subs, options('e', f1)); - - if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) { - /* unexpected error or wrong error */ - len = regerror(err, &re, erbuf, sizeof(erbuf)); - fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n", - line, type, eprint(err), len, - sizeof(erbuf), erbuf); - status = 1; - } else if (err != 0) { - /* nothing more to check */ - } else if (f3 == NULL) { - /* unexpected success */ - fprintf(stderr, "%d: %s exec should have failed\n", - line, type); - status = 1; - err = 1; /* just on principle */ - } else if (opts®_NOSUB) { - /* nothing more to check */ - } else if ((grump = check(f2, subs[0], f3)) != NULL) { - fprintf(stderr, "%d: %s %s\n", line, type, grump); - status = 1; - err = 1; - } - - if (err != 0 || f4 == NULL) { - regfree(&re); - return; - } - - for (i = 1; i < NSHOULD; i++) - should[i] = NULL; - nshould = split(f4, should+1, NSHOULD-1, ","); - if (nshould == 0) { - nshould = 1; - should[1] = ""; - } - for (i = 1; i < NSUBS; i++) { - grump = check(f2, subs[i], should[i]); - if (grump != NULL) { - fprintf(stderr, "%d: %s $%d %s\n", line, - type, i, grump); - status = 1; - err = 1; - } - } - - regfree(&re); -} - -/* - - options - pick options out of a regression-test string - == int options(int type, char *s); - */ -int -options(type, s) -int type; /* 'c' compile, 'e' exec */ -char *s; -{ - register char *p; - register int o = (type == 'c') ? copts : eopts; - register char *legal = (type == 'c') ? "bisnmp" : "^$#tl"; - - for (p = s; *p != '\0'; p++) - if (strchr(legal, *p) != NULL) - switch (*p) { - case 'b': - o &= ~REG_EXTENDED; - break; - case 'i': - o |= REG_ICASE; - break; - case 's': - o |= REG_NOSUB; - break; - case 'n': - o |= REG_NEWLINE; - break; - case 'm': - o &= ~REG_EXTENDED; - o |= REG_NOSPEC; - break; - case 'p': - o |= REG_PEND; - break; - case '^': - o |= REG_NOTBOL; - break; - case '$': - o |= REG_NOTEOL; - break; - case '#': - o |= REG_STARTEND; - break; - case 't': /* trace */ - o |= REG_TRACE; - break; - case 'l': /* force long representation */ - o |= REG_LARGE; - break; - case 'r': /* force backref use */ - o |= REG_BACKR; - break; - } - return(o); -} - -/* - - opt - is a particular option in a regression string? - == int opt(int c, char *s); - */ -int /* predicate */ -opt(c, s) -int c; -char *s; -{ - return(strchr(s, c) != NULL); -} - -/* - - fixstr - transform magic characters in strings - == void fixstr(register char *p); - */ -void -fixstr(p) -register char *p; -{ - if (p == NULL) - return; - - for (; *p != '\0'; p++) - if (*p == 'N') - *p = '\n'; - else if (*p == 'T') - *p = '\t'; - else if (*p == 'S') - *p = ' '; - else if (*p == 'Z') - *p = '\0'; -} - -/* - - check - check a substring match - == char *check(char *str, regmatch_t sub, char *should); - */ -char * /* NULL or complaint */ -check(str, sub, should) -char *str; -regmatch_t sub; -char *should; -{ - register int len; - register int shlen; - register char *p; - static char grump[500]; - register char *at = NULL; - - if (should != NULL && strcmp(should, "-") == 0) - should = NULL; - if (should != NULL && should[0] == '@') { - at = should + 1; - should = ""; - } - - /* check rm_so and rm_eo for consistency */ - if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) || - (sub.rm_so != -1 && sub.rm_eo == -1) || - (sub.rm_so != -1 && sub.rm_so < 0) || - (sub.rm_eo != -1 && sub.rm_eo < 0) ) { - sprintf(grump, "start %ld end %ld", (long)sub.rm_so, - (long)sub.rm_eo); - return(grump); - } - - /* check for no match */ - if (sub.rm_so == -1 && should == NULL) - return(NULL); - if (sub.rm_so == -1) - return("did not match"); - - /* check for in range */ - if (sub.rm_eo > strlen(str)) { - sprintf(grump, "start %ld end %ld, past end of string", - (long)sub.rm_so, (long)sub.rm_eo); - return(grump); - } - - len = (int)(sub.rm_eo - sub.rm_so); - shlen = (int)strlen(should); - p = str + sub.rm_so; - - /* check for not supposed to match */ - if (should == NULL) { - sprintf(grump, "matched `%.*s'", len, p); - return(grump); - } - - /* check for wrong match */ - if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) { - sprintf(grump, "matched `%.*s' instead", len, p); - return(grump); - } - if (shlen > 0) - return(NULL); - - /* check null match in right place */ - if (at == NULL) - return(NULL); - shlen = strlen(at); - if (shlen == 0) - shlen = 1; /* force check for end-of-string */ - if (strncmp(p, at, shlen) != 0) { - sprintf(grump, "matched null at `%.20s'", p); - return(grump); - } - return(NULL); -} - -/* - - eprint - convert error number to name - == static char *eprint(int err); - */ -static char * -eprint(err) -int err; -{ - static char epbuf[100]; - size_t len; - - len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf)); - assert(len <= sizeof(epbuf)); - return(epbuf); -} - -/* - - efind - convert error name to number - == static int efind(char *name); - */ -static int -efind(name) -char *name; -{ - static char efbuf[100]; - regex_t re; - - sprintf(efbuf, "REG_%s", name); - assert(strlen(efbuf) < sizeof(efbuf)); - re.re_endp = efbuf; - (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf)); - return(atoi(efbuf)); -} diff --git a/ext/ereg/regex/main.ih b/ext/ereg/regex/main.ih deleted file mode 100644 index 5a0118ac44..0000000000 --- a/ext/ereg/regex/main.ih +++ /dev/null @@ -1,19 +0,0 @@ -/* ========= begin header generated by ./mkh ========= */ -#ifdef __cplusplus -extern "C" { -#endif - -/* === main.c === */ -void regress(FILE *in); -void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts); -int options(int type, char *s); -int opt(int c, char *s); -void fixstr(register char *p); -char *check(char *str, regmatch_t sub, char *should); -static char *eprint(int err); -static int efind(char *name); - -#ifdef __cplusplus -} -#endif -/* ========= end header generated by ./mkh ========= */ diff --git a/ext/ereg/regex/mkh b/ext/ereg/regex/mkh deleted file mode 100644 index 252b246c7b..0000000000 --- a/ext/ereg/regex/mkh +++ /dev/null @@ -1,76 +0,0 @@ -#! /bin/sh -# mkh - pull headers out of C source -PATH=/bin:/usr/bin ; export PATH - -# egrep pattern to pick out marked lines -egrep='^ =([ ]|$)' - -# Sed program to process marked lines into lines for the header file. -# The markers have already been removed. Two things are done here: removal -# of backslashed newlines, and some fudging of comments. The first is done -# because -o needs to have prototypes on one line to strip them down. -# Getting comments into the output is tricky; we turn C++-style // comments -# into /* */ comments, after altering any existing */'s to avoid trouble. -peel=' /\\$/N - /\\\n[ ]*/s///g - /\/\//s;\*/;* /;g - /\/\//s;//\(.*\);/*\1 */;' - -for a -do - case "$a" in - -o) # old (pre-function-prototype) compiler - # add code to comment out argument lists - peel="$peel - "'/^\([^#\/][^\/]*[a-zA-Z0-9_)]\)(\(.*\))/s;;\1(/*\2*/);' - shift - ;; - -b) # funny Berkeley __P macro - peel="$peel - "'/^\([^#\/][^\/]*[a-zA-Z0-9_)]\)(\(.*\))/s;;\1 __P((\2));' - shift - ;; - -s) # compiler doesn't like `static foo();' - # add code to get rid of the `static' - peel="$peel - "'/^static[ ][^\/]*[a-zA-Z0-9_)](.*)/s;static.;;' - shift - ;; - -p) # private declarations - egrep='^ ==([ ]|$)' - shift - ;; - -i) # wrap in #ifndef, argument is name - ifndef="$2" - shift ; shift - ;; - *) break - ;; - esac -done - -if test " $ifndef" != " " -then - echo "#ifndef $ifndef" - echo "#define $ifndef /* never again */" -fi -echo "/* ========= begin header generated by $0 ========= */" -echo '#ifdef __cplusplus' -echo 'extern "C" {' -echo '#endif' -for f -do - echo - echo "/* === $f === */" - egrep "$egrep" $f | sed 's/^ ==*[ ]//;s/^ ==*$//' | sed "$peel" - echo -done -echo '#ifdef __cplusplus' -echo '}' -echo '#endif' -echo "/* ========= end header generated by $0 ========= */" -if test " $ifndef" != " " -then - echo "#endif" -fi -exit 0 diff --git a/ext/ereg/regex/regcomp.c b/ext/ereg/regex/regcomp.c deleted file mode 100644 index ec034d4c4c..0000000000 --- a/ext/ereg/regex/regcomp.c +++ /dev/null @@ -1,1608 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define POSIX_MISTAKE - -#include "utils.h" -#include "regex.h" -#include "regex2.h" - -#include "cclass.h" -#include "cname.h" - -/* - * parse structure, passed up and down to avoid global variables and - * other clumsinesses - */ -struct parse { - unsigned char *next; /* next character in RE */ - unsigned char *end; /* end of string (-> NUL normally) */ - int error; /* has an error been seen? */ - sop *strip; /* malloced strip */ - sopno ssize; /* malloced strip size (allocated) */ - sopno slen; /* malloced strip length (used) */ - int ncsalloc; /* number of csets allocated */ - struct re_guts *g; -# define NPAREN 10 /* we need to remember () 1-9 for back refs */ - sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ - sopno pend[NPAREN]; /* -> ) ([0] unused) */ -}; - -#include "regcomp.ih" - -static unsigned char nuls[10]; /* place to point scanner in event of error */ - -/* - * macros for use with parse structure - * BEWARE: these know that the parse structure is named `p' !!! - */ -#define PEEK() (*p->next) -#define PEEK2() (*(p->next+1)) -#define MORE() (p->next < p->end) -#define MORE2() (p->next+1 < p->end) -#define SEE(c) (MORE() && PEEK() == (c)) -#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) -#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) -#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) -#define NEXT() (p->next++) -#define NEXT2() (p->next += 2) -#define NEXTn(n) (p->next += (n)) -#define GETNEXT() (*p->next++) -#define SETERROR(e) seterr(p, (e)) -#define REQUIRE(co, e) (void) ((co) || SETERROR(e)) -#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) -#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) -#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) -#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) -#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) -#define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) -#define ASTERN(sop, pos) EMIT(sop, HERE()-pos) -#define HERE() (p->slen) -#define THERE() (p->slen - 1) -#define THERETHERE() (p->slen - 2) -#define DROP(n) (p->slen -= (n)) - -#ifndef NDEBUG -static int never = 0; /* for use in asserts; shuts lint up */ -#else -#define never 0 /* some s have bugs too */ -#endif - -/* - - regcomp - interface for parser and compilation - = API_EXPORT(int) regcomp(regex_t *, const char *, int); - = #define REG_BASIC 0000 - = #define REG_EXTENDED 0001 - = #define REG_ICASE 0002 - = #define REG_NOSUB 0004 - = #define REG_NEWLINE 0010 - = #define REG_NOSPEC 0020 - = #define REG_PEND 0040 - = #define REG_DUMP 0200 - */ -API_EXPORT(int) /* 0 success, otherwise REG_something */ -regcomp(preg, pattern, cflags) -regex_t *preg; -const char *pattern; -int cflags; -{ - struct parse pa; - register struct re_guts *g; - register struct parse *p = &pa; - register int i; - register size_t len; -#ifdef REDEBUG -# define GOODFLAGS(f) (f) -#else -# define GOODFLAGS(f) ((f)&~REG_DUMP) -#endif - - cflags = GOODFLAGS(cflags); - if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) - return(REG_INVARG); - - if (cflags®_PEND) { - if (preg->re_endp < pattern) - return(REG_INVARG); - len = preg->re_endp - pattern; - } else - len = strlen((char *)pattern); - - /* do the mallocs early so failure handling is easy */ - g = (struct re_guts *)malloc(sizeof(struct re_guts) + - (NC-1)*sizeof(cat_t)); - if (g == NULL) - return(REG_ESPACE); - p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ - p->strip = (sop *)malloc(p->ssize * sizeof(sop)); - p->slen = 0; - if (p->strip == NULL) { - free((char *)g); - return(REG_ESPACE); - } - - /* set things up */ - p->g = g; - p->next = (unsigned char *)pattern; /* convenience; we do not modify it */ - p->end = p->next + len; - p->error = 0; - p->ncsalloc = 0; - for (i = 0; i < NPAREN; i++) { - p->pbegin[i] = 0; - p->pend[i] = 0; - } - g->csetsize = NC; - g->sets = NULL; - g->setbits = NULL; - g->ncsets = 0; - g->cflags = cflags; - g->iflags = 0; - g->nbol = 0; - g->neol = 0; - g->must = NULL; - g->mlen = 0; - g->nsub = 0; - g->ncategories = 1; /* category 0 is "everything else" */ - g->categories = &g->catspace[0]; - (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); - g->backrefs = 0; - - /* do it */ - EMIT(OEND, 0); - g->firststate = THERE(); - if (cflags®_EXTENDED) - p_ere(p, OUT); - else if (cflags®_NOSPEC) - p_str(p); - else - p_bre(p, OUT, OUT); - EMIT(OEND, 0); - g->laststate = THERE(); - - /* tidy up loose ends and fill things in */ - categorize(p, g); - stripsnug(p, g); - findmust(p, g); - g->nplus = pluscount(p, g); - g->magic = MAGIC2; - preg->re_nsub = g->nsub; - preg->re_g = g; - preg->re_magic = MAGIC1; -#ifndef REDEBUG - /* not debugging, so can't rely on the assert() in regexec() */ - if (g->iflags&BAD) - SETERROR(REG_ASSERT); -#endif - - /* win or lose, we're done */ - if (p->error != 0) /* lose */ - regfree(preg); - return(p->error); -} - -/* - - p_ere - ERE parser top level, concatenation and alternation - == static void p_ere(register struct parse *p, int stop); - */ -static void -p_ere(p, stop) -register struct parse *p; -int stop; /* character this ERE should end at */ -{ - register unsigned char c; - register sopno prevback = 0; - register sopno prevfwd = 0; - register sopno conc; - register int first = 1; /* is this the first alternative? */ - - for (;;) { - /* do a bunch of concatenated expressions */ - conc = HERE(); - while (MORE() && (c = PEEK()) != '|' && c != stop) - p_ere_exp(p); - (void) REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ - - if (!EAT('|')) - break; /* NOTE BREAK OUT */ - - if (first) { - INSERT(OCH_, conc); /* offset is wrong */ - prevfwd = conc; - prevback = conc; - first = 0; - } - ASTERN(OOR1, prevback); - prevback = THERE(); - AHEAD(prevfwd); /* fix previous offset */ - prevfwd = HERE(); - EMIT(OOR2, 0); /* offset is very wrong */ - } - - if (!first) { /* tail-end fixups */ - AHEAD(prevfwd); - ASTERN(O_CH, prevback); - } - - assert(!MORE() || SEE(stop)); -} - -/* - - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op - == static void p_ere_exp(register struct parse *p); - */ -static void -p_ere_exp(p) -register struct parse *p; -{ - register unsigned char c; - register sopno pos; - register int count; - register int count2; - register sopno subno; - int wascaret = 0; - - assert(MORE()); /* caller should have ensured this */ - c = GETNEXT(); - - pos = HERE(); - switch (c) { - case '(': - REQUIRE(MORE(), REG_EPAREN); - p->g->nsub++; - subno = p->g->nsub; - if (subno < NPAREN) - p->pbegin[subno] = HERE(); - EMIT(OLPAREN, subno); - if (!SEE(')')) - p_ere(p, ')'); - if (subno < NPAREN) { - p->pend[subno] = HERE(); - assert(p->pend[subno] != 0); - } - EMIT(ORPAREN, subno); - MUSTEAT(')', REG_EPAREN); - break; -#ifndef POSIX_MISTAKE - case ')': /* happens only if no current unmatched ( */ - /* - * You may ask, why the ifndef? Because I didn't notice - * this until slightly too late for 1003.2, and none of the - * other 1003.2 regular-expression reviewers noticed it at - * all. So an unmatched ) is legal POSIX, at least until - * we can get it fixed. - */ - SETERROR(REG_EPAREN); - break; -#endif - case '^': - EMIT(OBOL, 0); - p->g->iflags |= USEBOL; - p->g->nbol++; - wascaret = 1; - break; - case '$': - EMIT(OEOL, 0); - p->g->iflags |= USEEOL; - p->g->neol++; - break; - case '|': - SETERROR(REG_EMPTY); - break; - case '*': - case '+': - case '?': - SETERROR(REG_BADRPT); - break; - case '.': - if (p->g->cflags®_NEWLINE) - nonnewline(p); - else - EMIT(OANY, 0); - break; - case '[': - p_bracket(p); - break; - case '\\': - REQUIRE(MORE(), REG_EESCAPE); - c = GETNEXT(); - ordinary(p, c); - break; - case '{': /* okay as ordinary except if digit follows */ - REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT); - /* FALLTHROUGH */ - default: - ordinary(p, c); - break; - } - - if (!MORE()) - return; - c = PEEK(); - /* we call { a repetition if followed by a digit */ - if (!( c == '*' || c == '+' || c == '?' || - (c == '{' && MORE2() && isdigit(PEEK2())) )) - return; /* no repetition, we're done */ - NEXT(); - - REQUIRE(!wascaret, REG_BADRPT); - switch (c) { - case '*': /* implemented as +? */ - /* this case does not require the (y|) trick, noKLUDGE */ - INSERT(OPLUS_, pos); - ASTERN(O_PLUS, pos); - INSERT(OQUEST_, pos); - ASTERN(O_QUEST, pos); - break; - case '+': - INSERT(OPLUS_, pos); - ASTERN(O_PLUS, pos); - break; - case '?': - /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ - INSERT(OCH_, pos); /* offset slightly wrong */ - ASTERN(OOR1, pos); /* this one's right */ - AHEAD(pos); /* fix the OCH_ */ - EMIT(OOR2, 0); /* offset very wrong... */ - AHEAD(THERE()); /* ...so fix it */ - ASTERN(O_CH, THERETHERE()); - break; - case '{': - count = p_count(p); - if (EAT(',')) { - if (isdigit(PEEK())) { - count2 = p_count(p); - REQUIRE(count <= count2, REG_BADBR); - } else /* single number with comma */ - count2 = INFINITY; - } else /* just a single number */ - count2 = count; - repeat(p, pos, count, count2); - if (!EAT('}')) { /* error heuristics */ - while (MORE() && PEEK() != '}') - NEXT(); - REQUIRE(MORE(), REG_EBRACE); - SETERROR(REG_BADBR); - } - break; - } - - if (!MORE()) - return; - c = PEEK(); - if (!( c == '*' || c == '+' || c == '?' || - (c == '{' && MORE2() && isdigit(PEEK2())) ) ) - return; - SETERROR(REG_BADRPT); -} - -/* - - p_str - string (no metacharacters) "parser" - == static void p_str(register struct parse *p); - */ -static void -p_str(p) -register struct parse *p; -{ - REQUIRE(MORE(), REG_EMPTY); - while (MORE()) - ordinary(p, GETNEXT()); -} - -/* - - p_bre - BRE parser top level, anchoring and concatenation - == static void p_bre(register struct parse *p, register int end1, \ - == register int end2); - * Giving end1 as OUT essentially eliminates the end1/end2 check. - * - * This implementation is a bit of a kludge, in that a trailing $ is first - * taken as an ordinary character and then revised to be an anchor. The - * only undesirable side effect is that '$' gets included as a character - * category in such cases. This is fairly harmless; not worth fixing. - * The amount of lookahead needed to avoid this kludge is excessive. - */ -static void -p_bre(p, end1, end2) -register struct parse *p; -register int end1; /* first terminating character */ -register int end2; /* second terminating character */ -{ - register sopno start = HERE(); - register int first = 1; /* first subexpression? */ - register int wasdollar = 0; - - if (EAT('^')) { - EMIT(OBOL, 0); - p->g->iflags |= USEBOL; - p->g->nbol++; - } - while (MORE() && !SEETWO(end1, end2)) { - wasdollar = p_simp_re(p, first); - first = 0; - } - if (wasdollar) { /* oops, that was a trailing anchor */ - DROP(1); - EMIT(OEOL, 0); - p->g->iflags |= USEEOL; - p->g->neol++; - } - - REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ -} - -/* - - p_simp_re - parse a simple RE, an atom possibly followed by a repetition - == static int p_simp_re(register struct parse *p, int starordinary); - */ -static int /* was the simple RE an unbackslashed $? */ -p_simp_re(p, starordinary) -register struct parse *p; -int starordinary; /* is a leading * an ordinary character? */ -{ - register int c; - register int count; - register int count2; - register sopno pos; - register int i; - register sopno subno; -# define BACKSL (1<g->cflags®_NEWLINE) - nonnewline(p); - else - EMIT(OANY, 0); - break; - case '[': - p_bracket(p); - break; - case BACKSL|'{': - SETERROR(REG_BADRPT); - break; - case BACKSL|'(': - p->g->nsub++; - subno = p->g->nsub; - if (subno < NPAREN) - p->pbegin[subno] = HERE(); - EMIT(OLPAREN, subno); - /* the MORE here is an error heuristic */ - if (MORE() && !SEETWO('\\', ')')) - p_bre(p, '\\', ')'); - if (subno < NPAREN) { - p->pend[subno] = HERE(); - assert(p->pend[subno] != 0); - } - EMIT(ORPAREN, subno); - REQUIRE(EATTWO('\\', ')'), REG_EPAREN); - break; - case BACKSL|')': /* should not get here -- must be user */ - case BACKSL|'}': - SETERROR(REG_EPAREN); - break; - case BACKSL|'1': - case BACKSL|'2': - case BACKSL|'3': - case BACKSL|'4': - case BACKSL|'5': - case BACKSL|'6': - case BACKSL|'7': - case BACKSL|'8': - case BACKSL|'9': - i = (c&~BACKSL) - '0'; - assert(i < NPAREN); - if (p->pend[i] != 0) { - assert(i <= p->g->nsub); - EMIT(OBACK_, i); - assert(p->pbegin[i] != 0); - assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); - assert(OP(p->strip[p->pend[i]]) == ORPAREN); - (void) dupl(p, p->pbegin[i]+1, p->pend[i]); - EMIT(O_BACK, i); - } else - SETERROR(REG_ESUBREG); - p->g->backrefs = 1; - break; - case '*': - REQUIRE(starordinary, REG_BADRPT); - /* FALLTHROUGH */ - default: - ordinary(p, (unsigned char)c); /* takes off BACKSL, if any */ - break; - } - - if (EAT('*')) { /* implemented as +? */ - /* this case does not require the (y|) trick, noKLUDGE */ - INSERT(OPLUS_, pos); - ASTERN(O_PLUS, pos); - INSERT(OQUEST_, pos); - ASTERN(O_QUEST, pos); - } else if (EATTWO('\\', '{')) { - count = p_count(p); - if (EAT(',')) { - if (MORE() && isdigit(PEEK())) { - count2 = p_count(p); - REQUIRE(count <= count2, REG_BADBR); - } else /* single number with comma */ - count2 = INFINITY; - } else /* just a single number */ - count2 = count; - repeat(p, pos, count, count2); - if (!EATTWO('\\', '}')) { /* error heuristics */ - while (MORE() && !SEETWO('\\', '}')) - NEXT(); - REQUIRE(MORE(), REG_EBRACE); - SETERROR(REG_BADBR); - } - } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */ - return(1); - - return(0); -} - -/* - - p_count - parse a repetition count - == static int p_count(register struct parse *p); - */ -static int /* the value */ -p_count(p) -register struct parse *p; -{ - register int count = 0; - register int ndigits = 0; - - while (MORE() && isdigit(PEEK()) && count <= DUPMAX) { - count = count*10 + (GETNEXT() - '0'); - ndigits++; - } - - REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); - return(count); -} - -/* - - p_bracket - parse a bracketed character list - == static void p_bracket(register struct parse *p); - * - * Note a significant property of this code: if the allocset() did SETERROR, - * no set operations are done. - */ -static void -p_bracket(p) -register struct parse *p; -{ - register cset *cs = allocset(p); - register int invert = 0; - - /* Dept of Truly Sickening Special-Case Kludges */ - if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { - EMIT(OBOW, 0); - NEXTn(6); - return; - } - if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { - EMIT(OEOW, 0); - NEXTn(6); - return; - } - - if (EAT('^')) - invert++; /* make note to invert set at end */ - if (EAT(']')) - CHadd(cs, ']'); - else if (EAT('-')) - CHadd(cs, '-'); - while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) - p_b_term(p, cs); - if (EAT('-')) - CHadd(cs, '-'); - MUSTEAT(']', REG_EBRACK); - - if (p->error != 0) /* don't mess things up further */ - return; - - if (p->g->cflags®_ICASE) { - register int i; - register int ci; - - for (i = p->g->csetsize - 1; i >= 0; i--) - if (CHIN(cs, i) && isalpha(i)) { - ci = othercase(i); - if (ci != i) - CHadd(cs, ci); - } - if (cs->multis != NULL) - mccase(p, cs); - } - if (invert) { - register int i; - - for (i = p->g->csetsize - 1; i >= 0; i--) - if (CHIN(cs, i)) - CHsub(cs, i); - else - CHadd(cs, i); - if (p->g->cflags®_NEWLINE) - CHsub(cs, '\n'); - if (cs->multis != NULL) - mcinvert(p, cs); - } - - assert(cs->multis == NULL); /* xxx */ - - if (nch(p, cs) == 1) { /* optimize singleton sets */ - ordinary(p, firstch(p, cs)); - freeset(p, cs); - } else - EMIT(OANYOF, freezeset(p, cs)); -} - -/* - - p_b_term - parse one term of a bracketed character list - == static void p_b_term(register struct parse *p, register cset *cs); - */ -static void -p_b_term(p, cs) -register struct parse *p; -register cset *cs; -{ - register unsigned char c; - register unsigned char start, finish; - register int i; - - /* classify what we've got */ - switch ((MORE()) ? PEEK() : '\0') { - case '[': - c = (MORE2()) ? PEEK2() : '\0'; - break; - case '-': - SETERROR(REG_ERANGE); - return; /* NOTE RETURN */ - break; - default: - c = '\0'; - break; - } - - switch (c) { - case ':': /* character class */ - NEXT2(); - REQUIRE(MORE(), REG_EBRACK); - c = PEEK(); - REQUIRE(c != '-' && c != ']', REG_ECTYPE); - p_b_cclass(p, cs); - REQUIRE(MORE(), REG_EBRACK); - REQUIRE(EATTWO(':', ']'), REG_ECTYPE); - break; - case '=': /* equivalence class */ - NEXT2(); - REQUIRE(MORE(), REG_EBRACK); - c = PEEK(); - REQUIRE(c != '-' && c != ']', REG_ECOLLATE); - p_b_eclass(p, cs); - REQUIRE(MORE(), REG_EBRACK); - REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); - break; - default: /* symbol, ordinary character, or range */ -/* xxx revision needed for multichar stuff */ - start = p_b_symbol(p); - if (SEE('-') && MORE2() && PEEK2() != ']') { - /* range */ - NEXT(); - if (EAT('-')) - finish = '-'; - else - finish = p_b_symbol(p); - } else - finish = start; -/* xxx what about signed chars here... */ - REQUIRE(start <= finish, REG_ERANGE); - for (i = start; i <= finish; i++) - CHadd(cs, i); - break; - } -} - -/* - - p_b_cclass - parse a character-class name and deal with it - == static void p_b_cclass(register struct parse *p, register cset *cs); - */ -static void -p_b_cclass(p, cs) -register struct parse *p; -register cset *cs; -{ - register unsigned char *sp = p->next; - register struct cclass *cp; - register size_t len; - register unsigned char *u; - register unsigned char c; - - while (MORE() && isalpha(PEEK())) - NEXT(); - len = p->next - sp; - for (cp = cclasses; cp->name != NULL; cp++) - if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') - break; - if (cp->name == NULL) { - /* oops, didn't find it */ - SETERROR(REG_ECTYPE); - return; - } - - u = cp->chars; - while ((c = *u++) != '\0') - CHadd(cs, c); - for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) - MCadd(p, cs, u); -} - -/* - - p_b_eclass - parse an equivalence-class name and deal with it - == static void p_b_eclass(register struct parse *p, register cset *cs); - * - * This implementation is incomplete. xxx - */ -static void -p_b_eclass(p, cs) -register struct parse *p; -register cset *cs; -{ - register unsigned char c; - - c = p_b_coll_elem(p, '='); - CHadd(cs, c); -} - -/* - - p_b_symbol - parse a character or [..]ed multicharacter collating symbol - == static char p_b_symbol(register struct parse *p); - */ -static unsigned char /* value of symbol */ -p_b_symbol(p) -register struct parse *p; -{ - register unsigned char value; - - REQUIRE(MORE(), REG_EBRACK); - if (!EATTWO('[', '.')) - return(GETNEXT()); - - /* collating symbol */ - value = p_b_coll_elem(p, '.'); - REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); - return(value); -} - -/* - - p_b_coll_elem - parse a collating-element name and look it up - == static char p_b_coll_elem(register struct parse *p, int endc); - */ -static unsigned char /* value of collating element */ -p_b_coll_elem(p, endc) -register struct parse *p; -int endc; /* name ended by endc,']' */ -{ - register unsigned char *sp = p->next; - register struct cname *cp; - register int len; - - while (MORE() && !SEETWO(endc, ']')) - NEXT(); - if (!MORE()) { - SETERROR(REG_EBRACK); - return(0); - } - len = p->next - sp; - for (cp = cnames; cp->name != NULL; cp++) - if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') - return(cp->code); /* known name */ - if (len == 1) - return(*sp); /* single character */ - SETERROR(REG_ECOLLATE); /* neither */ - return(0); -} - -/* - - othercase - return the case counterpart of an alphabetic - == static char othercase(int ch); - */ -static unsigned char /* if no counterpart, return ch */ -othercase(ch) -int ch; -{ - assert(isalpha(ch)); - if (isupper(ch)) - return(tolower(ch)); - else if (islower(ch)) - return(toupper(ch)); - else /* peculiar, but could happen */ - return(ch); -} - -/* - - bothcases - emit a dualcase version of a two-case character - == static void bothcases(register struct parse *p, int ch); - * - * Boy, is this implementation ever a kludge... - */ -static void -bothcases(p, ch) -register struct parse *p; -int ch; -{ - register unsigned char *oldnext = p->next; - register unsigned char *oldend = p->end; - unsigned char bracket[3]; - - assert(othercase(ch) != ch); /* p_bracket() would recurse */ - p->next = bracket; - p->end = bracket+2; - bracket[0] = ch; - bracket[1] = ']'; - bracket[2] = '\0'; - p_bracket(p); - assert(p->next == bracket+2); - p->next = oldnext; - p->end = oldend; -} - -/* - - ordinary - emit an ordinary character - == static void ordinary(register struct parse *p, register int ch); - */ -static void -ordinary(p, ch) -register struct parse *p; -register int ch; -{ - register cat_t *cap = p->g->categories; - - if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch) - bothcases(p, ch); - else { - EMIT(OCHAR, (unsigned char)ch); - if (cap[ch] == 0) - cap[ch] = p->g->ncategories++; - } -} - -/* - - nonnewline - emit REG_NEWLINE version of OANY - == static void nonnewline(register struct parse *p); - * - * Boy, is this implementation ever a kludge... - */ -static void -nonnewline(p) -register struct parse *p; -{ - register unsigned char *oldnext = p->next; - register unsigned char *oldend = p->end; - unsigned char bracket[4]; - - p->next = bracket; - p->end = bracket+3; - bracket[0] = '^'; - bracket[1] = '\n'; - bracket[2] = ']'; - bracket[3] = '\0'; - p_bracket(p); - assert(p->next == bracket+3); - p->next = oldnext; - p->end = oldend; -} - -/* - - repeat - generate code for a bounded repetition, recursively if needed - == static void repeat(register struct parse *p, sopno start, int from, int to); - */ -static void -repeat(p, start, from, to) -register struct parse *p; -sopno start; /* operand from here to end of strip */ -int from; /* repeated from this number */ -int to; /* to this number of times (maybe INFINITY) */ -{ - register sopno finish = HERE(); -# define N 2 -# define INF 3 -# define REP(f, t) ((f)*8 + (t)) -# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) - register sopno copy; - - if (p->error != 0) /* head off possible runaway recursion */ - return; - - assert(from <= to); - - switch (REP(MAP(from), MAP(to))) { - case REP(0, 0): /* must be user doing this */ - DROP(finish-start); /* drop the operand */ - break; - case REP(0, 1): /* as x{1,1}? */ - case REP(0, N): /* as x{1,n}? */ - case REP(0, INF): /* as x{1,}? */ - /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ - INSERT(OCH_, start); /* offset is wrong... */ - repeat(p, start+1, 1, to); - ASTERN(OOR1, start); - AHEAD(start); /* ... fix it */ - EMIT(OOR2, 0); - AHEAD(THERE()); - ASTERN(O_CH, THERETHERE()); - break; - case REP(1, 1): /* trivial case */ - /* done */ - break; - case REP(1, N): /* as x?x{1,n-1} */ - /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ - INSERT(OCH_, start); - ASTERN(OOR1, start); - AHEAD(start); - EMIT(OOR2, 0); /* offset very wrong... */ - AHEAD(THERE()); /* ...so fix it */ - ASTERN(O_CH, THERETHERE()); - copy = dupl(p, start+1, finish+1); - assert(copy == finish+4); - repeat(p, copy, 1, to-1); - break; - case REP(1, INF): /* as x+ */ - INSERT(OPLUS_, start); - ASTERN(O_PLUS, start); - break; - case REP(N, N): /* as xx{m-1,n-1} */ - copy = dupl(p, start, finish); - repeat(p, copy, from-1, to-1); - break; - case REP(N, INF): /* as xx{n-1,INF} */ - copy = dupl(p, start, finish); - repeat(p, copy, from-1, to); - break; - default: /* "can't happen" */ - SETERROR(REG_ASSERT); /* just in case */ - break; - } -} - -/* - - seterr - set an error condition - == static int seterr(register struct parse *p, int e); - */ -static int /* useless but makes type checking happy */ -seterr(p, e) -register struct parse *p; -int e; -{ - if (p->error == 0) /* keep earliest error condition */ - p->error = e; - p->next = nuls; /* try to bring things to a halt */ - p->end = nuls; - return(0); /* make the return value well-defined */ -} - -/* - - allocset - allocate a set of characters for [] - == static cset *allocset(register struct parse *p); - */ -static cset * -allocset(p) -register struct parse *p; -{ - register int no = p->g->ncsets++; - register size_t nc; - register size_t nbytes; - register cset *cs; - register size_t css = (size_t)p->g->csetsize; - register int i; - - if (no >= p->ncsalloc) { /* need another column of space */ - p->ncsalloc += CHAR_BIT; - nc = p->ncsalloc; - assert(nc % CHAR_BIT == 0); - nbytes = nc / CHAR_BIT * css; - if (p->g->sets == NULL) - p->g->sets = (cset *)malloc(nc * sizeof(cset)); - else - p->g->sets = (cset *)realloc((unsigned char *)p->g->sets, - nc * sizeof(cset)); - if (p->g->setbits == NULL) - p->g->setbits = (uch *)malloc(nbytes); - else { - p->g->setbits = (uch *)realloc((unsigned char *)p->g->setbits, - nbytes); - /* xxx this isn't right if setbits is now NULL */ - for (i = 0; i < no; i++) - p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); - } - if (p->g->sets != NULL && p->g->setbits != NULL) - (void) memset((unsigned char *)p->g->setbits + (nbytes - css), - 0, css); - else { - no = 0; - SETERROR(REG_ESPACE); - /* caller's responsibility not to do set ops */ - } - } - - assert(p->g->sets != NULL); /* xxx */ - cs = &p->g->sets[no]; - cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); - cs->mask = 1 << ((no) % CHAR_BIT); - cs->hash = 0; - cs->smultis = 0; - cs->multis = NULL; - - return(cs); -} - -/* - - freeset - free a now-unused set - == static void freeset(register struct parse *p, register cset *cs); - */ -static void -freeset(p, cs) -register struct parse *p; -register cset *cs; -{ - register size_t i; - register cset *top = &p->g->sets[p->g->ncsets]; - register size_t css = (size_t)p->g->csetsize; - - for (i = 0; i < css; i++) - CHsub(cs, i); - if (cs == top-1) /* recover only the easy case */ - p->g->ncsets--; -} - -/* - - freezeset - final processing on a set of characters - == static int freezeset(register struct parse *p, register cset *cs); - * - * The main task here is merging identical sets. This is usually a waste - * of time (although the hash code minimizes the overhead), but can win - * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash - * is done using addition rather than xor -- all ASCII [aA] sets xor to - * the same value! - */ -static int /* set number */ -freezeset(p, cs) -register struct parse *p; -register cset *cs; -{ - register uch h = cs->hash; - register size_t i; - register cset *top = &p->g->sets[p->g->ncsets]; - register cset *cs2; - register size_t css = (size_t)p->g->csetsize; - - /* look for an earlier one which is the same */ - for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) - if (cs2->hash == h && cs2 != cs) { - /* maybe */ - for (i = 0; i < css; i++) - if (!!CHIN(cs2, i) != !!CHIN(cs, i)) - break; /* no */ - if (i == css) - break; /* yes */ - } - - if (cs2 < top) { /* found one */ - freeset(p, cs); - cs = cs2; - } - - return((int)(cs - p->g->sets)); -} - -/* - - firstch - return first character in a set (which must have at least one) - == static int firstch(register struct parse *p, register cset *cs); - */ -static int /* character; there is no "none" value */ -firstch(p, cs) -register struct parse *p; -register cset *cs; -{ - register size_t i; - register size_t css = (size_t)p->g->csetsize; - - for (i = 0; i < css; i++) - if (CHIN(cs, i)) - return((unsigned char)i); - assert(never); - return(0); /* arbitrary */ -} - -/* - - nch - number of characters in a set - == static int nch(register struct parse *p, register cset *cs); - */ -static int -nch(p, cs) -register struct parse *p; -register cset *cs; -{ - register size_t i; - register size_t css = (size_t)p->g->csetsize; - register int n = 0; - - for (i = 0; i < css; i++) - if (CHIN(cs, i)) - n++; - return(n); -} - -/* - - mcadd - add a collating element to a cset - == static void mcadd(register struct parse *p, register cset *cs, \ - == register char *cp); - */ -static void -mcadd(p, cs, cp) -register struct parse *p; -register cset *cs; -register unsigned char *cp; -{ - register size_t oldend = cs->smultis; - - cs->smultis += strlen(cp) + 1; - if (cs->multis == NULL) - cs->multis = malloc(cs->smultis); - else - cs->multis = realloc(cs->multis, cs->smultis); - if (cs->multis == NULL) { - SETERROR(REG_ESPACE); - return; - } - - (void) strcpy(cs->multis + oldend - 1, cp); - cs->multis[cs->smultis - 1] = '\0'; -} - -#if 0 -/* - - mcsub - subtract a collating element from a cset - == static void mcsub(register cset *cs, register unsigned char *cp); - */ -static void -mcsub(cs, cp) -register unsigned cset *cs; -register unsigned char *cp; -{ - register unsigned char *fp = mcfind(cs, cp); - register size_t len = strlen(fp); - - assert(fp != NULL); - (void) memmove(fp, fp + len + 1, - cs->smultis - (fp + len + 1 - cs->multis)); - cs->smultis -= len; - - if (cs->smultis == 0) { - free(cs->multis); - cs->multis = NULL; - return; - } - - cs->multis = realloc(cs->multis, cs->smultis); - assert(cs->multis != NULL); -} - -/* - - mcin - is a collating element in a cset? - == static int mcin(register cset *cs, register unsigned char *cp); - */ -static int -mcin(cs, cp) -register cset *cs; -register unsigned char *cp; -{ - return(mcfind(cs, cp) != NULL); -} - - -/* - - mcfind - find a collating element in a cset - == static unsigned char *mcfind(register cset *cs, register unsigned char *cp); - */ -static unsigned char * -mcfind(cs, cp) -register cset *cs; -register unsigned char *cp; -{ - register unsigned char *p; - - if (cs->multis == NULL) - return(NULL); - for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) - if (strcmp(cp, p) == 0) - return(p); - return(NULL); -} -#endif - -/* - - mcinvert - invert the list of collating elements in a cset - == static void mcinvert(register struct parse *p, register cset *cs); - * - * This would have to know the set of possibilities. Implementation - * is deferred. - */ -static void -mcinvert(p, cs) -register struct parse *p; -register cset *cs; -{ - assert(cs->multis == NULL); /* xxx */ -} - -/* - - mccase - add case counterparts of the list of collating elements in a cset - == static void mccase(register struct parse *p, register cset *cs); - * - * This would have to know the set of possibilities. Implementation - * is deferred. - */ -static void -mccase(p, cs) -register struct parse *p; -register cset *cs; -{ - assert(cs->multis == NULL); /* xxx */ -} - -/* - - isinsets - is this character in any sets? - == static int isinsets(register struct re_guts *g, int c); - */ -static int /* predicate */ -isinsets(g, c) -register struct re_guts *g; -int c; -{ - register uch *col; - register int i; - register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; - register unsigned uc = (unsigned char)c; - - for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) - if (col[uc] != 0) - return(1); - return(0); -} - -/* - - samesets - are these two characters in exactly the same sets? - == static int samesets(register struct re_guts *g, int c1, int c2); - */ -static int /* predicate */ -samesets(g, c1, c2) -register struct re_guts *g; -int c1; -int c2; -{ - register uch *col; - register int i; - register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; - register unsigned uc1 = (unsigned char)c1; - register unsigned uc2 = (unsigned char)c2; - - for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) - if (col[uc1] != col[uc2]) - return(0); - return(1); -} - -/* - - categorize - sort out character categories - == static void categorize(struct parse *p, register struct re_guts *g); - */ -static void -categorize(p, g) -struct parse *p; -register struct re_guts *g; -{ - register cat_t *cats = g->categories; - register int c; - register int c2; - register cat_t cat; - - /* avoid making error situations worse */ - if (p->error != 0) - return; - - for (c = 0; c <= UCHAR_MAX; c++) - if (cats[c] == 0 && isinsets(g, c)) { - cat = g->ncategories++; - cats[c] = cat; - for (c2 = c+1; c2 <= UCHAR_MAX; c2++) - if (cats[c2] == 0 && samesets(g, c, c2)) - cats[c2] = cat; - } -} - -/* - - dupl - emit a duplicate of a bunch of sops - == static sopno dupl(register struct parse *p, sopno start, sopno finish); - */ -static sopno /* start of duplicate */ -dupl(p, start, finish) -register struct parse *p; -sopno start; /* from here */ -sopno finish; /* to this less one */ -{ - register sopno ret = HERE(); - register sopno len = finish - start; - - assert(finish >= start); - if (len == 0) - return(ret); - enlarge(p, p->ssize + len); /* this many unexpected additions */ - assert(p->ssize >= p->slen + len); - (void) memcpy((char *)(p->strip + p->slen), - (char *)(p->strip + start), (size_t)len*sizeof(sop)); - p->slen += len; - return(ret); -} - -/* - - doemit - emit a strip operator - == static void doemit(register struct parse *p, sop op, size_t opnd); - * - * It might seem better to implement this as a macro with a function as - * hard-case backup, but it's just too big and messy unless there are - * some changes to the data structures. Maybe later. - */ -static void -doemit(p, op, opnd) -register struct parse *p; -sop op; -size_t opnd; -{ - /* avoid making error situations worse */ - if (p->error != 0) - return; - - /* deal with oversize operands ("can't happen", more or less) */ - assert(opnd < 1<slen >= p->ssize) - enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ - assert(p->slen < p->ssize); - - /* finally, it's all reduced to the easy case */ - p->strip[p->slen++] = SOP(op, opnd); -} - -/* - - doinsert - insert a sop into the strip - == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); - */ -static void -doinsert(p, op, opnd, pos) -register struct parse *p; -sop op; -size_t opnd; -sopno pos; -{ - register sopno sn; - register sop s; - register int i; - - /* avoid making error situations worse */ - if (p->error != 0) - return; - - sn = HERE(); - EMIT(op, opnd); /* do checks, ensure space */ - assert(HERE() == sn+1); - s = p->strip[sn]; - - /* adjust paren pointers */ - assert(pos > 0); - for (i = 1; i < NPAREN; i++) { - if (p->pbegin[i] >= pos) { - p->pbegin[i]++; - } - if (p->pend[i] >= pos) { - p->pend[i]++; - } - } - - memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], - (HERE()-pos-1)*sizeof(sop)); - p->strip[pos] = s; -} - -/* - - dofwd - complete a forward reference - == static void dofwd(register struct parse *p, sopno pos, sop value); - */ -static void -dofwd(p, pos, value) -register struct parse *p; -register sopno pos; -sop value; -{ - /* avoid making error situations worse */ - if (p->error != 0) - return; - - assert(value < 1<strip[pos] = OP(p->strip[pos]) | value; -} - -/* - - enlarge - enlarge the strip - == static void enlarge(register struct parse *p, sopno size); - */ -static void -enlarge(p, size) -register struct parse *p; -register sopno size; -{ - register sop *sp; - - if (p->ssize >= size) - return; - - sp = (sop *)realloc(p->strip, size*sizeof(sop)); - if (sp == NULL) { - SETERROR(REG_ESPACE); - return; - } - p->strip = sp; - p->ssize = size; -} - -/* - - stripsnug - compact the strip - == static void stripsnug(register struct parse *p, register struct re_guts *g); - */ -static void -stripsnug(p, g) -register struct parse *p; -register struct re_guts *g; -{ - g->nstates = p->slen; - g->strip = (sop *)realloc((unsigned char *)p->strip, p->slen * sizeof(sop)); - if (g->strip == NULL) { - SETERROR(REG_ESPACE); - g->strip = p->strip; - } -} - -/* - - findmust - fill in must and mlen with longest mandatory literal string - == static void findmust(register struct parse *p, register struct re_guts *g); - * - * This algorithm could do fancy things like analyzing the operands of | - * for common subsequences. Someday. This code is simple and finds most - * of the interesting cases. - * - * Note that must and mlen got initialized during setup. - */ -static void -findmust(p, g) -struct parse *p; -register struct re_guts *g; -{ - register sop *scan; - sop *start = NULL; - register sop *newstart = NULL; - register sopno newlen; - register sop s; - register unsigned char *cp; - register sopno i; - - /* avoid making error situations worse */ - if (p->error != 0) - return; - - /* find the longest OCHAR sequence in strip */ - newlen = 0; - scan = g->strip + 1; - do { - s = *scan++; - switch (OP(s)) { - case OCHAR: /* sequence member */ - if (newlen == 0) /* new sequence */ - newstart = scan - 1; - newlen++; - break; - case OPLUS_: /* things that don't break one */ - case OLPAREN: - case ORPAREN: - break; - case OQUEST_: /* things that must be skipped */ - case OCH_: - scan--; - do { - scan += OPND(s); - s = *scan; - /* assert() interferes w debug printouts */ - if (OP(s) != O_QUEST && OP(s) != O_CH && - OP(s) != OOR2) { - g->iflags |= BAD; - return; - } - } while (OP(s) != O_QUEST && OP(s) != O_CH); - /* fallthrough */ - default: /* things that break a sequence */ - if (newlen > g->mlen) { /* ends one */ - start = newstart; - g->mlen = newlen; - } - newlen = 0; - break; - } - } while (OP(s) != OEND); - - if (g->mlen == 0) /* there isn't one */ - return; - - /* turn it into a character string */ - g->must = malloc((size_t)g->mlen + 1); - if (g->must == NULL) { /* argh; just forget it */ - g->mlen = 0; - return; - } - cp = g->must; - scan = start; - for (i = g->mlen; i > 0; i--) { - while (OP(s = *scan++) != OCHAR) - continue; - assert(cp < g->must + g->mlen); - *cp++ = (unsigned char)OPND(s); - } - assert(cp == g->must + g->mlen); - *cp++ = '\0'; /* just on general principles */ -} - -/* - - pluscount - count + nesting - == static sopno pluscount(register struct parse *p, register struct re_guts *g); - */ -static sopno /* nesting depth */ -pluscount(p, g) -struct parse *p; -register struct re_guts *g; -{ - register sop *scan; - register sop s; - register sopno plusnest = 0; - register sopno maxnest = 0; - - if (p->error != 0) - return(0); /* there may not be an OEND */ - - scan = g->strip + 1; - do { - s = *scan++; - switch (OP(s)) { - case OPLUS_: - plusnest++; - break; - case O_PLUS: - if (plusnest > maxnest) - maxnest = plusnest; - plusnest--; - break; - } - } while (OP(s) != OEND); - if (plusnest != 0) - g->iflags |= BAD; - return(maxnest); -} diff --git a/ext/ereg/regex/regcomp.ih b/ext/ereg/regex/regcomp.ih deleted file mode 100644 index c93d32e51d..0000000000 --- a/ext/ereg/regex/regcomp.ih +++ /dev/null @@ -1,53 +0,0 @@ -/* ========= begin header generated by ./mkh ========= */ -#ifdef __cplusplus -extern "C" { -#endif - -/* === regcomp.c === */ -static void p_ere(register struct parse *p, int stop); -static void p_ere_exp(register struct parse *p); -static void p_str(register struct parse *p); -static void p_bre(register struct parse *p, register int end1, register int end2); -static int p_simp_re(register struct parse *p, int starordinary); -static int p_count(register struct parse *p); -static void p_bracket(register struct parse *p); -static void p_b_term(register struct parse *p, register cset *cs); -static void p_b_cclass(register struct parse *p, register cset *cs); -static void p_b_eclass(register struct parse *p, register cset *cs); -static unsigned char p_b_symbol(register struct parse *p); -static unsigned char p_b_coll_elem(register struct parse *p, int endc); -static unsigned char othercase(int ch); -static void bothcases(register struct parse *p, int ch); -static void ordinary(register struct parse *p, register int ch); -static void nonnewline(register struct parse *p); -static void repeat(register struct parse *p, sopno start, int from, int to); -static int seterr(register struct parse *p, int e); -static cset *allocset(register struct parse *p); -static void freeset(register struct parse *p, register cset *cs); -static int freezeset(register struct parse *p, register cset *cs); -static int firstch(register struct parse *p, register cset *cs); -static int nch(register struct parse *p, register cset *cs); -static void mcadd(register struct parse *p, register cset *cs, register unsigned char *cp); -#if 0 -static void mcsub(register cset *cs, register unsigned char *cp); -static int mcin(register cset *cs, register unsigned char *cp); -static unsigned char *mcfind(register cset *cs, register unsigned char *cp); -#endif -static void mcinvert(register struct parse *p, register cset *cs); -static void mccase(register struct parse *p, register cset *cs); -static int isinsets(register struct re_guts *g, int c); -static int samesets(register struct re_guts *g, int c1, int c2); -static void categorize(struct parse *p, register struct re_guts *g); -static sopno dupl(register struct parse *p, sopno start, sopno finish); -static void doemit(register struct parse *p, sop op, size_t opnd); -static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); -static void dofwd(register struct parse *p, sopno pos, sop value); -static void enlarge(register struct parse *p, sopno size); -static void stripsnug(register struct parse *p, register struct re_guts *g); -static void findmust(register struct parse *p, register struct re_guts *g); -static sopno pluscount(register struct parse *p, register struct re_guts *g); - -#ifdef __cplusplus -} -#endif -/* ========= end header generated by ./mkh ========= */ diff --git a/ext/ereg/regex/regerror.c b/ext/ereg/regex/regerror.c deleted file mode 100644 index bf71e3a814..0000000000 --- a/ext/ereg/regex/regerror.c +++ /dev/null @@ -1,126 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include "utils.h" -#include "regerror.ih" - -/* - = #define REG_OKAY 0 - = #define REG_NOMATCH 1 - = #define REG_BADPAT 2 - = #define REG_ECOLLATE 3 - = #define REG_ECTYPE 4 - = #define REG_EESCAPE 5 - = #define REG_ESUBREG 6 - = #define REG_EBRACK 7 - = #define REG_EPAREN 8 - = #define REG_EBRACE 9 - = #define REG_BADBR 10 - = #define REG_ERANGE 11 - = #define REG_ESPACE 12 - = #define REG_BADRPT 13 - = #define REG_EMPTY 14 - = #define REG_ASSERT 15 - = #define REG_INVARG 16 - = #define REG_ATOI 255 // convert name to number (!) - = #define REG_ITOA 0400 // convert number to name (!) - */ -static struct rerr { - int code; - char *name; - char *explain; -} rerrs[] = { - {REG_OKAY, "REG_OKAY", "no errors detected"}, - {REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match"}, - {REG_BADPAT, "REG_BADPAT", "invalid regular expression"}, - {REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element"}, - {REG_ECTYPE, "REG_ECTYPE", "invalid character class"}, - {REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)"}, - {REG_ESUBREG, "REG_ESUBREG", "invalid backreference number"}, - {REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced"}, - {REG_EPAREN, "REG_EPAREN", "parentheses not balanced"}, - {REG_EBRACE, "REG_EBRACE", "braces not balanced"}, - {REG_BADBR, "REG_BADBR", "invalid repetition count(s)"}, - {REG_ERANGE, "REG_ERANGE", "invalid character range"}, - {REG_ESPACE, "REG_ESPACE", "out of memory"}, - {REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid"}, - {REG_EMPTY, "REG_EMPTY", "empty (sub)expression"}, - {REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug"}, - {REG_INVARG, "REG_INVARG", "invalid argument to regex routine"}, - {-1, "", "*** unknown regexp error code ***"}, -}; - -/* - - regerror - the interface to error numbers - = API_EXPORT(size_t) regerror(int, const regex_t *, char *, size_t); - */ -/* ARGSUSED */ -API_EXPORT(size_t) -regerror(errcode, preg, errbuf, errbuf_size) -int errcode; -const regex_t *preg; -char *errbuf; -size_t errbuf_size; -{ - register struct rerr *r; - register size_t len; - register int target = errcode &~ REG_ITOA; - register char *s; - char convbuf[50]; - - if (errcode == REG_ATOI) - s = regatoi(preg, convbuf); - else { - for (r = rerrs; r->code >= 0; r++) - if (r->code == target) - break; - - if (errcode®_ITOA) { - if (r->code >= 0) - (void) strcpy(convbuf, r->name); - else - sprintf(convbuf, "REG_0x%x", target); - assert(strlen(convbuf) < sizeof(convbuf)); - s = convbuf; - } else - s = r->explain; - } - - len = strlen(s) + 1; - if (errbuf_size > 0) { - if (errbuf_size > len) - (void) strcpy(errbuf, s); - else { - (void) strncpy(errbuf, s, errbuf_size-1); - errbuf[errbuf_size-1] = '\0'; - } - } - - return(len); -} - -/* - - regatoi - internal routine to implement REG_ATOI - == static char *regatoi(const regex_t *preg, char *localbuf); - */ -static char * -regatoi(preg, localbuf) -const regex_t *preg; -char *localbuf; -{ - register struct rerr *r; - - for (r = rerrs; r->code >= 0; r++) - if (strcmp(r->name, preg->re_endp) == 0) - break; - if (r->code < 0) - return("0"); - - sprintf(localbuf, "%d", r->code); - return(localbuf); -} diff --git a/ext/ereg/regex/regerror.ih b/ext/ereg/regex/regerror.ih deleted file mode 100644 index 2cb668c24f..0000000000 --- a/ext/ereg/regex/regerror.ih +++ /dev/null @@ -1,12 +0,0 @@ -/* ========= begin header generated by ./mkh ========= */ -#ifdef __cplusplus -extern "C" { -#endif - -/* === regerror.c === */ -static char *regatoi(const regex_t *preg, char *localbuf); - -#ifdef __cplusplus -} -#endif -/* ========= end header generated by ./mkh ========= */ diff --git a/ext/ereg/regex/regex.3 b/ext/ereg/regex/regex.3 deleted file mode 100644 index 100c8a7f71..0000000000 --- a/ext/ereg/regex/regex.3 +++ /dev/null @@ -1,502 +0,0 @@ -.TH REGEX 3 "17 May 1993" -.BY "Henry Spencer" -.de ZR -.\" one other place knows this name: the SEE ALSO section -.IR regex (7) \\$1 -.. -.SH NAME -regcomp, regexec, regerror, regfree \- regular-expression library -.SH SYNOPSIS -.ft B -.\".na -#include -.br -#include -.HP 10 -int regcomp(regex_t\ *preg, const\ char\ *pattern, int\ cflags); -.HP -int\ regexec(const\ regex_t\ *preg, const\ char\ *string, -size_t\ nmatch, regmatch_t\ pmatch[], int\ eflags); -.HP -size_t\ regerror(int\ errcode, const\ regex_t\ *preg, -char\ *errbuf, size_t\ errbuf_size); -.HP -void\ regfree(regex_t\ *preg); -.\".ad -.ft -.SH DESCRIPTION -These routines implement POSIX 1003.2 regular expressions (``RE''s); -see -.ZR . -.I Regcomp -compiles an RE written as a string into an internal form, -.I regexec -matches that internal form against a string and reports results, -.I regerror -transforms error codes from either into human-readable messages, -and -.I regfree -frees any dynamically-allocated storage used by the internal form -of an RE. -.PP -The header -.I -declares two structure types, -.I regex_t -and -.IR regmatch_t , -the former for compiled internal forms and the latter for match reporting. -It also declares the four functions, -a type -.IR regoff_t , -and a number of constants with names starting with ``REG_''. -.PP -.I Regcomp -compiles the regular expression contained in the -.I pattern -string, -subject to the flags in -.IR cflags , -and places the results in the -.I regex_t -structure pointed to by -.IR preg . -.I Cflags -is the bitwise OR of zero or more of the following flags: -.IP REG_EXTENDED \w'REG_EXTENDED'u+2n -Compile modern (``extended'') REs, -rather than the obsolete (``basic'') REs that -are the default. -.IP REG_BASIC -This is a synonym for 0, -provided as a counterpart to REG_EXTENDED to improve readability. -.IP REG_NOSPEC -Compile with recognition of all special characters turned off. -All characters are thus considered ordinary, -so the ``RE'' is a literal string. -This is an extension, -compatible with but not specified by POSIX 1003.2, -and should be used with -caution in software intended to be portable to other systems. -REG_EXTENDED and REG_NOSPEC may not be used -in the same call to -.IR regcomp . -.IP REG_ICASE -Compile for matching that ignores upper/lower case distinctions. -See -.ZR . -.IP REG_NOSUB -Compile for matching that need only report success or failure, -not what was matched. -.IP REG_NEWLINE -Compile for newline-sensitive matching. -By default, newline is a completely ordinary character with no special -meaning in either REs or strings. -With this flag, -`[^' bracket expressions and `.' never match newline, -a `^' anchor matches the null string after any newline in the string -in addition to its normal function, -and the `$' anchor matches the null string before any newline in the -string in addition to its normal function. -.IP REG_PEND -The regular expression ends, -not at the first NUL, -but just before the character pointed to by the -.I re_endp -member of the structure pointed to by -.IR preg . -The -.I re_endp -member is of type -.IR const\ char\ * . -This flag permits inclusion of NULs in the RE; -they are considered ordinary characters. -This is an extension, -compatible with but not specified by POSIX 1003.2, -and should be used with -caution in software intended to be portable to other systems. -.PP -When successful, -.I regcomp -returns 0 and fills in the structure pointed to by -.IR preg . -One member of that structure -(other than -.IR re_endp ) -is publicized: -.IR re_nsub , -of type -.IR size_t , -contains the number of parenthesized subexpressions within the RE -(except that the value of this member is undefined if the -REG_NOSUB flag was used). -If -.I regcomp -fails, it returns a non-zero error code; -see DIAGNOSTICS. -.PP -.I Regexec -matches the compiled RE pointed to by -.I preg -against the -.IR string , -subject to the flags in -.IR eflags , -and reports results using -.IR nmatch , -.IR pmatch , -and the returned value. -The RE must have been compiled by a previous invocation of -.IR regcomp . -The compiled form is not altered during execution of -.IR regexec , -so a single compiled RE can be used simultaneously by multiple threads. -.PP -By default, -the NUL-terminated string pointed to by -.I string -is considered to be the text of an entire line, minus any terminating -newline. -The -.I eflags -argument is the bitwise OR of zero or more of the following flags: -.IP REG_NOTBOL \w'REG_STARTEND'u+2n -The first character of -the string -is not the beginning of a line, so the `^' anchor should not match before it. -This does not affect the behavior of newlines under REG_NEWLINE. -.IP REG_NOTEOL -The NUL terminating -the string -does not end a line, so the `$' anchor should not match before it. -This does not affect the behavior of newlines under REG_NEWLINE. -.IP REG_STARTEND -The string is considered to start at -\fIstring\fR\ + \fIpmatch\fR[0].\fIrm_so\fR -and to have a terminating NUL located at -\fIstring\fR\ + \fIpmatch\fR[0].\fIrm_eo\fR -(there need not actually be a NUL at that location), -regardless of the value of -.IR nmatch . -See below for the definition of -.IR pmatch -and -.IR nmatch . -This is an extension, -compatible with but not specified by POSIX 1003.2, -and should be used with -caution in software intended to be portable to other systems. -Note that a non-zero \fIrm_so\fR does not imply REG_NOTBOL; -REG_STARTEND affects only the location of the string, -not how it is matched. -.PP -See -.ZR -for a discussion of what is matched in situations where an RE or a -portion thereof could match any of several substrings of -.IR string . -.PP -Normally, -.I regexec -returns 0 for success and the non-zero code REG_NOMATCH for failure. -Other non-zero error codes may be returned in exceptional situations; -see DIAGNOSTICS. -.PP -If REG_NOSUB was specified in the compilation of the RE, -or if -.I nmatch -is 0, -.I regexec -ignores the -.I pmatch -argument (but see below for the case where REG_STARTEND is specified). -Otherwise, -.I pmatch -points to an array of -.I nmatch -structures of type -.IR regmatch_t . -Such a structure has at least the members -.I rm_so -and -.IR rm_eo , -both of type -.I regoff_t -(a signed arithmetic type at least as large as an -.I off_t -and a -.IR ssize_t ), -containing respectively the offset of the first character of a substring -and the offset of the first character after the end of the substring. -Offsets are measured from the beginning of the -.I string -argument given to -.IR regexec . -An empty substring is denoted by equal offsets, -both indicating the character following the empty substring. -.PP -The 0th member of the -.I pmatch -array is filled in to indicate what substring of -.I string -was matched by the entire RE. -Remaining members report what substring was matched by parenthesized -subexpressions within the RE; -member -.I i -reports subexpression -.IR i , -with subexpressions counted (starting at 1) by the order of their opening -parentheses in the RE, left to right. -Unused entries in the array\(emcorresponding either to subexpressions that -did not participate in the match at all, or to subexpressions that do not -exist in the RE (that is, \fIi\fR\ > \fIpreg\fR\->\fIre_nsub\fR)\(emhave both -.I rm_so -and -.I rm_eo -set to \-1. -If a subexpression participated in the match several times, -the reported substring is the last one it matched. -(Note, as an example in particular, that when the RE `(b*)+' matches `bbb', -the parenthesized subexpression matches each of the three `b's and then -an infinite number of empty strings following the last `b', -so the reported substring is one of the empties.) -.PP -If REG_STARTEND is specified, -.I pmatch -must point to at least one -.I regmatch_t -(even if -.I nmatch -is 0 or REG_NOSUB was specified), -to hold the input offsets for REG_STARTEND. -Use for output is still entirely controlled by -.IR nmatch ; -if -.I nmatch -is 0 or REG_NOSUB was specified, -the value of -.IR pmatch [0] -will not be changed by a successful -.IR regexec . -.PP -.I Regerror -maps a non-zero -.I errcode -from either -.I regcomp -or -.I regexec -to a human-readable, printable message. -If -.I preg -is non-NULL, -the error code should have arisen from use of -the -.I regex_t -pointed to by -.IR preg , -and if the error code came from -.IR regcomp , -it should have been the result from the most recent -.I regcomp -using that -.IR regex_t . -.RI ( Regerror -may be able to supply a more detailed message using information -from the -.IR regex_t .) -.I Regerror -places the NUL-terminated message into the buffer pointed to by -.IR errbuf , -limiting the length (including the NUL) to at most -.I errbuf_size -bytes. -If the whole message won't fit, -as much of it as will fit before the terminating NUL is supplied. -In any case, -the returned value is the size of buffer needed to hold the whole -message (including terminating NUL). -If -.I errbuf_size -is 0, -.I errbuf -is ignored but the return value is still correct. -.PP -If the -.I errcode -given to -.I regerror -is first ORed with REG_ITOA, -the ``message'' that results is the printable name of the error code, -e.g. ``REG_NOMATCH'', -rather than an explanation thereof. -If -.I errcode -is REG_ATOI, -then -.I preg -shall be non-NULL and the -.I re_endp -member of the structure it points to -must point to the printable name of an error code; -in this case, the result in -.I errbuf -is the decimal digits of -the numeric value of the error code -(0 if the name is not recognized). -REG_ITOA and REG_ATOI are intended primarily as debugging facilities; -they are extensions, -compatible with but not specified by POSIX 1003.2, -and should be used with -caution in software intended to be portable to other systems. -Be warned also that they are considered experimental and changes are possible. -.PP -.I Regfree -frees any dynamically-allocated storage associated with the compiled RE -pointed to by -.IR preg . -The remaining -.I regex_t -is no longer a valid compiled RE -and the effect of supplying it to -.I regexec -or -.I regerror -is undefined. -.PP -None of these functions references global variables except for tables -of constants; -all are safe for use from multiple threads if the arguments are safe. -.SH IMPLEMENTATION CHOICES -There are a number of decisions that 1003.2 leaves up to the implementor, -either by explicitly saying ``undefined'' or by virtue of them being -forbidden by the RE grammar. -This implementation treats them as follows. -.PP -See -.ZR -for a discussion of the definition of case-independent matching. -.PP -There is no particular limit on the length of REs, -except insofar as memory is limited. -Memory usage is approximately linear in RE size, and largely insensitive -to RE complexity, except for bounded repetitions. -See BUGS for one short RE using them -that will run almost any system out of memory. -.PP -A backslashed character other than one specifically given a magic meaning -by 1003.2 (such magic meanings occur only in obsolete [``basic''] REs) -is taken as an ordinary character. -.PP -Any unmatched [ is a REG_EBRACK error. -.PP -Equivalence classes cannot begin or end bracket-expression ranges. -The endpoint of one range cannot begin another. -.PP -RE_DUP_MAX, the limit on repetition counts in bounded repetitions, is 255. -.PP -A repetition operator (?, *, +, or bounds) cannot follow another -repetition operator. -A repetition operator cannot begin an expression or subexpression -or follow `^' or `|'. -.PP -`|' cannot appear first or last in a (sub)expression or after another `|', -i.e. an operand of `|' cannot be an empty subexpression. -An empty parenthesized subexpression, `()', is legal and matches an -empty (sub)string. -An empty string is not a legal RE. -.PP -A `{' followed by a digit is considered the beginning of bounds for a -bounded repetition, which must then follow the syntax for bounds. -A `{' \fInot\fR followed by a digit is considered an ordinary character. -.PP -`^' and `$' beginning and ending subexpressions in obsolete (``basic'') -REs are anchors, not ordinary characters. -.SH SEE ALSO -grep(1), regex(7) -.PP -POSIX 1003.2, sections 2.8 (Regular Expression Notation) -and -B.5 (C Binding for Regular Expression Matching). -.SH DIAGNOSTICS -Non-zero error codes from -.I regcomp -and -.I regexec -include the following: -.PP -.nf -.ta \w'REG_ECOLLATE'u+3n -REG_NOMATCH regexec() failed to match -REG_BADPAT invalid regular expression -REG_ECOLLATE invalid collating element -REG_ECTYPE invalid character class -REG_EESCAPE \e applied to unescapable character -REG_ESUBREG invalid backreference number -REG_EBRACK brackets [ ] not balanced -REG_EPAREN parentheses ( ) not balanced -REG_EBRACE braces { } not balanced -REG_BADBR invalid repetition count(s) in { } -REG_ERANGE invalid character range in [ ] -REG_ESPACE ran out of memory -REG_BADRPT ?, *, or + operand invalid -REG_EMPTY empty (sub)expression -REG_ASSERT ``can't happen''\(emyou found a bug -REG_INVARG invalid argument, e.g. negative-length string -.fi -.SH HISTORY -Written by Henry Spencer at University of Toronto, -henry@zoo.toronto.edu. -.SH BUGS -This is an alpha release with known defects. -Please report problems. -.PP -There is one known functionality bug. -The implementation of internationalization is incomplete: -the locale is always assumed to be the default one of 1003.2, -and only the collating elements etc. of that locale are available. -.PP -The back-reference code is subtle and doubts linger about its correctness -in complex cases. -.PP -.I Regexec -performance is poor. -This will improve with later releases. -.I Nmatch -exceeding 0 is expensive; -.I nmatch -exceeding 1 is worse. -.I Regexec -is largely insensitive to RE complexity \fIexcept\fR that back -references are massively expensive. -RE length does matter; in particular, there is a strong speed bonus -for keeping RE length under about 30 characters, -with most special characters counting roughly double. -.PP -.I Regcomp -implements bounded repetitions by macro expansion, -which is costly in time and space if counts are large -or bounded repetitions are nested. -An RE like, say, -`((((a{1,100}){1,100}){1,100}){1,100}){1,100}' -will (eventually) run almost any existing machine out of swap space. -.PP -There are suspected problems with response to obscure error conditions. -Notably, -certain kinds of internal overflow, -produced only by truly enormous REs or by multiply nested bounded repetitions, -are probably not handled well. -.PP -Due to a mistake in 1003.2, things like `a)b' are legal REs because `)' is -a special character only in the presence of a previous unmatched `('. -This can't be fixed until the spec is fixed. -.PP -The standard's definition of back references is vague. -For example, does -`a\e(\e(b\e)*\e2\e)*d' match `abbbd'? -Until the standard is clarified, -behavior in such cases should not be relied on. -.PP -The implementation of word-boundary matching is a bit of a kludge, -and bugs may lurk in combinations of word-boundary matching and anchoring. diff --git a/ext/ereg/regex/regex.7 b/ext/ereg/regex/regex.7 deleted file mode 100644 index d89012bda1..0000000000 --- a/ext/ereg/regex/regex.7 +++ /dev/null @@ -1,233 +0,0 @@ -.TH REGEX 7 "7 Feb 1994" -.BY "Henry Spencer" -.SH NAME -regex \- POSIX 1003.2 regular expressions -.SH DESCRIPTION -Regular expressions (``RE''s), -as defined in POSIX 1003.2, come in two forms: -modern REs (roughly those of -.IR egrep ; -1003.2 calls these ``extended'' REs) -and obsolete REs (roughly those of -.IR ed ; -1003.2 ``basic'' REs). -Obsolete REs mostly exist for backward compatibility in some old programs; -they will be discussed at the end. -1003.2 leaves some aspects of RE syntax and semantics open; -`\(dg' marks decisions on these aspects that -may not be fully portable to other 1003.2 implementations. -.PP -A (modern) RE is one\(dg or more non-empty\(dg \fIbranches\fR, -separated by `|'. -It matches anything that matches one of the branches. -.PP -A branch is one\(dg or more \fIpieces\fR, concatenated. -It matches a match for the first, followed by a match for the second, etc. -.PP -A piece is an \fIatom\fR possibly followed -by a single\(dg `*', `+', `?', or \fIbound\fR. -An atom followed by `*' matches a sequence of 0 or more matches of the atom. -An atom followed by `+' matches a sequence of 1 or more matches of the atom. -An atom followed by `?' matches a sequence of 0 or 1 matches of the atom. -.PP -A \fIbound\fR is `{' followed by an unsigned decimal integer, -possibly followed by `,' -possibly followed by another unsigned decimal integer, -always followed by `}'. -The integers must lie between 0 and RE_DUP_MAX (255\(dg) inclusive, -and if there are two of them, the first may not exceed the second. -An atom followed by a bound containing one integer \fIi\fR -and no comma matches -a sequence of exactly \fIi\fR matches of the atom. -An atom followed by a bound -containing one integer \fIi\fR and a comma matches -a sequence of \fIi\fR or more matches of the atom. -An atom followed by a bound -containing two integers \fIi\fR and \fIj\fR matches -a sequence of \fIi\fR through \fIj\fR (inclusive) matches of the atom. -.PP -An atom is a regular expression enclosed in `()' (matching a match for the -regular expression), -an empty set of `()' (matching the null string)\(dg, -a \fIbracket expression\fR (see below), `.' -(matching any single character), `^' (matching the null string at the -beginning of a line), `$' (matching the null string at the -end of a line), a `\e' followed by one of the characters -`^.[$()|*+?{\e' -(matching that character taken as an ordinary character), -a `\e' followed by any other character\(dg -(matching that character taken as an ordinary character, -as if the `\e' had not been present\(dg), -or a single character with no other significance (matching that character). -A `{' followed by a character other than a digit is an ordinary -character, not the beginning of a bound\(dg. -It is illegal to end an RE with `\e'. -.PP -A \fIbracket expression\fR is a list of characters enclosed in `[]'. -It normally matches any single character from the list (but see below). -If the list begins with `^', -it matches any single character -(but see below) \fInot\fR from the rest of the list. -If two characters in the list are separated by `\-', this is shorthand -for the full \fIrange\fR of characters between those two (inclusive) in the -collating sequence, -e.g. `[0-9]' in ASCII matches any decimal digit. -It is illegal\(dg for two ranges to share an -endpoint, e.g. `a-c-e'. -Ranges are very collating-sequence-dependent, -and portable programs should avoid relying on them. -.PP -To include a literal `]' in the list, make it the first character -(following a possible `^'). -To include a literal `\-', make it the first or last character, -or the second endpoint of a range. -To use a literal `\-' as the first endpoint of a range, -enclose it in `[.' and `.]' to make it a collating element (see below). -With the exception of these and some combinations using `[' (see next -paragraphs), all other special characters, including `\e', lose their -special significance within a bracket expression. -.PP -Within a bracket expression, a collating element (a character, -a multi-character sequence that collates as if it were a single character, -or a collating-sequence name for either) -enclosed in `[.' and `.]' stands for the -sequence of characters of that collating element. -The sequence is a single element of the bracket expression's list. -A bracket expression containing a multi-character collating element -can thus match more than one character, -e.g. if the collating sequence includes a `ch' collating element, -then the RE `[[.ch.]]*c' matches the first five characters -of `chchcc'. -.PP -Within a bracket expression, a collating element enclosed in `[=' and -`=]' is an equivalence class, standing for the sequences of characters -of all collating elements equivalent to that one, including itself. -(If there are no other equivalent collating elements, -the treatment is as if the enclosing delimiters were `[.' and `.]'.) -For example, if o and \o'o^' are the members of an equivalence class, -then `[[=o=]]', `[[=\o'o^'=]]', and `[o\o'o^']' are all synonymous. -An equivalence class may not\(dg be an endpoint -of a range. -.PP -Within a bracket expression, the name of a \fIcharacter class\fR enclosed -in `[:' and `:]' stands for the list of all characters belonging to that -class. -Standard character class names are: -.PP -.RS -.nf -.ta 3c 6c 9c -alnum digit punct -alpha graph space -blank lower upper -cntrl print xdigit -.fi -.RE -.PP -These stand for the character classes defined in -.IR ctype (3). -A locale may provide others. -A character class may not be used as an endpoint of a range. -.PP -There are two special cases\(dg of bracket expressions: -the bracket expressions `[[:<:]]' and `[[:>:]]' match the null string at -the beginning and end of a word respectively. -A word is defined as a sequence of -word characters -which is neither preceded nor followed by -word characters. -A word character is an -.I alnum -character (as defined by -.IR ctype (3)) -or an underscore. -This is an extension, -compatible with but not specified by POSIX 1003.2, -and should be used with -caution in software intended to be portable to other systems. -.PP -In the event that an RE could match more than one substring of a given -string, -the RE matches the one starting earliest in the string. -If the RE could match more than one substring starting at that point, -it matches the longest. -Subexpressions also match the longest possible substrings, subject to -the constraint that the whole match be as long as possible, -with subexpressions starting earlier in the RE taking priority over -ones starting later. -Note that higher-level subexpressions thus take priority over -their lower-level component subexpressions. -.PP -Match lengths are measured in characters, not collating elements. -A null string is considered longer than no match at all. -For example, -`bb*' matches the three middle characters of `abbbc', -`(wee|week)(knights|nights)' matches all ten characters of `weeknights', -when `(.*).*' is matched against `abc' the parenthesized subexpression -matches all three characters, and -when `(a*)*' is matched against `bc' both the whole RE and the parenthesized -subexpression match the null string. -.PP -If case-independent matching is specified, -the effect is much as if all case distinctions had vanished from the -alphabet. -When an alphabetic that exists in multiple cases appears as an -ordinary character outside a bracket expression, it is effectively -transformed into a bracket expression containing both cases, -e.g. `x' becomes `[xX]'. -When it appears inside a bracket expression, all case counterparts -of it are added to the bracket expression, so that (e.g.) `[x]' -becomes `[xX]' and `[^x]' becomes `[^xX]'. -.PP -No particular limit is imposed on the length of REs\(dg. -Programs intended to be portable should not employ REs longer -than 256 bytes, -as an implementation can refuse to accept such REs and remain -POSIX-compliant. -.PP -Obsolete (``basic'') regular expressions differ in several respects. -`|', `+', and `?' are ordinary characters and there is no equivalent -for their functionality. -The delimiters for bounds are `\e{' and `\e}', -with `{' and `}' by themselves ordinary characters. -The parentheses for nested subexpressions are `\e(' and `\e)', -with `(' and `)' by themselves ordinary characters. -`^' is an ordinary character except at the beginning of the -RE or\(dg the beginning of a parenthesized subexpression, -`$' is an ordinary character except at the end of the -RE or\(dg the end of a parenthesized subexpression, -and `*' is an ordinary character if it appears at the beginning of the -RE or the beginning of a parenthesized subexpression -(after a possible leading `^'). -Finally, there is one new type of atom, a \fIback reference\fR: -`\e' followed by a non-zero decimal digit \fId\fR -matches the same sequence of characters -matched by the \fId\fRth parenthesized subexpression -(numbering subexpressions by the positions of their opening parentheses, -left to right), -so that (e.g.) `\e([bc]\e)\e1' matches `bb' or `cc' but not `bc'. -.SH SEE ALSO -regex(3) -.PP -POSIX 1003.2, section 2.8 (Regular Expression Notation). -.SH BUGS -Having two kinds of REs is a botch. -.PP -The current 1003.2 spec says that `)' is an ordinary character in -the absence of an unmatched `('; -this was an unintentional result of a wording error, -and change is likely. -Avoid relying on it. -.PP -Back references are a dreadful botch, -posing major problems for efficient implementations. -They are also somewhat vaguely defined -(does -`a\e(\e(b\e)*\e2\e)*d' match `abbbd'?). -Avoid using them. -.PP -1003.2's specification of case-independent matching is vague. -The ``one case implies all cases'' definition given above -is current consensus among implementors as to the right interpretation. -.PP -The syntax for word boundaries is incredibly ugly. diff --git a/ext/ereg/regex/regex.dsp b/ext/ereg/regex/regex.dsp deleted file mode 100644 index e8f1ad4299..0000000000 --- a/ext/ereg/regex/regex.dsp +++ /dev/null @@ -1,106 +0,0 @@ -# Microsoft Developer Studio Project File - Name="regex" - Package Owner=<4> -# Microsoft Developer Studio Generated Build File, Format Version 5.00 -# ** DO NOT EDIT ** - -# TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102 - -CFG=regex - Win32 Debug -!MESSAGE This is not a valid makefile. To build this project using NMAKE, -!MESSAGE use the Export Makefile command and run -!MESSAGE -!MESSAGE NMAKE /f "regex.mak". -!MESSAGE -!MESSAGE You can specify a configuration when running NMAKE -!MESSAGE by defining the macro CFG on the command line. For example: -!MESSAGE -!MESSAGE NMAKE /f "regex.mak" CFG="regex - Win32 Debug" -!MESSAGE -!MESSAGE Possible choices for configuration are: -!MESSAGE -!MESSAGE "regex - Win32 Release" (based on "Win32 (x86) Dynamic-Link Library") -!MESSAGE "regex - Win32 Debug" (based on "Win32 (x86) Dynamic-Link Library") -!MESSAGE - -# Begin Project -# PROP Scc_ProjName "" -# PROP Scc_LocalPath "" -CPP=cl.exe -MTL=midl.exe -RSC=rc.exe - -!IF "$(CFG)" == "regex - Win32 Release" - -# PROP BASE Use_MFC 0 -# PROP BASE Use_Debug_Libraries 0 -# PROP BASE Output_Dir "Release" -# PROP BASE Intermediate_Dir "Release" -# PROP BASE Target_Dir "" -# PROP Use_MFC 0 -# PROP Use_Debug_Libraries 0 -# PROP Output_Dir "Release" -# PROP Intermediate_Dir "Release" -# PROP Ignore_Export_Lib 0 -# PROP Target_Dir "" -# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /FD /c -# ADD CPP /nologo /MT /W3 /GX /O2 /I "." /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /FD /c -# ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /o NUL /win32 -# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /o NUL /win32 -# ADD BASE RSC /l 0x409 /d "NDEBUG" -# ADD RSC /l 0x409 /d "NDEBUG" -BSC32=bscmake.exe -# ADD BASE BSC32 /nologo -# ADD BSC32 /nologo -LINK32=link.exe -# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /machine:I386 -# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /machine:I386 - -!ELSEIF "$(CFG)" == "regex - Win32 Debug" - -# PROP BASE Use_MFC 0 -# PROP BASE Use_Debug_Libraries 1 -# PROP BASE Output_Dir "Debug" -# PROP BASE Intermediate_Dir "Debug" -# PROP BASE Target_Dir "" -# PROP Use_MFC 0 -# PROP Use_Debug_Libraries 1 -# PROP Output_Dir "Debug" -# PROP Intermediate_Dir "Debug" -# PROP Ignore_Export_Lib 0 -# PROP Target_Dir "" -# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /YX /FD /c -# ADD CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /I "." /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /YX /FD /c -# ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /o NUL /win32 -# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /o NUL /win32 -# ADD BASE RSC /l 0x409 /d "_DEBUG" -# ADD RSC /l 0x409 /d "_DEBUG" -BSC32=bscmake.exe -# ADD BASE BSC32 /nologo -# ADD BSC32 /nologo -LINK32=link.exe -# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /debug /machine:I386 /pdbtype:sept -# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /debug /machine:I386 /pdbtype:sept - -!ENDIF - -# Begin Target - -# Name "regex - Win32 Release" -# Name "regex - Win32 Debug" -# Begin Source File - -SOURCE=.\regcomp.c -# End Source File -# Begin Source File - -SOURCE=.\regerror.c -# End Source File -# Begin Source File - -SOURCE=.\regexec.c -# End Source File -# Begin Source File - -SOURCE=.\regfree.c -# End Source File -# End Target -# End Project diff --git a/ext/ereg/regex/regex.dsw b/ext/ereg/regex/regex.dsw deleted file mode 100644 index 7b7df8126c..0000000000 --- a/ext/ereg/regex/regex.dsw +++ /dev/null @@ -1,29 +0,0 @@ -Microsoft Developer Studio Workspace File, Format Version 5.00 -# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE! - -############################################################################### - -Project: "regex"=.\regex.dsp - Package Owner=<4> - -Package=<5> -{{{ -}}} - -Package=<4> -{{{ -}}} - -############################################################################### - -Global: - -Package=<5> -{{{ -}}} - -Package=<3> -{{{ -}}} - -############################################################################### - diff --git a/ext/ereg/regex/regex.h b/ext/ereg/regex/regex.h deleted file mode 100644 index b39c5e178c..0000000000 --- a/ext/ereg/regex/regex.h +++ /dev/null @@ -1,83 +0,0 @@ -#ifndef _HSREGEX_H_ -#define _HSREGEX_H_ -#ifndef _HSREGEX_H -#define _HSREGEX_H /* never again */ -/* ========= begin header generated by ././mkh ========= */ -#ifdef __cplusplus -extern "C" { -#endif - -/* === regex2.h === */ -#ifdef WIN32 -#define API_EXPORT(type) __declspec(dllexport) type __stdcall -#else -#define API_EXPORT(type) type -#endif - -typedef off_t regoff_t; -typedef struct { - int re_magic; - size_t re_nsub; /* number of parenthesized subexpressions */ - const char *re_endp; /* end pointer for REG_PEND */ - struct re_guts *re_g; /* none of your business :-) */ -} regex_t; -typedef struct { - regoff_t rm_so; /* start of match */ - regoff_t rm_eo; /* end of match */ -} regmatch_t; - - -/* === regcomp.c === */ -API_EXPORT(int) regcomp(regex_t *, const char *, int); -#define REG_BASIC 0000 -#define REG_EXTENDED 0001 -#define REG_ICASE 0002 -#define REG_NOSUB 0004 -#define REG_NEWLINE 0010 -#define REG_NOSPEC 0020 -#define REG_PEND 0040 -#define REG_DUMP 0200 - - -/* === regerror.c === */ -#define REG_OKAY 0 -#define REG_NOMATCH 1 -#define REG_BADPAT 2 -#define REG_ECOLLATE 3 -#define REG_ECTYPE 4 -#define REG_EESCAPE 5 -#define REG_ESUBREG 6 -#define REG_EBRACK 7 -#define REG_EPAREN 8 -#define REG_EBRACE 9 -#define REG_BADBR 10 -#define REG_ERANGE 11 -#define REG_ESPACE 12 -#define REG_BADRPT 13 -#define REG_EMPTY 14 -#define REG_ASSERT 15 -#define REG_INVARG 16 -#define REG_ATOI 255 /* convert name to number (!) */ -#define REG_ITOA 0400 /* convert number to name (!) */ -API_EXPORT(size_t) regerror(int, const regex_t *, char *, size_t); - - -/* === regexec.c === */ -API_EXPORT(int) regexec(const regex_t *, const char *, size_t, regmatch_t [], int); -#define REG_NOTBOL 00001 -#define REG_NOTEOL 00002 -#define REG_STARTEND 00004 -#define REG_TRACE 00400 /* tracing of execution */ -#define REG_LARGE 01000 /* force large representation */ -#define REG_BACKR 02000 /* force use of backref code */ - - -/* === regfree.c === */ -API_EXPORT(void) regfree(regex_t *); - -#ifdef __cplusplus -} -#endif -/* ========= end header generated by ././mkh ========= */ -#endif -#endif diff --git a/ext/ereg/regex/regex.mak b/ext/ereg/regex/regex.mak deleted file mode 100644 index b87ded340b..0000000000 --- a/ext/ereg/regex/regex.mak +++ /dev/null @@ -1,304 +0,0 @@ -# Microsoft Developer Studio Generated NMAKE File, Based on regex.dsp -!IF "$(CFG)" == "" -CFG=regex - Win32 Release -!MESSAGE No configuration specified. Defaulting to regex - Win32 Release. -!ENDIF - -!IF "$(CFG)" != "regex - Win32 Release" && "$(CFG)" != "regex - Win32 Debug" -!MESSAGE Invalid configuration "$(CFG)" specified. -!MESSAGE You can specify a configuration when running NMAKE -!MESSAGE by defining the macro CFG on the command line. For example: -!MESSAGE -!MESSAGE NMAKE /f "regex.mak" CFG="regex - Win32 Release" -!MESSAGE -!MESSAGE Possible choices for configuration are: -!MESSAGE -!MESSAGE "regex - Win32 Release" (based on "Win32 (x86) Static Library") -!MESSAGE "regex - Win32 Debug" (based on "Win32 (x86) Static Library") -!MESSAGE -!ERROR An invalid configuration is specified. -!ENDIF - -!IF "$(OS)" == "Windows_NT" -NULL= -!ELSE -NULL=nul -!ENDIF - -CPP=cl.exe - -!IF "$(CFG)" == "regex - Win32 Release" - -OUTDIR=.\Release -INTDIR=.\Release -# Begin Custom Macros -OutDir=.\.\Release -# End Custom Macros - -!IF "$(RECURSE)" == "0" - -ALL : "$(OUTDIR)\regex.lib" - -!ELSE - -ALL : "$(OUTDIR)\regex.lib" - -!ENDIF - -CLEAN : - -@erase "$(INTDIR)\regcomp.obj" - -@erase "$(INTDIR)\regerror.obj" - -@erase "$(INTDIR)\regexec.obj" - -@erase "$(INTDIR)\regfree.obj" - -@erase "$(INTDIR)\vc50.idb" - -@erase "$(OUTDIR)\regex.lib" - -"$(OUTDIR)" : - if not exist "$(OUTDIR)/$(NULL)" mkdir "$(OUTDIR)" - -CPP_PROJ=/nologo /MD /W3 /GX /O2 /I "." /D "WIN32" /D "NDEBUG" /D "_WINDOWS"\ - /Fp"$(INTDIR)\regex.pch" /YX /Fo"$(INTDIR)\\" /Fd"$(INTDIR)\\" /FD /c -CPP_OBJS=.\Release/ -CPP_SBRS=. -BSC32=bscmake.exe -BSC32_FLAGS=/nologo /o"$(OUTDIR)\regex.bsc" -BSC32_SBRS= \ - -LIB32=link.exe -lib -LIB32_FLAGS=/nologo /out:"$(OUTDIR)\regex.lib" -LIB32_OBJS= \ - "$(INTDIR)\regcomp.obj" \ - "$(INTDIR)\regerror.obj" \ - "$(INTDIR)\regexec.obj" \ - "$(INTDIR)\regfree.obj" - -"$(OUTDIR)\regex.lib" : "$(OUTDIR)" $(DEF_FILE) $(LIB32_OBJS) - $(LIB32) @<< - $(LIB32_FLAGS) $(DEF_FLAGS) $(LIB32_OBJS) -<< - -!ELSEIF "$(CFG)" == "regex - Win32 Debug" - -OUTDIR=.\Debug -INTDIR=.\Debug -# Begin Custom Macros -OutDir=.\.\Debug -# End Custom Macros - -!IF "$(RECURSE)" == "0" - -ALL : "$(OUTDIR)\regex.lib" "$(OUTDIR)\regex.bsc" - -!ELSE - -ALL : "$(OUTDIR)\regex.lib" "$(OUTDIR)\regex.bsc" - -!ENDIF - -CLEAN : - -@erase "$(INTDIR)\regcomp.obj" - -@erase "$(INTDIR)\regcomp.sbr" - -@erase "$(INTDIR)\regerror.obj" - -@erase "$(INTDIR)\regerror.sbr" - -@erase "$(INTDIR)\regexec.obj" - -@erase "$(INTDIR)\regexec.sbr" - -@erase "$(INTDIR)\regfree.obj" - -@erase "$(INTDIR)\regfree.sbr" - -@erase "$(INTDIR)\vc50.idb" - -@erase "$(OUTDIR)\regex.bsc" - -@erase "$(OUTDIR)\regex.lib" - -"$(OUTDIR)" : - if not exist "$(OUTDIR)/$(NULL)" mkdir "$(OUTDIR)" - -CPP_PROJ=/nologo /MDd /W3 /GX /Z7 /Od /I "." /D "WIN32" /D "_DEBUG" /D\ - "_WINDOWS" /FR"$(INTDIR)\\" /Fp"$(INTDIR)\regex.pch" /YX /Fo"$(INTDIR)\\"\ - /Fd"$(INTDIR)\\" /FD /c -CPP_OBJS=.\Debug/ -CPP_SBRS=.\Debug/ -BSC32=bscmake.exe -BSC32_FLAGS=/nologo /o"$(OUTDIR)\regex.bsc" -BSC32_SBRS= \ - "$(INTDIR)\regcomp.sbr" \ - "$(INTDIR)\regerror.sbr" \ - "$(INTDIR)\regexec.sbr" \ - "$(INTDIR)\regfree.sbr" - -"$(OUTDIR)\regex.bsc" : "$(OUTDIR)" $(BSC32_SBRS) - $(BSC32) @<< - $(BSC32_FLAGS) $(BSC32_SBRS) -<< - -LIB32=link.exe -lib -LIB32_FLAGS=/nologo /out:"$(OUTDIR)\regex.lib" -LIB32_OBJS= \ - "$(INTDIR)\regcomp.obj" \ - "$(INTDIR)\regerror.obj" \ - "$(INTDIR)\regexec.obj" \ - "$(INTDIR)\regfree.obj" - -"$(OUTDIR)\regex.lib" : "$(OUTDIR)" $(DEF_FILE) $(LIB32_OBJS) - $(LIB32) @<< - $(LIB32_FLAGS) $(DEF_FLAGS) $(LIB32_OBJS) -<< - -!ENDIF - -.c{$(CPP_OBJS)}.obj:: - $(CPP) @<< - $(CPP_PROJ) $< -<< - -.cpp{$(CPP_OBJS)}.obj:: - $(CPP) @<< - $(CPP_PROJ) $< -<< - -.cxx{$(CPP_OBJS)}.obj:: - $(CPP) @<< - $(CPP_PROJ) $< -<< - -.c{$(CPP_SBRS)}.sbr:: - $(CPP) @<< - $(CPP_PROJ) $< -<< - -.cpp{$(CPP_SBRS)}.sbr:: - $(CPP) @<< - $(CPP_PROJ) $< -<< - -.cxx{$(CPP_SBRS)}.sbr:: - $(CPP) @<< - $(CPP_PROJ) $< -<< - - -!IF "$(CFG)" == "regex - Win32 Release" || "$(CFG)" == "regex - Win32 Debug" -SOURCE=.\regcomp.c - -!IF "$(CFG)" == "regex - Win32 Release" - -DEP_CPP_REGCO=\ - ".\cclass.h"\ - ".\cname.h"\ - ".\regcomp.ih"\ - ".\regex.h"\ - ".\regex2.h"\ - ".\utils.h"\ - - -"$(INTDIR)\regcomp.obj" : $(SOURCE) $(DEP_CPP_REGCO) "$(INTDIR)" - - -!ELSEIF "$(CFG)" == "regex - Win32 Debug" - -DEP_CPP_REGCO=\ - ".\cclass.h"\ - ".\cname.h"\ - ".\regcomp.ih"\ - ".\regex.h"\ - ".\regex2.h"\ - ".\utils.h"\ - {$(INCLUDE)}"sys\types.h"\ - - -"$(INTDIR)\regcomp.obj" "$(INTDIR)\regcomp.sbr" : $(SOURCE) $(DEP_CPP_REGCO)\ - "$(INTDIR)" - - -!ENDIF - -SOURCE=.\regerror.c - -!IF "$(CFG)" == "regex - Win32 Release" - -DEP_CPP_REGER=\ - ".\regerror.ih"\ - ".\regex.h"\ - ".\utils.h"\ - - -"$(INTDIR)\regerror.obj" : $(SOURCE) $(DEP_CPP_REGER) "$(INTDIR)" - - -!ELSEIF "$(CFG)" == "regex - Win32 Debug" - -DEP_CPP_REGER=\ - ".\regerror.ih"\ - ".\regex.h"\ - ".\utils.h"\ - {$(INCLUDE)}"sys\types.h"\ - - -"$(INTDIR)\regerror.obj" "$(INTDIR)\regerror.sbr" : $(SOURCE) $(DEP_CPP_REGER)\ - "$(INTDIR)" - - -!ENDIF - -SOURCE=.\regexec.c - -!IF "$(CFG)" == "regex - Win32 Release" - -DEP_CPP_REGEX=\ - ".\engine.c"\ - ".\engine.ih"\ - ".\regex.h"\ - ".\regex2.h"\ - ".\utils.h"\ - - -"$(INTDIR)\regexec.obj" : $(SOURCE) $(DEP_CPP_REGEX) "$(INTDIR)" - - -!ELSEIF "$(CFG)" == "regex - Win32 Debug" - -DEP_CPP_REGEX=\ - ".\engine.c"\ - ".\engine.ih"\ - ".\regex.h"\ - ".\regex2.h"\ - ".\utils.h"\ - {$(INCLUDE)}"sys\types.h"\ - - -"$(INTDIR)\regexec.obj" "$(INTDIR)\regexec.sbr" : $(SOURCE) $(DEP_CPP_REGEX)\ - "$(INTDIR)" - - -!ENDIF - -SOURCE=.\regfree.c - -!IF "$(CFG)" == "regex - Win32 Release" - -DEP_CPP_REGFR=\ - ".\regex.h"\ - ".\regex2.h"\ - ".\utils.h"\ - - -"$(INTDIR)\regfree.obj" : $(SOURCE) $(DEP_CPP_REGFR) "$(INTDIR)" - - -!ELSEIF "$(CFG)" == "regex - Win32 Debug" - -DEP_CPP_REGFR=\ - ".\regex.h"\ - ".\regex2.h"\ - ".\utils.h"\ - {$(INCLUDE)}"sys\types.h"\ - - -"$(INTDIR)\regfree.obj" "$(INTDIR)\regfree.sbr" : $(SOURCE) $(DEP_CPP_REGFR)\ - "$(INTDIR)" - - -!ENDIF - -SOURCE=.\engine.c - -!ENDIF - diff --git a/ext/ereg/regex/regex2.h b/ext/ereg/regex/regex2.h deleted file mode 100644 index 4996f96ecd..0000000000 --- a/ext/ereg/regex/regex2.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * First, the stuff that ends up in the outside-world include file - = #ifdef WIN32 - = #define API_EXPORT(type) __declspec(dllexport) type __stdcall - = #else - = #define API_EXPORT(type) type - = #endif - = - = typedef off_t regoff_t; - = typedef struct { - = int re_magic; - = size_t re_nsub; // number of parenthesized subexpressions - = const unsigned char *re_endp; // end pointer for REG_PEND - = struct re_guts *re_g; // none of your business :-) - = } regex_t; - = typedef struct { - = regoff_t rm_so; // start of match - = regoff_t rm_eo; // end of match - = } regmatch_t; - */ -/* - * internals of regex_t - */ -#define MAGIC1 ((('r'^0200)<<8) | 'e') - -/* - * The internal representation is a *strip*, a sequence of - * operators ending with an endmarker. (Some terminology etc. is a - * historical relic of earlier versions which used multiple strips.) - * Certain oddities in the representation are there to permit running - * the machinery backwards; in particular, any deviation from sequential - * flow must be marked at both its source and its destination. Some - * fine points: - * - * - OPLUS_ and O_PLUS are *inside* the loop they create. - * - OQUEST_ and O_QUEST are *outside* the bypass they create. - * - OCH_ and O_CH are *outside* the multi-way branch they create, while - * OOR1 and OOR2 are respectively the end and the beginning of one of - * the branches. Note that there is an implicit OOR2 following OCH_ - * and an implicit OOR1 preceding O_CH. - * - * In state representations, an operator's bit is on to signify a state - * immediately *preceding* "execution" of that operator. - */ -typedef long sop; /* strip operator */ -typedef long sopno; -#define OPRMASK 0x7c000000 -#define OPDMASK 0x03ffffff -#define OPSHIFT (26) -#define OP(n) ((n)&OPRMASK) -#define OPND(n) ((n)&OPDMASK) -#define SOP(op, opnd) ((op)|(opnd)) -/* operators meaning operand */ -/* (back, fwd are offsets) */ -#define OEND (1< uch [csetsize] */ - uch mask; /* bit within array */ - uch hash; /* hash code */ - size_t smultis; - unsigned char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */ -} cset; -/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */ -#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c)) -#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c)) -#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask) -#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */ -#define MCsub(p, cs, cp) mcsub(p, cs, cp) -#define MCin(p, cs, cp) mcin(p, cs, cp) - -/* stuff for character categories */ -typedef unsigned char cat_t; - -/* - * main compiled-expression structure - */ -struct re_guts { - int magic; -# define MAGIC2 ((('R'^0200)<<8)|'E') - sop *strip; /* malloced area for strip */ - int csetsize; /* number of bits in a cset vector */ - int ncsets; /* number of csets in use */ - cset *sets; /* -> cset [ncsets] */ - uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */ - int cflags; /* copy of regcomp() cflags argument */ - sopno nstates; /* = number of sops */ - sopno firststate; /* the initial OEND (normally 0) */ - sopno laststate; /* the final OEND */ - int iflags; /* internal flags */ -# define USEBOL 01 /* used ^ */ -# define USEEOL 02 /* used $ */ -# define BAD 04 /* something wrong */ - int nbol; /* number of ^ used */ - int neol; /* number of $ used */ - int ncategories; /* how many character categories */ - cat_t *categories; /* ->catspace[-UCHAR_MIN] */ - unsigned char *must; /* match must contain this string */ - int mlen; /* length of must */ - size_t nsub; /* copy of re_nsub */ - int backrefs; /* does it use back references? */ - sopno nplus; /* how deep does it nest +s? */ - /* catspace must be last */ - cat_t catspace[1]; /* actually [NC] */ -}; - -/* misc utilities */ -#define OUT (UCHAR_MAX+1) /* a non-character value */ -#define ISWORD(c) (isalnum(c) || (c) == '_') diff --git a/ext/ereg/regex/regex_extra.h b/ext/ereg/regex/regex_extra.h deleted file mode 100644 index d668f7613f..0000000000 --- a/ext/ereg/regex/regex_extra.h +++ /dev/null @@ -1,23 +0,0 @@ -/* do not frame this - we must be able to include this file multiple times */ - -#undef regexec -#undef regerror -#undef regfree -#undef regcomp - -#if (defined(REGEX) && REGEX == 1) || (!defined(REGEX)) - -#ifndef PHP_WIN32 - -#ifndef PHP_NO_ALIASES - -#define regexec php_regexec -#define regerror php_regerror -#define regfree php_regfree -#define regcomp php_regcomp - -#endif - -#endif - -#endif diff --git a/ext/ereg/regex/regexec.c b/ext/ereg/regex/regexec.c deleted file mode 100644 index c8cb8fdf56..0000000000 --- a/ext/ereg/regex/regexec.c +++ /dev/null @@ -1,138 +0,0 @@ -/* - * the outer shell of regexec() - * - * This file includes engine.c *twice*, after muchos fiddling with the - * macros that code uses. This lets the same code operate on two different - * representations for state sets. - */ -#include -#include -#include -#include -#include -#include -#include - -#include "utils.h" -#include "regex2.h" - -static int nope = 0; /* for use in asserts; shuts lint up */ - -/* macros for manipulating states, small version */ -#define states unsigned -#define states1 unsigned /* for later use in regexec() decision */ -#define CLEAR(v) ((v) = 0) -#define SET0(v, n) ((v) &= ~((unsigned)1 << (n))) -#define SET1(v, n) ((v) |= (unsigned)1 << (n)) -#define ISSET(v, n) ((v) & ((unsigned)1 << (n))) -#define ASSIGN(d, s) ((d) = (s)) -#define EQ(a, b) ((a) == (b)) -#define STATEVARS int dummy /* dummy version */ -#define STATESETUP(m, n) /* nothing */ -#define STATETEARDOWN(m) /* nothing */ -#define SETUP(v) ((v) = 0) -#define onestate unsigned -#define INIT(o, n) ((o) = (unsigned)1 << (n)) -#define INC(o) ((o) <<= 1) -#define ISSTATEIN(v, o) ((v) & (o)) -/* some abbreviations; note that some of these know variable names! */ -/* do "if I'm here, I can also be there" etc without branches */ -#define FWD(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) << (n)) -#define BACK(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) >> (n)) -#define ISSETBACK(v, n) ((v) & ((unsigned)here >> (n))) -/* function names */ -#define SNAMES /* engine.c looks after details */ - -#include "engine.c" - -/* now undo things */ -#undef states -#undef CLEAR -#undef SET0 -#undef SET1 -#undef ISSET -#undef ASSIGN -#undef EQ -#undef STATEVARS -#undef STATESETUP -#undef STATETEARDOWN -#undef SETUP -#undef onestate -#undef INIT -#undef INC -#undef ISSTATEIN -#undef FWD -#undef BACK -#undef ISSETBACK -#undef SNAMES - -/* macros for manipulating states, large version */ -#define states unsigned char * -#define CLEAR(v) memset(v, 0, m->g->nstates) -#define SET0(v, n) ((v)[n] = 0) -#define SET1(v, n) ((v)[n] = 1) -#define ISSET(v, n) ((v)[n]) -#define ASSIGN(d, s) memcpy(d, s, m->g->nstates) -#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0) -#define STATEVARS int vn; unsigned char *space -#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \ - if ((m)->space == NULL) return(REG_ESPACE); \ - (m)->vn = 0; } -#define STATETEARDOWN(m) { free((m)->space); } -#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates]) -#define onestate int -#define INIT(o, n) ((o) = (n)) -#define INC(o) ((o)++) -#define ISSTATEIN(v, o) ((v)[o]) -/* some abbreviations; note that some of these know variable names! */ -/* do "if I'm here, I can also be there" etc without branches */ -#define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here]) -#define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here]) -#define ISSETBACK(v, n) ((v)[here - (n)]) -/* function names */ -#define LNAMES /* flag */ - -#include "engine.c" - -/* - - regexec - interface for matching - = API_EXPORT(int) regexec(const regex_t *, const char *, size_t, \ - = regmatch_t [], int); - = #define REG_NOTBOL 00001 - = #define REG_NOTEOL 00002 - = #define REG_STARTEND 00004 - = #define REG_TRACE 00400 // tracing of execution - = #define REG_LARGE 01000 // force large representation - = #define REG_BACKR 02000 // force use of backref code - * - * We put this here so we can exploit knowledge of the state representation - * when choosing which matcher to call. Also, by this point the matchers - * have been prototyped. - */ -API_EXPORT(int) /* 0 success, REG_NOMATCH failure */ -regexec(preg, string, nmatch, pmatch, eflags) -const regex_t *preg; -const char *string; -size_t nmatch; -regmatch_t pmatch[]; -int eflags; -{ - register struct re_guts *g = preg->re_g; -#ifdef REDEBUG -# define GOODFLAGS(f) (f) -#else -# define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND)) -#endif - - if (preg->re_magic != MAGIC1 || g->magic != MAGIC2) - return(REG_BADPAT); - assert(!(g->iflags&BAD)); - if (g->iflags&BAD) /* backstop for no-debug case */ - return(REG_BADPAT); - eflags = GOODFLAGS(eflags); - - if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) - return(smatcher(g, (unsigned char *)string, nmatch, pmatch, eflags)); - else - return(lmatcher(g, (unsigned char *)string, nmatch, pmatch, eflags)); -} diff --git a/ext/ereg/regex/regfree.c b/ext/ereg/regex/regfree.c deleted file mode 100644 index 9fd618a13b..0000000000 --- a/ext/ereg/regex/regfree.c +++ /dev/null @@ -1,37 +0,0 @@ -#include -#include -#include -#include - -#include "utils.h" -#include "regex2.h" - -/* - - regfree - free everything - = API_EXPORT(void) regfree(regex_t *); - */ -API_EXPORT(void) -regfree(preg) -regex_t *preg; -{ - register struct re_guts *g; - - if (preg->re_magic != MAGIC1) /* oops */ - return; /* nice to complain, but hard */ - - g = preg->re_g; - if (g == NULL || g->magic != MAGIC2) /* oops again */ - return; - preg->re_magic = 0; /* mark it invalid */ - g->magic = 0; /* mark it invalid */ - - if (g->strip != NULL) - free((char *)g->strip); - if (g->sets != NULL) - free((char *)g->sets); - if (g->setbits != NULL) - free((char *)g->setbits); - if (g->must != NULL) - free(g->must); - free((char *)g); -} diff --git a/ext/ereg/regex/split.c b/ext/ereg/regex/split.c deleted file mode 100644 index 188bdb775b..0000000000 --- a/ext/ereg/regex/split.c +++ /dev/null @@ -1,316 +0,0 @@ -#include -#include - -/* - - split - divide a string into fields, like awk split() - = int split(char *string, char *fields[], int nfields, char *sep); - */ -int /* number of fields, including overflow */ -split(string, fields, nfields, sep) -char *string; -char *fields[]; /* list is not NULL-terminated */ -int nfields; /* number of entries available in fields[] */ -char *sep; /* "" white, "c" single char, "ab" [ab]+ */ -{ - register char *p = string; - register char c; /* latest character */ - register char sepc = sep[0]; - register char sepc2; - register int fn; - register char **fp = fields; - register char *sepp; - register int trimtrail; - - /* white space */ - if (sepc == '\0') { - while ((c = *p++) == ' ' || c == '\t') - continue; - p--; - trimtrail = 1; - sep = " \t"; /* note, code below knows this is 2 long */ - sepc = ' '; - } else - trimtrail = 0; - sepc2 = sep[1]; /* now we can safely pick this up */ - - /* catch empties */ - if (*p == '\0') - return(0); - - /* single separator */ - if (sepc2 == '\0') { - fn = nfields; - for (;;) { - *fp++ = p; - fn--; - if (fn == 0) - break; - while ((c = *p++) != sepc) - if (c == '\0') - return(nfields - fn); - *(p-1) = '\0'; - } - /* we have overflowed the fields vector -- just count them */ - fn = nfields; - for (;;) { - while ((c = *p++) != sepc) - if (c == '\0') - return(fn); - fn++; - } - /* not reached */ - } - - /* two separators */ - if (sep[2] == '\0') { - fn = nfields; - for (;;) { - *fp++ = p; - fn--; - while ((c = *p++) != sepc && c != sepc2) - if (c == '\0') { - if (trimtrail && **(fp-1) == '\0') - fn++; - return(nfields - fn); - } - if (fn == 0) - break; - *(p-1) = '\0'; - while ((c = *p++) == sepc || c == sepc2) - continue; - p--; - } - /* we have overflowed the fields vector -- just count them */ - fn = nfields; - while (c != '\0') { - while ((c = *p++) == sepc || c == sepc2) - continue; - p--; - fn++; - while ((c = *p++) != '\0' && c != sepc && c != sepc2) - continue; - } - /* might have to trim trailing white space */ - if (trimtrail) { - p--; - while ((c = *--p) == sepc || c == sepc2) - continue; - p++; - if (*p != '\0') { - if (fn == nfields+1) - *p = '\0'; - fn--; - } - } - return(fn); - } - - /* n separators */ - fn = 0; - for (;;) { - if (fn < nfields) - *fp++ = p; - fn++; - for (;;) { - c = *p++; - if (c == '\0') - return(fn); - sepp = sep; - while ((sepc = *sepp++) != '\0' && sepc != c) - continue; - if (sepc != '\0') /* it was a separator */ - break; - } - if (fn < nfields) - *(p-1) = '\0'; - for (;;) { - c = *p++; - sepp = sep; - while ((sepc = *sepp++) != '\0' && sepc != c) - continue; - if (sepc == '\0') /* it wasn't a separator */ - break; - } - p--; - } - - /* not reached */ -} - -#ifdef TEST_SPLIT - - -/* - * test program - * pgm runs regression - * pgm sep splits stdin lines by sep - * pgm str sep splits str by sep - * pgm str sep n splits str by sep n times - */ -int -main(argc, argv) -int argc; -char *argv[]; -{ - char buf[512]; - register int n; -# define MNF 10 - char *fields[MNF]; - - if (argc > 4) - for (n = atoi(argv[3]); n > 0; n--) { - (void) strcpy(buf, argv[1]); - } - else if (argc > 3) - for (n = atoi(argv[3]); n > 0; n--) { - (void) strcpy(buf, argv[1]); - (void) split(buf, fields, MNF, argv[2]); - } - else if (argc > 2) - dosplit(argv[1], argv[2]); - else if (argc > 1) - while (fgets(buf, sizeof(buf), stdin) != NULL) { - buf[strlen(buf)-1] = '\0'; /* stomp newline */ - dosplit(buf, argv[1]); - } - else - regress(); - - exit(0); -} - -dosplit(string, seps) -char *string; -char *seps; -{ -# define NF 5 - char *fields[NF]; - register int nf; - - nf = split(string, fields, NF, seps); - print(nf, NF, fields); -} - -print(nf, nfp, fields) -int nf; -int nfp; -char *fields[]; -{ - register int fn; - register int bound; - - bound = (nf > nfp) ? nfp : nf; - printf("%d:\t", nf); - for (fn = 0; fn < bound; fn++) - printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n"); -} - -#define RNF 5 /* some table entries know this */ -struct { - char *str; - char *seps; - int nf; - char *fi[RNF]; -} tests[] = { - "", " ", 0, { "" }, - " ", " ", 2, { "", "" }, - "x", " ", 1, { "x" }, - "xy", " ", 1, { "xy" }, - "x y", " ", 2, { "x", "y" }, - "abc def g ", " ", 5, { "abc", "def", "", "g", "" }, - " a bcd", " ", 4, { "", "", "a", "bcd" }, - "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, - " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, - - "", " _", 0, { "" }, - " ", " _", 2, { "", "" }, - "x", " _", 1, { "x" }, - "x y", " _", 2, { "x", "y" }, - "ab _ cd", " _", 2, { "ab", "cd" }, - " a_b c ", " _", 5, { "", "a", "b", "c", "" }, - "a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" }, - " a b c d ", " _", 6, { "", "a", "b", "c", "d " }, - - "", " _~", 0, { "" }, - " ", " _~", 2, { "", "" }, - "x", " _~", 1, { "x" }, - "x y", " _~", 2, { "x", "y" }, - "ab _~ cd", " _~", 2, { "ab", "cd" }, - " a_b c~", " _~", 5, { "", "a", "b", "c", "" }, - "a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" }, - "~a b c d ", " _~", 6, { "", "a", "b", "c", "d " }, - - "", " _~-", 0, { "" }, - " ", " _~-", 2, { "", "" }, - "x", " _~-", 1, { "x" }, - "x y", " _~-", 2, { "x", "y" }, - "ab _~- cd", " _~-", 2, { "ab", "cd" }, - " a_b c~", " _~-", 5, { "", "a", "b", "c", "" }, - "a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" }, - "~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " }, - - "", " ", 0, { "" }, - " ", " ", 2, { "", "" }, - "x", " ", 1, { "x" }, - "xy", " ", 1, { "xy" }, - "x y", " ", 2, { "x", "y" }, - "abc def g ", " ", 4, { "abc", "def", "g", "" }, - " a bcd", " ", 3, { "", "a", "bcd" }, - "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, - " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, - - "", "", 0, { "" }, - " ", "", 0, { "" }, - "x", "", 1, { "x" }, - "xy", "", 1, { "xy" }, - "x y", "", 2, { "x", "y" }, - "abc def g ", "", 3, { "abc", "def", "g" }, - "\t a bcd", "", 2, { "a", "bcd" }, - " a \tb\t c ", "", 3, { "a", "b", "c" }, - "a b c d e ", "", 5, { "a", "b", "c", "d", "e" }, - "a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" }, - " a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " }, - - NULL, NULL, 0, { NULL }, -}; - -regress() -{ - char buf[512]; - register int n; - char *fields[RNF+1]; - register int nf; - register int i; - register int printit; - register char *f; - - for (n = 0; tests[n].str != NULL; n++) { - (void) strcpy(buf, tests[n].str); - fields[RNF] = NULL; - nf = split(buf, fields, RNF, tests[n].seps); - printit = 0; - if (nf != tests[n].nf) { - printf("split `%s' by `%s' gave %d fields, not %d\n", - tests[n].str, tests[n].seps, nf, tests[n].nf); - printit = 1; - } else if (fields[RNF] != NULL) { - printf("split() went beyond array end\n"); - printit = 1; - } else { - for (i = 0; i < nf && i < RNF; i++) { - f = fields[i]; - if (f == NULL) - f = "(NULL)"; - if (strcmp(f, tests[n].fi[i]) != 0) { - printf("split `%s' by `%s', field %d is `%s', not `%s'\n", - tests[n].str, tests[n].seps, - i, fields[i], tests[n].fi[i]); - printit = 1; - } - } - } - if (printit) - print(nf, RNF, fields); - } -} -#endif diff --git a/ext/ereg/regex/tests b/ext/ereg/regex/tests deleted file mode 100644 index c05846177f..0000000000 --- a/ext/ereg/regex/tests +++ /dev/null @@ -1,475 +0,0 @@ -# regular expression test set -# Lines are at least three fields, separated by one or more tabs. "" stands -# for an empty field. First field is an RE. Second field is flags. If -# C flag given, regcomp() is expected to fail, and the third field is the -# error name (minus the leading REG_). -# -# Otherwise it is expected to succeed, and the third field is the string to -# try matching it against. If there is no fourth field, the match is -# expected to fail. If there is a fourth field, it is the substring that -# the RE is expected to match. If there is a fifth field, it is a comma- -# separated list of what the subexpressions should match, with - indicating -# no match for that one. In both the fourth and fifth fields, a (sub)field -# starting with @ indicates that the (sub)expression is expected to match -# a null string followed by the stuff after the @; this provides a way to -# test where null strings match. The character `N' in REs and strings -# is newline, `S' is space, `T' is tab, `Z' is NUL. -# -# The full list of flags: -# - placeholder, does nothing -# b RE is a BRE, not an ERE -# & try it as both an ERE and a BRE -# C regcomp() error expected, third field is error name -# i REG_ICASE -# m ("mundane") REG_NOSPEC -# s REG_NOSUB (not really testable) -# n REG_NEWLINE -# ^ REG_NOTBOL -# $ REG_NOTEOL -# # REG_STARTEND (see below) -# p REG_PEND -# -# For REG_STARTEND, the start/end offsets are those of the substring -# enclosed in (). - -# basics -a & a a -abc & abc abc -abc|de - abc abc -a|b|c - abc a - -# parentheses and perversions thereof -a(b)c - abc abc -a\(b\)c b abc abc -a( C EPAREN -a( b a( a( -a\( - a( a( -a\( bC EPAREN -a\(b bC EPAREN -a(b C EPAREN -a(b b a(b a(b -# gag me with a right parenthesis -- 1003.2 goofed here (my fault, partly) -a) - a) a) -) - ) ) -# end gagging (in a just world, those *should* give EPAREN) -a) b a) a) -a\) bC EPAREN -\) bC EPAREN -a()b - ab ab -a\(\)b b ab ab - -# anchoring and REG_NEWLINE -^abc$ & abc abc -a^b - a^b -a^b b a^b a^b -a$b - a$b -a$b b a$b a$b -^ & abc @abc -$ & abc @ -^$ & "" @ -$^ - "" @ -\($\)\(^\) b "" @ -# stop retching, those are legitimate (although disgusting) -^^ - "" @ -$$ - "" @ -b$ & abNc -b$ &n abNc b -^b$ & aNbNc -^b$ &n aNbNc b -^$ &n aNNb @Nb -^$ n abc -^$ n abcN @ -$^ n aNNb @Nb -\($\)\(^\) bn aNNb @Nb -^^ n^ aNNb @Nb -$$ n aNNb @NN -^a ^ a -a$ $ a -^a ^n aNb -^b ^n aNb b -a$ $n bNa -b$ $n bNa b -a*(^b$)c* - b b -a*\(^b$\)c* b b b - -# certain syntax errors and non-errors -| C EMPTY -| b | | -* C BADRPT -* b * * -+ C BADRPT -? C BADRPT -"" &C EMPTY -() - abc @abc -\(\) b abc @abc -a||b C EMPTY -|ab C EMPTY -ab| C EMPTY -(|a)b C EMPTY -(a|)b C EMPTY -(*a) C BADRPT -(+a) C BADRPT -(?a) C BADRPT -({1}a) C BADRPT -\(\{1\}a\) bC BADRPT -(a|*b) C BADRPT -(a|+b) C BADRPT -(a|?b) C BADRPT -(a|{1}b) C BADRPT -^* C BADRPT -^* b * * -^+ C BADRPT -^? C BADRPT -^{1} C BADRPT -^\{1\} bC BADRPT - -# metacharacters, backslashes -a.c & abc abc -a[bc]d & abd abd -a\*c & a*c a*c -a\\b & a\b a\b -a\\\*b & a\*b a\*b -a\bc & abc abc -a\ &C EESCAPE -a\\bc & a\bc a\bc -\{ bC BADRPT -a\[b & a[b a[b -a[b &C EBRACK -# trailing $ is a peculiar special case for the BRE code -a$ & a a -a$ & a$ -a\$ & a -a\$ & a$ a$ -a\\$ & a -a\\$ & a$ -a\\$ & a\$ -a\\$ & a\ a\ - -# back references, ugh -a\(b\)\2c bC ESUBREG -a\(b\1\)c bC ESUBREG -a\(b*\)c\1d b abbcbbd abbcbbd bb -a\(b*\)c\1d b abbcbd -a\(b*\)c\1d b abbcbbbd -^\(.\)\1 b abc -a\([bc]\)\1d b abcdabbd abbd b -a\(\([bc]\)\2\)*d b abbccd abbccd -a\(\([bc]\)\2\)*d b abbcbd -# actually, this next one probably ought to fail, but the spec is unclear -a\(\(b\)*\2\)*d b abbbd abbbd -# here is a case that no NFA implementation does right -\(ab*\)[ab]*\1 b ababaaa ababaaa a -# check out normal matching in the presence of back refs -\(a\)\1bcd b aabcd aabcd -\(a\)\1bc*d b aabcd aabcd -\(a\)\1bc*d b aabd aabd -\(a\)\1bc*d b aabcccd aabcccd -\(a\)\1bc*[ce]d b aabcccd aabcccd -^\(a\)\1b\(c\)*cd$ b aabcccd aabcccd - -# ordinary repetitions -ab*c & abc abc -ab+c - abc abc -ab?c - abc abc -a\(*\)b b a*b a*b -a\(**\)b b ab ab -a\(***\)b bC BADRPT -*a b *a *a -**a b a a -***a bC BADRPT - -# the dreaded bounded repetitions -{ & { { -{abc & {abc {abc -{1 C BADRPT -{1} C BADRPT -a{b & a{b a{b -a{1}b - ab ab -a\{1\}b b ab ab -a{1,}b - ab ab -a\{1,\}b b ab ab -a{1,2}b - aab aab -a\{1,2\}b b aab aab -a{1 C EBRACE -a\{1 bC EBRACE -a{1a C EBRACE -a\{1a bC EBRACE -a{1a} C BADBR -a\{1a\} bC BADBR -a{,2} - a{,2} a{,2} -a\{,2\} bC BADBR -a{,} - a{,} a{,} -a\{,\} bC BADBR -a{1,x} C BADBR -a\{1,x\} bC BADBR -a{1,x C EBRACE -a\{1,x bC EBRACE -a{300} C BADBR -a\{300\} bC BADBR -a{1,0} C BADBR -a\{1,0\} bC BADBR -ab{0,0}c - abcac ac -ab\{0,0\}c b abcac ac -ab{0,1}c - abcac abc -ab\{0,1\}c b abcac abc -ab{0,3}c - abbcac abbc -ab\{0,3\}c b abbcac abbc -ab{1,1}c - acabc abc -ab\{1,1\}c b acabc abc -ab{1,3}c - acabc abc -ab\{1,3\}c b acabc abc -ab{2,2}c - abcabbc abbc -ab\{2,2\}c b abcabbc abbc -ab{2,4}c - abcabbc abbc -ab\{2,4\}c b abcabbc abbc -((a{1,10}){1,10}){1,10} - a a a,a - -# multiple repetitions -a** &C BADRPT -a++ C BADRPT -a?? C BADRPT -a*+ C BADRPT -a*? C BADRPT -a+* C BADRPT -a+? C BADRPT -a?* C BADRPT -a?+ C BADRPT -a{1}{1} C BADRPT -a*{1} C BADRPT -a+{1} C BADRPT -a?{1} C BADRPT -a{1}* C BADRPT -a{1}+ C BADRPT -a{1}? C BADRPT -a*{b} - a{b} a{b} -a\{1\}\{1\} bC BADRPT -a*\{1\} bC BADRPT -a\{1\}* bC BADRPT - -# brackets, and numerous perversions thereof -a[b]c & abc abc -a[ab]c & abc abc -a[^ab]c & adc adc -a[]b]c & a]c a]c -a[[b]c & a[c a[c -a[-b]c & a-c a-c -a[^]b]c & adc adc -a[^-b]c & adc adc -a[b-]c & a-c a-c -a[b &C EBRACK -a[] &C EBRACK -a[1-3]c & a2c a2c -a[3-1]c &C ERANGE -a[1-3-5]c &C ERANGE -a[[.-.]--]c & a-c a-c -a[1- &C ERANGE -a[[. &C EBRACK -a[[.x &C EBRACK -a[[.x. &C EBRACK -a[[.x.] &C EBRACK -a[[.x.]] & ax ax -a[[.x,.]] &C ECOLLATE -a[[.one.]]b & a1b a1b -a[[.notdef.]]b &C ECOLLATE -a[[.].]]b & a]b a]b -a[[:alpha:]]c & abc abc -a[[:notdef:]]c &C ECTYPE -a[[: &C EBRACK -a[[:alpha &C EBRACK -a[[:alpha:] &C EBRACK -a[[:alpha,:] &C ECTYPE -a[[:]:]]b &C ECTYPE -a[[:-:]]b &C ECTYPE -a[[:alph:]] &C ECTYPE -a[[:alphabet:]] &C ECTYPE -[[:alnum:]]+ - -%@a0X- a0X -[[:alpha:]]+ - -%@aX0- aX -[[:blank:]]+ - aSSTb SST -[[:cntrl:]]+ - aNTb NT -[[:digit:]]+ - a019b 019 -[[:graph:]]+ - Sa%bS a%b -[[:lower:]]+ - AabC ab -[[:print:]]+ - NaSbN aSb -[[:punct:]]+ - S%-&T %-& -[[:space:]]+ - aSNTb SNT -[[:upper:]]+ - aBCd BC -[[:xdigit:]]+ - p0f3Cq 0f3C -a[[=b=]]c & abc abc -a[[= &C EBRACK -a[[=b &C EBRACK -a[[=b= &C EBRACK -a[[=b=] &C EBRACK -a[[=b,=]] &C ECOLLATE -a[[=one=]]b & a1b a1b - -# complexities -a(((b)))c - abc abc -a(b|(c))d - abd abd -a(b*|c)d - abbd abbd -# just gotta have one DFA-buster, of course -a[ab]{20} - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab -# and an inline expansion in case somebody gets tricky -a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab -# and in case somebody just slips in an NFA... -a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) - aaaaabaaaabaaaabaaaabweeknights aaaaabaaaabaaaabaaaabweeknights -# fish for anomalies as the number of states passes 32 -12345678901234567890123456789 - a12345678901234567890123456789b 12345678901234567890123456789 -123456789012345678901234567890 - a123456789012345678901234567890b 123456789012345678901234567890 -1234567890123456789012345678901 - a1234567890123456789012345678901b 1234567890123456789012345678901 -12345678901234567890123456789012 - a12345678901234567890123456789012b 12345678901234567890123456789012 -123456789012345678901234567890123 - a123456789012345678901234567890123b 123456789012345678901234567890123 -# and one really big one, beyond any plausible word width -1234567890123456789012345678901234567890123456789012345678901234567890 - a1234567890123456789012345678901234567890123456789012345678901234567890b 1234567890123456789012345678901234567890123456789012345678901234567890 -# fish for problems as brackets go past 8 -[ab][cd][ef][gh][ij][kl][mn] - xacegikmoq acegikm -[ab][cd][ef][gh][ij][kl][mn][op] - xacegikmoq acegikmo -[ab][cd][ef][gh][ij][kl][mn][op][qr] - xacegikmoqy acegikmoq -[ab][cd][ef][gh][ij][kl][mn][op][q] - xacegikmoqy acegikmoq - -# subtleties of matching -abc & xabcy abc -a\(b\)?c\1d b acd -aBc i Abc Abc -a[Bc]*d i abBCcd abBCcd -0[[:upper:]]1 &i 0a1 0a1 -0[[:lower:]]1 &i 0A1 0A1 -a[^b]c &i abc -a[^b]c &i aBc -a[^b]c &i adc adc -[a]b[c] - abc abc -[a]b[a] - aba aba -[abc]b[abc] - abc abc -[abc]b[abd] - abd abd -a(b?c)+d - accd accd -(wee|week)(knights|night) - weeknights weeknights -(we|wee|week|frob)(knights|night|day) - weeknights weeknights -a[bc]d - xyzaaabcaababdacd abd -a[ab]c - aaabc abc -abc s abc abc -a* & b @b - -# Let's have some fun -- try to match a C comment. -# first the obvious, which looks okay at first glance... -/\*.*\*/ - /*x*/ /*x*/ -# but... -/\*.*\*/ - /*x*/y/*z*/ /*x*/y/*z*/ -# okay, we must not match */ inside; try to do that... -/\*([^*]|\*[^/])*\*/ - /*x*/ /*x*/ -/\*([^*]|\*[^/])*\*/ - /*x*/y/*z*/ /*x*/ -# but... -/\*([^*]|\*[^/])*\*/ - /*x**/y/*z*/ /*x**/y/*z*/ -# and a still fancier version, which does it right (I think)... -/\*([^*]|\*+[^*/])*\*+/ - /*x*/ /*x*/ -/\*([^*]|\*+[^*/])*\*+/ - /*x*/y/*z*/ /*x*/ -/\*([^*]|\*+[^*/])*\*+/ - /*x**/y/*z*/ /*x**/ -/\*([^*]|\*+[^*/])*\*+/ - /*x****/y/*z*/ /*x****/ -/\*([^*]|\*+[^*/])*\*+/ - /*x**x*/y/*z*/ /*x**x*/ -/\*([^*]|\*+[^*/])*\*+/ - /*x***x/y/*z*/ /*x***x/y/*z*/ - -# subexpressions -a(b)(c)d - abcd abcd b,c -a(((b)))c - abc abc b,b,b -a(b|(c))d - abd abd b,- -a(b*|c|e)d - abbd abbd bb -a(b*|c|e)d - acd acd c -a(b*|c|e)d - ad ad @d -a(b?)c - abc abc b -a(b?)c - ac ac @c -a(b+)c - abc abc b -a(b+)c - abbbc abbbc bbb -a(b*)c - ac ac @c -(a|ab)(bc([de]+)f|cde) - abcdef abcdef a,bcdef,de -# the regression tester only asks for 9 subexpressions -a(b)(c)(d)(e)(f)(g)(h)(i)(j)k - abcdefghijk abcdefghijk b,c,d,e,f,g,h,i,j -a(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)l - abcdefghijkl abcdefghijkl b,c,d,e,f,g,h,i,j,k -a([bc]?)c - abc abc b -a([bc]?)c - ac ac @c -a([bc]+)c - abc abc b -a([bc]+)c - abcc abcc bc -a([bc]+)bc - abcbc abcbc bc -a(bb+|b)b - abb abb b -a(bbb+|bb+|b)b - abb abb b -a(bbb+|bb+|b)b - abbb abbb bb -a(bbb+|bb+|b)bb - abbb abbb b -(.*).* - abcdef abcdef abcdef -(a*)* - bc @b @b - -# do we get the right subexpression when it is used more than once? -a(b|c)*d - ad ad - -a(b|c)*d - abcd abcd c -a(b|c)+d - abd abd b -a(b|c)+d - abcd abcd c -a(b|c?)+d - ad ad @d -a(b|c?)+d - abcd abcd @d -a(b|c){0,0}d - ad ad - -a(b|c){0,1}d - ad ad - -a(b|c){0,1}d - abd abd b -a(b|c){0,2}d - ad ad - -a(b|c){0,2}d - abcd abcd c -a(b|c){0,}d - ad ad - -a(b|c){0,}d - abcd abcd c -a(b|c){1,1}d - abd abd b -a(b|c){1,1}d - acd acd c -a(b|c){1,2}d - abd abd b -a(b|c){1,2}d - abcd abcd c -a(b|c){1,}d - abd abd b -a(b|c){1,}d - abcd abcd c -a(b|c){2,2}d - acbd acbd b -a(b|c){2,2}d - abcd abcd c -a(b|c){2,4}d - abcd abcd c -a(b|c){2,4}d - abcbd abcbd b -a(b|c){2,4}d - abcbcd abcbcd c -a(b|c){2,}d - abcd abcd c -a(b|c){2,}d - abcbd abcbd b -a(b+|((c)*))+d - abd abd @d,@d,- -a(b+|((c)*))+d - abcd abcd @d,@d,- - -# check out the STARTEND option -[abc] &# a(b)c b -[abc] &# a(d)c -[abc] &# a(bc)d b -[abc] &# a(dc)d c -. &# a()c -b.*c &# b(bc)c bc -b.* &# b(bc)c bc -.*c &# b(bc)c bc - -# plain strings, with the NOSPEC flag -abc m abc abc -abc m xabcy abc -abc m xyz -a*b m aba*b a*b -a*b m ab -"" mC EMPTY - -# cases involving NULs -aZb & a a -aZb &p a -aZb &p# (aZb) aZb -aZ*b &p# (ab) ab -a.b &# (aZb) aZb -a.* &# (aZb)c aZb - -# word boundaries (ick) -[[:<:]]a & a a -[[:<:]]a & ba -[[:<:]]a & -a a -a[[:>:]] & a a -a[[:>:]] & ab -a[[:>:]] & a- a -[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc abc -[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc-q abc -[[:<:]]a.c[[:>:]] & axc-dayc-dazce-abc axc -[[:<:]]b.c[[:>:]] & a_bxc-byc_d-bzc-q bzc -[[:<:]].x..[[:>:]] & y_xa_-_xb_y-_xc_-axdc _xc_ -[[:<:]]a_b[[:>:]] & x_a_b - -# past problems, and suspected problems -(A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A]) - A1 A1 -abcdefghijklmnop i abcdefghijklmnop abcdefghijklmnop -abcdefghijklmnopqrstuv i abcdefghijklmnopqrstuv abcdefghijklmnopqrstuv -(ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN]) - CC11 CC11 -CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a - CC11 CC11 -Char \([a-z0-9_]*\)\[.* b Char xyz[k Char xyz[k xyz -a?b - ab ab --\{0,1\}[0-9]*$ b -5 -5 diff --git a/ext/ereg/regex/utils.h b/ext/ereg/regex/utils.h deleted file mode 100644 index cd4a96025f..0000000000 --- a/ext/ereg/regex/utils.h +++ /dev/null @@ -1,25 +0,0 @@ -/* utility definitions */ - -#include "regex_extra.h" - -#ifdef _POSIX2_RE_DUP_MAX -#define DUPMAX _POSIX2_RE_DUP_MAX -#else -#define DUPMAX 255 -#endif -#define INFINITY (DUPMAX + 1) -#define NC (CHAR_MAX - CHAR_MIN + 1) -typedef unsigned char uch; - -/* switch off assertions (if not already off) if no REDEBUG */ -#ifndef REDEBUG -#ifndef NDEBUG -#define NDEBUG /* no assertions please */ -#endif -#endif -#include - -/* for old systems with bcopy() but no memmove() */ -#ifdef USEBCOPY -#define memmove(d, s, c) bcopy(s, d, c) -#endif diff --git a/ext/ereg/tests/001.phpt b/ext/ereg/tests/001.phpt deleted file mode 100644 index f63c252518..0000000000 --- a/ext/ereg/tests/001.phpt +++ /dev/null @@ -1,9 +0,0 @@ ---TEST-- -RegReplace test 1 ---POST-- ---GET-- ---FILE-- - ---EXPECT-- -abcdef diff --git a/ext/ereg/tests/002.phpt b/ext/ereg/tests/002.phpt deleted file mode 100644 index a9b7aaa00a..0000000000 --- a/ext/ereg/tests/002.phpt +++ /dev/null @@ -1,9 +0,0 @@ ---TEST-- -RegReplace test 2 ---POST-- ---GET-- ---FILE-- - ---EXPECT-- -abc diff --git a/ext/ereg/tests/003.phpt b/ext/ereg/tests/003.phpt deleted file mode 100644 index edd9c05969..0000000000 --- a/ext/ereg/tests/003.phpt +++ /dev/null @@ -1,10 +0,0 @@ ---TEST-- -ereg_replace single-quote test ---POST-- ---GET-- ---FILE-- - ---EXPECT-- -'test diff --git a/ext/ereg/tests/004.phpt b/ext/ereg/tests/004.phpt deleted file mode 100644 index 1f60ff4900..0000000000 --- a/ext/ereg/tests/004.phpt +++ /dev/null @@ -1,16 +0,0 @@ ---TEST-- -simple ereg test ---POST-- ---GET-- ---FILE-- - ---EXPECT-- -ok -ok diff --git a/ext/ereg/tests/005.phpt b/ext/ereg/tests/005.phpt deleted file mode 100644 index 78c0a0912a..0000000000 --- a/ext/ereg/tests/005.phpt +++ /dev/null @@ -1,20 +0,0 @@ ---TEST-- -Test Regular expression register support in ereg ---POST-- ---GET-- ---FILE-- - ---EXPECT-- -32 -This is a nice and simple string -is -is diff --git a/ext/ereg/tests/006.phpt b/ext/ereg/tests/006.phpt deleted file mode 100644 index 50b6dbfd3a..0000000000 --- a/ext/ereg/tests/006.phpt +++ /dev/null @@ -1,10 +0,0 @@ ---TEST-- -Test ereg_replace of start-of-line ---POST-- ---GET-- ---FILE-- - ---EXPECT-- -That is a nice and simple string diff --git a/ext/ereg/tests/007.phpt b/ext/ereg/tests/007.phpt deleted file mode 100644 index b2646f842f..0000000000 --- a/ext/ereg/tests/007.phpt +++ /dev/null @@ -1,12 +0,0 @@ ---TEST-- -Test empty result buffer in reg_replace ---POST-- ---GET-- ---FILE-- - ---EXPECT-- -strlen($b)=0 diff --git a/ext/ereg/tests/008.phpt b/ext/ereg/tests/008.phpt deleted file mode 100644 index db61d1ca07..0000000000 --- a/ext/ereg/tests/008.phpt +++ /dev/null @@ -1,10 +0,0 @@ ---TEST-- -Test back-references in regular expressions ---POST-- ---GET-- ---FILE-- - ---EXPECT-- -123 abc +-|= diff --git a/ext/ereg/tests/009.phpt b/ext/ereg/tests/009.phpt deleted file mode 100644 index dba531e901..0000000000 --- a/ext/ereg/tests/009.phpt +++ /dev/null @@ -1,18 +0,0 @@ ---TEST-- -Test split() ---POST-- ---GET-- ---FILE-- - ---EXPECT-- -4 -this -is -a -test diff --git a/ext/ereg/tests/010.phpt b/ext/ereg/tests/010.phpt deleted file mode 100644 index 30d28fd02f..0000000000 --- a/ext/ereg/tests/010.phpt +++ /dev/null @@ -1,9 +0,0 @@ ---TEST-- -Long back references ---POST-- ---GET-- ---FILE-- - ---EXPECT-- -abc2222222222def2222222222 diff --git a/ext/ereg/tests/011.phpt b/ext/ereg/tests/011.phpt deleted file mode 100644 index 4eda774f58..0000000000 --- a/ext/ereg/tests/011.phpt +++ /dev/null @@ -1,9 +0,0 @@ ---TEST-- -\0 back reference ---POST-- ---GET-- ---FILE-- - ---EXPECT-- -abcdef123ghi diff --git a/ext/ereg/tests/012.phpt b/ext/ereg/tests/012.phpt deleted file mode 100644 index d5342c7436..0000000000 --- a/ext/ereg/tests/012.phpt +++ /dev/null @@ -1,9 +0,0 @@ ---TEST-- -nonexisting back reference ---POST-- ---GET-- ---FILE-- - ---EXPECT-- -abcdef\1ghi diff --git a/ext/ereg/tests/013.phpt b/ext/ereg/tests/013.phpt deleted file mode 100644 index ec3329fa7c..0000000000 --- a/ext/ereg/tests/013.phpt +++ /dev/null @@ -1,9 +0,0 @@ ---TEST-- -escapes in replace string ---POST-- ---GET-- ---FILE-- - ---EXPECT-- -abcdef\g\\hi\ diff --git a/ext/ereg/tests/014.phpt b/ext/ereg/tests/014.phpt deleted file mode 100644 index ec4d19ed0e..0000000000 --- a/ext/ereg/tests/014.phpt +++ /dev/null @@ -1,9 +0,0 @@ ---TEST-- -backreferences not replaced recursively ---POST-- ---GET-- ---FILE-- - ---EXPECT-- -\2 diff --git a/ext/ereg/tests/015.phpt b/ext/ereg/tests/015.phpt deleted file mode 100644 index 961a60fa76..0000000000 --- a/ext/ereg/tests/015.phpt +++ /dev/null @@ -1,8 +0,0 @@ ---TEST-- -replace empty matches ---POST-- ---GET-- ---FILE-- - ---EXPECT-- -zabc123 diff --git a/ext/ereg/tests/016.phpt b/ext/ereg/tests/016.phpt deleted file mode 100644 index a24816f182..0000000000 --- a/ext/ereg/tests/016.phpt +++ /dev/null @@ -1,8 +0,0 @@ ---TEST-- -test backslash handling in regular expressions ---POST-- ---GET-- ---FILE-- - ---EXPECT-- -abc123abc -- cgit v1.2.1