diff options
author | Sascha Schumann <sas@php.net> | 1999-11-12 20:27:52 +0000 |
---|---|---|
committer | Sascha Schumann <sas@php.net> | 1999-11-12 20:27:52 +0000 |
commit | 6f572248907b8489b360ed2f030c10a2f5bb2e67 (patch) | |
tree | e5ff355bae61dce747aa699b24bdaabc025e8c45 | |
parent | e20e9786b3a1de2f209df560c43e36eb74af046a (diff) | |
download | php-git-6f572248907b8489b360ed2f030c10a2f5bb2e67.tar.gz |
upgrade regex library to alpha3.8
@- Upgraded regex library to alpha3.8 (Sascha)
-rw-r--r-- | ext/ereg/regex/cclass.h | 44 | ||||
-rw-r--r-- | ext/ereg/regex/cname.h | 192 | ||||
-rw-r--r-- | ext/ereg/regex/engine.c | 10 | ||||
-rw-r--r-- | ext/ereg/regex/regcomp.c | 85 | ||||
-rw-r--r-- | ext/ereg/regex/regcomp.ih | 3 | ||||
-rw-r--r-- | ext/ereg/regex/regerror.c | 44 | ||||
-rw-r--r-- | ext/ereg/regex/regex.h | 1 | ||||
-rw-r--r-- | ext/ereg/regex/regex2.h | 20 | ||||
-rw-r--r-- | ext/ereg/regex/regexec.c | 14 | ||||
-rw-r--r-- | ext/ereg/regex/utils.h | 8 | ||||
-rw-r--r-- | regex/cclass.h | 44 | ||||
-rw-r--r-- | regex/cname.h | 192 | ||||
-rw-r--r-- | regex/engine.c | 10 | ||||
-rw-r--r-- | regex/regcomp.c | 85 | ||||
-rw-r--r-- | regex/regcomp.ih | 3 | ||||
-rw-r--r-- | regex/regerror.c | 44 | ||||
-rw-r--r-- | regex/regex.h | 1 | ||||
-rw-r--r-- | regex/regex2.h | 20 | ||||
-rw-r--r-- | regex/regexec.c | 14 | ||||
-rw-r--r-- | regex/utils.h | 8 |
20 files changed, 484 insertions, 358 deletions
diff --git a/ext/ereg/regex/cclass.h b/ext/ereg/regex/cclass.h index 727cbb9255..0c293028e9 100644 --- a/ext/ereg/regex/cclass.h +++ b/ext/ereg/regex/cclass.h @@ -4,28 +4,28 @@ static struct cclass { char *chars; char *multis; } cclasses[] = { - { "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789", "" }, - { "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", - "" }, - { "blank", " \t", "" }, - { "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ -\25\26\27\30\31\32\33\34\35\36\37\177", "" }, - { "digit", "0123456789", "" }, - { "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ + "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ +0123456789", "", + "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", + "", + "blank", " \t", "", + "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ +\25\26\27\30\31\32\33\34\35\36\37\177", "", + "digit", "0123456789", "", + "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - "" }, - { "lower", "abcdefghijklmnopqrstuvwxyz", - "" }, - { "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ + "", + "lower", "abcdefghijklmnopqrstuvwxyz", + "", + "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", - "" }, - { "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - "" }, - { "space", "\t\n\v\f\r ", "" }, - { "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", - "" }, - { "xdigit", "0123456789ABCDEFabcdef", - "" }, - { NULL, 0, "" } + "", + "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + "", + "space", "\t\n\v\f\r ", "", + "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", + "", + "xdigit", "0123456789ABCDEFabcdef", + "", + NULL, 0, "" }; diff --git a/ext/ereg/regex/cname.h b/ext/ereg/regex/cname.h index ff116e55e2..02e86e912e 100644 --- a/ext/ereg/regex/cname.h +++ b/ext/ereg/regex/cname.h @@ -3,100 +3,100 @@ static struct cname { char *name; char code; } cnames[] = { - { "NUL", '\0' }, - { "SOH", '\001' }, - { "STX", '\002' }, - { "ETX", '\003' }, - { "EOT", '\004' }, - { "ENQ", '\005' }, - { "ACK", '\006' }, - { "BEL", '\007' }, - { "alert", '\007' }, - { "BS", '\010' }, - { "backspace", '\b' }, - { "HT", '\011' }, - { "tab", '\t' }, - { "LF", '\012' }, - { "newline", '\n' }, - { "VT", '\013' }, - { "vertical-tab", '\v' }, - { "FF", '\014' }, - { "form-feed", '\f' }, - { "CR", '\015' }, - { "carriage-return", '\r' }, - { "SO", '\016' }, - { "SI", '\017' }, - { "DLE", '\020' }, - { "DC1", '\021' }, - { "DC2", '\022' }, - { "DC3", '\023' }, - { "DC4", '\024' }, - { "NAK", '\025' }, - { "SYN", '\026' }, - { "ETB", '\027' }, - { "CAN", '\030' }, - { "EM", '\031' }, - { "SUB", '\032' }, - { "ESC", '\033' }, - { "IS4", '\034' }, - { "FS", '\034' }, - { "IS3", '\035' }, - { "GS", '\035' }, - { "IS2", '\036' }, - { "RS", '\036' }, - { "IS1", '\037' }, - { "US", '\037' }, - { "space", ' ' }, - { "exclamation-mark", '!' }, - { "quotation-mark", '"' }, - { "number-sign", '#' }, - { "dollar-sign", '$' }, - { "percent-sign", '%' }, - { "ampersand", '&' }, - { "apostrophe", '\'' }, - { "left-parenthesis", '(' }, - { "right-parenthesis", ')' }, - { "asterisk", '*' }, - { "plus-sign", '+' }, - { "comma", ',' }, - { "hyphen", '-' }, - { "hyphen-minus", '-' }, - { "period", '.' }, - { "full-stop", '.' }, - { "slash", '/' }, - { "solidus", '/' }, - { "zero", '0' }, - { "one", '1' }, - { "two", '2' }, - { "three", '3' }, - { "four", '4' }, - { "five", '5' }, - { "six", '6' }, - { "seven", '7' }, - { "eight", '8' }, - { "nine", '9' }, - { "colon", ':' }, - { "semicolon", ';' }, - { "less-than-sign", '<' }, - { "equals-sign", '=' }, - { "greater-than-sign", '>' }, - { "question-mark", '?' }, - { "commercial-at", '@' }, - { "left-square-bracket", '[' }, - { "backslash", '\\' }, - { "reverse-solidus", '\\' }, - { "right-square-bracket", ']' }, - { "circumflex", '^' }, - { "circumflex-accent", '^' }, - { "underscore", '_' }, - { "low-line", '_' }, - { "grave-accent", '`' }, - { "left-brace", '{' }, - { "left-curly-bracket", '{' }, - { "vertical-line", '|' }, - { "right-brace", '}' }, - { "right-curly-bracket", '}' }, - { "tilde", '~' }, - { "DEL", '\177' }, - { NULL, 0 } + "NUL", '\0', + "SOH", '\001', + "STX", '\002', + "ETX", '\003', + "EOT", '\004', + "ENQ", '\005', + "ACK", '\006', + "BEL", '\007', + "alert", '\007', + "BS", '\010', + "backspace", '\b', + "HT", '\011', + "tab", '\t', + "LF", '\012', + "newline", '\n', + "VT", '\013', + "vertical-tab", '\v', + "FF", '\014', + "form-feed", '\f', + "CR", '\015', + "carriage-return", '\r', + "SO", '\016', + "SI", '\017', + "DLE", '\020', + "DC1", '\021', + "DC2", '\022', + "DC3", '\023', + "DC4", '\024', + "NAK", '\025', + "SYN", '\026', + "ETB", '\027', + "CAN", '\030', + "EM", '\031', + "SUB", '\032', + "ESC", '\033', + "IS4", '\034', + "FS", '\034', + "IS3", '\035', + "GS", '\035', + "IS2", '\036', + "RS", '\036', + "IS1", '\037', + "US", '\037', + "space", ' ', + "exclamation-mark", '!', + "quotation-mark", '"', + "number-sign", '#', + "dollar-sign", '$', + "percent-sign", '%', + "ampersand", '&', + "apostrophe", '\'', + "left-parenthesis", '(', + "right-parenthesis", ')', + "asterisk", '*', + "plus-sign", '+', + "comma", ',', + "hyphen", '-', + "hyphen-minus", '-', + "period", '.', + "full-stop", '.', + "slash", '/', + "solidus", '/', + "zero", '0', + "one", '1', + "two", '2', + "three", '3', + "four", '4', + "five", '5', + "six", '6', + "seven", '7', + "eight", '8', + "nine", '9', + "colon", ':', + "semicolon", ';', + "less-than-sign", '<', + "equals-sign", '=', + "greater-than-sign", '>', + "question-mark", '?', + "commercial-at", '@', + "left-square-bracket", '[', + "backslash", '\\', + "reverse-solidus", '\\', + "right-square-bracket", ']', + "circumflex", '^', + "circumflex-accent", '^', + "underscore", '_', + "low-line", '_', + "grave-accent", '`', + "left-brace", '{', + "left-curly-bracket", '{', + "vertical-line", '|', + "right-brace", '}', + "right-curly-bracket", '}', + "tilde", '~', + "DEL", '\177', + NULL, 0, }; diff --git a/ext/ereg/regex/engine.c b/ext/ereg/regex/engine.c index 696774e1b3..919fe3f641 100644 --- a/ext/ereg/regex/engine.c +++ b/ext/ereg/regex/engine.c @@ -71,12 +71,12 @@ regmatch_t pmatch[]; int eflags; { register char *endp; - register unsigned int i; + register int i; struct match mv; register struct match *m = &mv; register char *dp; - register const sopno gf = g->firststate+1; /* +1 for OEND */ - register const sopno gl = g->laststate; + const register sopno gf = g->firststate+1; /* +1 for OEND */ + const register sopno gl = g->laststate; char *start; char *stop; @@ -613,7 +613,7 @@ sopno lev; /* PLUS nesting level */ /* "can't happen" */ assert(nope); /* NOTREACHED */ - return( NULL ); + return((char *)NULL); /* dummy */ } /* @@ -822,7 +822,7 @@ register states aft; /* states already known reachable after */ register sopno pc; register onestate here; /* note, macros know this name */ register sopno look; - register int i; + register long i; for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { s = g->strip[pc]; diff --git a/ext/ereg/regex/regcomp.c b/ext/ereg/regex/regcomp.c index b481be0f55..e3f0c1be4e 100644 --- a/ext/ereg/regex/regcomp.c +++ b/ext/ereg/regex/regcomp.c @@ -46,14 +46,14 @@ static char nuls[10]; /* place to point scanner in event of error */ #define MORE2() (p->next+1 < p->end) #define SEE(c) (MORE() && PEEK() == (c)) #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) -#define EAT(c) ((SEE(c)) ? (NEXT1(), 1) : 0) +#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) -#define NEXT1() (p->next++) +#define NEXT() (p->next++) #define NEXT2() (p->next += 2) #define NEXTn(n) (p->next += (n)) #define GETNEXT() (*p->next++) #define SETERROR(e) seterr(p, (e)) -#define REQUIRE(co, e) ((void)((co) || SETERROR(e))) +#define REQUIRE(co, e) ((co) || SETERROR(e)) #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) @@ -326,7 +326,7 @@ register struct parse *p; if (!( c == '*' || c == '+' || c == '?' || (c == '{' && MORE2() && isdigit(PEEK2())) )) return; /* no repetition, we're done */ - NEXT1(); + NEXT(); REQUIRE(!wascaret, REG_BADRPT); switch (c) { @@ -363,7 +363,7 @@ register struct parse *p; repeat(p, pos, count, count2); if (!EAT('}')) { /* error heuristics */ while (MORE() && PEEK() != '}') - NEXT1(); + NEXT(); REQUIRE(MORE(), REG_EBRACE); SETERROR(REG_BADBR); } @@ -518,7 +518,7 @@ int starordinary; /* is a leading * an ordinary character? */ REQUIRE(starordinary, REG_BADRPT); /* FALLTHROUGH */ default: - ordinary(p, c &~ BACKSL); + ordinary(p, (char)c); /* takes off BACKSL, if any */ break; } @@ -541,7 +541,7 @@ int starordinary; /* is a leading * an ordinary character? */ repeat(p, pos, count, count2); if (!EATTWO('\\', '}')) { /* error heuristics */ while (MORE() && !SEETWO('\\', '}')) - NEXT1(); + NEXT(); REQUIRE(MORE(), REG_EBRACE); SETERROR(REG_BADBR); } @@ -699,7 +699,7 @@ register cset *cs; start = p_b_symbol(p); if (SEE('-') && MORE2() && PEEK2() != ']') { /* range */ - NEXT1(); + NEXT(); if (EAT('-')) finish = '-'; else @@ -730,7 +730,7 @@ register cset *cs; register char c; while (MORE() && isalpha(PEEK())) - NEXT1(); + NEXT(); len = p->next - sp; for (cp = cclasses; cp->name != NULL; cp++) if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') @@ -799,7 +799,7 @@ int endc; /* name ended by endc,']' */ register int len; while (MORE() && !SEETWO(endc, ']')) - NEXT1(); + NEXT(); if (!MORE()) { SETERROR(REG_EBRACK); return(0); @@ -1056,7 +1056,7 @@ freeset(p, cs) register struct parse *p; register cset *cs; { - register unsigned int i; + register int i; register cset *top = &p->g->sets[p->g->ncsets]; register size_t css = (size_t)p->g->csetsize; @@ -1082,7 +1082,7 @@ register struct parse *p; register cset *cs; { register uch h = cs->hash; - register unsigned int i; + register int i; register cset *top = &p->g->sets[p->g->ncsets]; register cset *cs2; register size_t css = (size_t)p->g->csetsize; @@ -1115,7 +1115,7 @@ firstch(p, cs) register struct parse *p; register cset *cs; { - register unsigned int i; + register int i; register size_t css = (size_t)p->g->csetsize; for (i = 0; i < css; i++) @@ -1134,7 +1134,7 @@ nch(p, cs) register struct parse *p; register cset *cs; { - register unsigned int i; + register int i; register size_t css = (size_t)p->g->csetsize; register int n = 0; @@ -1171,6 +1171,63 @@ register char *cp; cs->multis[cs->smultis - 1] = '\0'; } +/* + - mcsub - subtract a collating element from a cset + == static void mcsub(register cset *cs, register char *cp); + */ +static void +mcsub(cs, cp) +register cset *cs; +register char *cp; +{ + register char *fp = mcfind(cs, cp); + register size_t len = strlen(fp); + + assert(fp != NULL); + (void) memmove(fp, fp + len + 1, + cs->smultis - (fp + len + 1 - cs->multis)); + cs->smultis -= len; + + if (cs->smultis == 0) { + free(cs->multis); + cs->multis = NULL; + return; + } + + cs->multis = realloc(cs->multis, cs->smultis); + assert(cs->multis != NULL); +} + +/* + - mcin - is a collating element in a cset? + == static int mcin(register cset *cs, register char *cp); + */ +static int +mcin(cs, cp) +register cset *cs; +register char *cp; +{ + return(mcfind(cs, cp) != NULL); +} + +/* + - mcfind - find a collating element in a cset + == static char *mcfind(register cset *cs, register char *cp); + */ +static char * +mcfind(cs, cp) +register cset *cs; +register char *cp; +{ + register char *p; + + if (cs->multis == NULL) + return(NULL); + for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) + if (strcmp(cp, p) == 0) + return(p); + return(NULL); +} /* - mcinvert - invert the list of collating elements in a cset diff --git a/ext/ereg/regex/regcomp.ih b/ext/ereg/regex/regcomp.ih index f0eeb5f02d..0776e7185c 100644 --- a/ext/ereg/regex/regcomp.ih +++ b/ext/ereg/regex/regcomp.ih @@ -28,6 +28,9 @@ static int freezeset(register struct parse *p, register cset *cs); static int firstch(register struct parse *p, register cset *cs); static int nch(register struct parse *p, register cset *cs); static void mcadd(register struct parse *p, register cset *cs, register char *cp); +static void mcsub(register cset *cs, register char *cp); +static int mcin(register cset *cs, register char *cp); +static char *mcfind(register cset *cs, register char *cp); static void mcinvert(register struct parse *p, register cset *cs); static void mccase(register struct parse *p, register cset *cs); static int isinsets(register struct re_guts *g, int c); diff --git a/ext/ereg/regex/regerror.c b/ext/ereg/regex/regerror.c index 12edd4e164..9158870322 100644 --- a/ext/ereg/regex/regerror.c +++ b/ext/ereg/regex/regerror.c @@ -10,6 +10,7 @@ #include "regerror.ih" /* + = #define REG_OKAY 0 = #define REG_NOMATCH 1 = #define REG_BADPAT 2 = #define REG_ECOLLATE 3 @@ -34,23 +35,24 @@ static struct rerr { char *name; char *explain; } rerrs[] = { - { REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match" }, - { REG_BADPAT, "REG_BADPAT", "invalid regular expression" }, - { REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" }, - { REG_ECTYPE, "REG_ECTYPE", "invalid character class" }, - { REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)" }, - { REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" }, - { REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced" }, - { REG_EPAREN, "REG_EPAREN", "parentheses not balanced" }, - { REG_EBRACE, "REG_EBRACE", "braces not balanced" }, - { REG_BADBR, "REG_BADBR", "invalid repetition count(s)" }, - { REG_ERANGE, "REG_ERANGE", "invalid character range" }, - { REG_ESPACE, "REG_ESPACE", "out of memory" }, - { REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid" }, - { REG_EMPTY, "REG_EMPTY", "empty (sub)expression" }, - { REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" }, - { REG_INVARG, "REG_INVARG", "invalid argument to regex routine" }, - { 0, "", "*** unknown regexp error code ***" } + REG_OKAY, "REG_OKAY", "no errors detected", + REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match", + REG_BADPAT, "REG_BADPAT", "invalid regular expression", + REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element", + REG_ECTYPE, "REG_ECTYPE", "invalid character class", + REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)", + REG_ESUBREG, "REG_ESUBREG", "invalid backreference number", + REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced", + REG_EPAREN, "REG_EPAREN", "parentheses not balanced", + REG_EBRACE, "REG_EBRACE", "braces not balanced", + REG_BADBR, "REG_BADBR", "invalid repetition count(s)", + REG_ERANGE, "REG_ERANGE", "invalid character range", + REG_ESPACE, "REG_ESPACE", "out of memory", + REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid", + REG_EMPTY, "REG_EMPTY", "empty (sub)expression", + REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug", + REG_INVARG, "REG_INVARG", "invalid argument to regex routine", + -1, "", "*** unknown regexp error code ***", }; /* @@ -74,12 +76,12 @@ size_t errbuf_size; if (errcode == REG_ATOI) s = regatoi(preg, convbuf); else { - for (r = rerrs; r->code != 0; r++) + for (r = rerrs; r->code >= 0; r++) if (r->code == target) break; if (errcode®_ITOA) { - if (r->code != 0) + if (r->code >= 0) (void) strcpy(convbuf, r->name); else sprintf(convbuf, "REG_0x%x", target); @@ -113,10 +115,10 @@ char *localbuf; { register struct rerr *r; - for (r = rerrs; r->code != 0; r++) + for (r = rerrs; r->code >= 0; r++) if (strcmp(r->name, preg->re_endp) == 0) break; - if (r->code == 0) + if (r->code < 0) return("0"); sprintf(localbuf, "%d", r->code); diff --git a/ext/ereg/regex/regex.h b/ext/ereg/regex/regex.h index 985ad02959..b39c5e178c 100644 --- a/ext/ereg/regex/regex.h +++ b/ext/ereg/regex/regex.h @@ -40,6 +40,7 @@ API_EXPORT(int) regcomp(regex_t *, const char *, int); /* === regerror.c === */ +#define REG_OKAY 0 #define REG_NOMATCH 1 #define REG_BADPAT 2 #define REG_ECOLLATE 3 diff --git a/ext/ereg/regex/regex2.h b/ext/ereg/regex/regex2.h index 3e8c0d65c4..7cd39a59ae 100644 --- a/ext/ereg/regex/regex2.h +++ b/ext/ereg/regex/regex2.h @@ -42,11 +42,11 @@ * In state representations, an operator's bit is on to signify a state * immediately *preceding* "execution" of that operator. */ -typedef unsigned long sop; /* strip operator */ +typedef long sop; /* strip operator */ typedef long sopno; -#define OPRMASK 0xf8000000 -#define OPDMASK 0x07ffffff -#define OPSHIFT ((unsigned)27) +#define OPRMASK 0x7c000000 +#define OPDMASK 0x03ffffff +#define OPSHIFT (26) #define OP(n) ((n)&OPRMASK) #define OPND(n) ((n)&OPDMASK) #define SOP(op, opnd) ((op)|(opnd)) @@ -67,11 +67,11 @@ typedef long sopno; #define OLPAREN (13<<OPSHIFT) /* ( fwd to ) */ #define ORPAREN (14<<OPSHIFT) /* ) back to ( */ #define OCH_ (15<<OPSHIFT) /* begin choice fwd to OOR2 */ -#define OOR1 (16u<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */ -#define OOR2 (17u<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */ -#define O_CH (18u<<OPSHIFT) /* end choice back to OOR1 */ -#define OBOW (19u<<OPSHIFT) /* begin word - */ -#define OEOW (20u<<OPSHIFT) /* end word - */ +#define OOR1 (16<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */ +#define OOR2 (17<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */ +#define O_CH (18<<OPSHIFT) /* end choice back to OOR1 */ +#define OBOW (19<<OPSHIFT) /* begin word - */ +#define OEOW (20<<OPSHIFT) /* end word - */ /* * Structure for [] character-set representation. Character sets are @@ -97,6 +97,8 @@ typedef struct { #define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c)) #define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask) #define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */ +#define MCsub(p, cs, cp) mcsub(p, cs, cp) +#define MCin(p, cs, cp) mcin(p, cs, cp) /* stuff for character categories */ typedef unsigned char cat_t; diff --git a/ext/ereg/regex/regexec.c b/ext/ereg/regex/regexec.c index a552f5a394..7888d02368 100644 --- a/ext/ereg/regex/regexec.c +++ b/ext/ereg/regex/regexec.c @@ -16,24 +16,22 @@ #include "utils.h" #include "regex2.h" -#ifndef NDEBUG static int nope = 0; /* for use in asserts; shuts lint up */ -#endif /* macros for manipulating states, small version */ -#define states long -#define states1 states /* for later use in regexec() decision */ +#define states unsigned +#define states1 unsigned /* for later use in regexec() decision */ #define CLEAR(v) ((v) = 0) -#define SET0(v, n) ((v) &= ~(1 << (n))) -#define SET1(v, n) ((v) |= 1 << (n)) -#define ISSET(v, n) ((v) & (1 << (n))) +#define SET0(v, n) ((v) &= ~((unsigned)1 << (n))) +#define SET1(v, n) ((v) |= (unsigned)1 << (n)) +#define ISSET(v, n) ((v) & ((unsigned)1 << (n))) #define ASSIGN(d, s) ((d) = (s)) #define EQ(a, b) ((a) == (b)) #define STATEVARS int dummy /* dummy version */ #define STATESETUP(m, n) /* nothing */ #define STATETEARDOWN(m) /* nothing */ #define SETUP(v) ((v) = 0) -#define onestate int +#define onestate unsigned #define INIT(o, n) ((o) = (unsigned)1 << (n)) #define INC(o) ((o) <<= 1) #define ISSTATEIN(v, o) ((v) & (o)) diff --git a/ext/ereg/regex/utils.h b/ext/ereg/regex/utils.h index f271f759b1..1a997ac8fc 100644 --- a/ext/ereg/regex/utils.h +++ b/ext/ereg/regex/utils.h @@ -1,9 +1,9 @@ /* utility definitions */ -#ifndef _POSIX2_RE_DUP_MAX -#define _POSIX2_RE_DUP_MAX 255 +#ifdef _POSIX2_RE_DUP_MAX +#define DUPMAX _POSIX2_RE_DUP_MAX +#else +#define DUPMAX 255 #endif - -#define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */ #define INFINITY (DUPMAX + 1) #define NC (CHAR_MAX - CHAR_MIN + 1) typedef unsigned char uch; diff --git a/regex/cclass.h b/regex/cclass.h index 727cbb9255..0c293028e9 100644 --- a/regex/cclass.h +++ b/regex/cclass.h @@ -4,28 +4,28 @@ static struct cclass { char *chars; char *multis; } cclasses[] = { - { "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789", "" }, - { "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", - "" }, - { "blank", " \t", "" }, - { "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ -\25\26\27\30\31\32\33\34\35\36\37\177", "" }, - { "digit", "0123456789", "" }, - { "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ + "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ +0123456789", "", + "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", + "", + "blank", " \t", "", + "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ +\25\26\27\30\31\32\33\34\35\36\37\177", "", + "digit", "0123456789", "", + "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - "" }, - { "lower", "abcdefghijklmnopqrstuvwxyz", - "" }, - { "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ + "", + "lower", "abcdefghijklmnopqrstuvwxyz", + "", + "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", - "" }, - { "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - "" }, - { "space", "\t\n\v\f\r ", "" }, - { "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", - "" }, - { "xdigit", "0123456789ABCDEFabcdef", - "" }, - { NULL, 0, "" } + "", + "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + "", + "space", "\t\n\v\f\r ", "", + "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", + "", + "xdigit", "0123456789ABCDEFabcdef", + "", + NULL, 0, "" }; diff --git a/regex/cname.h b/regex/cname.h index ff116e55e2..02e86e912e 100644 --- a/regex/cname.h +++ b/regex/cname.h @@ -3,100 +3,100 @@ static struct cname { char *name; char code; } cnames[] = { - { "NUL", '\0' }, - { "SOH", '\001' }, - { "STX", '\002' }, - { "ETX", '\003' }, - { "EOT", '\004' }, - { "ENQ", '\005' }, - { "ACK", '\006' }, - { "BEL", '\007' }, - { "alert", '\007' }, - { "BS", '\010' }, - { "backspace", '\b' }, - { "HT", '\011' }, - { "tab", '\t' }, - { "LF", '\012' }, - { "newline", '\n' }, - { "VT", '\013' }, - { "vertical-tab", '\v' }, - { "FF", '\014' }, - { "form-feed", '\f' }, - { "CR", '\015' }, - { "carriage-return", '\r' }, - { "SO", '\016' }, - { "SI", '\017' }, - { "DLE", '\020' }, - { "DC1", '\021' }, - { "DC2", '\022' }, - { "DC3", '\023' }, - { "DC4", '\024' }, - { "NAK", '\025' }, - { "SYN", '\026' }, - { "ETB", '\027' }, - { "CAN", '\030' }, - { "EM", '\031' }, - { "SUB", '\032' }, - { "ESC", '\033' }, - { "IS4", '\034' }, - { "FS", '\034' }, - { "IS3", '\035' }, - { "GS", '\035' }, - { "IS2", '\036' }, - { "RS", '\036' }, - { "IS1", '\037' }, - { "US", '\037' }, - { "space", ' ' }, - { "exclamation-mark", '!' }, - { "quotation-mark", '"' }, - { "number-sign", '#' }, - { "dollar-sign", '$' }, - { "percent-sign", '%' }, - { "ampersand", '&' }, - { "apostrophe", '\'' }, - { "left-parenthesis", '(' }, - { "right-parenthesis", ')' }, - { "asterisk", '*' }, - { "plus-sign", '+' }, - { "comma", ',' }, - { "hyphen", '-' }, - { "hyphen-minus", '-' }, - { "period", '.' }, - { "full-stop", '.' }, - { "slash", '/' }, - { "solidus", '/' }, - { "zero", '0' }, - { "one", '1' }, - { "two", '2' }, - { "three", '3' }, - { "four", '4' }, - { "five", '5' }, - { "six", '6' }, - { "seven", '7' }, - { "eight", '8' }, - { "nine", '9' }, - { "colon", ':' }, - { "semicolon", ';' }, - { "less-than-sign", '<' }, - { "equals-sign", '=' }, - { "greater-than-sign", '>' }, - { "question-mark", '?' }, - { "commercial-at", '@' }, - { "left-square-bracket", '[' }, - { "backslash", '\\' }, - { "reverse-solidus", '\\' }, - { "right-square-bracket", ']' }, - { "circumflex", '^' }, - { "circumflex-accent", '^' }, - { "underscore", '_' }, - { "low-line", '_' }, - { "grave-accent", '`' }, - { "left-brace", '{' }, - { "left-curly-bracket", '{' }, - { "vertical-line", '|' }, - { "right-brace", '}' }, - { "right-curly-bracket", '}' }, - { "tilde", '~' }, - { "DEL", '\177' }, - { NULL, 0 } + "NUL", '\0', + "SOH", '\001', + "STX", '\002', + "ETX", '\003', + "EOT", '\004', + "ENQ", '\005', + "ACK", '\006', + "BEL", '\007', + "alert", '\007', + "BS", '\010', + "backspace", '\b', + "HT", '\011', + "tab", '\t', + "LF", '\012', + "newline", '\n', + "VT", '\013', + "vertical-tab", '\v', + "FF", '\014', + "form-feed", '\f', + "CR", '\015', + "carriage-return", '\r', + "SO", '\016', + "SI", '\017', + "DLE", '\020', + "DC1", '\021', + "DC2", '\022', + "DC3", '\023', + "DC4", '\024', + "NAK", '\025', + "SYN", '\026', + "ETB", '\027', + "CAN", '\030', + "EM", '\031', + "SUB", '\032', + "ESC", '\033', + "IS4", '\034', + "FS", '\034', + "IS3", '\035', + "GS", '\035', + "IS2", '\036', + "RS", '\036', + "IS1", '\037', + "US", '\037', + "space", ' ', + "exclamation-mark", '!', + "quotation-mark", '"', + "number-sign", '#', + "dollar-sign", '$', + "percent-sign", '%', + "ampersand", '&', + "apostrophe", '\'', + "left-parenthesis", '(', + "right-parenthesis", ')', + "asterisk", '*', + "plus-sign", '+', + "comma", ',', + "hyphen", '-', + "hyphen-minus", '-', + "period", '.', + "full-stop", '.', + "slash", '/', + "solidus", '/', + "zero", '0', + "one", '1', + "two", '2', + "three", '3', + "four", '4', + "five", '5', + "six", '6', + "seven", '7', + "eight", '8', + "nine", '9', + "colon", ':', + "semicolon", ';', + "less-than-sign", '<', + "equals-sign", '=', + "greater-than-sign", '>', + "question-mark", '?', + "commercial-at", '@', + "left-square-bracket", '[', + "backslash", '\\', + "reverse-solidus", '\\', + "right-square-bracket", ']', + "circumflex", '^', + "circumflex-accent", '^', + "underscore", '_', + "low-line", '_', + "grave-accent", '`', + "left-brace", '{', + "left-curly-bracket", '{', + "vertical-line", '|', + "right-brace", '}', + "right-curly-bracket", '}', + "tilde", '~', + "DEL", '\177', + NULL, 0, }; diff --git a/regex/engine.c b/regex/engine.c index 696774e1b3..919fe3f641 100644 --- a/regex/engine.c +++ b/regex/engine.c @@ -71,12 +71,12 @@ regmatch_t pmatch[]; int eflags; { register char *endp; - register unsigned int i; + register int i; struct match mv; register struct match *m = &mv; register char *dp; - register const sopno gf = g->firststate+1; /* +1 for OEND */ - register const sopno gl = g->laststate; + const register sopno gf = g->firststate+1; /* +1 for OEND */ + const register sopno gl = g->laststate; char *start; char *stop; @@ -613,7 +613,7 @@ sopno lev; /* PLUS nesting level */ /* "can't happen" */ assert(nope); /* NOTREACHED */ - return( NULL ); + return((char *)NULL); /* dummy */ } /* @@ -822,7 +822,7 @@ register states aft; /* states already known reachable after */ register sopno pc; register onestate here; /* note, macros know this name */ register sopno look; - register int i; + register long i; for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { s = g->strip[pc]; diff --git a/regex/regcomp.c b/regex/regcomp.c index b481be0f55..e3f0c1be4e 100644 --- a/regex/regcomp.c +++ b/regex/regcomp.c @@ -46,14 +46,14 @@ static char nuls[10]; /* place to point scanner in event of error */ #define MORE2() (p->next+1 < p->end) #define SEE(c) (MORE() && PEEK() == (c)) #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) -#define EAT(c) ((SEE(c)) ? (NEXT1(), 1) : 0) +#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) -#define NEXT1() (p->next++) +#define NEXT() (p->next++) #define NEXT2() (p->next += 2) #define NEXTn(n) (p->next += (n)) #define GETNEXT() (*p->next++) #define SETERROR(e) seterr(p, (e)) -#define REQUIRE(co, e) ((void)((co) || SETERROR(e))) +#define REQUIRE(co, e) ((co) || SETERROR(e)) #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) @@ -326,7 +326,7 @@ register struct parse *p; if (!( c == '*' || c == '+' || c == '?' || (c == '{' && MORE2() && isdigit(PEEK2())) )) return; /* no repetition, we're done */ - NEXT1(); + NEXT(); REQUIRE(!wascaret, REG_BADRPT); switch (c) { @@ -363,7 +363,7 @@ register struct parse *p; repeat(p, pos, count, count2); if (!EAT('}')) { /* error heuristics */ while (MORE() && PEEK() != '}') - NEXT1(); + NEXT(); REQUIRE(MORE(), REG_EBRACE); SETERROR(REG_BADBR); } @@ -518,7 +518,7 @@ int starordinary; /* is a leading * an ordinary character? */ REQUIRE(starordinary, REG_BADRPT); /* FALLTHROUGH */ default: - ordinary(p, c &~ BACKSL); + ordinary(p, (char)c); /* takes off BACKSL, if any */ break; } @@ -541,7 +541,7 @@ int starordinary; /* is a leading * an ordinary character? */ repeat(p, pos, count, count2); if (!EATTWO('\\', '}')) { /* error heuristics */ while (MORE() && !SEETWO('\\', '}')) - NEXT1(); + NEXT(); REQUIRE(MORE(), REG_EBRACE); SETERROR(REG_BADBR); } @@ -699,7 +699,7 @@ register cset *cs; start = p_b_symbol(p); if (SEE('-') && MORE2() && PEEK2() != ']') { /* range */ - NEXT1(); + NEXT(); if (EAT('-')) finish = '-'; else @@ -730,7 +730,7 @@ register cset *cs; register char c; while (MORE() && isalpha(PEEK())) - NEXT1(); + NEXT(); len = p->next - sp; for (cp = cclasses; cp->name != NULL; cp++) if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') @@ -799,7 +799,7 @@ int endc; /* name ended by endc,']' */ register int len; while (MORE() && !SEETWO(endc, ']')) - NEXT1(); + NEXT(); if (!MORE()) { SETERROR(REG_EBRACK); return(0); @@ -1056,7 +1056,7 @@ freeset(p, cs) register struct parse *p; register cset *cs; { - register unsigned int i; + register int i; register cset *top = &p->g->sets[p->g->ncsets]; register size_t css = (size_t)p->g->csetsize; @@ -1082,7 +1082,7 @@ register struct parse *p; register cset *cs; { register uch h = cs->hash; - register unsigned int i; + register int i; register cset *top = &p->g->sets[p->g->ncsets]; register cset *cs2; register size_t css = (size_t)p->g->csetsize; @@ -1115,7 +1115,7 @@ firstch(p, cs) register struct parse *p; register cset *cs; { - register unsigned int i; + register int i; register size_t css = (size_t)p->g->csetsize; for (i = 0; i < css; i++) @@ -1134,7 +1134,7 @@ nch(p, cs) register struct parse *p; register cset *cs; { - register unsigned int i; + register int i; register size_t css = (size_t)p->g->csetsize; register int n = 0; @@ -1171,6 +1171,63 @@ register char *cp; cs->multis[cs->smultis - 1] = '\0'; } +/* + - mcsub - subtract a collating element from a cset + == static void mcsub(register cset *cs, register char *cp); + */ +static void +mcsub(cs, cp) +register cset *cs; +register char *cp; +{ + register char *fp = mcfind(cs, cp); + register size_t len = strlen(fp); + + assert(fp != NULL); + (void) memmove(fp, fp + len + 1, + cs->smultis - (fp + len + 1 - cs->multis)); + cs->smultis -= len; + + if (cs->smultis == 0) { + free(cs->multis); + cs->multis = NULL; + return; + } + + cs->multis = realloc(cs->multis, cs->smultis); + assert(cs->multis != NULL); +} + +/* + - mcin - is a collating element in a cset? + == static int mcin(register cset *cs, register char *cp); + */ +static int +mcin(cs, cp) +register cset *cs; +register char *cp; +{ + return(mcfind(cs, cp) != NULL); +} + +/* + - mcfind - find a collating element in a cset + == static char *mcfind(register cset *cs, register char *cp); + */ +static char * +mcfind(cs, cp) +register cset *cs; +register char *cp; +{ + register char *p; + + if (cs->multis == NULL) + return(NULL); + for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) + if (strcmp(cp, p) == 0) + return(p); + return(NULL); +} /* - mcinvert - invert the list of collating elements in a cset diff --git a/regex/regcomp.ih b/regex/regcomp.ih index f0eeb5f02d..0776e7185c 100644 --- a/regex/regcomp.ih +++ b/regex/regcomp.ih @@ -28,6 +28,9 @@ static int freezeset(register struct parse *p, register cset *cs); static int firstch(register struct parse *p, register cset *cs); static int nch(register struct parse *p, register cset *cs); static void mcadd(register struct parse *p, register cset *cs, register char *cp); +static void mcsub(register cset *cs, register char *cp); +static int mcin(register cset *cs, register char *cp); +static char *mcfind(register cset *cs, register char *cp); static void mcinvert(register struct parse *p, register cset *cs); static void mccase(register struct parse *p, register cset *cs); static int isinsets(register struct re_guts *g, int c); diff --git a/regex/regerror.c b/regex/regerror.c index 12edd4e164..9158870322 100644 --- a/regex/regerror.c +++ b/regex/regerror.c @@ -10,6 +10,7 @@ #include "regerror.ih" /* + = #define REG_OKAY 0 = #define REG_NOMATCH 1 = #define REG_BADPAT 2 = #define REG_ECOLLATE 3 @@ -34,23 +35,24 @@ static struct rerr { char *name; char *explain; } rerrs[] = { - { REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match" }, - { REG_BADPAT, "REG_BADPAT", "invalid regular expression" }, - { REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" }, - { REG_ECTYPE, "REG_ECTYPE", "invalid character class" }, - { REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)" }, - { REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" }, - { REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced" }, - { REG_EPAREN, "REG_EPAREN", "parentheses not balanced" }, - { REG_EBRACE, "REG_EBRACE", "braces not balanced" }, - { REG_BADBR, "REG_BADBR", "invalid repetition count(s)" }, - { REG_ERANGE, "REG_ERANGE", "invalid character range" }, - { REG_ESPACE, "REG_ESPACE", "out of memory" }, - { REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid" }, - { REG_EMPTY, "REG_EMPTY", "empty (sub)expression" }, - { REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" }, - { REG_INVARG, "REG_INVARG", "invalid argument to regex routine" }, - { 0, "", "*** unknown regexp error code ***" } + REG_OKAY, "REG_OKAY", "no errors detected", + REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match", + REG_BADPAT, "REG_BADPAT", "invalid regular expression", + REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element", + REG_ECTYPE, "REG_ECTYPE", "invalid character class", + REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)", + REG_ESUBREG, "REG_ESUBREG", "invalid backreference number", + REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced", + REG_EPAREN, "REG_EPAREN", "parentheses not balanced", + REG_EBRACE, "REG_EBRACE", "braces not balanced", + REG_BADBR, "REG_BADBR", "invalid repetition count(s)", + REG_ERANGE, "REG_ERANGE", "invalid character range", + REG_ESPACE, "REG_ESPACE", "out of memory", + REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid", + REG_EMPTY, "REG_EMPTY", "empty (sub)expression", + REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug", + REG_INVARG, "REG_INVARG", "invalid argument to regex routine", + -1, "", "*** unknown regexp error code ***", }; /* @@ -74,12 +76,12 @@ size_t errbuf_size; if (errcode == REG_ATOI) s = regatoi(preg, convbuf); else { - for (r = rerrs; r->code != 0; r++) + for (r = rerrs; r->code >= 0; r++) if (r->code == target) break; if (errcode®_ITOA) { - if (r->code != 0) + if (r->code >= 0) (void) strcpy(convbuf, r->name); else sprintf(convbuf, "REG_0x%x", target); @@ -113,10 +115,10 @@ char *localbuf; { register struct rerr *r; - for (r = rerrs; r->code != 0; r++) + for (r = rerrs; r->code >= 0; r++) if (strcmp(r->name, preg->re_endp) == 0) break; - if (r->code == 0) + if (r->code < 0) return("0"); sprintf(localbuf, "%d", r->code); diff --git a/regex/regex.h b/regex/regex.h index 985ad02959..b39c5e178c 100644 --- a/regex/regex.h +++ b/regex/regex.h @@ -40,6 +40,7 @@ API_EXPORT(int) regcomp(regex_t *, const char *, int); /* === regerror.c === */ +#define REG_OKAY 0 #define REG_NOMATCH 1 #define REG_BADPAT 2 #define REG_ECOLLATE 3 diff --git a/regex/regex2.h b/regex/regex2.h index 3e8c0d65c4..7cd39a59ae 100644 --- a/regex/regex2.h +++ b/regex/regex2.h @@ -42,11 +42,11 @@ * In state representations, an operator's bit is on to signify a state * immediately *preceding* "execution" of that operator. */ -typedef unsigned long sop; /* strip operator */ +typedef long sop; /* strip operator */ typedef long sopno; -#define OPRMASK 0xf8000000 -#define OPDMASK 0x07ffffff -#define OPSHIFT ((unsigned)27) +#define OPRMASK 0x7c000000 +#define OPDMASK 0x03ffffff +#define OPSHIFT (26) #define OP(n) ((n)&OPRMASK) #define OPND(n) ((n)&OPDMASK) #define SOP(op, opnd) ((op)|(opnd)) @@ -67,11 +67,11 @@ typedef long sopno; #define OLPAREN (13<<OPSHIFT) /* ( fwd to ) */ #define ORPAREN (14<<OPSHIFT) /* ) back to ( */ #define OCH_ (15<<OPSHIFT) /* begin choice fwd to OOR2 */ -#define OOR1 (16u<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */ -#define OOR2 (17u<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */ -#define O_CH (18u<<OPSHIFT) /* end choice back to OOR1 */ -#define OBOW (19u<<OPSHIFT) /* begin word - */ -#define OEOW (20u<<OPSHIFT) /* end word - */ +#define OOR1 (16<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */ +#define OOR2 (17<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */ +#define O_CH (18<<OPSHIFT) /* end choice back to OOR1 */ +#define OBOW (19<<OPSHIFT) /* begin word - */ +#define OEOW (20<<OPSHIFT) /* end word - */ /* * Structure for [] character-set representation. Character sets are @@ -97,6 +97,8 @@ typedef struct { #define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c)) #define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask) #define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */ +#define MCsub(p, cs, cp) mcsub(p, cs, cp) +#define MCin(p, cs, cp) mcin(p, cs, cp) /* stuff for character categories */ typedef unsigned char cat_t; diff --git a/regex/regexec.c b/regex/regexec.c index a552f5a394..7888d02368 100644 --- a/regex/regexec.c +++ b/regex/regexec.c @@ -16,24 +16,22 @@ #include "utils.h" #include "regex2.h" -#ifndef NDEBUG static int nope = 0; /* for use in asserts; shuts lint up */ -#endif /* macros for manipulating states, small version */ -#define states long -#define states1 states /* for later use in regexec() decision */ +#define states unsigned +#define states1 unsigned /* for later use in regexec() decision */ #define CLEAR(v) ((v) = 0) -#define SET0(v, n) ((v) &= ~(1 << (n))) -#define SET1(v, n) ((v) |= 1 << (n)) -#define ISSET(v, n) ((v) & (1 << (n))) +#define SET0(v, n) ((v) &= ~((unsigned)1 << (n))) +#define SET1(v, n) ((v) |= (unsigned)1 << (n)) +#define ISSET(v, n) ((v) & ((unsigned)1 << (n))) #define ASSIGN(d, s) ((d) = (s)) #define EQ(a, b) ((a) == (b)) #define STATEVARS int dummy /* dummy version */ #define STATESETUP(m, n) /* nothing */ #define STATETEARDOWN(m) /* nothing */ #define SETUP(v) ((v) = 0) -#define onestate int +#define onestate unsigned #define INIT(o, n) ((o) = (unsigned)1 << (n)) #define INC(o) ((o) <<= 1) #define ISSTATEIN(v, o) ((v) & (o)) diff --git a/regex/utils.h b/regex/utils.h index f271f759b1..1a997ac8fc 100644 --- a/regex/utils.h +++ b/regex/utils.h @@ -1,9 +1,9 @@ /* utility definitions */ -#ifndef _POSIX2_RE_DUP_MAX -#define _POSIX2_RE_DUP_MAX 255 +#ifdef _POSIX2_RE_DUP_MAX +#define DUPMAX _POSIX2_RE_DUP_MAX +#else +#define DUPMAX 255 #endif - -#define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */ #define INFINITY (DUPMAX + 1) #define NC (CHAR_MAX - CHAR_MIN + 1) typedef unsigned char uch; |