diff options
Diffstat (limited to 'regexp.c')
-rw-r--r-- | regexp.c | 2119 |
1 files changed, 0 insertions, 2119 deletions
diff --git a/regexp.c b/regexp.c deleted file mode 100644 index 5b0e7b21de..0000000000 --- a/regexp.c +++ /dev/null @@ -1,2119 +0,0 @@ -/* NOTE: this is derived from Henry Spencer's regexp code, and should not - * confused with the original package (see point 3 below). Thanks, Henry! - */ - -/* Additional note: this code is very heavily munged from Henry's version - * in places. In some spots I've traded clarity for efficiency, so don't - * blame Henry for some of the lack of readability. - */ - -/* $Header: regexp.c,v 2.0.1.1 88/06/28 16:37:19 root Exp $ - * - * $Log: regexp.c,v $ - * Revision 2.0.1.1 88/06/28 16:37:19 root - * patch1: removed redundant debugging code - * - * Revision 2.0 88/06/05 00:10:45 root - * Baseline version 2.0. - * - */ - -/* - * regcomp and regexec -- regsub and regerror are not used in perl - * - * Copyright (c) 1986 by University of Toronto. - * Written by Henry Spencer. Not derived from licensed software. - * - * Permission is granted to anyone to use this software for any - * purpose on any computer system, and to redistribute it freely, - * subject to the following restrictions: - * - * 1. The author is not responsible for the consequences of use of - * this software, no matter how awful, even if they arise - * from defects in it. - * - * 2. The origin of this software must not be misrepresented, either - * by explicit claim or by omission. - * - * 3. Altered versions must be plainly marked as such, and must not - * be misrepresented as being the original software. - * - * Beware that some of this code is subtly aware of the way operator - * precedence is structured in regular expressions. Serious changes in - * regular-expression syntax might require a total rethink. - */ -#include "EXTERN.h" -#include "perl.h" - -/* - * The "internal use only" fields in regexp.h are present to pass info from - * compile to execute that permits the execute phase to run lots faster on - * simple cases. They are: - * - * regstart str that must begin a match; Nullch if none obvious - * reganch is the match anchored (at beginning-of-line only)? - * regmust string (pointer into program) that match must include, or NULL - * [regmust changed to STR* for bminstr()--law] - * regmlen length of regmust string - * [regmlen not used currently] - * - * Regstart and reganch permit very fast decisions on suitable starting points - * for a match, cutting down the work a lot. Regmust permits fast rejection - * of lines that cannot possibly match. The regmust tests are costly enough - * that regcomp() supplies a regmust only if the r.e. contains something - * potentially expensive (at present, the only such thing detected is * or + - * at the start of the r.e., which can involve a lot of backup). Regmlen is - * supplied because the test in regexec() needs it and regcomp() is computing - * it anyway. - * [regmust is now supplied always. The tests that use regmust have a - * heuristic that disables the test if it usually matches.] - * - * [In fact, we now use regmust in many cases to locate where the search - * starts in the string, so if regback is >= 0, the regmust search is never - * wasted effort. The regback variable says how many characters back from - * where regmust matched is the earliest possible start of the match. - * For instance, /[a-z].foo/ has a regmust of 'foo' and a regback of 2.] - */ - -/* - * Structure for regexp "program". This is essentially a linear encoding - * of a nondeterministic finite-state machine (aka syntax charts or - * "railroad normal form" in parsing technology). Each node is an opcode - * plus a "next" pointer, possibly plus an operand. "Next" pointers of - * all nodes except BRANCH implement concatenation; a "next" pointer with - * a BRANCH on both ends of it is connecting two alternatives. (Here we - * have one of the subtle syntax dependencies: an individual BRANCH (as - * opposed to a collection of them) is never concatenated with anything - * because of operator precedence.) The operand of some types of node is - * a literal string; for others, it is a node leading into a sub-FSM. In - * particular, the operand of a BRANCH node is the first node of the branch. - * (NB this is *not* a tree structure: the tail of the branch connects - * to the thing following the set of BRANCHes.) The opcodes are: - */ - -/* definition number opnd? meaning */ -#define END 0 /* no End of program. */ -#define BOL 1 /* no Match "" at beginning of line. */ -#define EOL 2 /* no Match "" at end of line. */ -#define ANY 3 /* no Match any one character. */ -#define ANYOF 4 /* str Match any character in this string. */ -#define ANYBUT 5 /* str Match any character not in this string. */ -#define BRANCH 6 /* node Match this alternative, or the next... */ -#define BACK 7 /* no Match "", "next" ptr points backward. */ -#define EXACTLY 8 /* str Match this string (preceded by length). */ -#define NOTHING 9 /* no Match empty string. */ -#define STAR 10 /* node Match this (simple) thing 0 or more times. */ -#define PLUS 11 /* node Match this (simple) thing 1 or more times. */ -#define ALNUM 12 /* no Match any alphanumeric character */ -#define NALNUM 13 /* no Match any non-alphanumeric character */ -#define BOUND 14 /* no Match "" at any word boundary */ -#define NBOUND 15 /* no Match "" at any word non-boundary */ -#define SPACE 16 /* no Match any whitespace character */ -#define NSPACE 17 /* no Match any non-whitespace character */ -#define DIGIT 18 /* no Match any numeric character */ -#define NDIGIT 19 /* no Match any non-numeric character */ -#define REF 20 /* no Match some already matched string */ -#define OPEN 30 /* no Mark this point in input as start of #n. */ - /* OPEN+1 is number 1, etc. */ -#define CLOSE 40 /* no Analogous to OPEN. */ - -/* - * Opcode notes: - * - * BRANCH The set of branches constituting a single choice are hooked - * together with their "next" pointers, since precedence prevents - * anything being concatenated to any individual branch. The - * "next" pointer of the last BRANCH in a choice points to the - * thing following the whole choice. This is also where the - * final "next" pointer of each individual branch points; each - * branch starts with the operand node of a BRANCH node. - * - * BACK Normal "next" pointers all implicitly point forward; BACK - * exists to make loop structures possible. - * - * STAR,PLUS '?', and complex '*' and '+', are implemented as circular - * BRANCH structures using BACK. Simple cases (one character - * per match) are implemented with STAR and PLUS for speed - * and to minimize recursive plunges. - * - * OPEN,CLOSE ...are numbered at compile time. - */ - -/* The following have no fixed length. */ -char varies[] = {BRANCH,BACK,STAR,PLUS,REF,0}; - -/* The following always have a length of 1. */ -char simple[] = {ANY,ANYOF,ANYBUT,ALNUM,NALNUM,SPACE,NSPACE,DIGIT,NDIGIT,0}; - -/* - * A node is one char of opcode followed by two chars of "next" pointer. - * "Next" pointers are stored as two 8-bit pieces, high order first. The - * value is a positive offset from the opcode of the node containing it. - * An operand, if any, simply follows the node. (Note that much of the - * code generation knows about this implicit relationship.) - * - * Using two bytes for the "next" pointer is vast overkill for most things, - * but allows patterns to get big without disasters. - * - * [If ALIGN is defined, the "next" pointer is always aligned on an even - * boundary, and reads the offset directly as a short. Also, there is no - * special test to reverse the sign of BACK pointers since the offset is - * stored negative.] - */ - -#ifndef STATIC -#define STATIC static -#endif - -#define ALIGN -#define FASTANY -#ifdef DEBUG -#undef DEBUG -#endif -#ifdef DEBUGGING -#define DEBUG -#endif - -#ifdef DEBUG -int regnarrate = 0; -void regdump(); -STATIC char *regprop(); -#endif - - -#define OP(p) (*(p)) - -#ifdef ALIGN -#define NEXT(p) (*(short*)(p+1)) -#else -#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377)) -#endif - -#define OPERAND(p) ((p) + 3) - -#ifdef ALIGN -#define NEXTOPER(p) ((p) + 4) -#else -#define NEXTOPER(p) ((p) + 3) -#endif - -#define MAGIC 0234 - -/* - * Utility definitions. - */ -#ifndef CHARBITS -#define UCHARAT(p) ((int)*(unsigned char *)(p)) -#else -#define UCHARAT(p) ((int)*(p)&CHARBITS) -#endif - -#define FAIL(m) fatal("/%s/: %s",regprecomp,m) -#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?') -#define META "^$.[()|?+*\\" - -/* - * Flags to be passed up and down. - */ -#define HASWIDTH 01 /* Known never to match null string. */ -#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */ -#define SPSTART 04 /* Starts with * or +. */ -#define WORST 0 /* Worst case. */ - -/* - * Global work variables for regcomp(). - */ -static char *regprecomp; /* uncompiled string. */ -static char *regparse; /* Input-scan pointer. */ -static int regnpar; /* () count. */ -static char regdummy; -static char *regcode; /* Code-emit pointer; ®dummy = don't. */ -static long regsize; /* Code size. */ -static int regfold; - -/* - * Forward declarations for regcomp()'s friends. - */ -STATIC char *reg(); -STATIC char *regbranch(); -STATIC char *regpiece(); -STATIC char *regatom(); -STATIC char *regclass(); -STATIC char *regchar(); -STATIC char *regnode(); -STATIC char *regnext(); -STATIC void regc(); -STATIC void reginsert(); -STATIC void regtail(); -STATIC void regoptail(); -#ifndef STRCSPN -STATIC int strcspn(); -#endif - -/* - - regcomp - compile a regular expression into internal code - * - * We can't allocate space until we know how big the compiled form will be, - * but we can't compile it (and thus know how big it is) until we've got a - * place to put the code. So we cheat: we compile it twice, once with code - * generation turned off and size counting turned on, and once "for real". - * This also means that we don't allocate space until we are sure that the - * thing really will compile successfully, and we never have to move the - * code and thus invalidate pointers into it. (Note that it has to be in - * one piece because free() must be able to free it all.) [NB: not true in perl] - * - * Beware that the optimization-preparation code in here knows about some - * of the structure of the compiled regexp. [I'll say.] - */ -regexp * -regcomp(exp,fold,rare) -char *exp; -int fold; -int rare; -{ - register regexp *r; - register char *scan; - register STR *longest; - register int len; - register char *first; - int flags; - int back; - int curback; - extern char *safemalloc(); - extern char *savestr(); - - if (exp == NULL) - fatal("NULL regexp argument"); - - /* First pass: determine size, legality. */ - regfold = fold; - regparse = exp; - regprecomp = savestr(exp); - regnpar = 1; - regsize = 0L; - regcode = ®dummy; - regc(MAGIC); - if (reg(0, &flags) == NULL) { - safefree(regprecomp); - return(NULL); - } - - /* Small enough for pointer-storage convention? */ - if (regsize >= 32767L) /* Probably could be 65535L. */ - FAIL("regexp too big"); - - /* Allocate space. */ - r = (regexp *)safemalloc(sizeof(regexp) + (unsigned)regsize); - if (r == NULL) - FAIL("regexp out of space"); - - /* Second pass: emit code. */ - r->precomp = regprecomp; - r->subbase = NULL; - regparse = exp; - regnpar = 1; - regcode = r->program; - regc(MAGIC); - if (reg(0, &flags) == NULL) - return(NULL); - - /* Dig out information for optimizations. */ - r->regstart = Nullstr; /* Worst-case defaults. */ - r->reganch = 0; - r->regmust = Nullstr; - r->regback = -1; - r->regstclass = Nullch; - scan = r->program+1; /* First BRANCH. */ - if (!fold && OP(regnext(scan)) == END) {/* Only one top-level choice. */ - scan = NEXTOPER(scan); - - first = scan; - while ((OP(first) > OPEN && OP(first) < CLOSE) || - (OP(first) == BRANCH && OP(regnext(first)) != BRANCH) || - (OP(first) == PLUS) ) - first = NEXTOPER(first); - - /* Starting-point info. */ - if (OP(first) == EXACTLY) - r->regstart = str_make(OPERAND(first)+1); - else if ((exp = index(simple,OP(first))) && exp > simple) - r->regstclass = first; - else if (OP(first) == BOUND || OP(first) == NBOUND) - r->regstclass = first; - else if (OP(first) == BOL) - r->reganch++; - -#ifdef DEBUGGING - if (debug & 512) - fprintf(stderr,"first %d next %d offset %d\n", - OP(first), OP(NEXTOPER(first)), first - scan); -#endif - /* - * If there's something expensive in the r.e., find the - * longest literal string that must appear and make it the - * regmust. Resolve ties in favor of later strings, since - * the regstart check works with the beginning of the r.e. - * and avoiding duplication strengthens checking. Not a - * strong reason, but sufficient in the absence of others. - * [Now we resolve ties in favor of the earlier string if - * it happens that curback has been invalidated, since the - * earlier string may buy us something the later one won't.] - */ - longest = str_new(10); - len = 0; - curback = 0; - while (scan != NULL) { - if (OP(scan) == BRANCH) { - if (OP(regnext(scan)) == BRANCH) { - curback = -30000; - while (OP(scan) == BRANCH) - scan = regnext(scan); - } - else /* single branch is ok */ - scan = NEXTOPER(scan); - } - if (OP(scan) == EXACTLY) { - if (curback - back == len) { - str_cat(longest, OPERAND(scan)+1); - len += *OPERAND(scan); - curback += *OPERAND(scan); - } - else if (*OPERAND(scan) >= len + (curback >= 0)) { - str_set(longest, OPERAND(scan)+1); - len = *OPERAND(scan); - back = curback; - curback += len; - } - else - curback += *OPERAND(scan); - } - else if (index(varies,OP(scan))) - curback = -30000; - else if (index(simple,OP(scan))) - curback++; - scan = regnext(scan); - } - if (len) { - r->regmust = longest; - if (back < 0) - back = -1; - r->regback = back; - if (len > !(sawstudy)) - fbmcompile(r->regmust); - *(long*)&r->regmust->str_nval = 100; - } - else - str_free(longest); - } - - r->do_folding = fold; - r->nparens = regnpar - 1; -#ifdef DEBUG - if (debug & 512) - regdump(r); -#endif - return(r); -} - -/* - - reg - regular expression, i.e. main body or parenthesized thing - * - * Caller must absorb opening parenthesis. - * - * Combining parenthesis handling with the base level of regular expression - * is a trifle forced, but the need to tie the tails of the branches to what - * follows makes it hard to avoid. - */ -static char * -reg(paren, flagp) -int paren; /* Parenthesized? */ -int *flagp; -{ - register char *ret; - register char *br; - register char *ender; - register int parno; - int flags; - - *flagp = HASWIDTH; /* Tentatively. */ - - /* Make an OPEN node, if parenthesized. */ - if (paren) { - if (regnpar >= NSUBEXP) - FAIL("too many () in regexp"); - parno = regnpar; - regnpar++; - ret = regnode(OPEN+parno); - } else - ret = NULL; - - /* Pick up the branches, linking them together. */ - br = regbranch(&flags); - if (br == NULL) - return(NULL); - if (ret != NULL) - regtail(ret, br); /* OPEN -> first. */ - else - ret = br; - if (!(flags&HASWIDTH)) - *flagp &= ~HASWIDTH; - *flagp |= flags&SPSTART; - while (*regparse == '|') { - regparse++; - br = regbranch(&flags); - if (br == NULL) - return(NULL); - regtail(ret, br); /* BRANCH -> BRANCH. */ - if (!(flags&HASWIDTH)) - *flagp &= ~HASWIDTH; - *flagp |= flags&SPSTART; - } - - /* Make a closing node, and hook it on the end. */ - ender = regnode((paren) ? CLOSE+parno : END); - regtail(ret, ender); - - /* Hook the tails of the branches to the closing node. */ - for (br = ret; br != NULL; br = regnext(br)) - regoptail(br, ender); - - /* Check for proper termination. */ - if (paren && *regparse++ != ')') { - FAIL("unmatched () in regexp"); - } else if (!paren && *regparse != '\0') { - if (*regparse == ')') { - FAIL("unmatched () in regexp"); - } else - FAIL("junk on end of regexp"); /* "Can't happen". */ - /* NOTREACHED */ - } - - return(ret); -} - -/* - - regbranch - one alternative of an | operator - * - * Implements the concatenation operator. - */ -static char * -regbranch(flagp) -int *flagp; -{ - register char *ret; - register char *chain; - register char *latest; - int flags; - - *flagp = WORST; /* Tentatively. */ - - ret = regnode(BRANCH); - chain = NULL; - while (*regparse != '\0' && *regparse != '|' && *regparse != ')') { - latest = regpiece(&flags); - if (latest == NULL) - return(NULL); - *flagp |= flags&HASWIDTH; - if (chain == NULL) /* First piece. */ - *flagp |= flags&SPSTART; - else - regtail(chain, latest); - chain = latest; - } - if (chain == NULL) /* Loop ran zero times. */ - (void) regnode(NOTHING); - - return(ret); -} - -/* - - regpiece - something followed by possible [*+?] - * - * Note that the branching code sequences used for ? and the general cases - * of * and + are somewhat optimized: they use the same NOTHING node as - * both the endmarker for their branch list and the body of the last branch. - * It might seem that this node could be dispensed with entirely, but the - * endmarker role is not redundant. - */ -static char * -regpiece(flagp) -int *flagp; -{ - register char *ret; - register char op; - register char *next; - int flags; - - ret = regatom(&flags); - if (ret == NULL) - return(NULL); - - op = *regparse; - if (!ISMULT(op)) { - *flagp = flags; - return(ret); - } - - if (!(flags&HASWIDTH) && op != '?') - FAIL("regexp *+ operand could be empty"); - *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH); - - if (op == '*' && (flags&SIMPLE)) - reginsert(STAR, ret); - else if (op == '*') { - /* Emit x* as (x&|), where & means "self". */ - reginsert(BRANCH, ret); /* Either x */ - regoptail(ret, regnode(BACK)); /* and loop */ - regoptail(ret, ret); /* back */ - regtail(ret, regnode(BRANCH)); /* or */ - regtail(ret, regnode(NOTHING)); /* null. */ - } else if (op == '+' && (flags&SIMPLE)) - reginsert(PLUS, ret); - else if (op == '+') { - /* Emit x+ as x(&|), where & means "self". */ - next = regnode(BRANCH); /* Either */ - regtail(ret, next); - regtail(regnode(BACK), ret); /* loop back */ - regtail(next, regnode(BRANCH)); /* or */ - regtail(ret, regnode(NOTHING)); /* null. */ - } else if (op == '?') { - /* Emit x? as (x|) */ - reginsert(BRANCH, ret); /* Either x */ - regtail(ret, regnode(BRANCH)); /* or */ - next = regnode(NOTHING); /* null. */ - regtail(ret, next); - regoptail(ret, next); - } - regparse++; - if (ISMULT(*regparse)) - FAIL("nested *?+ in regexp"); - - return(ret); -} - -static int foo; - -/* - - regatom - the lowest level - * - * Optimization: gobbles an entire sequence of ordinary characters so that - * it can turn them into a single node, which is smaller to store and - * faster to run. Backslashed characters are exceptions, each becoming a - * separate node; the code is simpler that way and it's not worth fixing. - * - * [Yes, it is worth fixing, some scripts can run twice the speed.] - */ -static char * -regatom(flagp) -int *flagp; -{ - register char *ret; - int flags; - - *flagp = WORST; /* Tentatively. */ - - switch (*regparse++) { - case '^': - ret = regnode(BOL); - break; - case '$': - ret = regnode(EOL); - break; - case '.': - ret = regnode(ANY); - *flagp |= HASWIDTH|SIMPLE; - break; - case '[': - ret = regclass(); - *flagp |= HASWIDTH|SIMPLE; - break; - case '(': - ret = reg(1, &flags); - if (ret == NULL) - return(NULL); - *flagp |= flags&(HASWIDTH|SPSTART); - break; - case '\0': - case '|': - case ')': - FAIL("internal urp in regexp"); /* Supposed to be caught earlier. */ - break; - case '?': - case '+': - case '*': - FAIL("?+* follows nothing in regexp"); - break; - case '\\': - switch (*regparse) { - case '\0': - FAIL("trailing \\ in regexp"); - case 'w': - ret = regnode(ALNUM); - *flagp |= HASWIDTH|SIMPLE; - regparse++; - break; - case 'W': - ret = regnode(NALNUM); - *flagp |= HASWIDTH|SIMPLE; - regparse++; - break; - case 'b': - ret = regnode(BOUND); - *flagp |= SIMPLE; - regparse++; - break; - case 'B': - ret = regnode(NBOUND); - *flagp |= SIMPLE; - regparse++; - break; - case 's': - ret = regnode(SPACE); - *flagp |= HASWIDTH|SIMPLE; - regparse++; - break; - case 'S': - ret = regnode(NSPACE); - *flagp |= HASWIDTH|SIMPLE; - regparse++; - break; - case 'd': - ret = regnode(DIGIT); - *flagp |= HASWIDTH|SIMPLE; - regparse++; - break; - case 'D': - ret = regnode(NDIGIT); - *flagp |= HASWIDTH|SIMPLE; - regparse++; - break; - case 'n': - case 'r': - case 't': - case 'f': - goto defchar; - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - if (isdigit(regparse[1])) - goto defchar; - else { - ret = regnode(REF + *regparse++ - '0'); - *flagp |= SIMPLE; - } - break; - default: - goto defchar; - } - break; - default: { - register int len; - register char ender; - register char *p; - char *oldp; - int foo; - - defchar: - ret = regnode(EXACTLY); - regc(0); /* save spot for len */ - for (len=0, p=regparse-1; len < 127 && *p; len++) { - oldp = p; - switch (*p) { - case '^': - case '$': - case '.': - case '[': - case '(': - case ')': - case '|': - goto loopdone; - case '\\': - switch (*++p) { - case '\0': - FAIL("trailing \\ in regexp"); - case 'w': - case 'W': - case 'b': - case 'B': - case 's': - case 'S': - case 'd': - case 'D': - --p; - goto loopdone; - case 'n': - ender = '\n'; - p++; - break; - case 'r': - ender = '\r'; - p++; - break; - case 't': - ender = '\t'; - p++; - break; - case 'f': - ender = '\f'; - p++; - break; - case '0': case '1': case '2': case '3':case '4': - case '5': case '6': case '7': case '8':case '9': - if (isdigit(p[1])) { - foo = *p++ - '0'; - foo <<= 3; - foo += *p - '0'; - if (isdigit(p[1])) - foo = (foo<<3) + *++p - '0'; - ender = foo; - p++; - } - else { - --p; - goto loopdone; - } - break; - default: - ender = *p++; - break; - } - break; - default: - ender = *p++; - break; - } - if (regfold && isupper(ender)) - ender = tolower(ender); - if (ISMULT(*p)) { /* Back off on ?+*. */ - if (len) - p = oldp; - else { - len++; - regc(ender); - } - break; - } - regc(ender); - } - loopdone: - regparse = p; - if (len <= 0) - FAIL("internal disaster in regexp"); - *flagp |= HASWIDTH; - if (len == 1) - *flagp |= SIMPLE; - *OPERAND(ret) = len; - regc('\0'); - } - break; - } - - return(ret); -} - -#ifdef FASTANY -static void -regset(bits,def,c) -char *bits; -int def; -register int c; -{ - if (regcode == ®dummy) - return; - if (def) - bits[c >> 3] &= ~(1 << (c & 7)); - else - bits[c >> 3] |= (1 << (c & 7)); -} - -static char * -regclass() -{ - register char *bits; - register int class; - register int lastclass; - register int range = 0; - register char *ret; - register int def; - - if (*regparse == '^') { /* Complement of range. */ - ret = regnode(ANYBUT); - regparse++; - def = 0; - } else { - ret = regnode(ANYOF); - def = 255; - } - bits = regcode; - for (class = 0; class < 32; class++) - regc(def); - if (*regparse == ']' || *regparse == '-') - regset(bits,def,lastclass = *regparse++); - while (*regparse != '\0' && *regparse != ']') { - class = UCHARAT(regparse++); - if (class == '\\') { - class = UCHARAT(regparse++); - switch (class) { - case 'w': - for (class = 'a'; class <= 'z'; class++) - regset(bits,def,class); - for (class = 'A'; class <= 'Z'; class++) - regset(bits,def,class); - for (class = '0'; class <= '9'; class++) - regset(bits,def,class); - regset(bits,def,'_'); - lastclass = 1234; - continue; - case 's': - regset(bits,def,' '); - regset(bits,def,'\t'); - regset(bits,def,'\r'); - regset(bits,def,'\f'); - regset(bits,def,'\n'); - lastclass = 1234; - continue; - case 'd': - for (class = '0'; class <= '9'; class++) - regset(bits,def,class); - lastclass = 1234; - continue; - case 'n': - class = '\n'; - break; - case 'r': - class = '\r'; - break; - case 't': - class = '\t'; - break; - case 'f': - class = '\f'; - break; - case 'b': - class = '\b'; - break; - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - class -= '0'; - if (isdigit(*regparse)) { - class <<= 3; - class += *regparse++ - '0'; - } - if (isdigit(*regparse)) { - class <<= 3; - class += *regparse++ - '0'; - } - break; - } - } - if (!range && class == '-' && *regparse && *regparse != ']') { - range = 1; - continue; - } - if (range) { - if (lastclass > class) - FAIL("invalid [] range in regexp"); - } - else - lastclass = class - 1; - range = 0; - for (lastclass++; lastclass <= class; lastclass++) { - regset(bits,def,lastclass); - if (regfold && isupper(lastclass)) - regset(bits,def,tolower(lastclass)); - } - lastclass = class; - } - if (*regparse != ']') - FAIL("unmatched [] in regexp"); - regset(bits,0,0); /* always bomb out on null */ - regparse++; - return ret; -} - -#else /* !FASTANY */ -static char * -regclass() -{ - register int class; - register int lastclass; - register int range = 0; - register char *ret; - - if (*regparse == '^') { /* Complement of range. */ - ret = regnode(ANYBUT); - regparse++; - } else - ret = regnode(ANYOF); - if (*regparse == ']' || *regparse == '-') - regc(lastclass = *regparse++); - while (*regparse != '\0' && *regparse != ']') { - class = UCHARAT(regparse++); - if (class == '\\') { - class = UCHARAT(regparse++); - switch (class) { - case 'w': - for (class = 'a'; class <= 'z'; class++) - regc(class); - for (class = 'A'; class <= 'Z'; class++) - regc(class); - for (class = '0'; class <= '9'; class++) - regc(class); - regc('_'); - lastclass = 1234; - continue; - case 's': - regc(' '); - regc('\t'); - regc('\r'); - regc('\f'); - regc('\n'); - lastclass = 1234; - continue; - case 'd': - for (class = '0'; class <= '9'; class++) - regc(class); - lastclass = 1234; - continue; - case 'n': - class = '\n'; - break; - case 'r': - class = '\r'; - break; - case 't': - class = '\t'; - break; - case 'f': - class = '\f'; - break; - case 'b': - class = '\b'; - break; - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - class -= '0'; - if (isdigit(*regparse)) { - class <<= 3; - class += *regparse++ - '0'; - } - if (isdigit(*regparse)) { - class <<= 3; - class += *regparse++ - '0'; - } - break; - } - } - if (!range && class == '-' && *regparse && *regparse != ']') { - range = 1; - continue; - } - if (range) { - if (lastclass > class) - FAIL("invalid [] range in regexp"); - } - else - lastclass = class - 1; - range = 0; - for (lastclass++; lastclass <= class; lastclass++) { - regc(lastclass); - if (regfold && isupper(lastclass)) - regc(tolower(lastclass)); - } - lastclass = class; - } - regc('\0'); - if (*regparse != ']') - FAIL("unmatched [] in regexp"); - regparse++; - return ret; -} -#endif /* NOTDEF */ - -static char * -regchar(ch,flagp) -int ch; -int *flagp; -{ - char *ret; - - ret = regnode(EXACTLY); - regc(1); - regc(ch); - regc('\0'); - regparse++; - *flagp |= HASWIDTH|SIMPLE; - return ret; -} - -/* - - regnode - emit a node - */ -static char * /* Location. */ -regnode(op) -char op; -{ - register char *ret; - register char *ptr; - - ret = regcode; - if (ret == ®dummy) { -#ifdef ALIGN - if (!(regsize & 1)) - regsize++; -#endif - regsize += 3; - return(ret); - } - -#ifdef ALIGN - if (!((long)ret & 1)) - *ret++ = 127; -#endif - ptr = ret; - *ptr++ = op; - *ptr++ = '\0'; /* Null "next" pointer. */ - *ptr++ = '\0'; - regcode = ptr; - - return(ret); -} - -/* - - regc - emit (if appropriate) a byte of code - */ -static void -regc(b) -char b; -{ - if (regcode != ®dummy) - *regcode++ = b; - else - regsize++; -} - -/* - - reginsert - insert an operator in front of already-emitted operand - * - * Means relocating the operand. - */ -static void -reginsert(op, opnd) -char op; -char *opnd; -{ - register char *src; - register char *dst; - register char *place; - - if (regcode == ®dummy) { -#ifdef ALIGN - regsize += 4; -#else - regsize += 3; -#endif - return; - } - - src = regcode; -#ifdef ALIGN - regcode += 4; -#else - regcode += 3; -#endif - dst = regcode; - while (src > opnd) - *--dst = *--src; - - place = opnd; /* Op node, where operand used to be. */ - *place++ = op; - *place++ = '\0'; - *place++ = '\0'; -} - -/* - - regtail - set the next-pointer at the end of a node chain - */ -static void -regtail(p, val) -char *p; -char *val; -{ - register char *scan; - register char *temp; - register int offset; - - if (p == ®dummy) - return; - - /* Find last node. */ - scan = p; - for (;;) { - temp = regnext(scan); - if (temp == NULL) - break; - scan = temp; - } - -#ifdef ALIGN - offset = val - scan; - *(short*)(scan+1) = offset; -#else - if (OP(scan) == BACK) - offset = scan - val; - else - offset = val - scan; - *(scan+1) = (offset>>8)&0377; - *(scan+2) = offset&0377; -#endif -} - -/* - - regoptail - regtail on operand of first argument; nop if operandless - */ -static void -regoptail(p, val) -char *p; -char *val; -{ - /* "Operandless" and "op != BRANCH" are synonymous in practice. */ - if (p == NULL || p == ®dummy || OP(p) != BRANCH) - return; - regtail(NEXTOPER(p), val); -} - -/* - * regexec and friends - */ - -/* - * Global work variables for regexec(). - */ -static char *reginput; /* String-input pointer. */ -static char *regbol; /* Beginning of input, for ^ check. */ -static char **regstartp; /* Pointer to startp array. */ -static char **regendp; /* Ditto for endp. */ -static char *reglastparen; /* Similarly for lastparen. */ -static char *regtill; - -static char *regmystartp[10]; /* For remembering backreferences. */ -static char *regmyendp[10]; - -/* - * Forwards. - */ -STATIC int regtry(); -STATIC int regmatch(); -STATIC int regrepeat(); - -extern char sawampersand; -extern int multiline; - -/* - - regexec - match a regexp against a string - */ -int -regexec(prog, stringarg, strend, beginning, minend, screamer) -register regexp *prog; -char *stringarg; -char *strend; /* pointer to null at end of string */ -int beginning; /* is ^ valid at the beginning of stringarg? */ -int minend; /* end of match must be at least minend after stringarg */ -STR *screamer; -{ - register char *s; - extern char *index(); - register int tmp, i; - register char *string = stringarg; - register char *c; - extern char *savestr(); - - /* Be paranoid... */ - if (prog == NULL || string == NULL) { - fatal("NULL regexp parameter"); - return(0); - } - - regprecomp = prog->precomp; - /* Check validity of program. */ - if (UCHARAT(prog->program) != MAGIC) { - FAIL("corrupted regexp program"); - } - - if (prog->do_folding) { - i = strend - string; - string = savestr(string); - strend = string + i; - for (s = string; *s; s++) - if (isupper(*s)) - *s = tolower(*s); - } - - /* If there is a "must appear" string, look for it. */ - s = string; - if (prog->regmust != Nullstr) { - if (beginning && screamer) { - if (screamfirst[prog->regmust->str_rare] >= 0) - s = screaminstr(screamer,prog->regmust); - else - s = Nullch; - } - else - s = fbminstr(s,strend,prog->regmust); - if (!s) { - ++*(long*)&prog->regmust->str_nval; /* hooray */ - goto phooey; /* not present */ - } - else if (prog->regback >= 0) { - s -= prog->regback; - if (s < string) - s = string; - } - else if (--*(long*)&prog->regmust->str_nval < 0) { /* boo */ - str_free(prog->regmust); - prog->regmust = Nullstr; /* disable regmust */ - s = string; - } - else - s = string; - } - - /* Mark beginning of line for ^ . */ - if (beginning) - regbol = string; - else - regbol = NULL; - - /* see how far we have to get to not match where we matched before */ - regtill = string+minend; - - /* Simplest case: anchored match need be tried only once. */ - /* [unless multiline is set] */ - if (prog->reganch) { - if (regtry(prog, string)) - goto got_it; - else if (multiline) { - /* for multiline we only have to try after newlines */ - if (s > string) - s--; - while ((s = index(s, '\n')) != NULL) { - if (*++s && regtry(prog, s)) - goto got_it; - } - } - goto phooey; - } - - /* Messy cases: unanchored match. */ - if (prog->regstart) { - /* We know what string it must start with. */ - if (prog->regstart->str_pok == 3) { - while ((s = fbminstr(s, strend, prog->regstart)) != NULL) { - if (regtry(prog, s)) - goto got_it; - s++; - } - } - else { - c = prog->regstart->str_ptr; - while ((s = instr(s, c)) != NULL) { - if (regtry(prog, s)) - goto got_it; - s++; - } - } - } - else if (c = prog->regstclass) { - /* We know what class it must start with. */ - switch (OP(c)) { - case ANYOF: case ANYBUT: - c = OPERAND(c); - while (i = *s) { - if (!(c[i >> 3] & (1 << (i&7)))) - if (regtry(prog, s)) - goto got_it; - s++; - } - break; - case BOUND: - tmp = 0; - while (i = *s) { - if (tmp != (isalpha(i) || isdigit(i) || i == '_')) { - tmp = !tmp; - if (regtry(prog, s)) - goto got_it; - } - s++; - } - if (tmp && regtry(prog,s)) - goto got_it; - break; - case NBOUND: - tmp = 0; - while (i = *s) { - if (tmp != (isalpha(i) || isdigit(i) || i == '_')) - tmp = !tmp; - else if (regtry(prog, s)) - goto got_it; - s++; - } - if (!tmp && regtry(prog,s)) - goto got_it; - break; - case ALNUM: - while (i = *s) { - if (isalpha(i) || isdigit(i) || i == '_') - if (regtry(prog, s)) - goto got_it; - s++; - } - break; - case NALNUM: - while (i = *s) { - if (!isalpha(i) && !isdigit(i) && i != '_') - if (regtry(prog, s)) - goto got_it; - s++; - } - break; - case SPACE: - while (i = *s) { - if (isspace(i)) - if (regtry(prog, s)) - goto got_it; - s++; - } - break; - case NSPACE: - while (i = *s) { - if (!isspace(i)) - if (regtry(prog, s)) - goto got_it; - s++; - } - break; - case DIGIT: - while (i = *s) { - if (isdigit(i)) - if (regtry(prog, s)) - goto got_it; - s++; - } - break; - case NDIGIT: - while (i = *s) { - if (!isdigit(i)) - if (regtry(prog, s)) - goto got_it; - s++; - } - break; - } - } - else - /* We don't know much -- general case. */ - do { - if (regtry(prog, s)) - goto got_it; - } while (*s++ != '\0'); - - /* Failure. */ - goto phooey; - - got_it: - if (prog->nparens || sawampersand || prog->do_folding) { - s = savestr(stringarg); /* so $digit will always work */ - if (prog->subbase) - safefree(prog->subbase); - prog->subbase = s; - tmp = prog->subbase - string; - for (i = 0; i <= prog->nparens; i++) { - if (prog->endp[i]) { - prog->startp[i] += tmp; - prog->endp[i] += tmp; - } - } - if (prog->do_folding) { - safefree(string); - } - } - return(1); - - phooey: - if (prog->do_folding) { - safefree(string); - } - return(0); -} - -/* - - regtry - try match at specific point - */ -static int /* 0 failure, 1 success */ -regtry(prog, string) -regexp *prog; -char *string; -{ - register int i; - register char **sp; - register char **ep; - - reginput = string; - regstartp = prog->startp; - regendp = prog->endp; - reglastparen = &prog->lastparen; - - sp = prog->startp; - ep = prog->endp; - if (prog->nparens) { - for (i = NSUBEXP; i > 0; i--) { - *sp++ = NULL; - *ep++ = NULL; - } - } - if (regmatch(prog->program + 1) && reginput >= regtill) { - prog->startp[0] = string; - prog->endp[0] = reginput; - return(1); - } else - return(0); -} - -/* - - regmatch - main matching routine - * - * Conceptually the strategy is simple: check to see whether the current - * node matches, call self recursively to see whether the rest matches, - * and then act accordingly. In practice we make some effort to avoid - * recursion, in particular by going through "ordinary" nodes (that don't - * need to know whether the rest of the match failed) by a loop instead of - * by recursion. - */ -/* [lwall] I've hoisted the register declarations to the outer block in order to - * maybe save a little bit of pushing and popping on the stack. It also takes - * advantage of machines that use a register save mask on subroutine entry. - */ -static int /* 0 failure, 1 success */ -regmatch(prog) -char *prog; -{ - register char *scan; /* Current node. */ - char *next; /* Next node. */ - extern char *index(); - register int nextchar; - register int n; /* no or next */ - register int ln; /* len or last */ - register char *s; /* operand or save */ - register char *locinput = reginput; - - nextchar = *reginput; - scan = prog; -#ifdef DEBUG - if (scan != NULL && regnarrate) - fprintf(stderr, "%s(\n", regprop(scan)); -#endif - while (scan != NULL) { -#ifdef DEBUG - if (regnarrate) - fprintf(stderr, "%s...\n", regprop(scan)); -#endif - -#ifdef ALIGN - next = scan + NEXT(scan); - if (next == scan) - next = NULL; -#else - next = regnext(scan); -#endif - - switch (OP(scan)) { - case BOL: - if (locinput == regbol || - (nextchar && locinput[-1] == '\n') ) { - regtill--; - break; - } - return(0); - case EOL: - if (nextchar != '\0' && nextchar != '\n') - return(0); - regtill--; - break; - case ANY: - if (nextchar == '\0' || nextchar == '\n') - return(0); - nextchar = *++locinput; - break; - case EXACTLY: - s = OPERAND(scan); - ln = *s++; - /* Inline the first character, for speed. */ - if (*s != nextchar) - return(0); - if (ln > 1 && strncmp(s, locinput, ln) != 0) - return(0); - locinput += ln; - nextchar = *locinput; - break; - case ANYOF: - case ANYBUT: - s = OPERAND(scan); - if (nextchar < 0) - nextchar = UCHARAT(locinput); - if (s[nextchar >> 3] & (1 << (nextchar&7))) - return(0); - nextchar = *++locinput; - break; -#ifdef NOTDEF - case ANYOF: - if (nextchar == '\0' || index(OPERAND(scan), nextchar) == NULL) - return(0); - nextchar = *++locinput; - break; - case ANYBUT: - if (nextchar == '\0' || index(OPERAND(scan), nextchar) != NULL) - return(0); - nextchar = *++locinput; - break; -#endif - case ALNUM: - if (!nextchar) - return(0); - if (!isalpha(nextchar) && !isdigit(nextchar) && - nextchar != '_') - return(0); - nextchar = *++locinput; - break; - case NALNUM: - if (!nextchar) - return(0); - if (isalpha(nextchar) || isdigit(nextchar) || - nextchar == '_') - return(0); - nextchar = *++locinput; - break; - case NBOUND: - case BOUND: - if (locinput == regbol) /* was last char in word? */ - ln = 0; - else - ln = (isalpha(locinput[-1]) || - isdigit(locinput[-1]) || - locinput[-1] == '_' ); - n = (isalpha(nextchar) || isdigit(nextchar) || - nextchar == '_' ); /* is next char in word? */ - if ((ln == n) == (OP(scan) == BOUND)) - return(0); - break; - case SPACE: - if (!nextchar) - return(0); - if (!isspace(nextchar)) - return(0); - nextchar = *++locinput; - break; - case NSPACE: - if (!nextchar) - return(0); - if (isspace(nextchar)) - return(0); - nextchar = *++locinput; - break; - case DIGIT: - if (!isdigit(nextchar)) - return(0); - nextchar = *++locinput; - break; - case NDIGIT: - if (!nextchar) - return(0); - if (isdigit(nextchar)) - return(0); - nextchar = *++locinput; - break; - case REF: - case REF+1: - case REF+2: - case REF+3: - case REF+4: - case REF+5: - case REF+6: - case REF+7: - case REF+8: - case REF+9: - n = OP(scan) - REF; - s = regmystartp[n]; - if (!s) - return(0); - if (!regmyendp[n]) - return(0); - if (s == regmyendp[n]) - break; - /* Inline the first character, for speed. */ - if (*s != nextchar) - return(0); - ln = regmyendp[n] - s; - if (ln > 1 && strncmp(s, locinput, ln) != 0) - return(0); - locinput += ln; - nextchar = *locinput; - break; - - case NOTHING: - break; - case BACK: - break; - case OPEN+1: - case OPEN+2: - case OPEN+3: - case OPEN+4: - case OPEN+5: - case OPEN+6: - case OPEN+7: - case OPEN+8: - case OPEN+9: - n = OP(scan) - OPEN; - reginput = locinput; - - regmystartp[n] = locinput; /* for REF */ - if (regmatch(next)) { - /* - * Don't set startp if some later - * invocation of the same parentheses - * already has. - */ - if (regstartp[n] == NULL) - regstartp[n] = locinput; - return(1); - } else - return(0); - /* NOTREACHED */ - case CLOSE+1: - case CLOSE+2: - case CLOSE+3: - case CLOSE+4: - case CLOSE+5: - case CLOSE+6: - case CLOSE+7: - case CLOSE+8: - case CLOSE+9: { - n = OP(scan) - CLOSE; - reginput = locinput; - - regmyendp[n] = locinput; /* for REF */ - if (regmatch(next)) { - /* - * Don't set endp if some later - * invocation of the same parentheses - * already has. - */ - if (regendp[n] == NULL) { - regendp[n] = locinput; - *reglastparen = n; - } - return(1); - } else - return(0); - } - break; - case BRANCH: { - if (OP(next) != BRANCH) /* No choice. */ - next = NEXTOPER(scan); /* Avoid recursion. */ - else { - do { - reginput = locinput; - if (regmatch(NEXTOPER(scan))) - return(1); -#ifdef ALIGN - if (n = NEXT(scan)) - scan += n; - else - scan = NULL; -#else - scan = regnext(scan); -#endif - } while (scan != NULL && OP(scan) == BRANCH); - return(0); - /* NOTREACHED */ - } - } - break; - case STAR: - case PLUS: - /* - * Lookahead to avoid useless match attempts - * when we know what character comes next. - */ - if (OP(next) == EXACTLY) - nextchar = *(OPERAND(next)+1); - else - nextchar = '\0'; - ln = (OP(scan) == STAR) ? 0 : 1; - reginput = locinput; - n = regrepeat(NEXTOPER(scan)); - while (n >= ln) { - /* If it could work, try it. */ - if (nextchar == '\0' || *reginput == nextchar) - if (regmatch(next)) - return(1); - /* Couldn't or didn't -- back up. */ - n--; - reginput = locinput + n; - } - return(0); - case END: - reginput = locinput; /* put where regtry can find it */ - return(1); /* Success! */ - default: - printf("%x %d\n",scan,scan[1]); - FAIL("regexp memory corruption"); - } - - scan = next; - } - - /* - * We get here only if there's trouble -- normally "case END" is - * the terminating point. - */ - FAIL("corrupted regexp pointers"); - /*NOTREACHED*/ -} - -/* - - regrepeat - repeatedly match something simple, report how many - */ -/* - * [This routine now assumes that it will only match on things of length 1. - * That was true before, but now we assume scan - reginput is the count, - * rather than incrementing count on every character.] - */ -static int -regrepeat(p) -char *p; -{ - register char *scan; - register char *opnd; - register int c; - - scan = reginput; - opnd = OPERAND(p); - switch (OP(p)) { - case ANY: - while (*scan && *scan != '\n') - scan++; - break; - case EXACTLY: /* length of string is 1 */ - opnd++; - while (*opnd == *scan) - scan++; - break; -#ifdef FASTANY - case ANYOF: - case ANYBUT: - c = UCHARAT(scan); - while (!(opnd[c >> 3] & (1 << (c & 7)))) { - scan++; - c = UCHARAT(scan); - } - break; -#else - case ANYOF: - while (*scan != '\0' && index(opnd, *scan) != NULL) - scan++; - break; - case ANYBUT: - while (*scan != '\0' && index(opnd, *scan) == NULL) - scan++; - break; -#endif /* FASTANY */ - case ALNUM: - while (*scan && (isalpha(*scan) || isdigit(*scan) || - *scan == '_')) - scan++; - break; - case NALNUM: - while (*scan && (!isalpha(*scan) && !isdigit(*scan) && - *scan != '_')) - scan++; - break; - case SPACE: - while (*scan && isspace(*scan)) - scan++; - break; - case NSPACE: - while (*scan && !isspace(*scan)) - scan++; - break; - case DIGIT: - while (*scan && isdigit(*scan)) - scan++; - break; - case NDIGIT: - while (*scan && !isdigit(*scan)) - scan++; - break; - default: /* Oh dear. Called inappropriately. */ - FAIL("internal regexp foulup"); - /* NOTREACHED */ - } - - c = scan - reginput; - reginput = scan; - - return(c); -} - -/* - - regnext - dig the "next" pointer out of a node - * - * [Note, when ALIGN is defined there are two places in regmatch() that bypass - * this code for speed.] - */ -static char * -regnext(p) -register char *p; -{ - register int offset; - - if (p == ®dummy) - return(NULL); - - offset = NEXT(p); - if (offset == 0) - return(NULL); - -#ifdef ALIGN - return(p+offset); -#else - if (OP(p) == BACK) - return(p-offset); - else - return(p+offset); -#endif -} - -#ifdef DEBUG - -STATIC char *regprop(); - -/* - - regdump - dump a regexp onto stdout in vaguely comprehensible form - */ -void -regdump(r) -regexp *r; -{ - register char *s; - register char op = EXACTLY; /* Arbitrary non-END op. */ - register char *next; - extern char *index(); - - - s = r->program + 1; - while (op != END) { /* While that wasn't END last time... */ -#ifdef ALIGN - if (!((long)s & 1)) - s++; -#endif - op = OP(s); - printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */ - next = regnext(s); - if (next == NULL) /* Next ptr. */ - printf("(0)"); - else - printf("(%d)", (s-r->program)+(next-s)); - s += 3; - if (op == ANYOF || op == ANYBUT) { - s += 32; - } - if (op == EXACTLY) { - /* Literal string, where present. */ - s++; - while (*s != '\0') { - putchar(*s); - s++; - } - s++; - } - putchar('\n'); - } - - /* Header fields of interest. */ - if (r->regstart) - printf("start `%s' ", r->regstart->str_ptr); - if (r->regstclass) - printf("stclass `%s' ", regprop(OP(r->regstclass))); - if (r->reganch) - printf("anchored "); - if (r->regmust != NULL) - printf("must have \"%s\" back %d ", r->regmust->str_ptr, - r->regback); - printf("\n"); -} - -/* - - regprop - printable representation of opcode - */ -static char * -regprop(op) -char *op; -{ - register char *p; - static char buf[50]; - - (void) strcpy(buf, ":"); - - switch (OP(op)) { - case BOL: - p = "BOL"; - break; - case EOL: - p = "EOL"; - break; - case ANY: - p = "ANY"; - break; - case ANYOF: - p = "ANYOF"; - break; - case ANYBUT: - p = "ANYBUT"; - break; - case BRANCH: - p = "BRANCH"; - break; - case EXACTLY: - p = "EXACTLY"; - break; - case NOTHING: - p = "NOTHING"; - break; - case BACK: - p = "BACK"; - break; - case END: - p = "END"; - break; - case ALNUM: - p = "ALNUM"; - break; - case NALNUM: - p = "NALNUM"; - break; - case BOUND: - p = "BOUND"; - break; - case NBOUND: - p = "NBOUND"; - break; - case SPACE: - p = "SPACE"; - break; - case NSPACE: - p = "NSPACE"; - break; - case DIGIT: - p = "DIGIT"; - break; - case NDIGIT: - p = "NDIGIT"; - break; - case REF: - case REF+1: - case REF+2: - case REF+3: - case REF+4: - case REF+5: - case REF+6: - case REF+7: - case REF+8: - case REF+9: - sprintf(buf+strlen(buf), "REF%d", OP(op)-REF); - p = NULL; - break; - case OPEN+1: - case OPEN+2: - case OPEN+3: - case OPEN+4: - case OPEN+5: - case OPEN+6: - case OPEN+7: - case OPEN+8: - case OPEN+9: - sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN); - p = NULL; - break; - case CLOSE+1: - case CLOSE+2: - case CLOSE+3: - case CLOSE+4: - case CLOSE+5: - case CLOSE+6: - case CLOSE+7: - case CLOSE+8: - case CLOSE+9: - sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE); - p = NULL; - break; - case STAR: - p = "STAR"; - break; - case PLUS: - p = "PLUS"; - break; - default: - FAIL("corrupted regexp opcode"); - } - if (p != NULL) - (void) strcat(buf, p); - return(buf); -} -#endif - -#ifdef NOTDEF -/* - * The following is provided for those people who do not have strcspn() in - * their C libraries. They should get off their butts and do something - * about it; at least one public-domain implementation of those (highly - * useful) string routines has been published on Usenet. - */ -#ifndef STRCSPN -/* - * strcspn - find length of initial segment of s1 consisting entirely - * of characters not from s2 - */ - -static int -strcspn(s1, s2) -char *s1; -char *s2; -{ - register char *scan1; - register char *scan2; - register int count; - - count = 0; - for (scan1 = s1; *scan1 != '\0'; scan1++) { - for (scan2 = s2; *scan2 != '\0';) /* ++ moved down. */ - if (*scan1 == *scan2++) - return(count); - count++; - } - return(count); -} -#endif -#endif /* NOTDEF */ - -regfree(r) -struct regexp *r; -{ - if (r->precomp) - safefree(r->precomp); - if (r->subbase) - safefree(r->subbase); - if (r->regmust) - str_free(r->regmust); - if (r->regstart) - str_free(r->regstart); - safefree((char*)r); -} |