summaryrefslogtreecommitdiff
path: root/search.c
diff options
context:
space:
mode:
authorLarry Wall <larry@wall.org>1988-06-05 00:00:00 +0000
committerLarry Wall <larry@wall.org>1988-06-05 00:00:00 +0000
commit378cc40b38293ffc7298c6a7ed3cd740ad79be52 (patch)
tree87bedf9adc5c88847a2e2d85963df5f94435aaf5 /search.c
parenta4de7c03d0bdc29d9d3a18abad4ac2628182ed7b (diff)
downloadperl-2.0.tar.gz
perl 2.0 (no announcement message available)perl-2.0
Some of the enhancements from Perl1 included: * New regexp routines derived from Henry Spencer's. o Support for /(foo|bar)/. o Support for /(foo)*/ and /(foo)+/. o \s for whitespace, \S for non-, \d for digit, \D nondigit * Local variables in blocks, subroutines and evals. * Recursive subroutine calls are now supported. * Array values may now be interpolated into lists: unlink 'foo', 'bar', @trashcan, 'tmp'; * File globbing. * Use of <> in array contexts returns the whole file or glob list. * New iterator for normal arrays, foreach, that allows both read and write. * Ability to open pipe to a forked off script for secure pipes in setuid scripts. * File inclusion via do 'foo.pl'; * More file tests, including -t to see if, for instance, stdin is a terminal. File tests now behave in a more correct manner. You can do file tests on filehandles as well as filenames. The special filetests -T and -B test a file to see if it's text or binary. * An eof can now be used on each file of the <> input for such purposes as resetting the line numbers or appending to each file of an inplace edit. * Assignments can now function as lvalues, so you can say things like ($HOST = $host) =~ tr/a-z/A-Z/; ($obj = $src) =~ s/\.c$/.o/; * You can now do certain file operations with a variable which holds the name of a filehandle, e.g. open(++$incl,$includefilename); $foo = <$incl>; * Warnings are now available (with -w) on use of uninitialized variables and on identifiers that are mentioned only once, and on reference to various undefined things. * There is now a wait operator. * There is now a sort operator. * The manual is now not lying when it says that perl is generally faster than sed. I hope.
Diffstat (limited to 'search.c')
-rw-r--r--search.c754
1 files changed, 0 insertions, 754 deletions
diff --git a/search.c b/search.c
deleted file mode 100644
index 3a15e29b6f..0000000000
--- a/search.c
+++ /dev/null
@@ -1,754 +0,0 @@
-/* $Header: search.c,v 1.0.1.2 88/01/28 10:30:46 root Exp $
- *
- * $Log: search.c,v $
- * Revision 1.0.1.2 88/01/28 10:30:46 root
- * patch8: uncommented free_compex for use with eval operator.
- *
- * Revision 1.0.1.1 88/01/24 03:55:05 root
- * patch 2: made depend on perl.h.
- *
- * Revision 1.0 87/12/18 13:05:59 root
- * Initial revision
- *
- */
-
-/* string search routines */
-
-#include "EXTERN.h"
-#include "handy.h"
-#include "util.h"
-#include "INTERN.h"
-#include "search.h"
-#include "EXTERN.h"
-#include "perl.h"
-
-#define VERBOSE
-#define FLUSH
-#define MEM_SIZE int
-
-#ifndef BITSPERBYTE
-#define BITSPERBYTE 8
-#endif
-
-#define BMAPSIZ (127 / BITSPERBYTE + 1)
-
-#define CHAR 0 /* a normal character */
-#define ANY 1 /* . matches anything except newline */
-#define CCL 2 /* [..] character class */
-#define NCCL 3 /* [^..]negated character class */
-#define BEG 4 /* ^ beginning of a line */
-#define END 5 /* $ end of a line */
-#define LPAR 6 /* ( begin sub-match */
-#define RPAR 7 /* ) end sub-match */
-#define REF 8 /* \N backreference to the Nth submatch */
-#define WORD 9 /* \w matches alphanumeric character */
-#define NWORD 10 /* \W matches non-alphanumeric character */
-#define WBOUND 11 /* \b matches word boundary */
-#define NWBOUND 12 /* \B matches non-boundary */
-#define FINIS 13 /* the end of the pattern */
-
-#define CODEMASK 15
-
-/* Quantifiers: */
-
-#define MINZERO 16 /* minimum is 0, not 1 */
-#define MAXINF 32 /* maximum is infinity, not 1 */
-
-#define ASCSIZ 0200
-typedef char TRANSTABLE[ASCSIZ];
-
-static TRANSTABLE trans = {
-0000,0001,0002,0003,0004,0005,0006,0007,
-0010,0011,0012,0013,0014,0015,0016,0017,
-0020,0021,0022,0023,0024,0025,0026,0027,
-0030,0031,0032,0033,0034,0035,0036,0037,
-0040,0041,0042,0043,0044,0045,0046,0047,
-0050,0051,0052,0053,0054,0055,0056,0057,
-0060,0061,0062,0063,0064,0065,0066,0067,
-0070,0071,0072,0073,0074,0075,0076,0077,
-0100,0101,0102,0103,0104,0105,0106,0107,
-0110,0111,0112,0113,0114,0115,0116,0117,
-0120,0121,0122,0123,0124,0125,0126,0127,
-0130,0131,0132,0133,0134,0135,0136,0137,
-0140,0141,0142,0143,0144,0145,0146,0147,
-0150,0151,0152,0153,0154,0155,0156,0157,
-0160,0161,0162,0163,0164,0165,0166,0167,
-0170,0171,0172,0173,0174,0175,0176,0177,
-};
-static bool folding = FALSE;
-
-static int err;
-#define NOERR 0
-#define BEGFAIL 1
-#define FATAL 2
-
-static char *FirstCharacter;
-static char *matchend;
-static char *matchtill;
-
-void
-search_init()
-{
-#ifdef UNDEF
- register int i;
-
- for (i = 0; i < ASCSIZ; i++)
- trans[i] = i;
-#else
- ;
-#endif
-}
-
-void
-init_compex(compex)
-register COMPEX *compex;
-{
- /* the following must start off zeroed */
-
- compex->precomp = Nullch;
- compex->complen = 0;
- compex->subbase = Nullch;
-}
-
-void
-free_compex(compex)
-register COMPEX *compex;
-{
- if (compex->complen) {
- safefree(compex->compbuf);
- compex->complen = 0;
- }
- if (compex->subbase) {
- safefree(compex->subbase);
- compex->subbase = Nullch;
- }
-}
-
-static char *gbr_str = Nullch;
-static int gbr_siz = 0;
-
-char *
-getparen(compex,n)
-register COMPEX *compex;
-int n;
-{
- int length = compex->subend[n] - compex->subbeg[n];
-
- if (!n &&
- (!compex->numsubs || n > compex->numsubs || !compex->subend[n] || length<0))
- return "";
- growstr(&gbr_str, &gbr_siz, length+1);
- safecpy(gbr_str, compex->subbeg[n], length+1);
- return gbr_str;
-}
-
-void
-case_fold(which)
-int which;
-{
- register int i;
-
- if (which != folding) {
- if (which) {
- for (i = 'A'; i <= 'Z'; i++)
- trans[i] = tolower(i);
- }
- else {
- for (i = 'A'; i <= 'Z'; i++)
- trans[i] = i;
- }
- folding = which;
- }
-}
-
-/* Compile the regular expression into internal form */
-
-char *
-compile(compex, sp, regex, fold)
-register COMPEX *compex;
-register char *sp;
-int regex;
-int fold;
-{
- register int c;
- register char *cp;
- char *lastcp;
- char paren[MAXSUB],
- *parenp;
- char **alt = compex->alternatives;
- char *retmes = "Badly formed search string";
-
- case_fold(compex->do_folding = fold);
- if (compex->precomp)
- safefree(compex->precomp);
- compex->precomp = savestr(sp);
- if (!compex->complen) {
- compex->compbuf = safemalloc(84);
- compex->complen = 80;
- }
- cp = compex->compbuf; /* point at compiled buffer */
- *alt++ = cp; /* first alternative starts here */
- parenp = paren; /* first paren goes here */
- if (*sp == 0) { /* nothing to compile? */
-#ifdef NOTDEF
- if (*cp == 0) /* nothing there yet? */
- return "Null search string";
-#endif
- if (*cp)
- return Nullch; /* just keep old expression */
- }
- compex->numsubs = 0; /* no parens yet */
- lastcp = 0;
- for (;;) {
- if (cp - compex->compbuf >= compex->complen) {
- char *ocompbuf = compex->compbuf;
-
- grow_comp(compex);
- if (ocompbuf != compex->compbuf) { /* adjust pointers? */
- char **tmpalt;
-
- cp = compex->compbuf + (cp - ocompbuf);
- if (lastcp)
- lastcp = compex->compbuf + (lastcp - ocompbuf);
- for (tmpalt = compex->alternatives; tmpalt < alt; tmpalt++)
- if (*tmpalt)
- *tmpalt = compex->compbuf + (*tmpalt - ocompbuf);
- }
- }
- c = *sp++; /* get next char of pattern */
- if (c == 0) { /* end of pattern? */
- if (parenp != paren) { /* balanced parentheses? */
-#ifdef VERBOSE
- retmes = "Missing right parenthesis";
-#endif
- goto badcomp;
- }
- *cp++ = FINIS; /* append a stopper */
- *alt++ = 0; /* terminate alternative list */
- /*
- compex->complen = cp - compex->compbuf + 1;
- compex->compbuf = saferealloc(compex->compbuf,compex->complen+4); */
- return Nullch; /* return success */
- }
- if (c != '*' && c != '?' && c != '+')
- lastcp = cp;
- if (!regex) { /* just a normal search string? */
- *cp++ = CHAR; /* everything is a normal char */
- *cp++ = trans[c];
- }
- else /* it is a regular expression */
- switch (c) {
-
- default:
- normal_char:
- *cp++ = CHAR;
- *cp++ = trans[c];
- continue;
-
- case '.':
- *cp++ = ANY;
- continue;
-
- case '[': { /* character class */
- register int i;
-
- if (cp - compex->compbuf >= compex->complen - BMAPSIZ) {
- char *ocompbuf = compex->compbuf;
-
- grow_comp(compex); /* reserve bitmap */
- if (ocompbuf != compex->compbuf) {/* adjust pointers? */
- char **tmpalt;
-
- cp = compex->compbuf + (cp - ocompbuf);
- if (lastcp)
- lastcp = compex->compbuf + (lastcp - ocompbuf);
- for (tmpalt = compex->alternatives; tmpalt < alt;
- tmpalt++)
- if (*tmpalt)
- *tmpalt =
- compex->compbuf + (*tmpalt - ocompbuf);
- }
- }
- for (i = BMAPSIZ; i; --i)
- cp[i] = 0;
-
- if ((c = *sp++) == '^') {
- c = *sp++;
- *cp++ = NCCL; /* negated */
- }
- else
- *cp++ = CCL; /* normal */
-
- i = 0; /* remember oldchar */
- do {
- if (c == '\0') {
-#ifdef VERBOSE
- retmes = "Missing ]";
-#endif
- goto badcomp;
- }
- if (c == '\\' && *sp) {
- switch (*sp) {
- default:
- c = *sp++;
- break;
- case '0': case '1': case '2': case '3':
- case '4': case '5': case '6': case '7':
- c = *sp++ - '0';
- if (index("01234567",*sp)) {
- c <<= 3;
- c += *sp++ - '0';
- }
- if (index("01234567",*sp)) {
- c <<= 3;
- c += *sp++ - '0';
- }
- break;
- case 'b':
- c = '\b';
- sp++;
- break;
- case 'n':
- c = '\n';
- sp++;
- break;
- case 'r':
- c = '\r';
- sp++;
- break;
- case 'f':
- c = '\f';
- sp++;
- break;
- case 't':
- c = '\t';
- sp++;
- break;
- }
- }
- if (*sp == '-' && *(++sp))
- i = *sp++;
- else
- i = c;
- while (c <= i) {
- cp[c / BITSPERBYTE] |= 1 << (c % BITSPERBYTE);
- if (fold && isalpha(c))
- cp[(c ^ 32) / BITSPERBYTE] |=
- 1 << ((c ^ 32) % BITSPERBYTE);
- /* set the other bit too */
- c++;
- }
- } while ((c = *sp++) != ']');
- if (cp[-1] == NCCL)
- cp[0] |= 1;
- cp += BMAPSIZ;
- continue;
- }
-
- case '^':
- if (cp != compex->compbuf && cp[-1] != FINIS)
- goto normal_char;
- *cp++ = BEG;
- continue;
-
- case '$':
- if (isdigit(*sp)) {
- *cp++ = REF;
- *cp++ = *sp - '0';
- break;
- }
- if (*sp && *sp != '|')
- goto normal_char;
- *cp++ = END;
- continue;
-
- case '*': case '?': case '+':
- if (lastcp == 0 ||
- (*lastcp & (MINZERO|MAXINF)) ||
- *lastcp == LPAR ||
- *lastcp == RPAR ||
- *lastcp == BEG ||
- *lastcp == END ||
- *lastcp == WBOUND ||
- *lastcp == NWBOUND )
- goto normal_char;
- if (c != '+')
- *lastcp |= MINZERO;
- if (c != '?')
- *lastcp |= MAXINF;
- continue;
-
- case '(':
- if (compex->numsubs >= MAXSUB) {
-#ifdef VERBOSE
- retmes = "Too many parens";
-#endif
- goto badcomp;
- }
- *parenp++ = ++compex->numsubs;
- *cp++ = LPAR;
- *cp++ = compex->numsubs;
- break;
- case ')':
- if (parenp <= paren) {
-#ifdef VERBOSE
- retmes = "Unmatched right paren";
-#endif
- goto badcomp;
- }
- *cp++ = RPAR;
- *cp++ = *--parenp;
- break;
- case '|':
- if (parenp>paren) {
-#ifdef VERBOSE
- retmes = "No | in subpattern"; /* Sigh! */
-#endif
- goto badcomp;
- }
- *cp++ = FINIS;
- if (alt - compex->alternatives >= MAXALT) {
-#ifdef VERBOSE
- retmes = "Too many alternatives";
-#endif
- goto badcomp;
- }
- *alt++ = cp;
- break;
- case '\\': /* backslashed thingie */
- switch (c = *sp++) {
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- *cp++ = REF;
- *cp++ = c - '0';
- break;
- case 'w':
- *cp++ = WORD;
- break;
- case 'W':
- *cp++ = NWORD;
- break;
- case 'b':
- *cp++ = WBOUND;
- break;
- case 'B':
- *cp++ = NWBOUND;
- break;
- default:
- *cp++ = CHAR;
- if (c == '\0')
- goto badcomp;
- switch (c) {
- case 'n':
- c = '\n';
- break;
- case 'r':
- c = '\r';
- break;
- case 'f':
- c = '\f';
- break;
- case 't':
- c = '\t';
- break;
- }
- *cp++ = c;
- break;
- }
- break;
- }
- }
-badcomp:
- compex->compbuf[0] = 0;
- compex->numsubs = 0;
- return retmes;
-}
-
-void
-grow_comp(compex)
-register COMPEX *compex;
-{
- compex->complen += 80;
- compex->compbuf = saferealloc(compex->compbuf, (MEM_SIZE)compex->complen + 4);
-}
-
-char *
-execute(compex, addr, beginning, minend)
-register COMPEX *compex;
-char *addr;
-bool beginning;
-int minend;
-{
- register char *p1 = addr;
- register char *trt = trans;
- register int c;
- register int scr;
- register int c2;
-
- if (addr == Nullch)
- return Nullch;
- if (compex->numsubs) { /* any submatches? */
- for (c = 0; c <= compex->numsubs; c++)
- compex->subbeg[c] = compex->subend[c] = Nullch;
- }
- case_fold(compex->do_folding); /* make sure table is correct */
- if (beginning)
- FirstCharacter = p1; /* for ^ tests */
- else {
- if (multiline || compex->alternatives[1] || compex->compbuf[0] != BEG)
- FirstCharacter = Nullch;
- else
- return Nullch; /* can't match */
- }
- matchend = Nullch;
- matchtill = addr + minend;
- err = 0;
- if (compex->compbuf[0] == CHAR && !compex->alternatives[1]) {
- if (compex->do_folding) {
- c = compex->compbuf[1]; /* fast check for first character */
- do {
- if (trt[*p1] == c && try(compex, p1, compex->compbuf))
- goto got_it;
- } while (*p1++ && !err);
- }
- else {
- c = compex->compbuf[1]; /* faster check for first character */
- if (compex->compbuf[2] == CHAR)
- c2 = compex->compbuf[3];
- else
- c2 = 0;
- do {
- false_alarm:
- while (scr = *p1++, scr && scr != c) ;
- if (!scr)
- break;
- if (c2 && *p1 != c2) /* and maybe even second character */
- goto false_alarm;
- if (try(compex, p1, compex->compbuf+2)) {
- p1--;
- goto got_it;
- }
- } while (!err);
- }
- return Nullch;
- }
- else { /* normal algorithm */
- do {
- register char **alt = compex->alternatives;
- while (*alt) {
- if (try(compex, p1, *alt++))
- goto got_it;
- }
- } while (*p1++ && err < FATAL);
- return Nullch;
- }
-
-got_it:
- if (compex->numsubs) { /* any parens? */
- trt = savestr(addr); /* in case addr is not static */
- if (compex->subbase)
- safefree(compex->subbase); /* (may be freeing addr!) */
- compex->subbase = trt;
- scr = compex->subbase - addr;
- p1 += scr;
- matchend += scr;
- for (c = 0; c <= compex->numsubs; c++) {
- if (compex->subend[c]) {
- compex->subbeg[c] += scr;
- compex->subend[c] += scr;
- }
- }
- }
- compex->subend[0] = matchend;
- compex->subbeg[0] = p1;
- return p1;
-}
-
-bool
-try(compex, sp, cp)
-COMPEX *compex;
-register char *cp;
-register char *sp;
-{
- register char *basesp;
- register char *trt = trans;
- register int i;
- register int backlen;
- register int code;
-
- while (*sp || (*cp & MAXINF) || *cp == BEG || *cp == RPAR ||
- *cp == WBOUND || *cp == NWBOUND) {
- switch ((code = *cp++) & CODEMASK) {
-
- case CHAR:
- basesp = sp;
- i = *cp++;
- if (code & MAXINF)
- while (*sp && trt[*sp] == i) sp++;
- else
- if (*sp && trt[*sp] == i) sp++;
- backlen = 1;
- goto backoff;
-
- backoff:
- while (sp > basesp) {
- if (try(compex, sp, cp))
- goto right;
- sp -= backlen;
- }
- if (code & MINZERO)
- continue;
- goto wrong;
-
- case ANY:
- basesp = sp;
- if (code & MAXINF)
- while (*sp && *sp != '\n') sp++;
- else
- if (*sp && *sp != '\n') sp++;
- backlen = 1;
- goto backoff;
-
- case CCL:
- basesp = sp;
- if (code & MAXINF)
- while (*sp && cclass(cp, *sp, 1)) sp++;
- else
- if (*sp && cclass(cp, *sp, 1)) sp++;
- cp += BMAPSIZ;
- backlen = 1;
- goto backoff;
-
- case NCCL:
- basesp = sp;
- if (code & MAXINF)
- while (*sp && cclass(cp, *sp, 0)) sp++;
- else
- if (*sp && cclass(cp, *sp, 0)) sp++;
- cp += BMAPSIZ;
- backlen = 1;
- goto backoff;
-
- case END:
- if (!*sp || *sp == '\n') {
- matchtill--;
- continue;
- }
- goto wrong;
-
- case BEG:
- if (sp == FirstCharacter || (
- *sp && sp[-1] == '\n') ) {
- matchtill--;
- continue;
- }
- if (!multiline) /* no point in advancing more */
- err = BEGFAIL;
- goto wrong;
-
- case WORD:
- basesp = sp;
- if (code & MAXINF)
- while (*sp && isalnum(*sp)) sp++;
- else
- if (*sp && isalnum(*sp)) sp++;
- backlen = 1;
- goto backoff;
-
- case NWORD:
- basesp = sp;
- if (code & MAXINF)
- while (*sp && !isalnum(*sp)) sp++;
- else
- if (*sp && !isalnum(*sp)) sp++;
- backlen = 1;
- goto backoff;
-
- case WBOUND:
- if ((sp == FirstCharacter || !isalnum(sp[-1])) !=
- (!*sp || !isalnum(*sp)) )
- continue;
- goto wrong;
-
- case NWBOUND:
- if ((sp == FirstCharacter || !isalnum(sp[-1])) ==
- (!*sp || !isalnum(*sp)))
- continue;
- goto wrong;
-
- case FINIS:
- goto right;
-
- case LPAR:
- compex->subbeg[*cp++] = sp;
- continue;
-
- case RPAR:
- i = *cp++;
- compex->subend[i] = sp;
- compex->lastparen = i;
- continue;
-
- case REF:
- if (compex->subend[i = *cp++] == 0) {
- fputs("Bad subpattern reference\n",stdout) FLUSH;
- err = FATAL;
- goto wrong;
- }
- basesp = sp;
- backlen = compex->subend[i] - compex->subbeg[i];
- if (code & MAXINF)
- while (*sp && subpat(compex, i, sp)) sp += backlen;
- else
- if (*sp && subpat(compex, i, sp)) sp += backlen;
- goto backoff;
-
- default:
- fputs("Botched pattern compilation\n",stdout) FLUSH;
- err = FATAL;
- return -1;
- }
- }
- if (*cp == FINIS || *cp == END) {
-right:
- if (matchend == Nullch || sp > matchend)
- matchend = sp;
- return matchend >= matchtill;
- }
-wrong:
- matchend = Nullch;
- return FALSE;
-}
-
-bool
-subpat(compex, i, sp)
-register COMPEX *compex;
-register int i;
-register char *sp;
-{
- register char *bp;
-
- bp = compex->subbeg[i];
- while (*sp && *bp == *sp) {
- bp++;
- sp++;
- if (bp >= compex->subend[i])
- return TRUE;
- }
- return FALSE;
-}
-
-bool
-cclass(set, c, af)
-register char *set;
-register int c;
-{
- c &= 0177;
-#if BITSPERBYTE == 8
- if (set[c >> 3] & 1 << (c & 7))
-#else
- if (set[c / BITSPERBYTE] & 1 << (c % BITSPERBYTE))
-#endif
- return af;
- return !af;
-}