summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorLarry Wall <lwall@scalpel.netlabs.com>1995-11-21 10:01:00 +1200
committerLarry <lwall@scalpel.netlabs.com>1995-11-21 10:01:00 +1200
commit4633a7c4bad06b471d9310620b7fe8ddd158cccd (patch)
tree37ebeb26a64f123784fd8fac6243b124767243b0 /regexec.c
parent8e07c86ebc651fe92eb7e3b25f801f57cfb8dd6f (diff)
downloadperl-4633a7c4bad06b471d9310620b7fe8ddd158cccd.tar.gz
5.002 beta 1
If you're adventurous, have a look at ftp://ftp.sems.com/pub/outgoing/perl5.0/perl5.002beta1.tar.gz Many thanks to Andy for doing the integration. Obviously, if you consult the bugs database, you'll note there are still plenty of buglets that need fixing, and several enhancements that I've intended to put in still haven't made it in (Hi, Tim and Ilya). But I think it'll be pretty stable. And you can start to fiddle around with prototypes (which are, of course, still totally undocumented). Packrats, don't worry too much about readvertising this widely. Nowadays we're on a T1 here, so our bandwidth is okay. Have the appropriate amount of jollity. Larry
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c146
1 files changed, 92 insertions, 54 deletions
diff --git a/regexec.c b/regexec.c
index c2cf06ef2c..6a29d7f032 100644
--- a/regexec.c
+++ b/regexec.c
@@ -171,6 +171,7 @@ I32 safebase; /* no need to remember string in subbase */
CURCUR cc;
cc.cur = 0;
+ cc.oldcc = 0;
regcc = &cc;
#ifdef DEBUGGING
@@ -576,14 +577,26 @@ char *prog;
register char *s; /* operand or save */
register char *locinput = reginput;
int minmod = 0;
+#ifdef DEBUGGING
+ static int regindent = 0;
+ regindent++;
+#endif
nextchar = *locinput;
scan = prog;
while (scan != NULL) {
#ifdef DEBUGGING
- if (regnarrate)
- fprintf(stderr, "%2d%-8.8s\t<%.10s>\n",
+#define sayYES goto yes
+#define sayNO goto no
+#define saySAME(x) if (x) goto yes; else goto no
+ if (regnarrate) {
+ fprintf(stderr, "%*s%2d%-8.8s\t<%.10s>\n", regindent*2, "",
scan - regprogram, regprop(scan), locinput);
+ }
+#else
+#define sayYES return 1
+#define sayNO return 0
+#define saySAME(x) return x
#endif
#ifdef REGALIGN
@@ -603,7 +616,7 @@ char *prog;
/* regtill = regbol; */
break;
}
- return 0;
+ sayNO;
case MBOL:
if (locinput == regbol
? regprev == '\n'
@@ -611,15 +624,15 @@ char *prog;
{
break;
}
- return 0;
+ sayNO;
case SBOL:
if (locinput == regbol && regprev == '\n')
break;
- return 0;
+ sayNO;
case GBOL:
if (locinput == regbol)
break;
- return 0;
+ sayNO;
case EOL:
if (multiline)
goto meol;
@@ -628,23 +641,23 @@ char *prog;
case MEOL:
meol:
if ((nextchar || locinput < regeol) && nextchar != '\n')
- return 0;
+ sayNO;
break;
case SEOL:
seol:
if ((nextchar || locinput < regeol) && nextchar != '\n')
- return 0;
+ sayNO;
if (regeol - locinput > 1)
- return 0;
+ sayNO;
break;
case SANY:
if (!nextchar && locinput >= regeol)
- return 0;
+ sayNO;
nextchar = *++locinput;
break;
case ANY:
if (!nextchar && locinput >= regeol || nextchar == '\n')
- return 0;
+ sayNO;
nextchar = *++locinput;
break;
case EXACTLY:
@@ -652,11 +665,11 @@ char *prog;
ln = *s++;
/* Inline the first character, for speed. */
if (*s != nextchar)
- return 0;
+ sayNO;
if (regeol - locinput < ln)
- return 0;
+ sayNO;
if (ln > 1 && bcmp(s, locinput, ln) != 0)
- return 0;
+ sayNO;
locinput += ln;
nextchar = *locinput;
break;
@@ -665,23 +678,23 @@ char *prog;
if (nextchar < 0)
nextchar = UCHARAT(locinput);
if (s[nextchar >> 3] & (1 << (nextchar&7)))
- return 0;
+ sayNO;
if (!nextchar && locinput >= regeol)
- return 0;
+ sayNO;
nextchar = *++locinput;
break;
case ALNUM:
if (!nextchar)
- return 0;
+ sayNO;
if (!isALNUM(nextchar))
- return 0;
+ sayNO;
nextchar = *++locinput;
break;
case NALNUM:
if (!nextchar && locinput >= regeol)
- return 0;
+ sayNO;
if (isALNUM(nextchar))
- return 0;
+ sayNO;
nextchar = *++locinput;
break;
case NBOUND:
@@ -692,51 +705,51 @@ char *prog;
ln = isALNUM(locinput[-1]);
n = isALNUM(nextchar); /* is next char in word? */
if ((ln == n) == (OP(scan) == BOUND))
- return 0;
+ sayNO;
break;
case SPACE:
if (!nextchar && locinput >= regeol)
- return 0;
+ sayNO;
if (!isSPACE(nextchar))
- return 0;
+ sayNO;
nextchar = *++locinput;
break;
case NSPACE:
if (!nextchar)
- return 0;
+ sayNO;
if (isSPACE(nextchar))
- return 0;
+ sayNO;
nextchar = *++locinput;
break;
case DIGIT:
if (!isDIGIT(nextchar))
- return 0;
+ sayNO;
nextchar = *++locinput;
break;
case NDIGIT:
if (!nextchar && locinput >= regeol)
- return 0;
+ sayNO;
if (isDIGIT(nextchar))
- return 0;
+ sayNO;
nextchar = *++locinput;
break;
case REF:
n = ARG1(scan); /* which paren pair */
s = regstartp[n];
if (!s)
- return 0;
+ sayNO;
if (!regendp[n])
- return 0;
+ sayNO;
if (s == regendp[n])
break;
/* Inline the first character, for speed. */
if (*s != nextchar)
- return 0;
+ sayNO;
ln = regendp[n] - s;
if (locinput + ln > regeol)
- return 0;
+ sayNO;
if (ln > 1 && bcmp(s, locinput, ln) != 0)
- return 0;
+ sayNO;
locinput += ln;
nextchar = *locinput;
break;
@@ -774,7 +787,7 @@ char *prog;
n = regmatch(PREVOPER(next)); /* start on the WHILEM */
regcpblow(cp);
regcc = cc.oldcc;
- return n;
+ saySAME(n);
}
/* NOT REACHED */
case WHILEM: {
@@ -788,19 +801,25 @@ char *prog;
*/
CURCUR* cc = regcc;
- n = cc->cur + 1;
+ n = cc->cur + 1; /* how many we know we matched */
reginput = locinput;
+#ifdef DEBUGGING
+ if (regnarrate)
+ fprintf(stderr, "%*s %d %lx\n", regindent*2, "",
+ n, (long)cc);
+#endif
+
/* If degenerate scan matches "", assume scan done. */
if (locinput == cc->lastloc) {
regcc = cc->oldcc;
ln = regcc->cur;
if (regmatch(cc->next))
- return TRUE;
+ sayYES;
regcc->cur = ln;
regcc = cc;
- return FALSE;
+ sayNO;
}
/* First just match a string of min scans. */
@@ -808,7 +827,10 @@ char *prog;
if (n < cc->min) {
cc->cur = n;
cc->lastloc = locinput;
- return regmatch(cc->scan);
+ if (regmatch(cc->scan))
+ sayYES;
+ cc->cur = n - 1;
+ sayNO;
}
/* Prefer next over scan for minimal matching. */
@@ -817,18 +839,21 @@ char *prog;
regcc = cc->oldcc;
ln = regcc->cur;
if (regmatch(cc->next))
- return TRUE; /* All done. */
+ sayYES; /* All done. */
regcc->cur = ln;
regcc = cc;
if (n >= cc->max) /* Maximum greed exceeded? */
- return FALSE;
+ sayNO;
/* Try scanning more and see if it helps. */
reginput = locinput;
cc->cur = n;
cc->lastloc = locinput;
- return regmatch(cc->scan);
+ if (regmatch(cc->scan))
+ sayYES;
+ cc->cur = n - 1;
+ sayNO;
}
/* Prefer scan over next for maximal matching. */
@@ -838,7 +863,7 @@ char *prog;
cc->cur = n;
cc->lastloc = locinput;
if (regmatch(cc->scan))
- return TRUE;
+ sayYES;
regcppop(); /* Restore some previous $<digit>s? */
reginput = locinput;
}
@@ -847,10 +872,11 @@ char *prog;
regcc = cc->oldcc;
ln = regcc->cur;
if (regmatch(cc->next))
- return TRUE;
+ sayYES;
regcc->cur = ln;
regcc = cc;
- return FALSE;
+ cc->cur = n - 1;
+ sayNO;
}
/* NOT REACHED */
case BRANCH: {
@@ -861,7 +887,7 @@ char *prog;
do {
reginput = locinput;
if (regmatch(NEXTOPER(scan)))
- return 1;
+ sayYES;
for (n = *reglastparen; n > lastparen; n--)
regendp[n] = 0;
*reglastparen = n;
@@ -876,7 +902,7 @@ char *prog;
scan = regnext(scan);
#endif
} while (scan != NULL && OP(scan) == BRANCH);
- return 0;
+ sayNO;
/* NOTREACHED */
}
}
@@ -911,12 +937,12 @@ char *prog;
if (minmod) {
minmod = 0;
if (ln && regrepeat(scan, ln) < ln)
- return 0;
+ sayNO;
while (n >= ln || (n == 32767 && ln > 0)) { /* ln overflow ? */
/* If it could work, try it. */
if (nextchar == -1000 || *reginput == nextchar)
if (regmatch(next))
- return 1;
+ sayYES;
/* Couldn't or didn't -- back up. */
reginput = locinput + ln;
if (regrepeat(scan, 1)) {
@@ -924,7 +950,7 @@ char *prog;
reginput = locinput + ln;
}
else
- return 0;
+ sayNO;
}
}
else {
@@ -936,28 +962,28 @@ char *prog;
/* If it could work, try it. */
if (nextchar == -1000 || *reginput == nextchar)
if (regmatch(next))
- return 1;
+ sayYES;
/* Couldn't or didn't -- back up. */
n--;
reginput = locinput + n;
}
}
- return 0;
+ sayNO;
case SUCCEED:
case END:
reginput = locinput; /* put where regtry can find it */
- return 1; /* Success! */
+ sayYES; /* Success! */
case IFMATCH:
reginput = locinput;
scan = NEXTOPER(scan);
if (!regmatch(scan))
- return 0;
+ sayNO;
break;
case UNLESSM:
reginput = locinput;
scan = NEXTOPER(scan);
if (regmatch(scan))
- return 0;
+ sayNO;
break;
default:
fprintf(stderr, "%x %d\n",(unsigned)scan,scan[1]);
@@ -972,6 +998,18 @@ char *prog;
*/
FAIL("corrupted regexp pointers");
/*NOTREACHED*/
+ sayNO;
+
+yes:
+#ifdef DEBUGGING
+ regindent--;
+#endif
+ return 1;
+
+no:
+#ifdef DEBUGGING
+ regindent--;
+#endif
return 0;
}