summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorIlya Zakharevich <ilya@math.berkeley.edu>2000-11-19 17:30:26 -0500
committerJarkko Hietaniemi <jhi@iki.fi>2000-11-20 04:06:08 +0000
commit02db2b7b4dd14eb0184a5e518cff4cef3723243b (patch)
treebf8644977ef7f00aca93f52c958c596bb6ca68e0 /regexec.c
parent2b3f4ecd4597e6a2094b9dab9e0f5a8234dd5ac9 (diff)
downloadperl-02db2b7b4dd14eb0184a5e518cff4cef3723243b.tar.gz
The first step in removing recursion from the REx engine
Message-ID: <20001119223026.A5165@monk.mps.ohio-state.edu> p4raw-id: //depot/perl@7760
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c193
1 files changed, 142 insertions, 51 deletions
diff --git a/regexec.c b/regexec.c
index c0d0097fa4..be10dc90bf 100644
--- a/regexec.c
+++ b/regexec.c
@@ -145,14 +145,14 @@ S_regcppush(pTHX_ I32 parenfloor)
}
/* These are needed since we do not localize EVAL nodes: */
-# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
+# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
" Setting an EVAL scope, savestack=%"IVdf"\n", \
- (IV)PL_savestack_ix)); lastcp = PL_savestack_ix
+ (IV)PL_savestack_ix)); cp = PL_savestack_ix
-# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
+# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
PerlIO_printf(Perl_debug_log, \
" Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
- (IV)lastcp, (IV)PL_savestack_ix) : 0); regcpblow(lastcp)
+ (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
STATIC char *
S_regcppop(pTHX)
@@ -219,7 +219,7 @@ typedef struct re_cc_state
regexp *re;
} re_cc_state;
-#define regcpblow(cp) LEAVE_SCOPE(cp)
+#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
#define TRYPAREN(paren, n, input) { \
if (paren) { \
@@ -1706,6 +1706,9 @@ S_regtry(pTHX_ regexp *prog, char *startpos)
register I32 *ep;
CHECKPOINT lastcp;
+#ifdef DEBUGGING
+ PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
+#endif
if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
MAGIC *mg;
@@ -1786,15 +1789,45 @@ S_regtry(pTHX_ regexp *prog, char *startpos)
*++ep = -1;
}
}
- REGCP_SET;
+ REGCP_SET(lastcp);
if (regmatch(prog->program + 1)) {
prog->endp[0] = PL_reginput - PL_bostr;
return 1;
}
- REGCP_UNWIND;
+ REGCP_UNWIND(lastcp);
return 0;
}
+#define RE_UNWIND_BRANCH 1
+#define RE_UNWIND_BRANCHJ 2
+
+union re_unwind_t;
+
+typedef struct { /* XX: makes sense to enlarge it... */
+ I32 type;
+ I32 prev;
+ CHECKPOINT lastcp;
+} re_unwind_generic_t;
+
+typedef struct {
+ I32 type;
+ I32 prev;
+ CHECKPOINT lastcp;
+ I32 lastparen;
+ regnode *next;
+ char *locinput;
+ I32 nextchr;
+#ifdef DEBUGGING
+ int regindent;
+#endif
+} re_unwind_branch_t;
+
+typedef union re_unwind_t {
+ I32 type;
+ re_unwind_generic_t generic;
+ re_unwind_branch_t branch;
+} re_unwind_t;
+
/*
- regmatch - main matching routine
*
@@ -1824,6 +1857,9 @@ S_regmatch(pTHX_ regnode *prog)
register char *locinput = PL_reginput;
register I32 c1, c2, paren; /* case fold search, parenth */
int minmod = 0, sw = 0, logical = 0;
+ I32 unwind = 0;
+ I32 firstcp = PL_savestack_ix;
+
#ifdef DEBUGGING
PL_regindent++;
#endif
@@ -1833,7 +1869,7 @@ S_regmatch(pTHX_ regnode *prog)
scan = prog;
while (scan != NULL) {
#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
-#ifdef DEBUGGING
+#if 1
# define sayYES goto yes
# define sayNO goto no
# define sayYES_FINAL goto yes_final
@@ -2439,7 +2475,7 @@ S_regmatch(pTHX_ regnode *prog)
PL_regcc = 0;
cp = regcppush(0); /* Save *all* the positions. */
- REGCP_SET;
+ REGCP_SET(lastcp);
cache_re(re);
state.ss = PL_savestack_ix;
*PL_reglastparen = 0;
@@ -2469,7 +2505,7 @@ S_regmatch(pTHX_ regnode *prog)
sayYES;
}
ReREFCNT_dec(re);
- REGCP_UNWIND;
+ REGCP_UNWIND(lastcp);
regcppop();
PL_reg_call_cc = state.prev;
PL_regcc = state.cc;
@@ -2730,12 +2766,12 @@ S_regmatch(pTHX_ regnode *prog)
if (PL_regcc)
ln = PL_regcc->cur;
cp = regcppush(cc->parenfloor);
- REGCP_SET;
+ REGCP_SET(lastcp);
if (regmatch(cc->next)) {
regcpblow(cp);
sayYES; /* All done. */
}
- REGCP_UNWIND;
+ REGCP_UNWIND(lastcp);
regcppop();
if (PL_regcc)
PL_regcc->cur = ln;
@@ -2762,12 +2798,12 @@ S_regmatch(pTHX_ regnode *prog)
cc->cur = n;
cc->lastloc = locinput;
cp = regcppush(cc->parenfloor);
- REGCP_SET;
+ REGCP_SET(lastcp);
if (regmatch(cc->scan)) {
regcpblow(cp);
sayYES;
}
- REGCP_UNWIND;
+ REGCP_UNWIND(lastcp);
regcppop();
cc->cur = n - 1;
cc->lastloc = lastloc;
@@ -2780,12 +2816,12 @@ S_regmatch(pTHX_ regnode *prog)
cp = regcppush(cc->parenfloor);
cc->cur = n;
cc->lastloc = locinput;
- REGCP_SET;
+ REGCP_SET(lastcp);
if (regmatch(cc->scan)) {
regcpblow(cp);
sayYES;
}
- REGCP_UNWIND;
+ REGCP_UNWIND(lastcp);
regcppop(); /* Restore some previous $<digit>s? */
PL_reginput = locinput;
DEBUG_r(
@@ -2831,30 +2867,30 @@ S_regmatch(pTHX_ regnode *prog)
if (OP(next) != c1) /* No choice. */
next = inner; /* Avoid recursion. */
else {
- int lastparen = *PL_reglastparen;
+ I32 lastparen = *PL_reglastparen;
+ I32 unwind1;
+ re_unwind_branch_t *uw;
+
+ /* Put unwinding data on stack */
+ unwind1 = SSNEWt(1,re_unwind_branch_t);
+ uw = SSPTRt(unwind1,re_unwind_branch_t);
+ uw->prev = unwind;
+ unwind = unwind1;
+ uw->type = ((c1 == BRANCH)
+ ? RE_UNWIND_BRANCH
+ : RE_UNWIND_BRANCHJ);
+ uw->lastparen = lastparen;
+ uw->next = next;
+ uw->locinput = locinput;
+ uw->nextchr = nextchr;
+#ifdef DEBUGGING
+ uw->regindent = ++PL_regindent;
+#endif
- REGCP_SET;
- do {
- PL_reginput = locinput;
- if (regmatch(inner))
- sayYES;
- REGCP_UNWIND;
- for (n = *PL_reglastparen; n > lastparen; n--)
- PL_regendp[n] = -1;
- *PL_reglastparen = n;
- scan = next;
- /*SUPPRESS 560*/
- if ((n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next))))
- next += n;
- else
- next = NULL;
- inner = NEXTOPER(scan);
- if (c1 == BRANCHJ) {
- inner = NEXTOPER(inner);
- }
- } while (scan != NULL && OP(scan) == c1);
- sayNO;
- /* NOTREACHED */
+ REGCP_SET(uw->lastcp);
+
+ /* Now go into the first branch */
+ next = inner;
}
}
break;
@@ -2904,7 +2940,7 @@ S_regmatch(pTHX_ regnode *prog)
}
else
c1 = c2 = -1000;
- REGCP_SET;
+ REGCP_SET(lastcp);
/* This may be improved if l == 0. */
while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
/* If it could work, try it. */
@@ -2923,7 +2959,7 @@ S_regmatch(pTHX_ regnode *prog)
}
if (regmatch(next))
sayYES;
- REGCP_UNWIND;
+ REGCP_UNWIND(lastcp);
}
/* Couldn't or didn't -- move forward. */
PL_reginput = locinput;
@@ -2963,7 +2999,7 @@ S_regmatch(pTHX_ regnode *prog)
else
c1 = c2 = -1000;
}
- REGCP_SET;
+ REGCP_SET(lastcp);
while (n >= ln) {
/* If it could work, try it. */
if (c1 == -1000 ||
@@ -2985,7 +3021,7 @@ S_regmatch(pTHX_ regnode *prog)
}
if (regmatch(next))
sayYES;
- REGCP_UNWIND;
+ REGCP_UNWIND(lastcp);
}
/* Couldn't or didn't -- back up. */
n--;
@@ -3046,7 +3082,7 @@ S_regmatch(pTHX_ regnode *prog)
if (ln && regrepeat(scan, ln) < ln)
sayNO;
locinput = PL_reginput;
- REGCP_SET;
+ REGCP_SET(lastcp);
if (c1 != -1000) {
char *e = locinput + n - ln; /* Should not check after this */
char *old = locinput;
@@ -3076,7 +3112,7 @@ S_regmatch(pTHX_ regnode *prog)
/* PL_reginput == locinput now */
TRYPAREN(paren, ln, locinput);
PL_reginput = locinput; /* Could be reset... */
- REGCP_UNWIND;
+ REGCP_UNWIND(lastcp);
/* Couldn't or didn't -- move forward. */
old = locinput++;
}
@@ -3089,7 +3125,7 @@ S_regmatch(pTHX_ regnode *prog)
UCHARAT(PL_reginput) == c2)
{
TRYPAREN(paren, n, PL_reginput);
- REGCP_UNWIND;
+ REGCP_UNWIND(lastcp);
}
/* Couldn't or didn't -- move forward. */
PL_reginput = locinput;
@@ -3114,7 +3150,7 @@ S_regmatch(pTHX_ regnode *prog)
if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
ln--;
}
- REGCP_SET;
+ REGCP_SET(lastcp);
if (paren) {
while (n >= ln) {
/* If it could work, try it. */
@@ -3123,7 +3159,7 @@ S_regmatch(pTHX_ regnode *prog)
UCHARAT(PL_reginput) == c2)
{
TRYPAREN(paren, n, PL_reginput);
- REGCP_UNWIND;
+ REGCP_UNWIND(lastcp);
}
/* Couldn't or didn't -- back up. */
n--;
@@ -3138,7 +3174,7 @@ S_regmatch(pTHX_ regnode *prog)
UCHARAT(PL_reginput) == c2)
{
TRYPAREN(paren, n, PL_reginput);
- REGCP_UNWIND;
+ REGCP_UNWIND(lastcp);
}
/* Couldn't or didn't -- back up. */
n--;
@@ -3156,7 +3192,7 @@ S_regmatch(pTHX_ regnode *prog)
CHECKPOINT cp, lastcp;
cp = regcppush(0); /* Save *all* the positions. */
- REGCP_SET;
+ REGCP_SET(lastcp);
regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
the caller. */
PL_reginput = locinput; /* Make position available to
@@ -3169,7 +3205,7 @@ S_regmatch(pTHX_ regnode *prog)
regcpblow(cp);
sayYES;
}
- REGCP_UNWIND;
+ REGCP_UNWIND(lastcp);
regcppop();
PL_reg_call_cc = cur_call_cc;
PL_regcc = cctmp;
@@ -3276,6 +3312,7 @@ S_regmatch(pTHX_ regnode *prog)
PTR2UV(scan), OP(scan));
Perl_croak(aTHX_ "regexp memory corruption");
}
+ reenter:
scan = next;
}
@@ -3301,6 +3338,11 @@ yes:
#ifdef DEBUGGING
PL_regindent--;
#endif
+
+#if 0 /* Breaks $^R */
+ if (unwind)
+ regcpblow(firstcp);
+#endif
return 1;
no:
@@ -3312,6 +3354,55 @@ no:
goto do_no;
no_final:
do_no:
+ if (unwind) {
+ re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
+
+ switch (uw->type) {
+ case RE_UNWIND_BRANCH:
+ case RE_UNWIND_BRANCHJ:
+ {
+ re_unwind_branch_t *uwb = &(uw->branch);
+ I32 lastparen = uwb->lastparen;
+
+ REGCP_UNWIND(uwb->lastcp);
+ for (n = *PL_reglastparen; n > lastparen; n--)
+ PL_regendp[n] = -1;
+ *PL_reglastparen = n;
+ scan = next = uwb->next;
+ if ( !scan ||
+ OP(scan) != (uwb->type == RE_UNWIND_BRANCH
+ ? BRANCH : BRANCHJ) ) { /* Failure */
+ unwind = uwb->prev;
+#ifdef DEBUGGING
+ PL_regindent--;
+#endif
+ goto do_no;
+ }
+ /* Have more choice yet. Reuse the same uwb. */
+ /*SUPPRESS 560*/
+ if ((n = (uwb->type == RE_UNWIND_BRANCH
+ ? NEXT_OFF(next) : ARG(next))))
+ next += n;
+ else
+ next = NULL; /* XXXX Needn't unwinding in this case... */
+ uwb->next = next;
+ next = NEXTOPER(scan);
+ if (uwb->type == RE_UNWIND_BRANCHJ)
+ next = NEXTOPER(next);
+ locinput = uwb->locinput;
+ nextchr = uwb->nextchr;
+#ifdef DEBUGGING
+ PL_regindent = uwb->regindent;
+#endif
+
+ goto reenter;
+ }
+ /* NOT REACHED */
+ default:
+ Perl_croak(aTHX_ "regexp unwind memory corruption");
+ }
+ /* NOT REACHED */
+ }
#ifdef DEBUGGING
PL_regindent--;
#endif