summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--regexec.c275
-rw-r--r--regexp.h38
-rw-r--r--sv.c2
3 files changed, 181 insertions, 134 deletions
diff --git a/regexec.c b/regexec.c
index 61a011550a..71eab5b1a4 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1659,9 +1659,6 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
/* see how far we have to get to not match where we matched before */
PL_regtill = startpos+minend;
- /* We start without call_cc context. */
- PL_reg_call_cc = 0;
-
/* If there is a "must appear" string, look for it. */
s = startpos;
@@ -2307,6 +2304,44 @@ S_push_slab(pTHX)
goto start_recurse; \
resume_point_##where:
+
+/* push a new regex state. Set newst to point to it */
+
+#define PUSH_STATE(newst, resume) \
+ depth++; \
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "PUSH STATE(%d)\n", depth)); \
+ st->scan = scan; \
+ st->next = next; \
+ st->n = n; \
+ st->locinput = locinput; \
+ st->resume_state = resume; \
+ newst = st+1; \
+ if (newst > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1])) \
+ newst = S_push_slab(aTHX); \
+ PL_regmatch_state = newst; \
+ newst->cc = 0; \
+ newst->minmod = 0; \
+ newst->sw = 0; \
+ newst->logical = 0; \
+ newst->unwind = 0; \
+ locinput = PL_reginput; \
+ nextchr = UCHARAT(locinput);
+
+#define POP_STATE \
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE(%d)\n", depth)); \
+ depth--; \
+ st--; \
+ if (st < &PL_regmatch_slab->states[0]) { \
+ PL_regmatch_slab = PL_regmatch_slab->prev; \
+ st = &PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]; \
+ } \
+ PL_regmatch_state = st; \
+ scan = st->scan; \
+ next = st->next; \
+ n = st->n; \
+ locinput = st->locinput; \
+ nextchr = UCHARAT(locinput);
+
/*
- regmatch - main matching routine
*
@@ -2437,6 +2472,8 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog)
bool result; /* return value of S_regmatch */
regnode *inner; /* Next node in internal branch. */
int depth = 0; /* depth of recursion */
+ regmatch_state *newst; /* when pushing a state, this is the new one */
+ regmatch_state *cur_eval = NULL; /* most recent (??{}) state */
#ifdef DEBUGGING
SV *re_debug_flags = NULL;
@@ -3290,9 +3327,6 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog)
}
if (st->logical == 2) { /* Postponed subexpression: /(??{...})/ */
regexp *re;
- re_cc_state state;
- int toggleutf;
-
{
/* extract RE object from returned value; compiling if
* necessary */
@@ -3329,6 +3363,9 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog)
PL_regsize = osize;
}
}
+
+ /* run the pattern returned from (??{...}) */
+
DEBUG_EXECUTE_r(
PerlIO_printf(Perl_debug_log,
"Entering embedded \"%s%.60s%s%s\"\n",
@@ -3337,62 +3374,35 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog)
PL_colors[1],
(strlen(re->precomp) > 60 ? "..." : ""))
);
- state.node = next;
- state.prev = PL_reg_call_cc;
- state.cc = st->cc;
- state.re = PL_reg_re;
- st->cc = 0;
-
st->u.eval.cp = regcppush(0); /* Save *all* the positions. */
REGCP_SET(st->u.eval.lastcp);
- PL_reg_re = re;
- state.ss = PL_savestack_ix;
*PL_reglastparen = 0;
*PL_reglastcloseparen = 0;
- PL_reg_call_cc = &state;
PL_reginput = locinput;
- toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
- ((re->reganch & ROPT_UTF8) != 0);
- if (toggleutf) PL_reg_flags ^= RF_utf8;
-
- /* XXXX This is too dramatic a measure... */
- PL_reg_maxiter = 0;
-
- /* XXX the only recursion left in regmatch() */
- if (regmatch(re, re->program + 1)) {
- /* Even though we succeeded, we need to restore
- global variables, since we may be wrapped inside
- SUSPEND, thus the match may be not finished yet. */
-
- /* XXXX Do this only if SUSPENDed? */
- PL_reg_call_cc = state.prev;
- st->cc = state.cc;
- PL_reg_re = state.re;
-
- if (toggleutf) PL_reg_flags ^= RF_utf8;
-
- /* XXXX This is too dramatic a measure... */
- PL_reg_maxiter = 0;
-
- /* These are needed even if not SUSPEND. */
- ReREFCNT_dec(re);
- regcpblow(st->u.eval.cp);
- sayYES;
- }
- ReREFCNT_dec(re);
- REGCP_UNWIND(st->u.eval.lastcp);
- regcppop(rex);
- PL_reg_call_cc = state.prev;
- st->cc = state.cc;
- PL_reg_re = state.re;
- if (toggleutf) PL_reg_flags ^= RF_utf8;
/* XXXX This is too dramatic a measure... */
PL_reg_maxiter = 0;
st->logical = 0;
- sayNO;
+ st->u.eval.toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
+ ((re->reganch & ROPT_UTF8) != 0);
+ if (st->u.eval.toggleutf) PL_reg_flags ^= RF_utf8;
+ st->u.eval.prev_rex = rex;
+ assert(rex == PL_reg_re); /* XXX */
+ rex = re;
+ PL_reg_re = rex; /* XXX */
+
+ st->u.eval.prev_eval = cur_eval;
+ st->u.eval.prev_slab = PL_regmatch_slab;
+ st->u.eval.depth = depth;
+ cur_eval = st;
+ PUSH_STATE(newst, resume_EVAL);
+ st = newst;
+
+ /* now continue from first node in postoned RE */
+ next = re->program + 1;
+ break;
/* NOTREACHED */
}
/* /(?(?{...})X|Y)/ */
@@ -4234,49 +4244,49 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog)
sayNO;
break;
case END:
- if (PL_reg_call_cc) {
- st->u.end.cur_call_cc = PL_reg_call_cc;
- st->u.end.end_re = PL_reg_re;
+ if (cur_eval) {
+ /* we have successfully completed the execution of a
+ * postponed re. Pop all states back to the last EVAL
+ * then continue with the node following the (??{...})
+ */
+
+ /* this simulates a POP_STATE, except that it pops several
+ * levels, and doesn't restore locinput */
- /* Save *all* the positions. */
- st->u.end.cp = regcppush(0);
- REGCP_SET(st->u.end.lastcp);
+ st = cur_eval;
+ PL_regmatch_slab = st->u.eval.prev_slab;
+ cur_eval = st->u.eval.prev_eval;
+ depth = st->u.eval.depth;
- /* Restore parens of the caller. */
+ PL_regmatch_state = st;
+ scan = st->scan;
+ next = st->next;
+ n = st->n;
+
+ if (st->u.eval.toggleutf)
+ PL_reg_flags ^= RF_utf8;
+ ReREFCNT_dec(rex);
+ rex = st->u.eval.prev_rex;
+ PL_reg_re = rex; /* XXX */
+ /* XXXX This is too dramatic a measure... */
+ PL_reg_maxiter = 0;
+
+ /* Restore parens of the caller without popping the
+ * savestack */
{
I32 tmp = PL_savestack_ix;
- PL_savestack_ix = PL_reg_call_cc->ss;
+ PL_savestack_ix = st->u.eval.lastcp;
regcppop(rex);
PL_savestack_ix = tmp;
}
- /* Make position available to the callcc. */
- PL_reginput = locinput;
- PL_reg_re = PL_reg_call_cc->re;
- st->u.end.savecc = st->cc;
- st->cc = PL_reg_call_cc->cc;
- PL_reg_call_cc = PL_reg_call_cc->prev;
- REGMATCH(st->u.end.cur_call_cc->node, END);
- /*** all unsaved local vars undefined at this point */
- if (result) {
- PL_reg_call_cc = st->u.end.cur_call_cc;
- regcpblow(st->u.end.cp);
- sayYES;
- }
- REGCP_UNWIND(st->u.end.lastcp);
- regcppop(rex);
- PL_reg_call_cc = st->u.end.cur_call_cc;
- st->cc = st->u.end.savecc;
- PL_reg_re = st->u.end.end_re;
+ PL_reginput = locinput;
+ /* resume at node following the (??{...}) */
+ break;
- DEBUG_EXECUTE_r(
- PerlIO_printf(Perl_debug_log,
- "%*s continuation failed...\n",
- REPORT_CODE_OFF+PL_regindent*2, "")
- );
- sayNO_SILENT;
}
+
if (locinput < PL_regtill) {
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
"%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
@@ -4378,7 +4388,6 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog)
oldst->next = next;
oldst->n = n;
oldst->locinput = locinput;
- oldst->reg_call_cc = PL_reg_call_cc;
st->cc = oldst->cc;
locinput = PL_reginput;
@@ -4393,6 +4402,8 @@ S_regmatch(pTHX_ regexp *rex, regnode *prog)
}
}
+
+
/*
* We get here only if there's trouble -- normally "case END" is
* the terminating point.
@@ -4417,7 +4428,56 @@ yes:
#endif
result = 1;
- goto exit_level;
+ /* XXX this is duplicate(ish) code to that in the do_no section.
+ * eventually a yes should just pop the whole stack */
+ if (depth) {
+ /* restore previous state and re-enter */
+ POP_STATE;
+
+ switch (st->resume_state) {
+ case resume_TRIE1:
+ goto resume_point_TRIE1;
+ case resume_TRIE2:
+ goto resume_point_TRIE2;
+ case resume_EVAL:
+ break;
+ case resume_CURLYX:
+ goto resume_point_CURLYX;
+ case resume_WHILEM1:
+ goto resume_point_WHILEM1;
+ case resume_WHILEM2:
+ goto resume_point_WHILEM2;
+ case resume_WHILEM3:
+ goto resume_point_WHILEM3;
+ case resume_WHILEM4:
+ goto resume_point_WHILEM4;
+ case resume_WHILEM5:
+ goto resume_point_WHILEM5;
+ case resume_WHILEM6:
+ goto resume_point_WHILEM6;
+ case resume_CURLYM1:
+ goto resume_point_CURLYM1;
+ case resume_CURLYM2:
+ goto resume_point_CURLYM2;
+ case resume_CURLYM3:
+ goto resume_point_CURLYM3;
+ case resume_CURLYM4:
+ goto resume_point_CURLYM4;
+ case resume_IFMATCH:
+ goto resume_point_IFMATCH;
+ case resume_PLUS1:
+ goto resume_point_PLUS1;
+ case resume_PLUS2:
+ goto resume_point_PLUS2;
+ case resume_PLUS3:
+ goto resume_point_PLUS3;
+ case resume_PLUS4:
+ goto resume_point_PLUS4;
+ default:
+ Perl_croak(aTHX_ "regexp resume memory corruption");
+ }
+ }
+ goto final_exit;
no:
DEBUG_EXECUTE_r(
@@ -4476,33 +4536,38 @@ do_no:
}
/* NOTREACHED */
}
+
#ifdef DEBUGGING
PL_regindent--;
#endif
result = 0;
-exit_level:
-
- if (depth--) {
- /* restore previous state and re-enter */
- st--;
- if (st < &PL_regmatch_slab->states[0]) {
- PL_regmatch_slab = PL_regmatch_slab->prev;
- st = &PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1];
- }
- PL_regmatch_state = st;
-
- PL_reg_call_cc = st->reg_call_cc;
- scan = st->scan;
- next = st->next;
- n = st->n;
- locinput = st->locinput;
- nextchr = UCHARAT(locinput);
+ if (depth) {
+ /* there's a previous state to backtrack to */
+ POP_STATE;
switch (st->resume_state) {
case resume_TRIE1:
goto resume_point_TRIE1;
case resume_TRIE2:
goto resume_point_TRIE2;
+ case resume_EVAL:
+ /* we have failed an (??{...}). Restore state to the outer re
+ * then re-throw the failure */
+ if (st->u.eval.toggleutf)
+ PL_reg_flags ^= RF_utf8;
+ ReREFCNT_dec(rex);
+ rex = st->u.eval.prev_rex;
+ PL_reg_re = rex; /* XXX */
+ cur_eval = st->u.eval.prev_eval;
+
+ /* XXXX This is too dramatic a measure... */
+ PL_reg_maxiter = 0;
+
+ PL_reginput = locinput;
+ REGCP_UNWIND(st->u.eval.lastcp);
+ regcppop(rex);
+ goto do_no;
+
case resume_CURLYX:
goto resume_point_CURLYX;
case resume_WHILEM1:
@@ -4535,13 +4600,13 @@ exit_level:
goto resume_point_PLUS3;
case resume_PLUS4:
goto resume_point_PLUS4;
- case resume_END:
- goto resume_point_END;
default:
Perl_croak(aTHX_ "regexp resume memory corruption");
}
- /* NOTREACHED */
}
+
+final_exit:
+
/* restore original high-water mark */
PL_regmatch_slab = orig_slab;
PL_regmatch_state = orig_state;
diff --git a/regexp.h b/regexp.h
index 57f8d5ded2..bb02d22cae 100644
--- a/regexp.h
+++ b/regexp.h
@@ -159,19 +159,10 @@ typedef struct _reg_trie_accepted reg_trie_accepted;
typedef I32 CHECKPOINT;
-typedef struct re_cc_state
-{
- I32 ss;
- regnode *node;
- struct re_cc_state *prev;
- struct regmatch_state *cc; /* state corresponding to the current curly */
- regexp *re;
-} re_cc_state;
-
-
typedef enum {
resume_TRIE1,
resume_TRIE2,
+ resume_EVAL,
resume_CURLYX,
resume_WHILEM1,
resume_WHILEM2,
@@ -187,8 +178,7 @@ typedef enum {
resume_PLUS1,
resume_PLUS2,
resume_PLUS3,
- resume_PLUS4,
- resume_END
+ resume_PLUS4
} regmatch_resume_states;
@@ -200,7 +190,7 @@ typedef struct regmatch_state {
regmatch_resume_states resume_state; /* where to jump to on return */
regnode *scan; /* Current node. */
regnode *next; /* Next node. */
- bool minmod; /* the next "{n.m}" is a "{n,m}?" */
+ bool minmod; /* the next "{n,m}" is a "{n,m}?" */
bool sw; /* the condition value in (?(cond)a|b) */
int logical;
I32 unwind; /* savestack index of current unwind block */
@@ -219,8 +209,14 @@ typedef struct regmatch_state {
} trie;
struct {
- CHECKPOINT cp; /* remember current savestack indexes */
- CHECKPOINT lastcp;
+ regexp *prev_rex;
+ int toggleutf;
+ CHECKPOINT cp; /* remember current savestack indexes */
+ CHECKPOINT lastcp;
+ struct regmatch_state *prev_eval; /* save cur_eval */
+ struct regmatch_slab *prev_slab;
+ int depth;
+
} eval;
struct {
@@ -263,17 +259,7 @@ typedef struct regmatch_state {
char *old;
int count;
} plus; /* and CURLYN/CURLY/STAR */
-
- struct {
- CHECKPOINT cp; /* remember current savestack indexes */
- CHECKPOINT lastcp;
- struct regmatch_state *savecc;
- re_cc_state *cur_call_cc;
- regexp *end_re;
- } end;
} u;
-
- re_cc_state *reg_call_cc; /* saved value of PL_reg_call_cc */
} regmatch_state;
/* how many regmatch_state structs to allocate as a single slab.
@@ -303,7 +289,6 @@ typedef struct regmatch_slab {
#define PL_reg_eval_set PL_reg_state.re_state_reg_eval_set
#define PL_regnarrate PL_reg_state.re_state_regnarrate
#define PL_regindent PL_reg_state.re_state_regindent
-#define PL_reg_call_cc PL_reg_state.re_state_reg_call_cc
#define PL_reg_re PL_reg_state.re_state_reg_re
#define PL_reg_ganch PL_reg_state.re_state_reg_ganch
#define PL_reg_sv PL_reg_state.re_state_reg_sv
@@ -338,7 +323,6 @@ struct re_save_state {
I32 re_state_reg_eval_set; /* from regexec.c */
I32 re_state_regnarrate; /* from regexec.c */
int re_state_regindent; /* from regexec.c */
- struct re_cc_state *re_state_reg_call_cc; /* from regexec.c */
regexp *re_state_reg_re; /* from regexec.c */
char *re_state_reg_ganch; /* from regexec.c */
SV *re_state_reg_sv; /* from regexec.c */
diff --git a/sv.c b/sv.c
index cf42029ef4..d6135eddac 100644
--- a/sv.c
+++ b/sv.c
@@ -10657,8 +10657,6 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param)
/* I assume that it only ever "worked" because no-one called
(pseudo)fork while the regexp engine had re-entered itself.
*/
- new_state->re_state_reg_call_cc
- = any_dup(old_state->re_state_reg_call_cc, proto_perl);
new_state->re_state_reg_re
= any_dup(old_state->re_state_reg_re, proto_perl);
new_state->re_state_reg_ganch