summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorIlya Zakharevich <ilya@math.berkeley.edu>1999-09-08 22:49:49 -0400
committerJarkko Hietaniemi <jhi@iki.fi>1999-09-09 07:26:53 +0000
commit2ab053812751d8a65a6b8eda6829e97c0876564b (patch)
tree7f532e0d73c87d82316beaf00f100ce1cf5e658c /regexec.c
parent26b44a0af0ea323b1f08d11d9890b96d19caeab9 (diff)
downloadperl-2ab053812751d8a65a6b8eda6829e97c0876564b.tar.gz
Clear up PL_regcc issues.
To: perl5-porters@perl.org (Mailing list Perl5) Subject: [PATCH 5.005_60] Another regexec.c unobfuscation Message-Id: <199909090649.CAA26119@monk.mps.ohio-state.edu> p4raw-id: //depot/cfgperl@4109
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c103
1 files changed, 89 insertions, 14 deletions
diff --git a/regexec.c b/regexec.c
index 3fb1826e19..54f5e22253 100644
--- a/regexec.c
+++ b/regexec.c
@@ -642,7 +642,6 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
register I32 tmp;
I32 minlen; /* must match at least this many chars */
I32 dontbother = 0; /* how many characters not to try at end */
- CURCUR cc;
I32 start_shift = 0; /* Offset of the start to find
constant substr. */ /* CC */
I32 end_shift = 0; /* Same for the end. */ /* CC */
@@ -650,9 +649,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
char *scream_olds;
SV* oreplsv = GvSV(PL_replgv);
- cc.cur = 0;
- cc.oldcc = 0;
- PL_regcc = &cc;
+ PL_regcc = 0;
cache_re(prog);
#ifdef DEBUGGING
@@ -2109,7 +2106,6 @@ S_regmatch(pTHX_ regnode *prog)
regexp *re;
MAGIC *mg = Null(MAGIC*);
re_cc_state state;
- CURCUR cctmp;
CHECKPOINT cp, lastcp;
if(SvROK(ret) || SvRMAGICAL(ret)) {
@@ -2152,9 +2148,7 @@ S_regmatch(pTHX_ regnode *prog)
state.cc = PL_regcc;
state.re = PL_reg_re;
- cctmp.cur = 0;
- cctmp.oldcc = 0;
- PL_regcc = &cctmp;
+ PL_regcc = 0;
cp = regcppush(0); /* Save *all* the positions. */
REGCP_SET;
@@ -2227,6 +2221,81 @@ S_regmatch(pTHX_ regnode *prog)
case LOGICAL:
logical = scan->flags;
break;
+/*******************************************************************
+ PL_regcc contains infoblock about the innermost (...)* loop, and
+ a pointer to the next outer infoblock.
+
+ Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
+
+ 1) After matching X, regnode for CURLYX is processed;
+
+ 2) This regnode creates infoblock on the stack, and calls
+ regmatch() recursively with the starting point at WHILEM node;
+
+ 3) Each hit of WHILEM node tries to match A and Z (in the order
+ depending on the current iteration, min/max of {min,max} and
+ greediness). The information about where are nodes for "A"
+ and "Z" is read from the infoblock, as is info on how many times "A"
+ was already matched, and greediness.
+
+ 4) After A matches, the same WHILEM node is hit again.
+
+ 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
+ of the same pair. Thus when WHILEM tries to match Z, it temporarily
+ resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
+ as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
+ of the external loop.
+
+ Currently present infoblocks form a tree with a stem formed by PL_curcc
+ and whatever it mentions via ->next, and additional attached trees
+ corresponding to temporarily unset infoblocks as in "5" above.
+
+ In the following picture infoblocks for outer loop of
+ (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
+ is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
+ infoblocks are drawn below the "reset" infoblock.
+
+ In fact in the picture below we do not show failed matches for Z and T
+ by WHILEM blocks. [We illustrate minimal matches, since for them it is
+ more obvious *why* one needs to *temporary* unset infoblocks.]
+
+ Matched REx position InfoBlocks Comment
+ (Y(A)*?Z)*?T x
+ Y(A)*?Z)*?T x <- O
+ Y (A)*?Z)*?T x <- O
+ Y A)*?Z)*?T x <- O <- I
+ YA )*?Z)*?T x <- O <- I
+ YA A)*?Z)*?T x <- O <- I
+ YAA )*?Z)*?T x <- O <- I
+ YAA Z)*?T x <- O # Temporary unset I
+ I
+
+ YAAZ Y(A)*?Z)*?T x <- O
+ I
+
+ YAAZY (A)*?Z)*?T x <- O
+ I
+
+ YAAZY A)*?Z)*?T x <- O <- I
+ I
+
+ YAAZYA )*?Z)*?T x <- O <- I
+ I
+
+ YAAZYA Z)*?T x <- O # Temporary unset I
+ I,I
+
+ YAAZYAZ )*?T x <- O
+ I,I
+
+ YAAZYAZ T x # Temporary unset O
+ O
+ I,I
+
+ YAAZYAZT x
+ O
+ I,I
+ *******************************************************************/
case CURLYX: {
CURCUR cc;
CHECKPOINT cp = PL_savestack_ix;
@@ -2279,7 +2348,8 @@ S_regmatch(pTHX_ regnode *prog)
if (locinput == cc->lastloc && n >= cc->min) {
PL_regcc = cc->oldcc;
- ln = PL_regcc->cur;
+ if (PL_regcc)
+ ln = PL_regcc->cur;
DEBUG_r(
PerlIO_printf(Perl_debug_log,
"%*s empty match detected, try continuation...\n",
@@ -2292,7 +2362,8 @@ S_regmatch(pTHX_ regnode *prog)
"%*s failed...\n",
REPORT_CODE_OFF+PL_regindent*2, "")
);
- PL_regcc->cur = ln;
+ if (PL_regcc)
+ PL_regcc->cur = ln;
PL_regcc = cc;
sayNO;
}
@@ -2363,7 +2434,8 @@ S_regmatch(pTHX_ regnode *prog)
if (cc->minmod) {
PL_regcc = cc->oldcc;
- ln = PL_regcc->cur;
+ if (PL_regcc)
+ ln = PL_regcc->cur;
cp = regcppush(cc->parenfloor);
REGCP_SET;
if (regmatch(cc->next)) {
@@ -2372,7 +2444,8 @@ S_regmatch(pTHX_ regnode *prog)
}
REGCP_UNWIND;
regcppop();
- PL_regcc->cur = ln;
+ if (PL_regcc)
+ PL_regcc->cur = ln;
PL_regcc = cc;
if (n >= cc->max) { /* Maximum greed exceeded? */
@@ -2443,14 +2516,16 @@ S_regmatch(pTHX_ regnode *prog)
/* Failed deeper matches of scan, so see if this one works. */
PL_regcc = cc->oldcc;
- ln = PL_regcc->cur;
+ if (PL_regcc)
+ ln = PL_regcc->cur;
if (regmatch(cc->next))
sayYES;
DEBUG_r(
PerlIO_printf(Perl_debug_log, "%*s failed...\n",
REPORT_CODE_OFF+PL_regindent*2, "")
);
- PL_regcc->cur = ln;
+ if (PL_regcc)
+ PL_regcc->cur = ln;
PL_regcc = cc;
cc->cur = n - 1;
cc->lastloc = lastloc;