summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Mitchell <davem@fdisolutions.com>2006-04-02 22:37:57 +0000
committerDave Mitchell <davem@fdisolutions.com>2006-04-02 22:37:57 +0000
commita03745377e2ca3b237c839d07c73106bdf88ac3e (patch)
tree48e3ea13545d5d1109a9846063bbe92ced22e466
parentc9da69fb2cff080fa286bf64ac1d12f81bab3bf6 (diff)
downloadperl-a03745377e2ca3b237c839d07c73106bdf88ac3e.tar.gz
subsume CURCUR (current curly) struct within the main regmatch_state
p4raw-id: //depot/perl@27691
-rw-r--r--regexec.c137
-rw-r--r--regexp.h36
2 files changed, 83 insertions, 90 deletions
diff --git a/regexec.c b/regexec.c
index 9cb15b8ce4..ee8503be44 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1017,7 +1017,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32
U8 *sm = (U8 *) m;
U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
- const U32 uniflags = UTF8_ALLOW_DEFAULT;
+ const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
@@ -1064,7 +1064,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32
UV c, f;
U8 tmpbuf [UTF8_MAXBYTES+1];
STRLEN len, foldlen;
- const U32 uniflags = UTF8_ALLOW_DEFAULT;
+ const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
if (c1 == c2) {
/* Upper and lower of 1st char are equal -
* probably not a "letter". */
@@ -1166,7 +1166,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32
tmp = '\n';
else {
U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
- tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
+ tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
}
tmp = ((OP(c) == BOUND ?
isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
@@ -1208,7 +1208,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32
tmp = '\n';
else {
U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
- tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
+ tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
}
tmp = ((OP(c) == NBOUND ?
isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
@@ -2430,7 +2430,7 @@ S_regmatch(pTHX_ regnode *prog)
{
dVAR;
register const bool do_utf8 = PL_reg_match_utf8;
- const U32 uniflags = UTF8_ALLOW_DEFAULT;
+ const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
regmatch_slab *orig_slab;
regmatch_state *orig_state;
@@ -3046,7 +3046,7 @@ S_regmatch(pTHX_ regnode *prog)
else {
const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
- st->ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
+ st->ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
}
if (OP(scan) == BOUND || OP(scan) == NBOUND) {
st->ln = isALNUM_uni(st->ln);
@@ -3439,20 +3439,21 @@ S_regmatch(pTHX_ regnode *prog)
st->logical = scan->flags;
break;
/*******************************************************************
- cc contains infoblock about the innermost (...)* loop, and
- a pointer to the next outer infoblock.
+ cc points to the regmatch_state associated with the most recent CURLYX.
+ This struct contains info about the innermost (...)* loop (an
+ "infoblock"), and a pointer to the next outer cc.
Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
1) After matching Y, regnode for CURLYX is processed;
- 2) This regnode mallocs an infoblock, and calls regmatch() recursively
+ 2) This regnode populates cc, and calls regmatch() recursively
with the starting point at WHILEM node;
3) Each hit of WHILEM node tries to match A and Z (in the order
depending on the current iteration, min/max of {min,max} and
greediness). The information about where are nodes for "A"
- and "Z" is read from the infoblock, as is info on how many times "A"
+ and "Z" is read from cc, as is info on how many times "A"
was already matched, and greediness.
4) After A matches, the same WHILEM node is hit again.
@@ -3463,7 +3464,7 @@ S_regmatch(pTHX_ regnode *prog)
as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
of the external loop.
- Currently present infoblocks form a tree with a stem formed by PL_curcc
+ Currently present infoblocks form a tree with a stem formed by st->cc
and whatever it mentions via ->next, and additional attached trees
corresponding to temporarily unset infoblocks as in "5" above.
@@ -3518,34 +3519,32 @@ S_regmatch(pTHX_ regnode *prog)
/* No need to save/restore up to this paren */
I32 parenfloor = scan->flags;
- {
- CURCUR *newcc;
- Newx(newcc, 1, CURCUR);
- st->curlyx.savecc = st->cc;
- newcc->oldcc = st->cc;
- st->cc = newcc;
- }
- st->curlyx.cp = PL_savestack_ix;
if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
next += ARG(next);
/* XXXX Probably it is better to teach regpush to support
parenfloor > PL_regsize... */
if (parenfloor > (I32)*PL_reglastparen)
parenfloor = *PL_reglastparen; /* Pessimization... */
- st->cc->parenfloor = parenfloor;
- st->cc->cur = -1;
- st->cc->min = ARG1(scan);
- st->cc->max = ARG2(scan);
- st->cc->scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
- st->cc->next = next;
- st->cc->minmod = st->minmod;
- st->cc->lastloc = 0;
+
+ st->curlyx.cp = PL_savestack_ix;
+ st->curlyx.outercc = st->cc;
+ st->cc = st;
+ /* these fields contain the state of the current curly.
+ * they are accessed by subsequent WHILEMs;
+ * cur and lastloc are also updated by WHILEM */
+ st->curlyx.parenfloor = parenfloor;
+ st->curlyx.cur = -1; /* this will be updated by WHILEM */
+ st->curlyx.min = ARG1(scan);
+ st->curlyx.max = ARG2(scan);
+ st->curlyx.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
+ st->curlyx.lastloc = 0;
+ /* st->next and st->minmod are also read by WHILEM */
+
PL_reginput = locinput;
REGMATCH(PREVOPER(next), CURLYX); /* start on the WHILEM */
/*** all unsaved local vars undefined at this point */
regcpblow(st->curlyx.cp);
- Safefree(st->cc);
- st->cc = st->curlyx.savecc;
+ st->cc = st->curlyx.outercc;
saySAME(result);
}
/* NOTREACHED */
@@ -3559,28 +3558,28 @@ S_regmatch(pTHX_ regnode *prog)
* that we can try again after backing off.
*/
- st->whilem.lastloc = st->cc->lastloc; /* Detection of 0-len. */
+ st->whilem.lastloc = st->cc->curlyx.lastloc; /* Detection of 0-len. */
st->whilem.cache_offset = 0;
st->whilem.cache_bit = 0;
- n = st->cc->cur + 1; /* how many we know we matched */
+ n = st->cc->curlyx.cur + 1; /* how many we know we matched */
PL_reginput = locinput;
DEBUG_EXECUTE_r(
PerlIO_printf(Perl_debug_log,
"%*s %ld out of %ld..%ld cc=%"UVxf"\n",
REPORT_CODE_OFF+PL_regindent*2, "",
- (long)n, (long)st->cc->min,
- (long)st->cc->max, PTR2UV(st->cc))
+ (long)n, (long)st->cc->curlyx.min,
+ (long)st->cc->curlyx.max, PTR2UV(st->cc))
);
/* If degenerate scan matches "", assume scan done. */
- if (locinput == st->cc->lastloc && n >= st->cc->min) {
+ if (locinput == st->cc->curlyx.lastloc && n >= st->cc->curlyx.min) {
st->whilem.savecc = st->cc;
- st->cc = st->cc->oldcc;
+ st->cc = st->cc->curlyx.outercc;
if (st->cc)
- st->ln = st->cc->cur;
+ st->ln = st->cc->curlyx.cur;
DEBUG_EXECUTE_r(
PerlIO_printf(Perl_debug_log,
"%*s empty match detected, try continuation...\n",
@@ -3591,22 +3590,22 @@ S_regmatch(pTHX_ regnode *prog)
st->cc = st->whilem.savecc;
if (result)
sayYES;
- if (st->cc->oldcc)
- st->cc->oldcc->cur = st->ln;
+ if (st->cc->curlyx.outercc)
+ st->cc->curlyx.outercc->curlyx.cur = st->ln;
sayNO;
}
/* First just match a string of min scans. */
- if (n < st->cc->min) {
- st->cc->cur = n;
- st->cc->lastloc = locinput;
- REGMATCH(st->cc->scan, WHILEM2);
+ if (n < st->cc->curlyx.min) {
+ st->cc->curlyx.cur = n;
+ st->cc->curlyx.lastloc = locinput;
+ REGMATCH(st->cc->curlyx.scan, WHILEM2);
/*** all unsaved local vars undefined at this point */
if (result)
sayYES;
- st->cc->cur = n - 1;
- st->cc->lastloc = st->whilem.lastloc;
+ st->cc->curlyx.cur = n - 1;
+ st->cc->curlyx.lastloc = st->whilem.lastloc;
sayNO;
}
@@ -3665,10 +3664,10 @@ S_regmatch(pTHX_ regnode *prog)
if (st->cc->minmod) {
st->whilem.savecc = st->cc;
- st->cc = st->cc->oldcc;
+ st->cc = st->cc->curlyx.outercc;
if (st->cc)
- st->ln = st->cc->cur;
- st->whilem.cp = regcppush(st->whilem.savecc->parenfloor);
+ st->ln = st->cc->curlyx.cur;
+ st->whilem.cp = regcppush(st->whilem.savecc->curlyx.parenfloor);
REGCP_SET(st->whilem.lastcp);
REGMATCH(st->whilem.savecc->next, WHILEM3);
/*** all unsaved local vars undefined at this point */
@@ -3679,10 +3678,10 @@ S_regmatch(pTHX_ regnode *prog)
}
REGCP_UNWIND(st->whilem.lastcp);
regcppop();
- if (st->cc->oldcc)
- st->cc->oldcc->cur = st->ln;
+ if (st->cc->curlyx.outercc)
+ st->cc->curlyx.outercc->curlyx.cur = st->ln;
- if (n >= st->cc->max) { /* Maximum greed exceeded? */
+ if (n >= st->cc->curlyx.max) { /* Maximum greed exceeded? */
if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
&& !(PL_reg_flags & RF_warned)) {
PL_reg_flags |= RF_warned;
@@ -3700,11 +3699,11 @@ S_regmatch(pTHX_ regnode *prog)
);
/* Try scanning more and see if it helps. */
PL_reginput = locinput;
- st->cc->cur = n;
- st->cc->lastloc = locinput;
- st->whilem.cp = regcppush(st->cc->parenfloor);
+ st->cc->curlyx.cur = n;
+ st->cc->curlyx.lastloc = locinput;
+ st->whilem.cp = regcppush(st->cc->curlyx.parenfloor);
REGCP_SET(st->whilem.lastcp);
- REGMATCH(st->cc->scan, WHILEM4);
+ REGMATCH(st->cc->curlyx.scan, WHILEM4);
/*** all unsaved local vars undefined at this point */
if (result) {
regcpblow(st->whilem.cp);
@@ -3712,19 +3711,19 @@ S_regmatch(pTHX_ regnode *prog)
}
REGCP_UNWIND(st->whilem.lastcp);
regcppop();
- st->cc->cur = n - 1;
- st->cc->lastloc = st->whilem.lastloc;
+ st->cc->curlyx.cur = n - 1;
+ st->cc->curlyx.lastloc = st->whilem.lastloc;
CACHEsayNO;
}
/* Prefer scan over next for maximal matching. */
- if (n < st->cc->max) { /* More greed allowed? */
- st->whilem.cp = regcppush(st->cc->parenfloor);
- st->cc->cur = n;
- st->cc->lastloc = locinput;
+ if (n < st->cc->curlyx.max) { /* More greed allowed? */
+ st->whilem.cp = regcppush(st->cc->curlyx.parenfloor);
+ st->cc->curlyx.cur = n;
+ st->cc->curlyx.lastloc = locinput;
REGCP_SET(st->whilem.lastcp);
- REGMATCH(st->cc->scan, WHILEM5);
+ REGMATCH(st->cc->curlyx.scan, WHILEM5);
/*** all unsaved local vars undefined at this point */
if (result) {
regcpblow(st->whilem.cp);
@@ -3749,18 +3748,18 @@ S_regmatch(pTHX_ regnode *prog)
/* Failed deeper matches of scan, so see if this one works. */
st->whilem.savecc = st->cc;
- st->cc = st->cc->oldcc;
+ st->cc = st->cc->curlyx.outercc;
if (st->cc)
- st->ln = st->cc->cur;
+ st->ln = st->cc->curlyx.cur;
REGMATCH(st->whilem.savecc->next, WHILEM6);
/*** all unsaved local vars undefined at this point */
st->cc = st->whilem.savecc;
if (result)
CACHEsayYES;
- if (st->cc->oldcc)
- st->cc->oldcc->cur = st->ln;
- st->cc->cur = n - 1;
- st->cc->lastloc = st->whilem.lastloc;
+ if (st->cc->curlyx.outercc)
+ st->cc->curlyx.outercc->curlyx.cur = st->ln;
+ st->cc->curlyx.cur = n - 1;
+ st->cc->curlyx.lastloc = st->whilem.lastloc;
CACHEsayNO;
}
/* NOTREACHED */
@@ -4887,8 +4886,8 @@ S_reginclass(pTHX_ register const regnode *n, register const U8* p, STRLEN* lenp
if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
- (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
- /* see [perl #37836] for UTF8_ALLOW_ANYUV */
+ ckWARN(WARN_UTF8) ? UTF8_CHECK_ONLY :
+ UTF8_ALLOW_ANYUV|UTF8_CHECK_ONLY);
if (len == (STRLEN)-1)
Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
}
diff --git a/regexp.h b/regexp.h
index b291c2d25f..b3634d846c 100644
--- a/regexp.h
+++ b/regexp.h
@@ -159,31 +159,16 @@ typedef struct _reg_trie_accepted reg_trie_accepted;
typedef I32 CHECKPOINT;
-/* Current curly descriptor */
-typedef struct curcur CURCUR;
-struct curcur {
- int parenfloor; /* how far back to strip paren data */
- int cur; /* how many instances of scan we've matched */
- int min; /* the minimal number of scans to match */
- int max; /* the maximal number of scans to match */
- int minmod; /* whether to work our way up or down */
- regnode * scan; /* the thing to match */
- regnode * next; /* what has to match after it */
- char * lastloc; /* where we started matching this scan */
- CURCUR * oldcc; /* current curly before we started this one */
-};
-
typedef struct re_cc_state
{
I32 ss;
regnode *node;
struct re_cc_state *prev;
- CURCUR *cc;
+ struct regmatch_state *cc; /* state corresponding to the current curly */
regexp *re;
} re_cc_state;
-
typedef enum {
resume_TRIE1,
resume_TRIE2,
@@ -207,7 +192,7 @@ typedef enum {
} regmatch_resume_states;
-typedef struct {
+typedef struct regmatch_state {
/* these vars contain state that needs to be maintained
* across the main while loop ... */
@@ -219,7 +204,7 @@ typedef struct {
bool sw; /* the condition value in (?(cond)a|b) */
int logical;
I32 unwind; /* savestack index of current unwind block */
- CURCUR *cc; /* current innermost curly struct */
+ struct regmatch_state *cc; /* current innermost curly state */
char *locinput;
/* ... while the rest of these are local to an individual branch */
@@ -240,13 +225,22 @@ typedef struct {
struct {
CHECKPOINT cp; /* remember current savestack indexes */
- CURCUR *savecc;
+ struct regmatch_state *outercc; /* outer CURLYX state if any */
+
+ /* these contain the current curly state, and are accessed
+ * by subsequent WHILEMs */
+ int parenfloor;/* how far back to strip paren data */
+ int cur; /* how many instances of scan we've matched */
+ int min; /* the minimal number of scans to match */
+ int max; /* the maximal number of scans to match */
+ regnode * scan; /* the thing to match */
+ char * lastloc;/* where we started matching this scan */
} curlyx;
struct {
CHECKPOINT cp; /* remember current savestack indexes */
CHECKPOINT lastcp;
- CURCUR *savecc;
+ struct regmatch_state *savecc;
char *lastloc; /* Detection of 0-len. */
I32 cache_offset;
I32 cache_bit;
@@ -273,7 +267,7 @@ typedef struct {
struct {
CHECKPOINT cp; /* remember current savestack indexes */
CHECKPOINT lastcp;
- CURCUR *savecc;
+ struct regmatch_state *savecc;
re_cc_state *cur_call_cc;
regexp *end_re;
} end;