summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>1999-11-13 19:50:24 +0000
committerJarkko Hietaniemi <jhi@iki.fi>1999-11-13 19:50:24 +0000
commitb2a3d41024cc23941660c6f6cac48502a4284422 (patch)
tree68fe7d6c04fd45051dc2fb49ab731abfde2d47fc /regexec.c
parent374d691e102e2975cdf0ae9698b9f6e2ad73d91c (diff)
downloadperl-b2a3d41024cc23941660c6f6cac48502a4284422.tar.gz
Change #4576 accidentally leaked also parts of
Ilya's patch that won't apply cleanly anymore. p4raw-id: //depot/cfgperl@4577
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c461
1 files changed, 64 insertions, 397 deletions
diff --git a/regexec.c b/regexec.c
index e3f0cb4273..fa891c86f8 100644
--- a/regexec.c
+++ b/regexec.c
@@ -254,9 +254,6 @@ S_cache_re(pTHX_ regexp *prog)
PL_reg_re = prog;
}
-static char *find_byclass(regexp * prog, regnode *c, char *s, char *strend,
- char *startpos, I32 norun);
-
/*
* Need to implement the following flags for reg_anch:
*
@@ -278,13 +275,6 @@ static char *find_byclass(regexp * prog, regnode *c, char *s, char *strend,
/* XXXX We assume that strpos is strbeg unless sv. */
-/* XXXX Some places assume that there is a fixed substring.
- An update may be needed if optimizer marks as "INTUITable"
- RExen without fixed substrings. Similarly, it is assumed that
- lengths of all the strings are no more than minlen, thus they
- cannot come from lookahead.
- (Or minlen should take into account lookahead.) */
-
/* A failure to find a constant substring means that there is no need to make
an expensive call to REx engine, thus we celebrate a failure. Similarly,
finding a substring too deep into the string means that less calls to
@@ -295,14 +285,10 @@ static char *find_byclass(regexp * prog, regnode *c, char *s, char *strend,
b) Fixed substring;
c) Whether we are anchored (beginning-of-line or \G);
d) First node (of those at offset 0) which may distingush positions;
- We use a)b)d) and multiline-part of c), and try to find a position in the
+ We use 'a', 'b', multiline-part of 'c', and try to find a position in the
string which does not contradict any of them.
*/
-/* Most of decisions we do here should have been done at compile time.
- The nodes of the REx which we used for the search should have been
- deleted from the finite automaton. */
-
char *
Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
char *strend, U32 flags, re_scream_pos_data *data)
@@ -315,8 +301,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
char *t;
I32 ml_anch;
char *tmp;
- register char *other_last = Nullch; /* other substr checked before this */
- char *check_at; /* check substr found at this pos */
+ register char *other_last = Nullch;
#ifdef DEBUGGING
char *i_strpos = strpos;
#endif
@@ -447,8 +432,6 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
if (!s)
goto fail_finish;
- check_at = s;
-
/* Finish the diagnostic message */
DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
@@ -464,7 +447,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
/* Take into account the "other" substring. */
/* XXXX May be hopelessly wrong for UTF... */
if (!other_last)
- other_last = strpos;
+ other_last = strpos - 1;
if (check == prog->float_substr) {
do_other_anchored:
{
@@ -482,8 +465,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
else
t = strpos;
t += prog->anchored_offset;
- if (t < other_last) /* These positions already checked */
- t = other_last;
+ if (t <= other_last)
+ t = other_last + 1;
PL_bostr = tmp;
last2 = last1 = strend - prog->minlen;
if (last < last1)
@@ -512,7 +495,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
", trying floating at offset %ld...\n",
(long)(s1 + 1 - i_strpos)));
PL_regeol = strend; /* Used in HOP() */
- other_last = last1 + prog->anchored_offset + 1;
+ other_last = last1 + prog->anchored_offset;
s = HOPc(last, 1);
goto restart;
}
@@ -520,7 +503,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
(long)(s - i_strpos)));
t = s - prog->anchored_offset;
- other_last = s + 1;
+ other_last = s - 1;
s = s1;
if (t == strpos)
goto try_at_start;
@@ -537,8 +520,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
if (last - t > prog->float_max_offset)
last = t + prog->float_max_offset;
s = t + prog->float_min_offset;
- if (s < other_last)
- s = other_last;
+ if (s <= other_last)
+ s = other_last + 1;
/* XXXX It is not documented what units *_offsets are in. Assume bytes. */
/* fbm_instr() takes into account exact value of end-of-str
if the check is SvTAIL(ed). Since false positives are OK,
@@ -563,7 +546,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
DEBUG_r(PerlIO_printf(Perl_debug_log,
", trying anchored starting at offset %ld...\n",
(long)(s1 + 1 - i_strpos)));
- other_last = last + 1;
+ other_last = last;
PL_regeol = strend; /* Used in HOP() */
s = HOPc(t, 1);
goto restart;
@@ -571,7 +554,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
else {
DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
(long)(s - i_strpos)));
- other_last = s + 1;
+ other_last = s - 1;
s = s1;
if (t == strpos)
goto try_at_start;
@@ -669,72 +652,6 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
s = strpos;
}
- /* Last resort... */
- /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
- if (prog->regstclass) {
- /* minlen == 0 is possible if regstclass is \b or \B,
- and the fixed substr is ''$.
- Since minlen is already taken into account, s+1 is before strend;
- accidentally, minlen >= 1 guaranties no false positives at s + 1
- even for \b or \B. But (minlen? 1 : 0) below assumes that
- regstclass does not come from lookahead... */
- /* If regstclass takes bytelength more than 1: If charlength==1, OK.
- This leaves EXACTF only, which is dealt with in find_byclass(). */
- char *endpos = (prog->anchored_substr || ml_anch)
- ? s + (prog->minlen? 1 : 0)
- : (prog->float_substr ? check_at - start_shift + 1
- : strend) ;
- char *startpos = sv ? strend - SvCUR(sv) : s;
-
- t = s;
- s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
- if (!s) {
-#ifdef DEBUGGING
- char *what;
-#endif
- if (endpos == strend) {
- DEBUG_r( PerlIO_printf(Perl_debug_log,
- "Could not match STCLASS...\n") );
- goto fail;
- }
- /* Contradict one of substrings */
- if (prog->anchored_substr) {
- DEBUG_r( PerlIO_printf(Perl_debug_log,
- "This position contradicts STCLASS...\n") );
- if (prog->anchored_substr == check) {
- DEBUG_r( what = "anchored" );
- hop_and_restart:
- PL_regeol = strend; /* Used in HOP() */
- s = HOPc(t, 1);
- DEBUG_r( PerlIO_printf(Perl_debug_log,
- "trying %s substr starting at offset %ld...\n",
- what, (long)(s + start_shift - i_strpos)) );
- goto restart;
- }
- /* Have both, check is floating */
- if (t + start_shift >= check_at) /* Contradicts floating=check */
- goto retry_floating_check;
- /* Recheck anchored substring, but not floating... */
- s = check_at;
- DEBUG_r( PerlIO_printf(Perl_debug_log,
- "trying anchored substr starting at offset %ld...\n",
- (long)(other_last - i_strpos)) );
- goto do_other_anchored;
- }
- /* Check is floating subtring. */
- retry_floating_check:
- t = check_at - start_shift;
- DEBUG_r( what = "floating" );
- goto hop_and_restart;
- }
- DEBUG_r( if (t != s)
- PerlIO_printf(Perl_debug_log,
- "By STCLASS: moving %ld --> %ld\n",
- (long)(t - i_strpos), (long)(s - i_strpos));
- else
- PerlIO_printf(Perl_debug_log,
- "Does not contradict STCLASS...\n") );
- }
DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
return s;
@@ -1066,7 +983,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
&& (ln == 1 || (OP(c) == EXACTF
? ibcmp(s, m, ln)
: ibcmp_locale(s, m, ln)))
- && (norun || regtry(prog, s)) )
+ && regtry(prog, s) )
goto got_it;
s++;
}
@@ -1076,7 +993,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
&& (ln == 1 || (OP(c) == EXACTF
? ibcmp(s, m, ln)
: ibcmp_locale(s, m, ln)))
- && (norun || regtry(prog, s)) )
+ && regtry(prog, s) )
goto got_it;
s++;
}
@@ -1086,24 +1003,32 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
/* FALL THROUGH */
case BOUND:
- tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
+ if (minlen) {
+ dontbother++;
+ strend -= 1;
+ }
+ tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
while (s < strend) {
if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
tmp = !tmp;
- if ((norun || regtry(prog, s)))
+ if (regtry(prog, s))
goto got_it;
}
s++;
}
- if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
+ if ((minlen || tmp) && regtry(prog,s))
goto got_it;
break;
case BOUNDLUTF8:
PL_reg_flags |= RF_tainted;
/* FALL THROUGH */
case BOUNDUTF8:
- tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
+ if (minlen) {
+ dontbother++;
+ strend = reghop_c(strend, -1);
+ }
+ tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
while (s < strend) {
if (tmp == !(OP(c) == BOUND ?
@@ -1111,54 +1036,60 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
isALNUM_LC_utf8((U8*)s)))
{
tmp = !tmp;
- if ((norun || regtry(prog, s)))
+ if (regtry(prog, s))
goto got_it;
}
s += UTF8SKIP(s);
}
- if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
+ if ((minlen || tmp) && regtry(prog,s))
goto got_it;
break;
case NBOUNDL:
PL_reg_flags |= RF_tainted;
/* FALL THROUGH */
case NBOUND:
- tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
+ if (minlen) {
+ dontbother++;
+ strend -= 1;
+ }
+ tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
while (s < strend) {
if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
tmp = !tmp;
- else if ((norun || regtry(prog, s)))
+ else if (regtry(prog, s))
goto got_it;
s++;
}
- if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
+ if ((minlen || !tmp) && regtry(prog,s))
goto got_it;
break;
case NBOUNDLUTF8:
PL_reg_flags |= RF_tainted;
/* FALL THROUGH */
case NBOUNDUTF8:
- if (prog->minlen)
+ if (minlen) {
+ dontbother++;
strend = reghop_c(strend, -1);
- tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
+ }
+ tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
while (s < strend) {
if (tmp == !(OP(c) == NBOUND ?
swash_fetch(PL_utf8_alnum, (U8*)s) :
isALNUM_LC_utf8((U8*)s)))
tmp = !tmp;
- else if ((norun || regtry(prog, s)))
+ else if (regtry(prog, s))
goto got_it;
s += UTF8SKIP(s);
}
- if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
+ if ((minlen || !tmp) && regtry(prog,s))
goto got_it;
break;
case ALNUM:
while (s < strend) {
if (isALNUM(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1171,7 +1102,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
case ALNUMUTF8:
while (s < strend) {
if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1185,7 +1116,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (isALNUM_LC(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1199,7 +1130,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (isALNUM_LC_utf8((U8*)s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1212,7 +1143,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
case NALNUM:
while (s < strend) {
if (!isALNUM(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1225,7 +1156,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
case NALNUMUTF8:
while (s < strend) {
if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1239,7 +1170,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (!isALNUM_LC(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1253,7 +1184,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (!isALNUM_LC_utf8((U8*)s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1266,7 +1197,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
case SPACE:
while (s < strend) {
if (isSPACE(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1279,7 +1210,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
case SPACEUTF8:
while (s < strend) {
if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1293,7 +1224,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (isSPACE_LC(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1307,7 +1238,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1320,7 +1251,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
case NSPACE:
while (s < strend) {
if (!isSPACE(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1333,7 +1264,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
case NSPACEUTF8:
while (s < strend) {
if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1347,7 +1278,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (!isSPACE_LC(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1361,7 +1292,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1374,7 +1305,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
case DIGIT:
while (s < strend) {
if (isDIGIT(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1387,7 +1318,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
case DIGITUTF8:
while (s < strend) {
if (swash_fetch(PL_utf8_digit,(U8*)s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1401,7 +1332,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (isDIGIT_LC(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1415,7 +1346,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (isDIGIT_LC_utf8((U8*)s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1428,7 +1359,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
case NDIGIT:
while (s < strend) {
if (!isDIGIT(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1441,7 +1372,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
case NDIGITUTF8:
while (s < strend) {
if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1455,7 +1386,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (!isDIGIT_LC(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1469,7 +1400,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (!isDIGIT_LC_utf8((U8*)s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && regtry(prog, s))
goto got_it;
else
tmp = doevery;
@@ -1483,270 +1414,6 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
break;
}
- return 0;
- got_it:
- return s;
-}
-
-/*
- - regexec_flags - match a regexp against a string
- */
-I32
-Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
- char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
-/* strend: pointer to null at end of string */
-/* strbeg: real beginning of string */
-/* minend: end of match must be >=minend after stringarg. */
-/* data: May be used for some additional optimizations. */
-/* nosave: For optimizations. */
-{
- dTHR;
- register char *s;
- register regnode *c;
- register char *startpos = stringarg;
- register I32 tmp;
- I32 minlen; /* must match at least this many chars */
- I32 dontbother = 0; /* how many characters not to try at end */
- I32 start_shift = 0; /* Offset of the start to find
- constant substr. */ /* CC */
- I32 end_shift = 0; /* Same for the end. */ /* CC */
- I32 scream_pos = -1; /* Internal iterator of scream. */
- char *scream_olds;
- SV* oreplsv = GvSV(PL_replgv);
-
- PL_regcc = 0;
-
- cache_re(prog);
-#ifdef DEBUGGING
- PL_regnarrate = PL_debug & 512;
-#endif
-
- /* Be paranoid... */
- if (prog == NULL || startpos == NULL) {
- Perl_croak(aTHX_ "NULL regexp parameter");
- return 0;
- }
-
- minlen = prog->minlen;
- if (strend - startpos < minlen) goto phooey;
-
- if (startpos == strbeg) /* is ^ valid at stringarg? */
- PL_regprev = '\n';
- else {
- PL_regprev = (U32)stringarg[-1];
- if (!PL_multiline && PL_regprev == '\n')
- PL_regprev = '\0'; /* force ^ to NOT match */
- }
-
- /* Check validity of program. */
- if (UCHARAT(prog->program) != REG_MAGIC) {
- Perl_croak(aTHX_ "corrupted regexp program");
- }
-
- PL_reg_flags = 0;
- PL_reg_eval_set = 0;
- PL_reg_maxiter = 0;
-
- if (prog->reganch & ROPT_UTF8)
- PL_reg_flags |= RF_utf8;
-
- /* Mark beginning of line for ^ and lookbehind. */
- PL_regbol = startpos;
- PL_bostr = strbeg;
- PL_reg_sv = sv;
-
- /* Mark end of line for $ (and such) */
- PL_regeol = strend;
-
- /* see how far we have to get to not match where we matched before */
- PL_regtill = startpos+minend;
-
- /* We start without call_cc context. */
- PL_reg_call_cc = 0;
-
- /* If there is a "must appear" string, look for it. */
- s = startpos;
-
- if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
- MAGIC *mg;
-
- if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
- PL_reg_ganch = startpos;
- else if (sv && SvTYPE(sv) >= SVt_PVMG
- && SvMAGIC(sv)
- && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
- PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
- if (prog->reganch & ROPT_ANCH_GPOS) {
- if (s > PL_reg_ganch)
- goto phooey;
- s = PL_reg_ganch;
- }
- }
- else /* pos() not defined */
- PL_reg_ganch = strbeg;
- }
-
- if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
- re_scream_pos_data d;
-
- d.scream_olds = &scream_olds;
- d.scream_pos = &scream_pos;
- s = re_intuit_start(prog, sv, s, strend, flags, &d);
- if (!s)
- goto phooey; /* not present */
- }
-
- DEBUG_r( if (!PL_colorset) reginitcolors() );
- DEBUG_r(PerlIO_printf(Perl_debug_log,
- "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
- PL_colors[4],PL_colors[5],PL_colors[0],
- prog->precomp,
- PL_colors[1],
- (strlen(prog->precomp) > 60 ? "..." : ""),
- PL_colors[0],
- (strend - startpos > 60 ? 60 : strend - startpos),
- startpos, PL_colors[1],
- (strend - startpos > 60 ? "..." : ""))
- );
-
- /* Simplest case: anchored match need be tried only once. */
- /* [unless only anchor is BOL and multiline is set] */
- if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
- if (s == startpos && regtry(prog, startpos))
- goto got_it;
- else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
- || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
- {
- char *end;
-
- if (minlen)
- dontbother = minlen - 1;
- end = HOPc(strend, -dontbother) - 1;
- /* for multiline we only have to try after newlines */
- if (prog->check_substr) {
- if (s == startpos)
- goto after_try;
- while (1) {
- if (regtry(prog, s))
- goto got_it;
- after_try:
- if (s >= end)
- goto phooey;
- s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
- if (!s)
- goto phooey;
- }
- } else {
- if (s > startpos)
- s--;
- while (s < end) {
- if (*s++ == '\n') { /* don't need PL_utf8skip here */
- if (regtry(prog, s))
- goto got_it;
- }
- }
- }
- }
- goto phooey;
- } else if (prog->reganch & ROPT_ANCH_GPOS) {
- if (regtry(prog, PL_reg_ganch))
- goto got_it;
- goto phooey;
- }
-
- /* Messy cases: unanchored match. */
- if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
- /* we have /x+whatever/ */
- /* it must be a one character string (XXXX Except UTF?) */
- char ch = SvPVX(prog->anchored_substr)[0];
- if (UTF) {
- while (s < strend) {
- if (*s == ch) {
- if (regtry(prog, s)) goto got_it;
- s += UTF8SKIP(s);
- while (s < strend && *s == ch)
- s += UTF8SKIP(s);
- }
- s += UTF8SKIP(s);
- }
- }
- else {
- while (s < strend) {
- if (*s == ch) {
- if (regtry(prog, s)) goto got_it;
- s++;
- while (s < strend && *s == ch)
- s++;
- }
- s++;
- }
- }
- }
- /*SUPPRESS 560*/
- else if (prog->anchored_substr != Nullsv
- || (prog->float_substr != Nullsv
- && prog->float_max_offset < strend - s)) {
- SV *must = prog->anchored_substr
- ? prog->anchored_substr : prog->float_substr;
- I32 back_max =
- prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
- I32 back_min =
- prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
- I32 delta = back_max - back_min;
- char *last = HOPc(strend, /* Cannot start after this */
- -(I32)(CHR_SVLEN(must)
- - (SvTAIL(must) != 0) + back_min));
- char *last1; /* Last position checked before */
-
- if (s > PL_bostr)
- last1 = HOPc(s, -1);
- else
- last1 = s - 1; /* bogus */
-
- /* XXXX check_substr already used to find `s', can optimize if
- check_substr==must. */
- scream_pos = -1;
- dontbother = end_shift;
- strend = HOPc(strend, -dontbother);
- while ( (s <= last) &&
- ((flags & REXEC_SCREAM)
- ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
- end_shift, &scream_pos, 0))
- : (s = fbm_instr((unsigned char*)HOP(s, back_min),
- (unsigned char*)strend, must,
- PL_multiline ? FBMrf_MULTILINE : 0))) ) {
- if (HOPc(s, -back_max) > last1) {
- last1 = HOPc(s, -back_min);
- s = HOPc(s, -back_max);
- }
- else {
- char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
-
- last1 = HOPc(s, -back_min);
- s = t;
- }
- if (UTF) {
- while (s <= last1) {
- if (regtry(prog, s))
- goto got_it;
- s += UTF8SKIP(s);
- }
- }
- else {
- while (s <= last1) {
- if (regtry(prog, s))
- goto got_it;
- s++;
- }
- }
- }
- goto phooey;
- }
- else if (c = prog->regstclass) {
- if (minlen) /* don't bother with what can't match */
- strend = HOPc(strend, -(minlen - 1));
- if (find_byclass(prog, c, s, strend, startpos, 0))
- goto got_it;
}
else {
dontbother = 0;