summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-02-18 22:11:20 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-02-18 22:11:20 +0000
commit3baa4c62cda542368be1e7e1f7af8bd8257c2ff4 (patch)
tree36c534fdd1b4be5046f1c67dcdf0be0722f4dc97 /regexec.c
parent5da9da9e9f46681684e0c487fd55df8db6f9de67 (diff)
downloadperl-3baa4c62cda542368be1e7e1f7af8bd8257c2ff4.tar.gz
Misapplied regex optimizations when \C is present.
Fixes 20001230.002. What still remains broken is that the submatches that have \C in them get their UTF8 flag on because their parent SV has it on. This will result in malformed UTF8 if a \C happened to match a non-ASCII byte. p4raw-id: //depot/perl@8836
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c27
1 files changed, 9 insertions, 18 deletions
diff --git a/regexec.c b/regexec.c
index c70d1b1655..5d9e8ac8fa 100644
--- a/regexec.c
+++ b/regexec.c
@@ -398,7 +398,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
goto fail;
}
- if (prog->check_offset_min == prog->check_offset_max) {
+ if (prog->check_offset_min == prog->check_offset_max &&
+ !(prog->reganch & ROPT_SANY_SEEN)) {
/* Substring at constant offset from beg-of-str... */
I32 slen;
@@ -474,6 +475,10 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
if (data)
*data->scream_olds = s;
}
+ else if (prog->reganch & ROPT_SANY_SEEN)
+ s = fbm_instr((U8*)(s + start_shift),
+ (U8*)(strend - end_shift),
+ check, PL_multiline ? FBMrf_MULTILINE : 0);
else
s = fbm_instr(HOP3(s, start_shift, strend),
HOP3(strend, -end_shift, strbeg),
@@ -1407,7 +1412,8 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
minlen = prog->minlen;
if (do_utf8) {
- if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
+ if (!(prog->reganch & ROPT_SANY_SEEN))
+ if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
}
else {
if (strend - startpos < minlen) goto phooey;
@@ -2075,13 +2081,6 @@ S_regmatch(pTHX_ regnode *prog)
sayNO;
break;
case SANY:
- if (do_utf8) {
- locinput += PL_utf8skip[nextchr];
- if (locinput > PL_regeol)
- sayNO;
- nextchr = UCHARAT(locinput);
- break;
- }
if (!nextchr && locinput >= PL_regeol)
sayNO;
nextchr = UCHARAT(++locinput);
@@ -3563,15 +3562,7 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
}
break;
case SANY:
- if (do_utf8) {
- loceol = PL_regeol;
- while (hardcount < max && scan < loceol) {
- scan += UTF8SKIP(scan);
- hardcount++;
- }
- } else {
- scan = loceol;
- }
+ scan = loceol;
break;
case EXACT: /* length of string is 1 */
c = (U8)*STRING(p);