summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorDaniel P. Berrange <dan@berrange.com>2001-08-03 12:39:33 +0100
committerJarkko Hietaniemi <jhi@iki.fi>2001-08-04 17:28:55 +0000
commitf33976b4825a1f900bb28e78ad0509286ad2ffe5 (patch)
tree58019b92ac58588a5e773e4361cffd43ab20167b /regexec.c
parent577e12cc4ababd31ea5d99718f699d57deab1b2c (diff)
downloadperl-f33976b4825a1f900bb28e78ad0509286ad2ffe5.tar.gz
Decouple SANY into SANY and CANY: the new SANY is /./s,
the new CANY is the \C. The problem reported and the test case supplied in Subject: UTF-8 bugs in string length & single line regex matches Message-ID: <20010803113932.A19318@berrange.com> p4raw-id: //depot/perl@11575
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c35
1 files changed, 29 insertions, 6 deletions
diff --git a/regexec.c b/regexec.c
index 6fd4c0fa10..9ce5e6878a 100644
--- a/regexec.c
+++ b/regexec.c
@@ -490,7 +490,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
if (data)
*data->scream_olds = s;
}
- else if (prog->reganch & ROPT_SANY_SEEN)
+ else if (prog->reganch & ROPT_CANY_SEEN)
s = fbm_instr((U8*)(s + start_shift),
(U8*)(strend - end_shift),
check, PL_multiline ? FBMrf_MULTILINE : 0);
@@ -776,7 +776,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
PL_regdata = prog->data;
PL_bostr = startpos;
}
- s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
+ s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
if (!s) {
#ifdef DEBUGGING
char *what = 0;
@@ -895,6 +895,15 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
s += do_utf8 ? UTF8SKIP(s) : 1;
}
break;
+ case CANY:
+ while (s < strend) {
+ if (tmp && (norun || regtry(prog, s)))
+ goto got_it;
+ else
+ tmp = doevery;
+ s++;
+ }
+ break;
case EXACTF:
m = STRING(c);
ln = STR_LEN(c);
@@ -1440,12 +1449,11 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
}
minlen = prog->minlen;
- if (do_utf8) {
- if (!(prog->reganch & ROPT_SANY_SEEN))
+ if (do_utf8 && !(prog->reganch & ROPT_CANY_SEEN)) {
if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
}
else {
- if (strend - startpos < minlen) goto phooey;
+ if (strend - startpos < minlen) goto phooey;
}
/* Check validity of program. */
@@ -1488,7 +1496,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
&& mg->mg_len >= 0) {
PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
if (prog->reganch & ROPT_ANCH_GPOS) {
- if (s > PL_reg_ganch)
+ if (s > PL_reg_ganch)
goto phooey;
s = PL_reg_ganch;
}
@@ -2107,6 +2115,18 @@ S_regmatch(pTHX_ regnode *prog)
case SANY:
if (!nextchr && locinput >= PL_regeol)
sayNO;
+ if (do_utf8) {
+ locinput += PL_utf8skip[nextchr];
+ if (locinput > PL_regeol)
+ sayNO;
+ nextchr = UCHARAT(locinput);
+ }
+ else
+ nextchr = UCHARAT(++locinput);
+ break;
+ case CANY:
+ if (!nextchr && locinput >= PL_regeol)
+ sayNO;
nextchr = UCHARAT(++locinput);
break;
case REG_ANY:
@@ -3586,6 +3606,9 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
case SANY:
scan = loceol;
break;
+ case CANY:
+ scan = loceol;
+ break;
case EXACT: /* length of string is 1 */
c = (U8)*STRING(p);
while (scan < loceol && UCHARAT(scan) == c)