diff options
author | Daniel P. Berrange <dan@berrange.com> | 2001-08-03 12:39:33 +0100 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-08-04 17:28:55 +0000 |
commit | f33976b4825a1f900bb28e78ad0509286ad2ffe5 (patch) | |
tree | 58019b92ac58588a5e773e4361cffd43ab20167b /regexec.c | |
parent | 577e12cc4ababd31ea5d99718f699d57deab1b2c (diff) | |
download | perl-f33976b4825a1f900bb28e78ad0509286ad2ffe5.tar.gz |
Decouple SANY into SANY and CANY: the new SANY is /./s,
the new CANY is the \C. The problem reported and the
test case supplied in
Subject: UTF-8 bugs in string length & single line regex matches
Message-ID: <20010803113932.A19318@berrange.com>
p4raw-id: //depot/perl@11575
Diffstat (limited to 'regexec.c')
-rw-r--r-- | regexec.c | 35 |
1 files changed, 29 insertions, 6 deletions
@@ -490,7 +490,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, if (data) *data->scream_olds = s; } - else if (prog->reganch & ROPT_SANY_SEEN) + else if (prog->reganch & ROPT_CANY_SEEN) s = fbm_instr((U8*)(s + start_shift), (U8*)(strend - end_shift), check, PL_multiline ? FBMrf_MULTILINE : 0); @@ -776,7 +776,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, PL_regdata = prog->data; PL_bostr = startpos; } - s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1); + s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1); if (!s) { #ifdef DEBUGGING char *what = 0; @@ -895,6 +895,15 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta s += do_utf8 ? UTF8SKIP(s) : 1; } break; + case CANY: + while (s < strend) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + s++; + } + break; case EXACTF: m = STRING(c); ln = STR_LEN(c); @@ -1440,12 +1449,11 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * } minlen = prog->minlen; - if (do_utf8) { - if (!(prog->reganch & ROPT_SANY_SEEN)) + if (do_utf8 && !(prog->reganch & ROPT_CANY_SEEN)) { if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey; } else { - if (strend - startpos < minlen) goto phooey; + if (strend - startpos < minlen) goto phooey; } /* Check validity of program. */ @@ -1488,7 +1496,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * && mg->mg_len >= 0) { PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */ if (prog->reganch & ROPT_ANCH_GPOS) { - if (s > PL_reg_ganch) + if (s > PL_reg_ganch) goto phooey; s = PL_reg_ganch; } @@ -2107,6 +2115,18 @@ S_regmatch(pTHX_ regnode *prog) case SANY: if (!nextchr && locinput >= PL_regeol) sayNO; + if (do_utf8) { + locinput += PL_utf8skip[nextchr]; + if (locinput > PL_regeol) + sayNO; + nextchr = UCHARAT(locinput); + } + else + nextchr = UCHARAT(++locinput); + break; + case CANY: + if (!nextchr && locinput >= PL_regeol) + sayNO; nextchr = UCHARAT(++locinput); break; case REG_ANY: @@ -3586,6 +3606,9 @@ S_regrepeat(pTHX_ regnode *p, I32 max) case SANY: scan = loceol; break; + case CANY: + scan = loceol; + break; case EXACT: /* length of string is 1 */ c = (U8)*STRING(p); while (scan < loceol && UCHARAT(scan) == c) |