summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--regcomp.c3
-rw-r--r--regcomp.h9
-rw-r--r--regexec.c27
-rw-r--r--regexp.h1
-rwxr-xr-xt/op/pat.t55
-rwxr-xr-xt/pragma/utf8.t15
6 files changed, 86 insertions, 24 deletions
diff --git a/regcomp.c b/regcomp.c
index 547f756a30..69d114e9e9 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1930,6 +1930,8 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
r->reganch |= ROPT_LOOKBEHIND_SEEN;
if (RExC_seen & REG_SEEN_EVAL)
r->reganch |= ROPT_EVAL_SEEN;
+ if (RExC_seen & REG_SEEN_SANY)
+ r->reganch |= ROPT_SANY_SEEN;
Newz(1002, r->startp, RExC_npar, I32);
Newz(1002, r->endp, RExC_npar, I32);
PL_regdata = r->data; /* for regprop() */
@@ -2638,6 +2640,7 @@ tryagain:
break;
case 'C':
ret = reg_node(pRExC_state, SANY);
+ RExC_seen |= REG_SEEN_SANY;
*flagp |= HASWIDTH|SIMPLE;
nextchar(pRExC_state);
break;
diff --git a/regcomp.h b/regcomp.h
index 066e31f01d..ee9be39f71 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -285,10 +285,11 @@ struct regnode_charclass_class {
#define EXTRA_SIZE(guy) ((sizeof(guy)-1)/sizeof(struct regnode))
-#define REG_SEEN_ZERO_LEN 1
-#define REG_SEEN_LOOKBEHIND 2
-#define REG_SEEN_GPOS 4
-#define REG_SEEN_EVAL 8
+#define REG_SEEN_ZERO_LEN 1
+#define REG_SEEN_LOOKBEHIND 2
+#define REG_SEEN_GPOS 4
+#define REG_SEEN_EVAL 8
+#define REG_SEEN_SANY 16
START_EXTERN_C
diff --git a/regexec.c b/regexec.c
index c70d1b1655..5d9e8ac8fa 100644
--- a/regexec.c
+++ b/regexec.c
@@ -398,7 +398,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
goto fail;
}
- if (prog->check_offset_min == prog->check_offset_max) {
+ if (prog->check_offset_min == prog->check_offset_max &&
+ !(prog->reganch & ROPT_SANY_SEEN)) {
/* Substring at constant offset from beg-of-str... */
I32 slen;
@@ -474,6 +475,10 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
if (data)
*data->scream_olds = s;
}
+ else if (prog->reganch & ROPT_SANY_SEEN)
+ s = fbm_instr((U8*)(s + start_shift),
+ (U8*)(strend - end_shift),
+ check, PL_multiline ? FBMrf_MULTILINE : 0);
else
s = fbm_instr(HOP3(s, start_shift, strend),
HOP3(strend, -end_shift, strbeg),
@@ -1407,7 +1412,8 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
minlen = prog->minlen;
if (do_utf8) {
- if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
+ if (!(prog->reganch & ROPT_SANY_SEEN))
+ if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
}
else {
if (strend - startpos < minlen) goto phooey;
@@ -2075,13 +2081,6 @@ S_regmatch(pTHX_ regnode *prog)
sayNO;
break;
case SANY:
- if (do_utf8) {
- locinput += PL_utf8skip[nextchr];
- if (locinput > PL_regeol)
- sayNO;
- nextchr = UCHARAT(locinput);
- break;
- }
if (!nextchr && locinput >= PL_regeol)
sayNO;
nextchr = UCHARAT(++locinput);
@@ -3563,15 +3562,7 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
}
break;
case SANY:
- if (do_utf8) {
- loceol = PL_regeol;
- while (hardcount < max && scan < loceol) {
- scan += UTF8SKIP(scan);
- hardcount++;
- }
- } else {
- scan = loceol;
- }
+ scan = loceol;
break;
case EXACT: /* length of string is 1 */
c = (U8)*STRING(p);
diff --git a/regexp.h b/regexp.h
index 3c71060a40..33ace4066e 100644
--- a/regexp.h
+++ b/regexp.h
@@ -54,6 +54,7 @@ typedef struct regexp {
#define ROPT_CHECK_ALL 0x00100
#define ROPT_LOOKBEHIND_SEEN 0x00200
#define ROPT_EVAL_SEEN 0x00400
+#define ROPT_SANY_SEEN 0x00800
/* 0xf800 of reganch is used by PMf_COMPILETIME */
diff --git a/t/op/pat.t b/t/op/pat.t
index 17df867fd9..d7eb9f8fd1 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -4,7 +4,7 @@
# the format supported by op/regexp.t. If you want to add a test
# that does fit that format, add it to op/re_tests, not here.
-print "1..231\n";
+print "1..240\n";
BEGIN {
chdir 't' if -d 't';
@@ -1129,3 +1129,56 @@ print "not " unless "A \x{263a} B z C" =~ /A . B (??{ "z" }) C/;
print "ok $test\n";
$test++;
+$_ = "a\x{100}b";
+if (/(.)(\C)(\C)(.)/) {
+ print "ok 232\n";
+ if ($1 eq "a") {
+ print "ok 233\n";
+ } else {
+ print "not ok 233\n";
+ }
+ if ($2 eq "\xC4") {
+ print "ok 234\n";
+ } else {
+ print "not ok 234\n";
+ }
+ if ($3 eq "\x80") {
+ print "ok 235\n";
+ } else {
+ print "not ok 235\n";
+ }
+ if ($4 eq "b") {
+ print "ok 236\n";
+ } else {
+ print "not ok 236\n";
+ }
+} else {
+ for (232..236) {
+ print "not ok $_\n";
+ }
+}
+$_ = "\x{100}";
+if (/(\C)/g) {
+ print "ok 237\n";
+ if ($1 eq "\xC4") {
+ print "ok 238\n";
+ } else {
+ print "not ok 238\n";
+ }
+} else {
+ for (237..238) {
+ print "not ok $_\n";
+ }
+}
+if (/(\C)/g) {
+ print "ok 239\n";
+ if ($1 eq "\x80") {
+ print "ok 240\n";
+ } else {
+ print "not ok 240\n";
+ }
+} else {
+ for (239..240) {
+ print "not ok $_\n";
+ }
+}
diff --git a/t/pragma/utf8.t b/t/pragma/utf8.t
index 577e6b4e2b..60e6c6e102 100755
--- a/t/pragma/utf8.t
+++ b/t/pragma/utf8.t
@@ -10,7 +10,7 @@ BEGIN {
}
}
-print "1..107\n";
+print "1..109\n";
my $test = 1;
@@ -577,3 +577,16 @@ sub nok_bytes {
$test++; # 107
}
+{
+ # bug id 20001230.002
+
+ use utf8;
+
+ print "not " unless "École" =~ /^\C\C(.)/ && $1 eq 'c';
+ print "ok $test\n";
+ $test++; # 108
+
+ print "not " unless "École" =~ /^\C\C(c)/;
+ print "ok $test\n";
+ $test++; # 109
+}