summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--regexec.c12
-rwxr-xr-xt/op/pat.t40
-rw-r--r--utf8.c28
3 files changed, 59 insertions, 21 deletions
diff --git a/regexec.c b/regexec.c
index 3aed549382..0f738d1b27 100644
--- a/regexec.c
+++ b/regexec.c
@@ -995,7 +995,8 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
c = utf8_to_uvchr((U8*)s, &len);
if ( c == c1
&& (ln == len ||
- !ibcmp_utf8(s, do_utf8, strend - s,
+ !ibcmp_utf8(s, do_utf8,
+ strend - s > ln ? ln : strend - s,
m, UTF, ln))
&& (norun || regtry(prog, s)) )
goto got_it;
@@ -1007,7 +1008,8 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
&& (f == c1 || f == c2)
&& (ln == foldlen ||
!ibcmp_utf8((char *)foldbuf,
- do_utf8, foldlen,
+ do_utf8,
+ foldlen > ln ? ln : foldlen,
m, UTF, ln))
&& (norun || regtry(prog, s)) )
goto got_it;
@@ -1032,7 +1034,8 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
if ( (c == c1 || c == c2)
&& (ln == len ||
- !ibcmp_utf8(s, do_utf8, strend - s,
+ !ibcmp_utf8(s, do_utf8,
+ strend - s > ln ? ln : strend - s,
m, UTF, ln))
&& (norun || regtry(prog, s)) )
goto got_it;
@@ -1044,7 +1047,8 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
&& (f == c1 || f == c2)
&& (ln == foldlen ||
!ibcmp_utf8((char *)foldbuf,
- do_utf8, foldlen,
+ do_utf8,
+ foldlen > ln ? ln : foldlen,
m, UTF, ln))
&& (norun || regtry(prog, s)) )
goto got_it;
diff --git a/t/op/pat.t b/t/op/pat.t
index 0eda689cc5..b797bdffbb 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
$| = 1;
-print "1..825\n";
+print "1..828\n";
BEGIN {
chdir 't' if -d 't';
@@ -2520,3 +2520,41 @@ print "# some Unicode properties\n";
$& eq "franc\N{COMBINING CEDILLA}ais" ?
"ok 825\n" : "not ok 825\n";
}
+
+{
+ print "# Does lingering (and useless) UTF8 flag mess up /i matching?\n";
+
+ {
+ my $regex = "ABcde";
+ my $string = "abcDE\x{100}";
+ chop($string);
+ if ($string =~ m/$regex/i) {
+ print "ok 826\n";
+ } else {
+ print "not ok 826\n";
+ }
+ }
+
+ {
+ my $regex = "ABcde\x{100}";
+ my $string = "abcDE";
+ chop($regex);
+ if ($string =~ m/$regex/i) {
+ print "ok 827\n";
+ } else {
+ print "not ok 827\n";
+ }
+ }
+
+ {
+ my $regex = "ABcde\x{100}";
+ my $string = "abcDE\x{100}";
+ chop($regex);
+ chop($string);
+ if ($string =~ m/$regex/i) {
+ print "ok 828\n";
+ } else {
+ print "not ok 828\n";
+ }
+ }
+}
diff --git a/utf8.c b/utf8.c
index 54ab5293cc..0051796840 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1672,9 +1672,9 @@ Perl_ibcmp_utf8(pTHX_ const char *s1, bool u1, register I32 len1, const char *s2
register U8 *be = b + len2;
STRLEN la, lb;
UV ca, cb;
- STRLEN ulen1, ulen2;
- U8 tmpbuf1[UTF8_MAXLEN_FOLD+1];
- U8 tmpbuf2[UTF8_MAXLEN_FOLD+1];
+ STRLEN foldlen1, foldlen2;
+ U8 foldbuf1[UTF8_MAXLEN_FOLD+1];
+ U8 foldbuf2[UTF8_MAXLEN_FOLD+1];
while (a < ae && b < be) {
if (u1) {
@@ -1682,7 +1682,7 @@ Perl_ibcmp_utf8(pTHX_ const char *s1, bool u1, register I32 len1, const char *s2
break;
ca = utf8_to_uvchr((U8*)a, &la);
} else {
- ca = *a;
+ ca = NATIVE_TO_UNI(*a);
la = 1;
}
if (u2) {
@@ -1690,21 +1690,17 @@ Perl_ibcmp_utf8(pTHX_ const char *s1, bool u1, register I32 len1, const char *s2
break;
cb = utf8_to_uvchr((U8*)b, &lb);
} else {
- cb = *b;
+ cb = NATIVE_TO_UNI(*b);
lb = 1;
}
if (ca != cb) {
- if (u1)
- to_uni_fold(NATIVE_TO_UNI(ca), tmpbuf1, &ulen1);
- else
- ulen1 = 1;
- if (u2)
- to_uni_fold(NATIVE_TO_UNI(cb), tmpbuf2, &ulen2);
- else
- ulen2 = 1;
- if (ulen1 != ulen2
- || (ca < 256 && cb < 256 && ca != PL_fold[cb])
- || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
+ to_uni_fold(ca, foldbuf1, &foldlen1);
+ ca = utf8_to_uvchr(foldbuf1, 0);
+
+ to_uni_fold(cb, foldbuf2, &foldlen2);
+ cb = utf8_to_uvchr(foldbuf2, 0);
+
+ if (ca != cb || foldlen1 != foldlen2)
return 1; /* mismatch */
}
a += la;