summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-10-13 19:56:45 -0600
committerKarl Williamson <public@khwilliamson.com>2011-10-13 20:44:11 -0600
commit7c1b9f38fcbfdb3a9e1766e02bcb991d1a5452d9 (patch)
tree3c98257ee4c0e05264c07aa2028e2390d865e3f0
parent6af864889434f3aedc5ff52cae277d1cbfa476d6 (diff)
downloadperl-7c1b9f38fcbfdb3a9e1766e02bcb991d1a5452d9.tar.gz
regexec.c: Fix "\x{FB01}\x{FB00}" =~ /ff/i
Only the first character of the string was being checked when scanning for the beginning position of the pattern match. This was so wrong, it looks like it has to be a regression. I experimented a little and did not find any. I believe (but am not certain) that a multi-char fold has to be involved. The the handling of these was so broken before 5.14 that there very well may not be a regression.
-rw-r--r--pod/perldelta.pod7
-rw-r--r--regexec.c3
-rw-r--r--t/re/re_tests4
3 files changed, 11 insertions, 3 deletions
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 4e24e25ae1..5204b37fe8 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -606,6 +606,13 @@ A minor regression, introduced Perl 5.15.0, has been fixed in which some
regular expression Unicode property matches (C<\p{...}>) matched
non-Unicode code points.
+=item *
+
+In case-insensitive regular expression pattern matching, no longer on
+UTF-8 encoded strings does the scan for the start of match only look at
+the first possible position. This caused matches such as
+C<"f\x{FB00}" =~ /ff/i> to fail.
+
=back
=head1 Known Problems
diff --git a/regexec.c b/regexec.c
index 95462e470c..d063308d17 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1535,7 +1535,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
? utf8_length((U8 *) pat_string, (U8 *) pat_end)
: ln;
- e = HOP3c(strend, -((I32)lnc), s);
+ /* Set the end position to the final character available */
+ e = HOP3c(strend, -1, s);
if (!reginfo && e < s) {
e = s; /* Due to minlen logic of intuit() */
diff --git a/t/re/re_tests b/t/re/re_tests
index 5a9b5db095..9b65f5532b 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -1542,8 +1542,8 @@ abc\N{def - c - \\N{NAME} must be resolved by the lexer
/^\R\x0A$/ \x0D\x0A yT $& \x0D\x0A
/ff/i \x{FB00}\x{FB01} y $& \x{FB00}
-/ff/i \x{FB01}\x{FB00} yT $& \x{FB00}
+/ff/i \x{FB01}\x{FB00} y $& \x{FB00}
/fi/i \x{FB01}\x{FB00} y $& \x{FB01}
-/fi/i \x{FB00}\x{FB01} yT $& \x{FB01}
+/fi/i \x{FB00}\x{FB01} y $& \x{FB01}
# vim: softtabstop=0 noexpandtab