summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-10-13 19:56:45 -0600
committerRicardo Signes <rjbs@cpan.org>2012-08-09 16:04:11 -0400
commit399fb9c0594c29de7dc8815c6596bd6a67ddc9e6 (patch)
tree77cdf4b8bbe1785e259044c80c28591e4d893404
parent6e634c54a0f90c8878c8086142fe3451f8970a9e (diff)
downloadperl-399fb9c0594c29de7dc8815c6596bd6a67ddc9e6.tar.gz
regexec.c: Fix "\x{FB01}\x{FB00}" =~ /ff/i
Only the first character of the string was being checked when scanning for the beginning position of the pattern match. This was so wrong, it looks like it has to be a regression. I experimented a little and did not find any. I believe (but am not certain) that a multi-char fold has to be involved. The the handling of these was so broken before 5.14 that there very well may not be a regression.
-rw-r--r--pod/perldelta.pod7
-rw-r--r--regexec.c3
-rw-r--r--t/re/re_tests6
3 files changed, 15 insertions, 1 deletions
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 1f1d4bd2bc..304f70a82f 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -315,6 +315,13 @@ pattern is in UTF-8, the target string is not, and a Latin-1 character
precedes a character in the string that should match the pattern. [perl
#101710]
+=item *
+
+In case-insensitive regular expression pattern matching, no longer on
+UTF-8 encoded strings does the scan for the start of match only look at
+the first possible position. This caused matches such as
+C<"f\x{FB00}" =~ /ff/i> to fail.
+
=back
=head1 Known Problems
diff --git a/regexec.c b/regexec.c
index 2354be1f9f..021ab8e65e 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1507,7 +1507,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
? utf8_length((U8 *) pat_string, (U8 *) pat_end)
: ln;
- e = HOP3c(strend, -((I32)lnc), s);
+ /* Set the end position to the final character available */
+ e = HOP3c(strend, -1, s);
if (!reginfo && e < s) {
e = s; /* Due to minlen logic of intuit() */
diff --git a/t/re/re_tests b/t/re/re_tests
index 35a72203cd..ae124522f8 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -1522,4 +1522,10 @@ abc\N{def - c - \\N{NAME} must be resolved by the lexer
# See [perl #89750]. This makes sure that the simple fold gets generated
# in that case, to DF.
/[^\x{1E9E}]/i \x{DF} n - -
+
+/ff/i \x{FB00}\x{FB01} y $& \x{FB00}
+/ff/i \x{FB01}\x{FB00} y $& \x{FB00}
+/fi/i \x{FB01}\x{FB00} y $& \x{FB01}
+/fi/i \x{FB00}\x{FB01} y $& \x{FB01}
+
# vim: softtabstop=0 noexpandtab