summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-11-01 17:57:15 -0600
committerRicardo Signes <rjbs@cpan.org>2012-08-09 16:04:11 -0400
commit6e634c54a0f90c8878c8086142fe3451f8970a9e (patch)
treeabe6c68047103bec2aa7417701559f03b0c9f6b5
parentc4fc4d72d0bf574a2b7597f0a919aa614d47c6de (diff)
downloadperl-6e634c54a0f90c8878c8086142fe3451f8970a9e.tar.gz
PATCH: [perl #101710] Regression with /i, latin1 chars.
The root cause of this bug is that it was assuming that a string was in utf8 when it wasn't, and so was thinking that a byte was a starter byte that wasn't, so was skipping ahead based on that starter byte.
-rw-r--r--pod/perldelta.pod8
-rw-r--r--regexec.c2
-rw-r--r--t/re/pat.t9
3 files changed, 17 insertions, 2 deletions
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index cdb8c830b4..1f1d4bd2bc 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -307,6 +307,14 @@ L</Modules and Pragmata>.
XXX
+=item *
+
+A regression has been fixed that was introduced in 5.14, in C</i>
+regular expression matching, in which a match improperly fails if the
+pattern is in UTF-8, the target string is not, and a Latin-1 character
+precedes a character in the string that should match the pattern. [perl
+#101710]
+
=back
=head1 Known Problems
diff --git a/regexec.c b/regexec.c
index 0dc093f831..2354be1f9f 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1521,7 +1521,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
{
goto got_it;
}
- s += UTF8SKIP(s);
+ s += (utf8_target) ? UTF8SKIP(s) : 1;
}
break;
case BOUNDL:
diff --git a/t/re/pat.t b/t/re/pat.t
index 4ef9663b5e..4eb05c61f2 100644
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -21,7 +21,7 @@ BEGIN {
require './test.pl';
}
-plan tests => 451; # Update this when adding/deleting tests.
+plan tests => 452; # Update this when adding/deleting tests.
run_tests() unless caller;
@@ -1167,6 +1167,13 @@ sub run_tests {
is($got,$want,'RT #84294: check that "ab" =~ /((\w+)(?{ push @got, $2 })){2}/ leaves @got in the correct state');
}
+
+ { # [perl #101710]
+ my $pat = "b";
+ utf8::upgrade($pat);
+ like("\xffb", qr/$pat/i, "/i: utf8 pattern, non-utf8 string, latin1-char preceding matching char in string");
+ }
+
} # End of sub run_tests
1;