summaryrefslogtreecommitdiff
path: root/t
diff options
context:
space:
mode:
authorRostislav Skudnov <skrostislav@gmail.com>2014-12-24 08:12:52 +0200
committerDavid Mitchell <davem@iabyn.com>2015-01-13 14:50:52 +0000
commit9ce1a4d5ec32720328a5dce6ee796ce4b79d6faf (patch)
tree309cec260dfb2798797fe74db28a821592cd4e63 /t
parent162b417c061ec9190135629d421e3685e8d31dc0 (diff)
downloadperl-9ce1a4d5ec32720328a5dce6ee796ce4b79d6faf.tar.gz
make re_intuit_string() return correct string
Fix #123469 - Bug in split function, with utf8 strings Each regex has two SV pointers, check_substr and check_utf8, which hold a constant string (if any) corresponding to the longest constant string in the regexp. When the regex is first compiled, only one pointer is set, depending on whether the pattern is utf8 or not; but subsequent usage of the regex can instantiate the other pointer too. So which of the two strings re_intuit_string() should return should be based on the UTF8ness of the pattern, not whether check_substr is set.
Diffstat (limited to 't')
-rw-r--r--t/op/split.t17
1 files changed, 16 insertions, 1 deletions
diff --git a/t/op/split.t b/t/op/split.t
index 9afdd6e411..5d5c19dc39 100644
--- a/t/op/split.t
+++ b/t/op/split.t
@@ -6,7 +6,7 @@ BEGIN {
set_up_inc('../lib');
}
-plan tests => 125;
+plan tests => 131;
$FS = ':';
@@ -374,6 +374,21 @@ is($cnt, scalar(@ary));
}
{
+ # LATIN SMALL LETTER A WITH DIAERESIS, CYRILLIC SMALL LETTER I
+ for my $pattern ("\x{e4}", "\x{0437}") {
+ utf8::upgrade $pattern;
+ my @res;
+ for my $str ("a${pattern}b", "axb", "a${pattern}b") {
+ @split = split /$pattern/, $str;
+ push @res, scalar(@split);
+ }
+ is($res[0], 2);
+ is($res[1], 1);
+ is($res[2], 2, '#123469 - split with utf8 pattern after handling non-utf8 EXPR');
+ }
+}
+
+{
is (\@a, \@{"a"}, '@a must be global for following test');
$p="";
$n = @a = split /,/,$p;