summaryrefslogtreecommitdiff
path: root/t
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2007-12-17 16:21:46 +0100
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2007-12-17 16:01:32 +0000
commita0a388a13daad79062b3c4b126f22d2f46fc82dd (patch)
treec1f1562353529ecc4edfee3469657379d1adf79d /t
parent5cf57fc61e657066a9798cca42453d9e2f71bd8c (diff)
downloadperl-a0a388a13daad79062b3c4b126f22d2f46fc82dd.tar.gz
Fix various bugs in regex engine with mixed utf8/latin pattern and strings. Related to [perl #36207] among others
Message-ID: <9b18b3110712170621h41de2c76k331971e3660abcb0@mail.gmail.com> p4raw-id: //depot/perl@32628
Diffstat (limited to 't')
-rwxr-xr-xt/op/pat.t23
-rw-r--r--t/op/reg_fold.t37
2 files changed, 57 insertions, 3 deletions
diff --git a/t/op/pat.t b/t/op/pat.t
index 2697157195..7d03eb6b82 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -3406,9 +3406,9 @@ if (!$ENV{PERL_SKIP_PSYCHO_TEST}){
ok($utf8 =~ /(abc|\xe9)/i, "utf8/latin trie");
ok($utf8 =~ /(abc|$latin1)/i, "utf8/latin trie runtime");
- ok("\xe9" =~ /$utf8/i, "# TODO latin/utf8");
+ ok("\xe9" =~ /$utf8/i, "# latin/utf8");
ok("\xe9" =~ /(abc|$utf8)/i, "# latin/utf8 trie");
- ok($latin1 =~ /$utf8/i, "# TODO latin/utf8 runtime");
+ ok($latin1 =~ /$utf8/i, "# latin/utf8 runtime");
ok($latin1 =~ /(abc|$utf8)/i, "# latin/utf8 trie runtime");
}
@@ -4487,6 +4487,23 @@ sub kt
iseq($1,"\xd6","#45605");
}
+{
+ # Regardless of utf8ness any character matches itself when
+ # doing a case insensitive match. See also [perl #36207]
+ for my $o (0..255) {
+ my @ch=(chr($o),chr($o));
+ utf8::upgrade($ch[1]);
+ for my $u_str (0,1) {
+ for my $u_pat (0,1) {
+ ok( $ch[$u_str]=~/\Q$ch[$u_pat]\E/i,
+ "\$c=~/\$c/i : chr($o) : u_str=$u_str u_pat=$u_pat");
+ ok( $ch[$u_str]=~/\Q$ch[$u_pat]\E|xyz/i,
+ "# \$c=~/\$c|xyz/i : chr($o) : u_str=$u_str u_pat=$u_pat");
+ }
+ }
+ }
+}
+
# Test counter is at bottom of file. Put new tests above here.
#-------------------------------------------------------------------
# Keep the following tests last -- they may crash perl
@@ -4545,6 +4562,6 @@ ok($@=~/\QSequence \k... not terminated in regex;\E/);
iseq(0+$::test,$::TestCount,"Got the right number of tests!");
# Don't forget to update this!
BEGIN {
- $::TestCount = 1965;
+ $::TestCount = 4013;
print "1..$::TestCount\n";
}
diff --git a/t/op/reg_fold.t b/t/op/reg_fold.t
new file mode 100644
index 0000000000..6064ecfa69
--- /dev/null
+++ b/t/op/reg_fold.t
@@ -0,0 +1,37 @@
+use strict;
+use warnings;
+use Test::More;
+my $count=1;
+my @tests;
+use Cwd;
+
+my $file="../lib/unicore/CaseFolding.txt";
+open my $fh,"<",$file
+ or die "Failed to read '$file' from '".cwd()."': $!";
+while (<$fh>) {
+ chomp;
+ my ($line,$comment)= split/\s+#\s+/, $_;
+ my ($cp,$type,@fc)=split/[\s;]+/,$line||'';
+ next unless $type and ($type eq 'F' or $type eq 'C');
+ $_="\\x{$_}" for @fc;
+ my $cpv=hex("0x$cp");
+ my $chr="chr(0x$cp)";
+ my @str;
+ push @str,$chr if $cpv<128 or $cpv>256;
+ if ($cpv<256) {
+ push @str,"do{my \$c=$chr; utf8::upgrade(\$c); \$c}"
+ }
+
+ foreach my $str ( @str ) {
+ my $expr="$str=~/@fc/ix";
+ my $t=($cpv > 256 || $str=~/^do/) ? "unicode" : "latin";
+ push @tests,
+ qq[ok($expr,'$chr=~/@fc/ix - $comment ($t string)')];
+ $tests[-1]="TODO: { local \$TODO='[13:41] <BinGOs> cue *It is all Greek to me* joke.';\n$tests[-1] }"
+ if $cp eq '0390' or $cp eq '03B0';
+ $count++;
+ }
+}
+eval join ";\n","plan tests=>".($count-1),@tests,"1"
+ or die $@;
+__DATA__