Fix various bugs in regex engine with mixed utf8/latin pattern and strings. Related to [perl #36207] among others

Message-ID: <9b18b3110712170621h41de2c76k331971e3660abcb0@mail.gmail.com> p4raw-id: //depot/perl@32628
author: Yves Orton <demerphq@gmail.com> 2007-12-17 16:21:46 +0100
committer: Rafael Garcia-Suarez <rgarciasuarez@gmail.com> 2007-12-17 16:01:32 +0000
commit: a0a388a13daad79062b3c4b126f22d2f46fc82dd (patch)
tree: c1f1562353529ecc4edfee3469657379d1adf79d /t
parent: 5cf57fc61e657066a9798cca42453d9e2f71bd8c (diff)
download: perl-a0a388a13daad79062b3c4b126f22d2f46fc82dd.tar.gz
2 files changed, 57 insertions, 3 deletions
diff --git a/t/op/pat.t b/t/op/pat.t
index 2697157195..7d03eb6b82 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -3406,9 +3406,9 @@ if (!$ENV{PERL_SKIP_PSYCHO_TEST}){
     ok($utf8 =~ /(abc|\xe9)/i, "utf8/latin trie");
     ok($utf8 =~ /(abc|$latin1)/i, "utf8/latin trie runtime");
 
-    ok("\xe9" =~ /$utf8/i, "# TODO latin/utf8");
+    ok("\xe9" =~ /$utf8/i, "# latin/utf8");
     ok("\xe9" =~ /(abc|$utf8)/i, "# latin/utf8 trie");
-    ok($latin1 =~ /$utf8/i, "# TODO latin/utf8 runtime");
+    ok($latin1 =~ /$utf8/i, "# latin/utf8 runtime");
     ok($latin1 =~ /(abc|$utf8)/i, "# latin/utf8 trie runtime");
 }
 
@@ -4487,6 +4487,23 @@ sub kt
     iseq($1,"\xd6","#45605");
 }
 
+{
+    # Regardless of utf8ness any character matches itself when 
+    # doing a case insensitive match. See also [perl #36207] 
+    for my $o (0..255) {
+        my @ch=(chr($o),chr($o));
+        utf8::upgrade($ch[1]);
+        for my $u_str (0,1) {
+            for my $u_pat (0,1) {
+                ok( $ch[$u_str]=~/\Q$ch[$u_pat]\E/i,
+                    "\$c=~/\$c/i : chr($o) : u_str=$u_str u_pat=$u_pat");
+                ok( $ch[$u_str]=~/\Q$ch[$u_pat]\E|xyz/i,
+                "# \$c=~/\$c|xyz/i : chr($o) : u_str=$u_str u_pat=$u_pat");
+            }
+        }
+    }
+}
+
 # Test counter is at bottom of file. Put new tests above here.
 #-------------------------------------------------------------------
 # Keep the following tests last -- they may crash perl
@@ -4545,6 +4562,6 @@ ok($@=~/\QSequence \k... not terminated in regex;\E/);
 iseq(0+$::test,$::TestCount,"Got the right number of tests!");
 # Don't forget to update this!
 BEGIN {
-    $::TestCount = 1965;
+    $::TestCount = 4013;
     print "1..$::TestCount\n";
 }
diff --git a/t/op/reg_fold.t b/t/op/reg_fold.t
new file mode 100644
index 0000000000..6064ecfa69
--- /dev/null
+++ b/t/op/reg_fold.t
@@ -0,0 +1,37 @@
+use strict;
+use warnings;
+use Test::More;
+my $count=1;
+my @tests;
+use Cwd;
+
+my $file="../lib/unicore/CaseFolding.txt";
+open my $fh,"<",$file 
+    or die "Failed to read '$file' from '".cwd()."': $!";
+while (<$fh>) {
+    chomp;
+    my ($line,$comment)= split/\s+#\s+/, $_;
+    my ($cp,$type,@fc)=split/[\s;]+/,$line||'';
+    next unless $type and ($type eq 'F' or $type eq 'C');
+    $_="\\x{$_}" for @fc;
+    my $cpv=hex("0x$cp");
+    my $chr="chr(0x$cp)";
+    my @str;
+    push @str,$chr if $cpv<128 or $cpv>256;
+    if ($cpv<256) {
+        push @str,"do{my \$c=$chr; utf8::upgrade(\$c); \$c}"
+    }
+    
+    foreach my $str ( @str ) {    
+        my $expr="$str=~/@fc/ix";
+        my $t=($cpv > 256 || $str=~/^do/) ? "unicode" : "latin";
+        push @tests,
+            qq[ok($expr,'$chr=~/@fc/ix - $comment ($t string)')];
+        $tests[-1]="TODO: { local \$TODO='[13:41] <BinGOs> cue *It is all Greek to me* joke.';\n$tests[-1] }"
+            if $cp eq '0390' or $cp eq '03B0';
+        $count++;
+    }
+}    
+eval join ";\n","plan tests=>".($count-1),@tests,"1"
+    or die $@;
+__DATA__
author	Yves Orton <demerphq@gmail.com>	2007-12-17 16:21:46 +0100
committer	Rafael Garcia-Suarez <rgarciasuarez@gmail.com>	2007-12-17 16:01:32 +0000
commit	a0a388a13daad79062b3c4b126f22d2f46fc82dd (patch)
tree	c1f1562353529ecc4edfee3469657379d1adf79d /t
parent	5cf57fc61e657066a9798cca42453d9e2f71bd8c (diff)
download	perl-a0a388a13daad79062b3c4b126f22d2f46fc82dd.tar.gz