diff options
author | Yves Orton <demerphq@gmail.com> | 2007-12-17 16:21:46 +0100 |
---|---|---|
committer | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2007-12-17 16:01:32 +0000 |
commit | a0a388a13daad79062b3c4b126f22d2f46fc82dd (patch) | |
tree | c1f1562353529ecc4edfee3469657379d1adf79d /t | |
parent | 5cf57fc61e657066a9798cca42453d9e2f71bd8c (diff) | |
download | perl-a0a388a13daad79062b3c4b126f22d2f46fc82dd.tar.gz |
Fix various bugs in regex engine with mixed utf8/latin pattern and strings. Related to [perl #36207] among others
Message-ID: <9b18b3110712170621h41de2c76k331971e3660abcb0@mail.gmail.com>
p4raw-id: //depot/perl@32628
Diffstat (limited to 't')
-rwxr-xr-x | t/op/pat.t | 23 | ||||
-rw-r--r-- | t/op/reg_fold.t | 37 |
2 files changed, 57 insertions, 3 deletions
diff --git a/t/op/pat.t b/t/op/pat.t index 2697157195..7d03eb6b82 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -3406,9 +3406,9 @@ if (!$ENV{PERL_SKIP_PSYCHO_TEST}){ ok($utf8 =~ /(abc|\xe9)/i, "utf8/latin trie"); ok($utf8 =~ /(abc|$latin1)/i, "utf8/latin trie runtime"); - ok("\xe9" =~ /$utf8/i, "# TODO latin/utf8"); + ok("\xe9" =~ /$utf8/i, "# latin/utf8"); ok("\xe9" =~ /(abc|$utf8)/i, "# latin/utf8 trie"); - ok($latin1 =~ /$utf8/i, "# TODO latin/utf8 runtime"); + ok($latin1 =~ /$utf8/i, "# latin/utf8 runtime"); ok($latin1 =~ /(abc|$utf8)/i, "# latin/utf8 trie runtime"); } @@ -4487,6 +4487,23 @@ sub kt iseq($1,"\xd6","#45605"); } +{ + # Regardless of utf8ness any character matches itself when + # doing a case insensitive match. See also [perl #36207] + for my $o (0..255) { + my @ch=(chr($o),chr($o)); + utf8::upgrade($ch[1]); + for my $u_str (0,1) { + for my $u_pat (0,1) { + ok( $ch[$u_str]=~/\Q$ch[$u_pat]\E/i, + "\$c=~/\$c/i : chr($o) : u_str=$u_str u_pat=$u_pat"); + ok( $ch[$u_str]=~/\Q$ch[$u_pat]\E|xyz/i, + "# \$c=~/\$c|xyz/i : chr($o) : u_str=$u_str u_pat=$u_pat"); + } + } + } +} + # Test counter is at bottom of file. Put new tests above here. #------------------------------------------------------------------- # Keep the following tests last -- they may crash perl @@ -4545,6 +4562,6 @@ ok($@=~/\QSequence \k... not terminated in regex;\E/); iseq(0+$::test,$::TestCount,"Got the right number of tests!"); # Don't forget to update this! BEGIN { - $::TestCount = 1965; + $::TestCount = 4013; print "1..$::TestCount\n"; } diff --git a/t/op/reg_fold.t b/t/op/reg_fold.t new file mode 100644 index 0000000000..6064ecfa69 --- /dev/null +++ b/t/op/reg_fold.t @@ -0,0 +1,37 @@ +use strict; +use warnings; +use Test::More; +my $count=1; +my @tests; +use Cwd; + +my $file="../lib/unicore/CaseFolding.txt"; +open my $fh,"<",$file + or die "Failed to read '$file' from '".cwd()."': $!"; +while (<$fh>) { + chomp; + my ($line,$comment)= split/\s+#\s+/, $_; + my ($cp,$type,@fc)=split/[\s;]+/,$line||''; + next unless $type and ($type eq 'F' or $type eq 'C'); + $_="\\x{$_}" for @fc; + my $cpv=hex("0x$cp"); + my $chr="chr(0x$cp)"; + my @str; + push @str,$chr if $cpv<128 or $cpv>256; + if ($cpv<256) { + push @str,"do{my \$c=$chr; utf8::upgrade(\$c); \$c}" + } + + foreach my $str ( @str ) { + my $expr="$str=~/@fc/ix"; + my $t=($cpv > 256 || $str=~/^do/) ? "unicode" : "latin"; + push @tests, + qq[ok($expr,'$chr=~/@fc/ix - $comment ($t string)')]; + $tests[-1]="TODO: { local \$TODO='[13:41] <BinGOs> cue *It is all Greek to me* joke.';\n$tests[-1] }" + if $cp eq '0390' or $cp eq '03B0'; + $count++; + } +} +eval join ";\n","plan tests=>".($count-1),@tests,"1" + or die $@; +__DATA__ |