diff options
author | Karl Williamson <khw@khw-desktop.(none)> | 2009-11-21 23:45:28 -0700 |
---|---|---|
committer | Karl Williamson <khw@khw-desktop.(none)> | 2009-11-21 23:45:28 -0700 |
commit | 99870f4d255a0df2e3a0799d805210bab9040380 (patch) | |
tree | e78a5ff96e187f53bc711b71e788aa6d75a9dbe8 /t/uni | |
parent | 3fbd6cf4979f01b90b051e3a54e86f412a509f7e (diff) | |
download | perl-99870f4d255a0df2e3a0799d805210bab9040380.tar.gz |
mktables revamp
Diffstat (limited to 't/uni')
-rw-r--r-- | t/uni/cache.t | 3 | ||||
-rw-r--r-- | t/uni/class.t | 164 |
2 files changed, 11 insertions, 156 deletions
diff --git a/t/uni/cache.t b/t/uni/cache.t index c3f7634fcd..df12f33ba2 100644 --- a/t/uni/cache.t +++ b/t/uni/cache.t @@ -8,7 +8,8 @@ plan tests => 1; my $count = 0; unshift @INC, sub { - $count++ if $_[1] eq 'unicore/lib/gc_sc/Hira.pl'; + # XXX Kludge requires exact path, which might change + $count++ if $_[1] eq 'unicore/lib/Sc/Hira.pl'; }; my $s = 'foo'; diff --git a/t/uni/class.t b/t/uni/class.t index 4620ca0cc1..6dd6ee60c5 100644 --- a/t/uni/class.t +++ b/t/uni/class.t @@ -4,7 +4,7 @@ BEGIN { require "test.pl"; } -plan tests => 5092; +plan tests => 10; sub MyUniClass { <<END; @@ -71,164 +71,18 @@ is(($str =~ /(\p{Other::Class}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_'); # make sure it finds class in other OTHER package is(($str =~ /(\p{A::B::Intersection}+)/)[0], '@ABCDEFGHIJKLMNO'); -# all of these should look in lib/unicore/bc/AL.pl +# lib/unicore/bc/AL.pl $str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}"; -is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{070E}\x{070F}"); -is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{070E}\x{070F}"); -is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{070E}\x{070F}"); -is(($str =~ /(\P{bc=AL}+)/)[0], "\x{070E}\x{070F}"); +is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{070F}"); +is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{070F}"); +is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{070F}"); +is(($str =~ /(\P{bc=AL}+)/)[0], "\x{070F}"); # make sure InGreek works $str = "[\x{038B}\x{038C}\x{038D}]"; is(($str =~ /(\p{InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); -is(($str =~ /(\p{Script:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); -is(($str =~ /(\p{Script=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); -is(($str =~ /(\p{sc:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); -is(($str =~ /(\p{sc=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); - -use File::Spec; -my $updir = File::Spec->updir; - -# the %utf8::... hashes are already in existence -# because utf8_pva.pl was run by utf8_heavy.pl - -*utf8::PropertyAlias = *utf8::PropertyAlias; # thwart a warning - -no warnings 'utf8'; # we do not want warnings about surrogates etc - -sub char_range { - my ($h1, $h2) = @_; - - my $str; - - if (ord('A') == 193 && $h1 < 256) { - my $h3 = ($h2 || $h1) + 1; - if ($h3 - $h1 == 1) { - $str = join "", pack 'U*', $h1 .. $h3; # Using pack since chr doesn't generate Unicode chars for value < 256. - } elsif ($h3 - $h1 > 1) { - for (my $i = $h1; $i <= $h3; $i++) { - $str = join "", $str, pack 'U*', $i; - } - } - } else { - $str = join "", map chr, $h1 .. (($h2 || $h1) + 1); - } - - return $str; -} - -# non-General Category and non-Script -while (my ($abbrev, $files) = each %utf8::PVA_abbr_map) { - my $prop_name = $utf8::PropertyAlias{$abbrev}; - next unless $prop_name; - next if $abbrev eq "gc_sc"; - - for (sort keys %$files) { - my $filename = File::Spec->catfile( - $updir => lib => unicore => lib => $abbrev => "$files->{$_}.pl" - ); - - next unless -e $filename; - my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1]; - - my $str = char_range($h1, $h2); - - for my $p ($prop_name, $abbrev) { - for my $c ($files->{$_}, $_) { - is($str =~ /(\p{$p: $c}+)/ && $1, substr($str, 0, -1)); - is($str =~ /(\P{$p= $c}+)/ && $1, substr($str, -1)); - } - } - } -} - -# General Category and Script -for my $p ('gc', 'sc') { - while (my ($abbr) = each %{ $utf8::PropValueAlias{$p} }) { - my $filename = File::Spec->catfile( - $updir => lib => unicore => lib => gc_sc => "$utf8::PVA_abbr_map{gc_sc}{$abbr}.pl" - ); - - next unless -e $filename; - my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1]; - - my $str = char_range($h1, $h2); - - for my $x ($p, { gc => 'General Category', sc => 'Script' }->{$p}) { - for my $y ($abbr, $utf8::PropValueAlias{$p}{$abbr}, $utf8::PVA_abbr_map{gc_sc}{$abbr}) { - is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1)); - is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1)); - SKIP: { - skip("surrogate", 1) if $abbr eq 'cs'; - test_regexp ($str, $y); - } - } - } - } -} - -# test extra properties (ASCII_Hex_Digit, Bidi_Control, etc.) -SKIP: -{ - skip "Can't reliably derive class names from file names", 576 if $^O eq 'VMS'; - - # On case tolerant filesystems, Cf.pl will cause a -e test for cf.pl to - # return true. Try to work around this by reading the filenames explicitly - # to get a case sensitive test. N.B. This will fail if filename case is - # not preserved because you might go looking for a class name of CF or cf - # when you really want Cf. Storing case sensitive data in filenames is - # simply not portable. - - my %files; - - my $dirname = File::Spec->catdir($updir => lib => unicore => lib => 'gc_sc'); - opendir D, $dirname or die $!; - @files{readdir(D)} = (); - closedir D; - - for (keys %utf8::PA_reverse) { - my $leafname = "$utf8::PA_reverse{$_}.pl"; - next unless exists $files{$leafname}; - - my $filename = File::Spec->catfile($dirname, $leafname); - - my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1]; - - my $str = char_range($h1, $h2); - - for my $x ('gc', 'General Category') { - print "# $filename $x $_, $utf8::PA_reverse{$_}\n"; - for my $y ($_, $utf8::PA_reverse{$_}) { - is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1)); - is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1)); - test_regexp ($str, $y); - } - } - } -} - -# test the blocks (InFoobar) -for (grep $utf8::Canonical{$_} =~ /^In/, keys %utf8::Canonical) { - my $filename = File::Spec->catfile( - $updir => lib => unicore => lib => gc_sc => "$utf8::Canonical{$_}.pl" - ); - - next unless -e $filename; - - print "# In$_ $filename\n"; - - my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1]; - - my $str = char_range($h1, $h2); - - my $blk = $_; - - SKIP: { - skip($blk, 2) if $blk =~ /surrogates/i; - test_regexp ($str, $blk); - $blk =~ s/^In/Block:/; - test_regexp ($str, $blk); - } -} +# The other tests that are based on looking at the generated files are now +# generated by mktables. The location of that .t file is given by the main +# Makefile that calls mktables. |