#!./perl use strict; # Test charnames.pm. If $ENV{PERL_RUN_SLOW_TESTS} is unset or 0, a random # selection of names is tested, a higher percentage of regular names is tested # than algorithmically-determined names. my $RUN_SLOW_TESTS_EVERY_CODE_POINT = 100; # If $ENV{PERL_RUN_SLOW_TESTS} is at least 1 and less than the number above, # all code points with names are tested. If it is at least that number, all # 1,114,112 Unicode code points are tested. # Because \N{} is compile time, any warnings will get generated before # execution, so have to have an array, and arrange things so no warning # is generated twice to verify that in fact a warning did happen my @WARN; BEGIN { unless(grep /blib/, @INC) { chdir 't' if -d 't'; @INC = '../lib'; } $SIG{__WARN__} = sub { push @WARN, @_ }; } our $local_tests = 'no_plan'; # ---- For the alias extensions require "../t/lib/common.pl"; is("Here\N{EXCLAMATION MARK}?", "Here!?", "Basic sanity, autoload of :full upon \\N"); is("\N{latin: Q}", "Q", "autoload of :short upon \\N"); { use bytes; # TEST -utf8 can switch utf8 on my $res = eval <<'EOE'; use charnames ":full"; "Here: \N{CYRILLIC SMALL LETTER BE}!"; 1 EOE like($@, "above 0xFF", "Verify get warning for \\N{above ff} under 'use bytes' with :full"); ok(! defined $res, "... and result is undefined"); $res = eval <<'EOE'; use charnames 'cyrillic'; "Here: \N{Be}!"; 1 EOE like($@, "CYRILLIC CAPITAL LETTER BE.*above 0xFF", "Verify get warning under 'use bytes' with explicit script"); ok(! defined $res, "... and result is undefined"); $res = eval <<'EOE'; use charnames ':full', ":alias" => { BOM => "LATIN SMALL LETTER B" }; "\N{BOM}"; EOE is ($@, "", "Verify that there is no warning for \\N{below 256} under 'use bytes'"); is ($res, 'b', "Verify that can redefine a standard alias"); } { use charnames ":alias" => { mychar1 => "0xE8000", mychar2 => 983040, # U+F0000 mychar3 => "U+100000", myctrl => 0x80, mylarge => "U+111000", }; is ("\N{PILE OF POO}", chr(0x1F4A9), "Verify :alias alone implies :full"); is ("\N{mychar1}", chr(0xE8000), "Verify that can define hex alias"); is (charnames::viacode(0xE8000), "mychar1", "And that can get the alias back"); is ("\N{mychar2}", chr(0xF0000), "Verify that can define decimal alias"); is (charnames::viacode(0xF0000), "mychar2", "And that can get the alias back"); is ("\N{mychar3}", chr(0x100000), "Verify that can define U+... alias"); is (charnames::viacode(0x100000), "mychar3", "And that can get the alias back"); is ("\N{mylarge}", chr(0x111000), "Verify that can define alias beyond Unicode"); is (charnames::viacode(0x111000), "mylarge", "And that can get the alias back"); is (charnames::viacode(0x80), "myctrl", "Verify that can name a nameless control"); } my $encoded_be; my $encoded_alpha; my $encoded_bet; my $encoded_deseng; # If octal representation of unicode char is \0xyzt, then the utf8 is \3xy\2zt if (ord('A') == 65) { # as on ASCII or UTF-8 machines $encoded_be = "\320\261"; $encoded_alpha = "\316\261"; $encoded_bet = "\327\221"; $encoded_deseng = "\360\220\221\215"; } else { # EBCDIC where UTF-EBCDIC may be used (this may be 1047 specific since # UTF-EBCDIC is codepage specific) $encoded_be = "\270\102\130"; $encoded_alpha = "\264\130"; $encoded_bet = "\270\125\130"; $encoded_deseng = "\336\102\103\124"; } sub to_bytes { unpack"U0a*", shift; } sub get_loose_name ($) { # Modify name to stress the loose tests. # First, all lower case, my $loose_name = lc shift; # Then squeeze out all the blanks not adjacent to hyphens, but make the # spaces that are adjacent to hypens into two, to make sure the code isn't # looking for just one when looking for non-medial hyphens. $loose_name =~ s/ (? { mychar1 => "LATIN SMALL LETTER E", mychar2 => "LATIN CAPITAL LETTER A", myprivate1 => 0xE8000, # Private use area myprivate2 => 0x100000, # Private use area }, ":short", qw( katakana ), ; my $hiragana_be = "\N{HIRAGANA LETTER BE}"; is("\N{mychar1}", "e", "Outer block: verify that \\N{mychar1} works"); is(charnames::vianame("mychar1"), ord("e"), "Outer block: verify that vianame(mychar1) works"); is(charnames::string_vianame("mychar1"), "e", "Outer block: verify that string_vianame(mychar1) works"); is("\N{mychar2}", "A", "Outer block: verify that \\N{mychar2} works"); is(charnames::vianame("mychar2"), ord("A"), "Outer block: verify that vianame(mychar2) works"); is(charnames::string_vianame("mychar2"), "A", "Outer block: verify that string_vianame(mychar2) works"); is("\N{myprivate1}", "\x{E8000}", "Outer block: verify that \\N{myprivate1} works"); cmp_ok(charnames::vianame("myprivate1"), "==", 0xE8000, "Outer block: verify that vianame(myprivate1) works"); is(charnames::string_vianame("myprivate1"), chr(0xE8000), "Outer block: verify that string_vianame(myprivate1) works"); is(charnames::viacode(0xE8000), "myprivate1", "Outer block: verify that myprivate1 viacode works"); is("\N{myprivate2}", "\x{100000}", "Outer block: verify that \\N{myprivate2} works"); cmp_ok(charnames::vianame("myprivate2"), "==", 0x100000, "Outer block: verify that vianame(myprivate2) works"); is(charnames::string_vianame("myprivate2"), chr(0x100000), "Outer block: verify that string_vianame(myprivate2) works"); is(charnames::viacode(0x100000), "myprivate2", "Outer block: verify that myprivate2 viacode works"); is("\N{BE}", "\N{KATAKANA LETTER BE}", "Outer block: verify that \\N uses the correct script "); cmp_ok(charnames::vianame("BE"), "==", ord("\N{KATAKANA LETTER BE}"), "Outer block: verify that vianame uses the correct script"); cmp_ok(charnames::string_vianame("BE"), "==", "\N{KATAKANA LETTER BE}", "Outer block: verify that string_vianame uses the correct script"); is("\N{Hiragana: BE}", $hiragana_be, "Outer block: verify that :short works with \\N"); cmp_ok(charnames::vianame("Hiragana: BE"), "==", ord($hiragana_be), "Outer block: verify that :short works with vianame"); cmp_ok(charnames::string_vianame("Hiragana: BE"), "==", $hiragana_be, "Outer block: verify that :short works with string_vianame"); { use charnames ":full", ":alias" => { mychar1 => "LATIN SMALL LETTER F", myprivate1 => 0xE8001, # Private use area }, # BE is in both hiragana and katakana; see if # different default script delivers different # letter. qw( hiragana ), ; is("\N{mychar1}", "f", "Inner block: verify that \\N{mychar1} is redefined"); is(charnames::vianame("mychar1"), ord("f"), "Inner block: verify that vianame(mychar1) is redefined"); is(charnames::string_vianame("mychar1"), "f", "Inner block: verify that string_vianame(mychar1) is redefined"); eval '"\N{mychar2}"'; like($@, qr/Unknown charname 'mychar2'/, "Inner block: verify that \\N{mychar2} outer definition didn't leak"); ok( ! defined charnames::vianame("mychar2"), "Inner block: verify that vianame(mychar2) outer definition didn't leak"); ok( ! defined charnames::string_vianame("mychar2"), "Inner block: verify that string_vianame(mychar2) outer definition didn't leak"); is("\N{myprivate1}", "\x{E8001}", "Inner block: verify that \\N{myprivate1} is redefined "); cmp_ok(charnames::vianame("myprivate1"), "==", 0xE8001, "Inner block: verify that vianame(myprivate1) is redefined"); is(charnames::string_vianame("myprivate1"), chr(0xE8001), "Inner block: verify that string_vianame(myprivate1) is redefined"); is(charnames::viacode(0xE8001), "myprivate1", "Inner block: verify that myprivate1 viacode is redefined"); ok(! defined charnames::viacode(0xE8000), "Inner block: verify that outer myprivate1 viacode didn't leak"); eval '"\N{myprivate2}"'; like($@, qr/Unknown charname 'myprivate2'/, "Inner block: verify that \\N{myprivate2} outer definition didn't leak"); ok(! defined charnames::vianame("myprivate2"), "Inner block: verify that vianame(myprivate2) outer definition didn't leak"); ok(! defined charnames::string_vianame("myprivate2"), "Inner block: verify that string_vianame(myprivate2) outer definition didn't leak"); ok(! defined charnames::viacode(0x100000), "Inner block: verify that myprivate2 viacode outer definition didn't leak"); is("\N{BE}", $hiragana_be, "Inner block: verify that \\N uses the correct script"); cmp_ok(charnames::vianame("BE"), "==", ord($hiragana_be), "Inner block: verify that vianame uses the correct script"); cmp_ok(charnames::string_vianame("BE"), "==", $hiragana_be, "Inner block: verify that string_vianame uses the correct script"); eval '"\N{Hiragana: BE}"'; like($@, qr/Unknown charname 'Hiragana: BE'/, "Inner block without :short: \\N with short doesn't work"); ok(! defined charnames::vianame("Hiragana: BE"), "Inner block without :short: verify that vianame with short doesn't work"); ok(! defined charnames::string_vianame("Hiragana: BE"), "Inner block without :short: verify that string_vianame with short doesn't work"); { # An inner block where only :short definitions are valid. use charnames ":short"; eval '"\N{mychar1}"'; like($@, qr/Unknown charname 'mychar1'/, "Inner inner block: verify that mychar1 outer definition didn't leak with \\N"); ok( ! defined charnames::vianame("mychar1"), "Inner inner block: verify that mychar1 outer definition didn't leak with vianame"); ok( ! defined charnames::string_vianame("mychar1"), "Inner inner block: verify that mychar1 outer definition didn't leak with string_vianame"); eval '"\N{mychar2}"'; like($@, qr/Unknown charname 'mychar2'/, "Inner inner block: verify that mychar2 outer definition didn't leak with \\N"); ok( ! defined charnames::vianame("mychar2"), "Inner inner block: verify that mychar2 outer definition didn't leak with vianame"); ok( ! defined charnames::string_vianame("mychar2"), "Inner inner block: verify that mychar2 outer definition didn't leak with string_vianame"); eval '"\N{myprivate1}"'; like($@, qr/Unknown charname 'myprivate1'/, "Inner inner block: verify that myprivate1 outer definition didn't leak with \\N"); ok(! defined charnames::vianame("myprivate1"), "Inner inner block: verify that myprivate1 outer definition didn't leak with vianame"); ok(! defined charnames::string_vianame("myprivate1"), "Inner inner block: verify that myprivate1 outer definition didn't leak with string_vianame"); eval '"\N{myprivate2}"'; like($@, qr/Unknown charname 'myprivate2'/, "Inner inner block: verify that myprivate2 outer definition didn't leak with \\N"); ok(! defined charnames::vianame("myprivate2"), "Inner inner block: verify that myprivate2 outer definition didn't leak with vianame"); ok(! defined charnames::string_vianame("myprivate2"), "Inner inner block: verify that myprivate2 outer definition didn't leak with string_vianame"); ok(! defined charnames::viacode(0xE8000), "Inner inner block: verify that mychar1 outer outer definition didn't leak with viacode"); ok(! defined charnames::viacode(0xE8001), "Inner inner block: verify that mychar1 outer definition didn't leak with viacode"); ok(! defined charnames::viacode(0x100000), "Inner inner block: verify that mychar2 outer definition didn't leak with viacode"); eval '"\N{BE}"'; like($@, qr/Unknown charname 'BE'/, "Inner inner block without script: verify that outer :script didn't leak with \\N"); ok(! defined charnames::vianame("BE"), "Inner inner block without script: verify that outer :script didn't leak with vianames"); ok(! defined charnames::string_vianame("BE"), "Inner inner block without script: verify that outer :script didn't leak with string_vianames"); eval '"\N{HIRAGANA LETTER BE}"'; like($@, qr/Unknown charname 'HIRAGANA LETTER BE'/, "Inner inner block without :full: verify that outer :full didn't leak with \\N"); is("\N{Hiragana: BE}", $hiragana_be, "Inner inner block with :short: verify that \\N works with :short"); cmp_ok(charnames::vianame("Hiragana: BE"), "==", ord($hiragana_be), "Inner inner block with :short: verify that vianame works with :short"); cmp_ok(charnames::string_vianame("Hiragana: BE"), "==", $hiragana_be, "Inner inner block with :short: verify that string_vianame works with :short"); } # Back to previous block. All previous tests should work again. is("\N{mychar1}", "f", "Inner block: verify that \\N{mychar1} is redefined"); is(charnames::vianame("mychar1"), ord("f"), "Inner block: verify that vianame(mychar1) is redefined"); is(charnames::string_vianame("mychar1"), "f", "Inner block: verify that string_vianame(mychar1) is redefined"); eval '"\N{mychar2}"'; like($@, qr/Unknown charname 'mychar2'/, "Inner block: verify that \\N{mychar2} outer definition didn't leak"); ok( ! defined charnames::vianame("mychar2"), "Inner block: verify that vianame(mychar2) outer definition didn't leak"); ok( ! defined charnames::string_vianame("mychar2"), "Inner block: verify that string_vianame(mychar2) outer definition didn't leak"); is("\N{myprivate1}", "\x{E8001}", "Inner block: verify that \\N{myprivate1} is redefined "); cmp_ok(charnames::vianame("myprivate1"), "==", 0xE8001, "Inner block: verify that vianame(myprivate1) is redefined"); is(charnames::string_vianame("myprivate1"), chr(0xE8001), "Inner block: verify that string_vianame(myprivate1) is redefined"); is(charnames::viacode(0xE8001), "myprivate1", "Inner block: verify that myprivate1 viacode is redefined"); ok(! defined charnames::viacode(0xE8000), "Inner block: verify that outer myprivate1 viacode didn't leak"); eval '"\N{myprivate2}"'; like($@, qr/Unknown charname 'myprivate2'/, "Inner block: verify that \\N{myprivate2} outer definition didn't leak"); ok(! defined charnames::vianame("myprivate2"), "Inner block: verify that vianame(myprivate2) outer definition didn't leak"); ok(! defined charnames::string_vianame("myprivate2"), "Inner block: verify that string_vianame(myprivate2) outer definition didn't leak"); ok(! defined charnames::viacode(0x100000), "Inner block: verify that myprivate2 viacode outer definition didn't leak"); is("\N{BE}", $hiragana_be, "Inner block: verify that \\N uses the correct script"); cmp_ok(charnames::vianame("BE"), "==", ord($hiragana_be), "Inner block: verify that vianame uses the correct script"); cmp_ok(charnames::string_vianame("BE"), "==", $hiragana_be, "Inner block: verify that string_vianame uses the correct script"); eval '"\N{Hiragana: BE}"'; like($@, qr/Unknown charname 'Hiragana: BE'/, "Inner block without :short: \\N with short doesn't work"); ok(! defined charnames::vianame("Hiragana: BE"), "Inner block without :short: verify that vianame with short doesn't work"); ok(! defined charnames::string_vianame("Hiragana: BE"), "Inner block without :short: verify that string_vianame with short doesn't work"); } # Back to previous block. All tests from that block should work again. is("\N{mychar1}", "e", "Outer block: verify that \\N{mychar1} works"); is(charnames::vianame("mychar1"), ord("e"), "Outer block: verify that vianame(mychar1) works"); is(charnames::string_vianame("mychar1"), "e", "Outer block: verify that string_vianame(mychar1) works"); is("\N{mychar2}", "A", "Outer block: verify that \\N{mychar2} works"); is(charnames::vianame("mychar2"), ord("A"), "Outer block: verify that vianame(mychar2) works"); is(charnames::string_vianame("mychar2"), "A", "Outer block: verify that string_vianame(mychar2) works"); is("\N{myprivate1}", "\x{E8000}", "Outer block: verify that \\N{myprivate1} works"); cmp_ok(charnames::vianame("myprivate1"), "==", 0xE8000, "Outer block: verify that vianame(myprivate1) works"); is(charnames::string_vianame("myprivate1"), chr(0xE8000), "Outer block: verify that string_vianame(myprivate1) works"); is(charnames::viacode(0xE8000), "myprivate1", "Outer block: verify that myprivate1 viacode works"); is("\N{myprivate2}", "\x{100000}", "Outer block: verify that \\N{myprivate2} works"); cmp_ok(charnames::vianame("myprivate2"), "==", 0x100000, "Outer block: verify that vianame(myprivate2) works"); is(charnames::string_vianame("myprivate2"), chr(0x100000), "Outer block: verify that string_vianame(myprivate2) works"); is(charnames::viacode(0x100000), "myprivate2", "Outer block: verify that myprivate2 viacode works"); is("\N{BE}", "\N{KATAKANA LETTER BE}", "Outer block: verify that \\N uses the correct script "); cmp_ok(charnames::vianame("BE"), "==", ord("\N{KATAKANA LETTER BE}"), "Outer block: verify that vianame uses the correct script"); cmp_ok(charnames::string_vianame("BE"), "==", "\N{KATAKANA LETTER BE}", "Outer block: verify that string_vianame uses the correct script"); is("\N{Hiragana: BE}", $hiragana_be, "Outer block: verify that :short works with \\N"); cmp_ok(charnames::vianame("Hiragana: BE"), "==", ord($hiragana_be), "Outer block: verify that :short works with vianame"); cmp_ok(charnames::string_vianame("Hiragana: BE"), "==", $hiragana_be, "Outer block: verify that :short works with string_vianame"); { use charnames qw(:loose new_tai_lue des_eret); is("\N{latincapitallettera}", "A", "Verify that loose matching works"); cmp_ok("\N{high-qa}", "==", chr(0x1980), "Verify that loose script list matching works"); is(charnames::string_vianame("O-i"), chr(0x10426), "Verify that loose script list matching works with string_vianame"); is(charnames::vianame("o i"), 0x1044E, "Verify that loose script list matching works with vianame"); } eval '"\N{latincapitallettera}"'; like($@, qr/Unknown charname 'latincapitallettera'/, "Verify that loose matching caching doesn't leak outside of scope"); { use charnames qw(:loose :short); cmp_ok("\N{co pt-ic:she-i}", "==", chr(0x3E3), "Verify that loose :short matching works"); is(charnames::string_vianame("co pt_ic: She i"), chr(0x3E2), "Verify that loose :short matching works with string_vianame"); is(charnames::vianame(" Arm-en-ian: x e h_"), 0x56D, "Verify that loose :short matching works with vianame"); } } { # Go through the whole Unicode db. It takes quite a while to test # all 1 million code points, so this tests a randomly selected # subset. For now, don't test with \N{}, to avoid filling the internal # cache at compile time; use vianame # For randomized tests below. my $seed; if (defined $ENV{PERL_TEST_CHARNAMES_SEED}) { $seed = srand($ENV{PERL_TEST_CHARNAMES_SEED}); if ($seed != $ENV{PERL_TEST_CHARNAMES_SEED}) { die "srand returned '$seed' instead of '$ENV{PERL_TEST_CHARNAMES_SEED}'"; }; } else { $seed = srand; } my $run_slow_tests = $ENV{PERL_RUN_SLOW_TESTS} || 0; # We will look at the data grouped in "blocks" of the following # size. my $block_size_bits = 7; # above 16 is not sensible my $block_size = 2**$block_size_bits; # There are the regular names, like "SPACE", plus the ones # that are algorithmically determinable, such as "CKJ UNIFIED # IDEOGRAPH-hhhh" where the hhhh is the actual hex code point number # of the character. The percentage of each type to test is # fuzzily independently settable. This breaks down when the block size is # 1 or is large enough that both types of names occur in the same block my $percentage_of_regular_names = ($run_slow_tests) ? 100 : 13; my $percentage_of_algorithmic_names = (100 / $block_size); # 1 test/block # If wants everything tested, do so by changing the block size to 1 so # every character is in its own block, otherwise there is a risk that the # randomness will cause something to be tested more than once at the # expense of testing something else not at all. if ($percentage_of_regular_names >= 100 || $percentage_of_algorithmic_names >= 100) { $block_size_bits = 0; $block_size = 2**$block_size_bits; } # Changing the block size doesn't change anything with regards to # testing the regular names (except if you set it to 1 so that each code # point is in its own block), but will affect the algorithmic names. # If you make the size too big so that blocks include both regular # names and algorithmic, the whole block will be sampled at the sum # of the two rates. If you make it too small, then more algorithmic # names will be tested than you probably intended. my @names; # The names of every code point. # We look at one block past the Unicode maximum, to verify there are # no names in it. my $block_count = 1 + 0x110000 / $block_size; my @regular_names_count = (0) x $block_count ; my @algorithmic_names_count = (0) x $block_count; # Read the DB, and fill in @names with the character names. open my $fh, "<", "../../lib/unicore/UnicodeData.txt" or die "Can't open ../../lib/unicore/UnicodeData.txt: $!"; while (<$fh>) { chomp; my ($code, $name, undef, undef, undef, undef, undef, undef, undef, undef, $u1name) = split ";"; my $decimal = hex $code; # The Unicode version 1 name is used instead of any that are # marked . $name = $u1name if $name eq ""; # In earlier Perls, we reject this code point's name (BELL) $name = "" if $^V lt v5.17.0 && $decimal == 0x1F514; # ALERT overrides BELL $name = 'ALERT' if $decimal == 7; # Some don't have names, leave those array elements undefined next unless $name; # If the name isn't of this special form, it is a regular one. if ($name !~ /First>$/) { my $block = $decimal >> $block_size_bits; $names[$decimal] = $name; $regular_names_count[$block]++; } else { # The next line after a is the , which is the # ending point of the range. $_ = <$fh>; /^(.*?);/; my $end_decimal = hex $1; # Only the CJK (and the Hangul which are instead dealt with below) # ones have names, and they all have the code point as part of the # name, which we can construct if ($name =~ /^> $block_size_bits; $algorithmic_names_count[$block]++; } } } } close $fh; use Unicode::UCD; if (pack("C*", split /\./, Unicode::UCD::UnicodeVersion()) gt v1.1.5) { # The Hangul syllable names aren't in the file above; their names # are algorithmically determinable, but to avoid perpetuating any # programming errors, this file contains the complete list, gathered # from the web. while () { chomp; next unless $_; # Guard against empty lines getting inserted. my ($code, $name) = split ";"; my $decimal = hex $code; $names[$decimal] = $name; my $block = $decimal >> $block_size_bits; $algorithmic_names_count[$block] = 1; } } my @name_aliases; use Unicode::UCD; if (ord('A') != 65 || pack( "C*", split /\./, Unicode::UCD::UnicodeVersion()) ge v6.1.0) { open my $fh, "<", "../../lib/unicore/NameAliases.txt" or die "Can't open ../../lib/unicore/NameAliases.txt: $!"; @name_aliases = <$fh> } else { # If this Unicode version doesn't have the full .txt file, or are on # an EBCDIC platform where they need to be translated, get the data # from prop_invmap() (which should do the translation) and convert it # to the file's format use Unicode::UCD 'prop_invmap'; my ($invlist_ref, $invmap_ref, undef, $default) = prop_invmap('Name_Alias'); for my $i (0 .. @$invlist_ref - 1) { # Convert the aliases for code points that have just one alias to # single element arrays for uniform handling below. if (! ref $invmap_ref->[$i]) { # But we test only the real aliases, not the ones which are # just really placeholders. next if $invmap_ref->[$i] eq $default; $invmap_ref->[$i] = [ $invmap_ref->[$i] ]; } # Change each alias for the code point to the form that the file # has foreach my $j ($invlist_ref->[$i] .. $invlist_ref->[$i+1] - 1) { foreach my $value (@{$invmap_ref->[$i]}) { $value =~ s/: /;/; push @name_aliases, sprintf("%04X;%s\n", $j, $value); } } } } for (@name_aliases) { chomp; s/^\s*#.*//; next unless $_; my ($hex, $name, $type) = split ";"; my $i = CORE::hex $hex; # Make sure that both aliases (the one in UnicodeData, and the one we # just read) return the same code point. test_vianame($i, $hex, $name); test_vianame($i, $hex, $names[$i]) if $names[$i] ne ""; # Set up so that a test below of this code point will use the alias # instead of the less-correct original. We can't test here that # viacode is correct, because the alias file may contain multiple # aliases for the same code point, and viacode should return only the # final one. So don't do it here; instead rely on the loop below to # pick up the test. $names[$i] = $name if $type eq 'correction'; } close $fh; # Now, have all the names populated. Do the tests my $all_pass = 1; # Assume everything will pass. my $block = 0; # Start at the beginning. while ($block < $block_count) { # Calculate how many tests to run on this block, based on the # how many names of each type are in it, and what percentage to # test of each type. my $test_count = 0; if ($algorithmic_names_count[$block]) { $test_count += int($regular_names_count[$block] * $percentage_of_algorithmic_names / 100 + .5); $test_count = 1 unless $test_count; # Make sure at least one } if ($regular_names_count[$block]) { $test_count += int($regular_names_count[$block] * $percentage_of_regular_names / 100 + .5); $test_count = 1 unless $test_count; } # For very small block sizes, we could come up with more tests # than characters in it $test_count = $block_size if $test_count > $block_size; # To avoid testing all the gazillions of code points that have # no names, and are almost certainly going to succeed, we # coalesce all such adjacent blocks into one, and have just one # test for that super-sized block my $end_block = $block; if ($test_count == 0) { $test_count = 1; if ($run_slow_tests < $RUN_SLOW_TESTS_EVERY_CODE_POINT) { $end_block++; # Keep coalescing until find a block that has something in # it. But don't cross plane boundaries (the 16 bits below), # so there is at least one test for every plane. while ($end_block < $block_count && $end_block >> (16 - $block_size_bits) == $block >> (16 - $block_size_bits) && ! $algorithmic_names_count[$end_block] && ! $regular_names_count[$end_block]) { $end_block++; } $end_block--; # Back-off to a block that has no defined names } } # Calculated how many tests. Do them for (1 .. $test_count) { # Randomly choose a code point in the block my $i = $block * $block_size + int(rand(($end_block - $block + 1) * $block_size)); my $hex = sprintf("%04X", $i); if (! $names[$i]) { # These four code points now have names, from NameAlias, but # aren't listed as having names in UnicodeData.txt, so viacode # returns their alias names, not undef next if $i == 0x80 || $i == 0x81 || $i == 0x84 || $i == 0x99; # If there is no name for this code point, all we can # test is that. $all_pass &= ok(! defined charnames::viacode($i), "Verify viacode(0x$hex) is undefined"); } else { # Otherwise, test that the name and code point map # correctly. $all_pass &= test_vianame($i, $hex, $names[$i]); # These four code points have a different Unicode1 name than # regular name, and viacode has already specifically tested # for the regular name if ($i != 0x0a && $i != 0x0c && $i != 0x0d && $i != 0x85) { $all_pass &= is(charnames::viacode($i), $names[$i], "Verify viacode(0x$hex) is \"$names[$i]\""); } # And make sure that a non-algorithmically named code # point doesn't also map to one that is. if ($names[$i] !~ /$hex$/) { if (rand() < .5) { $all_pass &= ok(! defined charnames::vianame("CJK UNIFIED IDEOGRAPH-$hex"), "Verify vianame(\"CJK UNIFIED IDEOGRAPH-$hex\") is undefined"); } else { $all_pass &= ok(! defined charnames::string_vianame("CJK UNIFIED IDEOGRAPH-$hex"), "Verify string_vianame(\"CJK UNIFIED IDEOGRAPH-$hex\") is undefined"); } } } } # Skip to the next untested block. $block = $end_block + 1; } if (open my $fh, "<", "../../lib/unicore/NamedSequences.txt") { while (<$fh>) { chomp; s/^\s*#.*//; next unless $_; my ($name, $codes) = split ";"; my $utf8 = pack("U*", map { hex } split " ", $codes); is(charnames::string_vianame($name), $utf8, "Verify string_vianame(\"$name\") is the proper utf8"); my $loose_name = get_loose_name($name); use charnames ":loose"; is(charnames::string_vianame($loose_name), $utf8, "Verify string_vianame(\"$loose_name\") is the proper utf8"); #diag("$name, $utf8"); } close $fh; } else { use Unicode::UCD; die "Can't open ../../lib/unicore/NamedSequences.txt: $!" if pack("C*", split /\./, Unicode::UCD::UnicodeVersion()) ge v4.1.0; } unless ($all_pass) { diag(<