diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-08-09 14:49:00 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-08-09 14:49:00 +0000 |
commit | 55d7b906b92a1aeb0d5030f2388e4f9daaf4425f (patch) | |
tree | 575bbd8c0ca8258180bf53476c273a41610a7ba4 /lib/Unicode/UCD.t | |
parent | 17baacb7566a50d23189ae645624597ecca41aab (diff) | |
download | perl-55d7b906b92a1aeb0d5030f2388e4f9daaf4425f.tar.gz |
Rename lib/unicode files to lib/unicore to avoid
conflicts between core lib/unicode and Unicode::
files in case-ignoring filesystems.
p4raw-id: //depot/perl@11623
Diffstat (limited to 'lib/Unicode/UCD.t')
-rw-r--r-- | lib/Unicode/UCD.t | 271 |
1 files changed, 271 insertions, 0 deletions
diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t new file mode 100644 index 0000000000..67e99d0b9d --- /dev/null +++ b/lib/Unicode/UCD.t @@ -0,0 +1,271 @@ +use Unicode::UCD; + +use Test; +use strict; + +BEGIN { plan tests => 162 }; + +use Unicode::UCD 'charinfo'; + +my $charinfo; + +$charinfo = charinfo(0x41); + +ok($charinfo->{code}, '0041'); +ok($charinfo->{name}, 'LATIN CAPITAL LETTER A'); +ok($charinfo->{category}, 'Lu'); +ok($charinfo->{combining}, '0'); +ok($charinfo->{bidi}, 'L'); +ok($charinfo->{decomposition}, ''); +ok($charinfo->{decimal}, ''); +ok($charinfo->{digit}, ''); +ok($charinfo->{numeric}, ''); +ok($charinfo->{mirrored}, 'N'); +ok($charinfo->{unicode10}, ''); +ok($charinfo->{comment}, ''); +ok($charinfo->{upper}, ''); +ok($charinfo->{lower}, '0061'); +ok($charinfo->{title}, ''); +ok($charinfo->{block}, 'Basic Latin'); +ok($charinfo->{script}, 'Latin'); + +$charinfo = charinfo(0x100); + +ok($charinfo->{code}, '0100'); +ok($charinfo->{name}, 'LATIN CAPITAL LETTER A WITH MACRON'); +ok($charinfo->{category}, 'Lu'); +ok($charinfo->{combining}, '0'); +ok($charinfo->{bidi}, 'L'); +ok($charinfo->{decomposition}, '0041 0304'); +ok($charinfo->{decimal}, ''); +ok($charinfo->{digit}, ''); +ok($charinfo->{numeric}, ''); +ok($charinfo->{mirrored}, 'N'); +ok($charinfo->{unicode10}, 'LATIN CAPITAL LETTER A MACRON'); +ok($charinfo->{comment}, ''); +ok($charinfo->{upper}, ''); +ok($charinfo->{lower}, '0101'); +ok($charinfo->{title}, ''); +ok($charinfo->{block}, 'Latin Extended-A'); +ok($charinfo->{script}, 'Latin'); + +# 0x0590 is in the Hebrew block but unused. + +$charinfo = charinfo(0x590); + +ok($charinfo->{code}, undef); +ok($charinfo->{name}, undef); +ok($charinfo->{category}, undef); +ok($charinfo->{combining}, undef); +ok($charinfo->{bidi}, undef); +ok($charinfo->{decomposition}, undef); +ok($charinfo->{decimal}, undef); +ok($charinfo->{digit}, undef); +ok($charinfo->{numeric}, undef); +ok($charinfo->{mirrored}, undef); +ok($charinfo->{unicode10}, undef); +ok($charinfo->{comment}, undef); +ok($charinfo->{upper}, undef); +ok($charinfo->{lower}, undef); +ok($charinfo->{title}, undef); +ok($charinfo->{block}, undef); +ok($charinfo->{script}, undef); + +# 0x05d0 is in the Hebrew block and used. + +$charinfo = charinfo(0x5d0); + +ok($charinfo->{code}, '05D0'); +ok($charinfo->{name}, 'HEBREW LETTER ALEF'); +ok($charinfo->{category}, 'Lo'); +ok($charinfo->{combining}, '0'); +ok($charinfo->{bidi}, 'R'); +ok($charinfo->{decomposition}, ''); +ok($charinfo->{decimal}, ''); +ok($charinfo->{digit}, ''); +ok($charinfo->{numeric}, ''); +ok($charinfo->{mirrored}, 'N'); +ok($charinfo->{unicode10}, ''); +ok($charinfo->{comment}, ''); +ok($charinfo->{upper}, ''); +ok($charinfo->{lower}, ''); +ok($charinfo->{title}, ''); +ok($charinfo->{block}, 'Hebrew'); +ok($charinfo->{script}, 'Hebrew'); + +# An open syllable in Hangul. + +$charinfo = charinfo(0xAC00); + +ok($charinfo->{code}, 'AC00'); +ok($charinfo->{name}, 'HANGUL SYLLABLE GA'); +ok($charinfo->{category}, 'Lo'); +ok($charinfo->{combining}, '0'); +ok($charinfo->{bidi}, 'L'); +ok($charinfo->{decomposition}, '1100 1161'); +ok($charinfo->{decimal}, ''); +ok($charinfo->{digit}, ''); +ok($charinfo->{numeric}, ''); +ok($charinfo->{mirrored}, 'N'); +ok($charinfo->{unicode10}, ''); +ok($charinfo->{comment}, ''); +ok($charinfo->{upper}, ''); +ok($charinfo->{lower}, ''); +ok($charinfo->{title}, ''); +ok($charinfo->{block}, 'Hangul Syllables'); +ok($charinfo->{script}, 'Hangul'); + +# A closed syllable in Hangul. + +$charinfo = charinfo(0xAE00); + +ok($charinfo->{code}, 'AE00'); +ok($charinfo->{name}, 'HANGUL SYLLABLE GEUL'); +ok($charinfo->{category}, 'Lo'); +ok($charinfo->{combining}, '0'); +ok($charinfo->{bidi}, 'L'); +ok($charinfo->{decomposition}, '1100 1173 11AF'); +ok($charinfo->{decimal}, ''); +ok($charinfo->{digit}, ''); +ok($charinfo->{numeric}, ''); +ok($charinfo->{mirrored}, 'N'); +ok($charinfo->{unicode10}, ''); +ok($charinfo->{comment}, ''); +ok($charinfo->{upper}, ''); +ok($charinfo->{lower}, ''); +ok($charinfo->{title}, ''); +ok($charinfo->{block}, 'Hangul Syllables'); +ok($charinfo->{script}, 'Hangul'); + +$charinfo = charinfo(0x1D400); + +ok($charinfo->{code}, '1D400'); +ok($charinfo->{name}, 'MATHEMATICAL BOLD CAPITAL A'); +ok($charinfo->{category}, 'Lu'); +ok($charinfo->{combining}, '0'); +ok($charinfo->{bidi}, 'L'); +ok($charinfo->{decomposition}, '<font> 0041'); +ok($charinfo->{decimal}, ''); +ok($charinfo->{digit}, ''); +ok($charinfo->{numeric}, ''); +ok($charinfo->{mirrored}, 'N'); +ok($charinfo->{unicode10}, ''); +ok($charinfo->{comment}, ''); +ok($charinfo->{upper}, ''); +ok($charinfo->{lower}, ''); +ok($charinfo->{title}, ''); +ok($charinfo->{block}, 'Mathematical Alphanumeric Symbols'); +ok($charinfo->{script}, undef); + +use Unicode::UCD qw(charblock charscript); + +# 0x0590 is in the Hebrew block but unused. + +ok(charblock(0x590), 'Hebrew'); +ok(charscript(0x590), undef); + +$charinfo = charinfo(0xbe); + +ok($charinfo->{code}, '00BE'); +ok($charinfo->{name}, 'VULGAR FRACTION THREE QUARTERS'); +ok($charinfo->{category}, 'No'); +ok($charinfo->{combining}, '0'); +ok($charinfo->{bidi}, 'ON'); +ok($charinfo->{decomposition}, '<fraction> 0033 2044 0034'); +ok($charinfo->{decimal}, ''); +ok($charinfo->{digit}, ''); +ok($charinfo->{numeric}, '3/4'); +ok($charinfo->{mirrored}, 'N'); +ok($charinfo->{unicode10}, 'FRACTION THREE QUARTERS'); +ok($charinfo->{comment}, ''); +ok($charinfo->{upper}, ''); +ok($charinfo->{lower}, ''); +ok($charinfo->{title}, ''); +ok($charinfo->{block}, 'Latin-1 Supplement'); +ok($charinfo->{script}, undef); + +use Unicode::UCD qw(charblocks charscripts); + +my $charblocks = charblocks(); + +ok(exists $charblocks->{Thai}); +ok($charblocks->{Thai}->[0]->[0], hex('0e00')); +ok(!exists $charblocks->{PigLatin}); + +my $charscripts = charscripts(); + +ok(exists $charscripts->{Armenian}); +ok($charscripts->{Armenian}->[0]->[0], hex('0531')); +ok(!exists $charscripts->{PigLatin}); + +my $charscript; + +$charscript = charscript("12ab"); +ok($charscript, 'Ethiopic'); + +$charscript = charscript("0x12ab"); +ok($charscript, 'Ethiopic'); + +$charscript = charscript("U+12ab"); +ok($charscript, 'Ethiopic'); + +my $ranges; + +$ranges = charscript('Ogham'); +ok($ranges->[0]->[0], hex('1681')); +ok($ranges->[0]->[1], hex('169a')); + +use Unicode::UCD qw(charinrange); + +$ranges = charscript('Cherokee'); +ok(!charinrange($ranges, "139f")); +ok( charinrange($ranges, "13a0")); +ok( charinrange($ranges, "13f4")); +ok(!charinrange($ranges, "13f5")); + +ok(Unicode::UCD::UnicodeVersion, 3.1); + +use Unicode::UCD qw(compexcl); + +ok(!compexcl(0x0100)); +ok( compexcl(0x0958)); + +use Unicode::UCD qw(casefold); + +my $casefold; + +$casefold = casefold(0x41); + +ok($casefold->{code} eq '0041' && + $casefold->{status} eq 'C' && + $casefold->{mapping} eq '0061'); + +$casefold = casefold(0xdf); + +ok($casefold->{code} eq '00DF' && + $casefold->{status} eq 'F' && + $casefold->{mapping} eq '0073 0073'); + +ok(!casefold(0x20)); + +use Unicode::UCD qw(casespec); + +my $casespec; + +ok(!casespec(0x41)); + +$casespec = casespec(0xdf); + +ok($casespec->{code} eq '00DF' && + $casespec->{lower} eq '00DF' && + $casespec->{title} eq '0053 0073' && + $casespec->{upper} eq '0053 0053' && + $casespec->{condition} eq undef); + +$casespec = casespec(0x307); + +ok($casespec->{code} eq '0307' && + $casespec->{lower} eq '0307' && + $casespec->{title} eq '' && + $casespec->{upper} eq '' && + $casespec->{condition} eq 'lt AFTER_i'); |