BEGIN { unless ("A" eq pack('U', 0x41)) { print "1..0 # Unicode::Collate " . "cannot stringify a Unicode code point\n"; exit 0; } if ($ENV{PERL_CORE}) { chdir('t') if -d 't'; @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); } } use Test; BEGIN { plan tests => 72 }; use strict; use warnings; use Unicode::Collate; ok(1); ######################### # a standard collator (3.1.1) my $Collator = Unicode::Collate->new( table => 'keys.txt', normalization => undef, ); # a collator for hangul sorting, # cf. http://std.dkuug.dk/JTC1/SC22/WG20/docs/documents.html # http://std.dkuug.dk/JTC1/SC22/WG20/docs/n1051-hangulsort.pdf my $hangul = Unicode::Collate->new( level => 3, table => undef, normalization => undef, entry => <<'ENTRIES', 0061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A 0041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A #1161 ; [.1800.0020.0002] # initial jungseong A #1163 ; [.1801.0020.0002] # initial jungseong YA 1100 ; [.1831.0020.0002] # choseong KIYEOK 1100 1161 ; [.1831.0020.0002][.1800.0020.0002] # G-A 1100 1163 ; [.1831.0020.0002][.1801.0020.0002] # G-YA 1101 ; [.1831.0020.0002][.1831.0020.0002] # choseong SSANGKIYEOK 1101 1161 ; [.1831.0020.0002][.1831.0020.0002][.1800.0020.0002] # GG-A 1101 1163 ; [.1831.0020.0002][.1831.0020.0002][.1801.0020.0002] # GG-YA 1102 ; [.1833.0020.0002] # choseong NIEUN 1102 1161 ; [.1833.0020.0002][.1800.0020.0002] # N-A 1102 1163 ; [.1833.0020.0002][.1801.0020.0002] # N-YA 3042 ; [.1921.0020.000E] # HIRAGANA LETTER A 11A8 ; [.FE10.0020.0002] # jongseong KIYEOK 11A9 ; [.FE10.0020.0002][.FE10.0020.0002] # jongseong SSANGKIYEOK 1161 ; [.FE20.0020.0002] # jungseong A 1163 ; [.FE21.0020.0002] # jungseong YA ENTRIES ); ok(ref $hangul, "Unicode::Collate"); my $trailwt = Unicode::Collate->new( level => 3, table => undef, normalization => undef, hangul_terminator => 16, entry => <<'ENTRIES', # Term < Jongseong < Jungseong < Choseong 0061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A 0041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A 11A8 ; [.1801.0020.0002] # HANGUL JONGSEONG KIYEOK 11A9 ; [.1801.0020.0002][.1801.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK 1161 ; [.1831.0020.0002] # HANGUL JUNGSEONG A 1163 ; [.1832.0020.0002] # HANGUL JUNGSEONG YA 1100 ; [.1861.0020.0002] # HANGUL CHOSEONG KIYEOK 1101 ; [.1861.0020.0002][.1861.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK 1102 ; [.1862.0020.0002] # HANGUL CHOSEONG NIEUN 3042 ; [.1921.0020.000E] # HIRAGANA LETTER A ENTRIES ); ######################### # L(simp)L(simp) vs L(comp): /GGA/ ok($Collator->lt("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); ok($hangul ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); ok($trailwt ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); # L(simp) vs L(simp)L(simp): /GA/ vs /GGA/ ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); ok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); # T(simp)T(simp) vs T(comp): /AGG/ ok($Collator->lt("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); ok($hangul ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); ok($trailwt ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); # T(simp) vs T(simp)T(simp): /AG/ vs /AGG/ ok($Collator->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); ok($hangul ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); ok($trailwt ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); # LV vs LLV: /GA/ vs /GNA/ ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); ok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); # LVX vs LVV: /GAA/ vs /GA/.latinA ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); # LVX vs LVV: /GAA/ vs /GA/.hiraganaA ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); # LVX vs LVV: /GAA/ vs /GA/.hanja ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); # LVL vs LVT: /GA/./G/ vs /GAG/ ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); # LVT vs LVX: /GAG/ vs /GA/.latinA ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); # LVT vs LVX: /GAG/ vs /GA/.hiraganaA ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); # LVT vs LVX: /GAG/ vs /GA/.hanja ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); # LVT vs LVV: /GAG/ vs /GAA/ ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); ok($hangul ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); ok($trailwt ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); # LVL vs LVV: /GA/./G/ vs /GAA/ ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); # LV vs Syl(LV): /GA/ vs /[GA]/ ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}")); ok($hangul ->eq("\x{1100}\x{1161}", "\x{AC00}")); ok($trailwt ->eq("\x{1100}\x{1161}", "\x{AC00}")); # LVT vs Syl(LV)T: /GAG/ vs /[GA]G/ ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); # LVT vs Syl(LVT): /GAG/ vs /[GAG]/ ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); # LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/ ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); # LVTT vs Syl(LVT).T: /GAGG/ vs /[GAG]G/ ok($Collator->gt("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); # LLVT vs L.Syl(LVT): /GGAG/ vs /G[GAG]/ ok($Collator->gt("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); ok($hangul ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); ok($trailwt ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); ######################### # checks contraction in LVT: # weights of these contractions may be non-sense. my $hangcont = Unicode::Collate->new( level => 3, table => undef, normalization => undef, entry => <<'ENTRIES', 1100 ; [.1831.0020.0002] # HANGUL CHOSEONG KIYEOK 1101 ; [.1832.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK 1161 ; [.188D.0020.0002] # HANGUL JUNGSEONG A 1162 ; [.188E.0020.0002] # HANGUL JUNGSEONG AE 1163 ; [.188F.0020.0002] # HANGUL JUNGSEONG YA 11A8 ; [.18CF.0020.0002] # HANGUL JONGSEONG KIYEOK 11A9 ; [.18D0.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK 1161 11A9 ; [.0000.0000.0000] # A-GG 1100 1163 11A8 ; [.1000.0020.0002] # G-YA-G eq. U+AC39 ENTRIES ); # contracted into VT ok($Collator->lt("\x{1101}", "\x{1101}\x{1161}\x{11A9}")); ok($hangcont->eq("\x{1101}", "\x{1101}\x{1161}\x{11A9}")); # not contracted into LVT but into VT ok($Collator->lt("\x{1100}", "\x{1100}\x{1161}\x{11A9}")); ok($hangcont->eq("\x{1100}", "\x{1100}\x{1161}\x{11A9}")); # contracted into LVT ok($Collator->gt("\x{1100}\x{1163}\x{11A8}", "\x{1100}")); ok($hangcont->lt("\x{1100}\x{1163}\x{11A8}", "\x{1100}")); # LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/ ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); ok($hangcont->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); # LVT vs Syl(LVT): /GYAG/ vs /[GYAG]/ ok($Collator->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}")); ok($hangcont->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}")); 1; __END__