diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2003-07-27 17:13:55 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2003-07-27 17:13:55 +0000 |
commit | aaf52a42e904ca7ae9766d6ce5091ef5b411fda1 (patch) | |
tree | 8d95e356ff76fb232d8420882473851968db4e66 /lib/I18N | |
parent | dc9e9a318f31f03f479a46376098d08742841b9a (diff) | |
download | perl-aaf52a42e904ca7ae9766d6ce5091ef5b411fda1.tar.gz |
Upgrade to I18N::LangTags 0.28.
p4raw-id: //depot/perl@20230
Diffstat (limited to 'lib/I18N')
-rw-r--r-- | lib/I18N/LangTags.pm | 22 | ||||
-rw-r--r-- | lib/I18N/LangTags/List.pm | 194 | ||||
-rw-r--r-- | lib/I18N/LangTags/t/01test.t (renamed from lib/I18N/LangTags/test.pl) | 30 |
3 files changed, 192 insertions, 54 deletions
diff --git a/lib/I18N/LangTags.pm b/lib/I18N/LangTags.pm index ab5ef38245..5fa5692cf2 100644 --- a/lib/I18N/LangTags.pm +++ b/lib/I18N/LangTags.pm @@ -1,5 +1,5 @@ -# Time-stamp: "2002-02-02 20:43:03 MST" +# Time-stamp: "2003-07-20 07:44:42 ADT" # Sean M. Burke <sburke@cpan.org> require 5.000; @@ -17,7 +17,7 @@ require Exporter; ); %EXPORT_TAGS = ('ALL' => \@EXPORT_OK); -$VERSION = "0.27"; +$VERSION = "0.28"; =head1 NAME @@ -529,9 +529,16 @@ sub encode_language_tag { # we can just handle them here with regexps. $tag =~ s/^iw\b/he/i; # Hebrew $tag =~ s/^in\b/id/i; # Indonesian + $tag =~ s/^cre\b/cr/i; # Cree + $tag =~ s/^jw\b/jv/i; # Javanese $tag =~ s/^[ix]-lux\b/lb/i; # Luxemburger $tag =~ s/^[ix]-navajo\b/nv/i; # Navajo $tag =~ s/^ji\b/yi/i; # Yiddish + # SMB 2003 -- Hm. There's a bunch of new XXX->YY variances now, + # but maybe they're all so obscure I can ignore them. "Obscure" + # meaning either that the language is obscure, and/or that the + # XXX form was extant so briefly that it's unlikely it was ever + # used. I hope. # # These go FROM the simplex to complex form, to get # similarity-comparison right. And that's okay, since @@ -770,19 +777,16 @@ Continuations". * Locale::Codes, in C<http://www.perl.com/CPAN/modules/by-module/Locale/> -* ISO 639, "Code for the representation of names of languages", -C<http://www.indigo.ie/egt/standards/iso639/iso639-1-en.html> - * ISO 639-2, "Codes for the representation of names of languages", -including three-letter codes, -C<http://lcweb.loc.gov/standards/iso639-2/bibcodes.html> +including two-letter and three-letter codes, +C<http://www.loc.gov/standards/iso639-2/langcodes.html> * The IANA list of registered languages (hopefully up-to-date), -C<ftp://ftp.isi.edu/in-notes/iana/assignments/languages/> +C<http://www.iana.org/assignments/language-tags> =head1 COPYRIGHT -Copyright (c) 1998-2001 Sean M. Burke. All rights reserved. +Copyright (c) 1998-2003 Sean M. Burke. All rights reserved. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. diff --git a/lib/I18N/LangTags/List.pm b/lib/I18N/LangTags/List.pm index 2dbd19a5d7..ebabbf5e39 100644 --- a/lib/I18N/LangTags/List.pm +++ b/lib/I18N/LangTags/List.pm @@ -1,10 +1,10 @@ require 5; package I18N::LangTags::List; -# Time-stamp: "2002-02-02 20:13:58 MST" +# Time-stamp: "2003-07-20 07:31:08 ADT" use strict; use vars qw(%Name $Debug $VERSION); -$VERSION = '0.25'; +$VERSION = '0.26'; # POD at the end. #---------------------------------------------------------------------- @@ -13,17 +13,20 @@ $VERSION = '0.25'; my $seeking = 1; my $count = 0; my($tag,$name); + my $last_name = ''; while(<I18N::LangTags::List::DATA>) { if($seeking) { $seeking = 0 if m/=for woohah/; - } else { - next unless ($tag, $name) = - m/\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/; + } elsif( ($tag, $name) = + m/\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/ + ) { $name =~ s/\s*[;\.]*\s*$//g; next unless $name; ++$count; print "<$tag> <$name>\n" if $Debug; - $Name{$tag} = $name; + $last_name = $Name{$tag} = $name; + } elsif (m/Formerly \"([-a-z0-9]+)\"/) { + $Name{$1} = "$last_name (old tag)" if $last_name; } } die "No tags read??" unless $count; @@ -195,6 +198,10 @@ eq Abkhaz =item {ada} : Adangme +=item {ady} : Adyghe + +eq Adygei + =item {aa} : Afar =item {afh} : Afrihili @@ -205,7 +212,9 @@ eq Abkhaz =item [{afa} : Afro-Asiatic (Other)] -=item {aka} : Akan +=item {ak} : Akan + +(Formerly "aka".) =item {akk} : Akkadian @@ -264,8 +273,14 @@ NOT Amharic! NOT Samaritan Aramaic! =item {hy} : Armenian +=item {an} : Aragonese + =item [{art} : Artificial (Other)] +=item {ast} : Asturian + +eq Bable. + =item {as} : Assamese =item [{ath} : Athapascan languages] @@ -276,7 +291,9 @@ eq Athabaskan. eq Athapaskan. eq Athabascan. =item [{map} : Austronesian (Other)] -=item {ava} : Avaric +=item {av} : Avaric + +(Formerly "ava".) =item {ae} : Avestan @@ -290,13 +307,20 @@ eq Zend eq Azeri +Notable forms: +{az-Arab} Azerbaijani in Arabic script; +{az-Cyrl} Azerbaijani in Cyrillic script; +{az-Latn} Azerbaijani in Latin script. + =item {ban} : Balinese =item [{bat} : Baltic (Other)] =item {bal} : Baluchi -=item {bam} : Bambara +=item {bm} : Bambara + +(Formerly "bam".) =item [{bai} : Bamileke languages] @@ -403,14 +427,16 @@ eq Nyanja. eq Chinyanja. =item {zh} : Chinese Many forms are mutually un-intelligible in spoken media. -Notable subforms: +Notable forms: +{zh-Hans} Chinese, in simplified script; +{zh-Hant} Chinese, in traditional script; +{zh-tw} Taiwan Chinese; {zh-cn} PRC Chinese; -{zh-hk} Hong Kong Chinese; -{zh-mo} Macau Chinese; {zh-sg} Singapore Chinese; -{zh-tw} Taiwan Chinese; +{zh-mo} Macau Chinese; +{zh-hk} Hong Kong Chinese; {zh-guoyu} Mandarin [Putonghua/Guoyu]; -{zh-hakka} Hakka [formerly i-hakka]; +{zh-hakka} Hakka [formerly "i-hakka"]; {zh-min} Hokkien; {zh-min-nan} Southern Hokkien; {zh-wuu} Shanghaiese; @@ -447,9 +473,9 @@ eq Trukese. eq Chuuk. eq Truk. eq Ruk. eq Corse. -=item {cre} : Cree +=item {cr} : Cree -NOT Creek! +NOT Creek! (Formerly "cre".) =item {mus} : Creek @@ -477,6 +503,8 @@ eq Nakota. eq Latoka. =item {da} : Danish +=item {dar} : Dargwa + =item {day} : Dayak =item {i-default} : Default (Fallthru) Language @@ -491,7 +519,9 @@ messages, for example. =item {din} : Dinka -=item {div} : Divehi +=item {dv} : Divehi + +eq Maldivian. (Formerly "div".) =item {doi} : Dogri @@ -555,13 +585,19 @@ Notable forms: eq Anglo-Saxon. (Historical) +=item {i-enochian} : Enochian (Artificial) + +=item {myv} : Erzya + =item {eo} : Esperanto (Artificial) =item {et} : Estonian -=item {ewe} : Ewe +=item {ee} : Ewe + +(Formerly "ewe".) =item {ewo} : Ewondo @@ -603,7 +639,9 @@ Notable forms: =item {fur} : Friulian -=item {ful} : Fulah +=item {ff} : Fulah + +(Formerly "ful".) =item {gaa} : Ga @@ -615,7 +653,9 @@ NOT Scots! eq Galician -=item {lug} : Ganda +=item {lg} : Ganda + +(Formerly "lug".) =item {gay} : Gayo @@ -679,6 +719,10 @@ eq Gwichin =item {hai} : Haida +=item {ht} : Haitian + +eq Haitian Creole + =item {ha} : Hausa =item {haw} : Hawaiian @@ -716,7 +760,13 @@ Hawai'ian =item {is} : Icelandic -=item {ibo} : Igbo +=item {io} : Ido + +(Artificial) + +=item {ig} : Igbo + +(Formerly "ibo".) =item {ijo} : Ijo @@ -733,6 +783,8 @@ Hawai'ian =for etc {in} Indonesian (old tag) +=item {inh} : Ingush + =item {ia} : Interlingua (International Auxiliary Language Association) (Artificial) NOT Interlingue! @@ -773,12 +825,16 @@ Notable forms: (NOT "jp"!) -=item {jw} : Javanese +=item {jv} : Javanese + +(Formerly "jw" because of a typo.) =item {jrb} : Judeo-Arabic =item {jpr} : Judeo-Persian +=item {kbd} : Kabardian + =item {kab} : Kabyle =item {kac} : Kachin @@ -787,13 +843,19 @@ Notable forms: eq Greenlandic "Eskimo" +=item {xal} : Kalmyk + =item {kam} : Kamba =item {kn} : Kannada eq Kanarese. NOT Canadian! -=item {kau} : Kanuri +=item {kr} : Kanuri + +(Formerly "kau".) + +=item {krc} : Karachay-Balkar =item {kaa} : Kara-Kalpak @@ -801,6 +863,10 @@ eq Kanarese. NOT Canadian! =item {ks} : Kashmiri +=item {csb} : Kashubian + +eq Kashub + =item {kaw} : Kawi =item {kk} : Kazakh @@ -829,7 +895,9 @@ eq Gikuyu. =item {kv} : Komi -=item {kon} : Kongo +=item {kg} : Kongo + +(Formerly "kon".) =item {kok} : Konkani @@ -877,13 +945,17 @@ eq Lettish. =item {lb} : Letzeburgesch -eq Luxemburgian, eq Luxemburger. (Formerly i-lux.) +eq Luxemburgian, eq Luxemburger. (Formerly "i-lux".) =for etc {i-lux} Letzeburgesch (old tag) =item {lez} : Lezghian +=item {li} : Limburgish + +eq Limburger, eq Limburgan. NOT Letzeburgesch! + =item {ln} : Lingala =item {lt} : Lithuanian @@ -892,9 +964,13 @@ eq Luxemburgian, eq Luxemburger. (Formerly i-lux.) eq Low Saxon. eq Low German. eq Low Saxon. +=item {art-lojban} : Lojban (Artificial) + =item {loz} : Lozi -=item {lub} : Luba-Katanga +=item {lu} : Luba-Katanga + +(Formerly "lub".) =item {lua} : Luba-Lulua @@ -985,6 +1061,8 @@ Don't use this. =item {moh} : Mohawk +=item {mdf} : Moksha + =item {mo} : Moldavian eq Moldovan. @@ -1007,11 +1085,13 @@ Not for normal use. =item {nah} : Nahuatl +=item {nap} : Neapolitan + =item {na} : Nauru =item {nv} : Navajo -eq Navaho. (Formerly i-navajo.) +eq Navaho. (Formerly "i-navajo".) =for etc {i-navajo} Navajo (old tag) @@ -1038,6 +1118,8 @@ eq Nepalese. Notable forms: =item {niu} : Niuean +=item {nog} : Nogai + =item {non} : Old Norse (Historical) @@ -1046,24 +1128,20 @@ eq Nepalese. Notable forms: Do not use this. -=item {se} : Northern Sami - -eq Lappish. eq Lapp. eq (Northern) Saami. - =item {no} : Norwegian Note the two following forms: =item {nb} : Norwegian Bokmal -eq BokmE<aring>l, (A form of Norwegian.) (Formerly no-bok.) +eq BokmE<aring>l, (A form of Norwegian.) (Formerly "no-bok".) =for etc {no-bok} Norwegian Bokmal (old tag) =item {nn} : Norwegian Nynorsk -(A form of Norwegian.) (Formerly no-nyn.) +(A form of Norwegian.) (Formerly "no-nyn".) =for etc {no-nyn} Norwegian Nynorsk (old tag) @@ -1082,9 +1160,9 @@ eq BokmE<aring>l, (A form of Norwegian.) (Formerly no-bok.) eq ProvenE<ccedil>al, eq Provencal -=item {oji} : Ojibwa +=item {oj} : Ojibwa -eq Ojibwe. +eq Ojibwe. (Formerly "oji".) =item {or} : Oriya @@ -1202,6 +1280,18 @@ Large language group. NOT Aramaic! +=item {se} : Northern Sami + +eq Lappish. eq Lapp. eq (Northern) Saami. + +=item {sma} : Southern Sami + +=item {smn} : Inari Sami + +=item {smj} : Lule Sami + +=item {sms} : Skolt Sami + =item [{smi} : Sami languages (Other)] =item {sm} : Samoan @@ -1234,6 +1324,10 @@ NOT Scots Gaelic! eq Serb. NOT Sorbian. +Notable forms: +{sr-Cyrl} : Serbian in Cyrillic script; +{sr-Latn} : Serbian in Latin script. + =item {srr} : Serer =item {shn} : Shan @@ -1250,6 +1344,8 @@ Always use with a subtag. Notable forms: {sgn-ni} Nicaraguan Sign Language (ISN); {sgn-us} American Sign Language (ASL). +(And so on with other country codes as the subtag.) + =item {bla} : Siksika eq Blackfoot. eq Pikanii. @@ -1422,6 +1518,8 @@ Same as Setswana. =item {tum} : Tumbuka +=item [{tup} : Tupi languages] + =item {tr} : Turkish (Typically in Roman script) @@ -1430,6 +1528,10 @@ Same as Setswana. (Typically in Arabic script) (Historical) +=item {crh} : Crimean Turkish + +eq Crimean Tatar + =item {tk} : Turkmen eq Turkmeni. @@ -1442,6 +1544,8 @@ eq Tuvan. eq Tuvin. =item {tw} : Twi +=item {udm} : Udmurt + =item {uga} : Ugaritic NOT Ugric! @@ -1462,11 +1566,15 @@ Not a tag for normal use. eq E<Ouml>zbek +Notable forms: +{uz-Cyrl} Uzbek in Cyrillic script; +{uz-Latn} Uzbek in Latin script. + =item {vai} : Vai -=item {ven} : Venda +=item {ve} : Venda -NOT Wendish! NOT Wend! NOT Avestan! +NOT Wendish! NOT Wend! NOT Avestan! (Formerly "ven".) =item {vi} : Vietnamese @@ -1482,6 +1590,8 @@ eq Votian. eq Vod. =item [{wak} : Wakashan languages] +=item {wa} : Walloon + =item {wal} : Walamo eq Wolaytta. @@ -1517,12 +1627,14 @@ or IANA. Example, x-double-dutch eq Yap +=item {ii} : Sichuan Yi + =item {yi} : Yiddish -Formerly "ji". Sometimes in Roman script, sometimes in Hebrew script. +Formerly "ji". Usually in Hebrew script. -=for etc -{ji} Yiddish (old tag) +Notable forms: +{yi-latn} Yiddish in Latin script =item {yo} : Yoruba @@ -1558,7 +1670,7 @@ L<I18N::LangTags|I18N::LangTags> and its "See Also" section. =head1 COPYRIGHT AND DISCLAIMER -Copyright (c) 2001,2002 Sean M. Burke. All rights reserved. +Copyright (c) 2001,2002,2003 Sean M. Burke. All rights reserved. You can redistribute and/or modify this document under the same terms as Perl itself. diff --git a/lib/I18N/LangTags/test.pl b/lib/I18N/LangTags/t/01test.t index 88a7bf66ae..86e251778f 100644 --- a/lib/I18N/LangTags/test.pl +++ b/lib/I18N/LangTags/t/01test.t @@ -3,10 +3,10 @@ ######################### We start with some black magic to print on failure. require 5; - # Time-stamp: "2001-06-21 22:59:38 MDT" + # Time-stamp: "2003-07-20 07:36:49 ADT" use strict; use Test; -BEGIN { plan tests => 46 }; +BEGIN { plan tests => 64 }; BEGIN { ok 1 } use I18N::LangTags (':ALL'); @@ -20,8 +20,8 @@ ok !is_language_tag('fr-CA-'); ok !is_language_tag('fr_CA'); ok is_language_tag('fr-ca-joual'); ok !is_language_tag('frca'); -ok is_language_tag('nav'); -ok is_language_tag('nav-shiprock'); +ok is_language_tag('nav'); # (not actual tag) +ok is_language_tag('nav-shiprock'); # (not actual tag) ok !is_language_tag('nav-ceremonial'); # subtag too long ok !is_language_tag('x'); ok !is_language_tag('i'); @@ -44,6 +44,8 @@ ok grep $_ eq 'it', panic_languages('es'); print "# Now the ::List tests...\n"; +print "# Perl v$], I18N::LangTags::List v$I18N::LangTags::List::VERSION\n"; + use I18N::LangTags::List; foreach my $lt (qw( en @@ -58,12 +60,30 @@ foreach my $lt (qw( it-it it-IT it-FR + ak + aka + jv + jw + no + no-nyn + nn + i-lux + lb + wa yi ji + den-syllabic + den-syllabic-western + den-western + den-latin cre-syllabic cre-syllabic-western cre-western cre-latin + cr-syllabic + cr-syllabic-western + cr-western + cr-latin )) { my $name = I18N::LangTags::List::name($lt); if($name) { @@ -75,5 +95,7 @@ foreach my $lt (qw( } } + + print "# So there!\n"; |