summaryrefslogtreecommitdiff
path: root/lib/I18N
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2003-07-27 17:13:55 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2003-07-27 17:13:55 +0000
commitaaf52a42e904ca7ae9766d6ce5091ef5b411fda1 (patch)
tree8d95e356ff76fb232d8420882473851968db4e66 /lib/I18N
parentdc9e9a318f31f03f479a46376098d08742841b9a (diff)
downloadperl-aaf52a42e904ca7ae9766d6ce5091ef5b411fda1.tar.gz
Upgrade to I18N::LangTags 0.28.
p4raw-id: //depot/perl@20230
Diffstat (limited to 'lib/I18N')
-rw-r--r--lib/I18N/LangTags.pm22
-rw-r--r--lib/I18N/LangTags/List.pm194
-rw-r--r--lib/I18N/LangTags/t/01test.t (renamed from lib/I18N/LangTags/test.pl)30
3 files changed, 192 insertions, 54 deletions
diff --git a/lib/I18N/LangTags.pm b/lib/I18N/LangTags.pm
index ab5ef38245..5fa5692cf2 100644
--- a/lib/I18N/LangTags.pm
+++ b/lib/I18N/LangTags.pm
@@ -1,5 +1,5 @@
-# Time-stamp: "2002-02-02 20:43:03 MST"
+# Time-stamp: "2003-07-20 07:44:42 ADT"
# Sean M. Burke <sburke@cpan.org>
require 5.000;
@@ -17,7 +17,7 @@ require Exporter;
);
%EXPORT_TAGS = ('ALL' => \@EXPORT_OK);
-$VERSION = "0.27";
+$VERSION = "0.28";
=head1 NAME
@@ -529,9 +529,16 @@ sub encode_language_tag {
# we can just handle them here with regexps.
$tag =~ s/^iw\b/he/i; # Hebrew
$tag =~ s/^in\b/id/i; # Indonesian
+ $tag =~ s/^cre\b/cr/i; # Cree
+ $tag =~ s/^jw\b/jv/i; # Javanese
$tag =~ s/^[ix]-lux\b/lb/i; # Luxemburger
$tag =~ s/^[ix]-navajo\b/nv/i; # Navajo
$tag =~ s/^ji\b/yi/i; # Yiddish
+ # SMB 2003 -- Hm. There's a bunch of new XXX->YY variances now,
+ # but maybe they're all so obscure I can ignore them. "Obscure"
+ # meaning either that the language is obscure, and/or that the
+ # XXX form was extant so briefly that it's unlikely it was ever
+ # used. I hope.
#
# These go FROM the simplex to complex form, to get
# similarity-comparison right. And that's okay, since
@@ -770,19 +777,16 @@ Continuations".
* Locale::Codes, in
C<http://www.perl.com/CPAN/modules/by-module/Locale/>
-* ISO 639, "Code for the representation of names of languages",
-C<http://www.indigo.ie/egt/standards/iso639/iso639-1-en.html>
-
* ISO 639-2, "Codes for the representation of names of languages",
-including three-letter codes,
-C<http://lcweb.loc.gov/standards/iso639-2/bibcodes.html>
+including two-letter and three-letter codes,
+C<http://www.loc.gov/standards/iso639-2/langcodes.html>
* The IANA list of registered languages (hopefully up-to-date),
-C<ftp://ftp.isi.edu/in-notes/iana/assignments/languages/>
+C<http://www.iana.org/assignments/language-tags>
=head1 COPYRIGHT
-Copyright (c) 1998-2001 Sean M. Burke. All rights reserved.
+Copyright (c) 1998-2003 Sean M. Burke. All rights reserved.
This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
diff --git a/lib/I18N/LangTags/List.pm b/lib/I18N/LangTags/List.pm
index 2dbd19a5d7..ebabbf5e39 100644
--- a/lib/I18N/LangTags/List.pm
+++ b/lib/I18N/LangTags/List.pm
@@ -1,10 +1,10 @@
require 5;
package I18N::LangTags::List;
-# Time-stamp: "2002-02-02 20:13:58 MST"
+# Time-stamp: "2003-07-20 07:31:08 ADT"
use strict;
use vars qw(%Name $Debug $VERSION);
-$VERSION = '0.25';
+$VERSION = '0.26';
# POD at the end.
#----------------------------------------------------------------------
@@ -13,17 +13,20 @@ $VERSION = '0.25';
my $seeking = 1;
my $count = 0;
my($tag,$name);
+ my $last_name = '';
while(<I18N::LangTags::List::DATA>) {
if($seeking) {
$seeking = 0 if m/=for woohah/;
- } else {
- next unless ($tag, $name) =
- m/\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/;
+ } elsif( ($tag, $name) =
+ m/\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/
+ ) {
$name =~ s/\s*[;\.]*\s*$//g;
next unless $name;
++$count;
print "<$tag> <$name>\n" if $Debug;
- $Name{$tag} = $name;
+ $last_name = $Name{$tag} = $name;
+ } elsif (m/Formerly \"([-a-z0-9]+)\"/) {
+ $Name{$1} = "$last_name (old tag)" if $last_name;
}
}
die "No tags read??" unless $count;
@@ -195,6 +198,10 @@ eq Abkhaz
=item {ada} : Adangme
+=item {ady} : Adyghe
+
+eq Adygei
+
=item {aa} : Afar
=item {afh} : Afrihili
@@ -205,7 +212,9 @@ eq Abkhaz
=item [{afa} : Afro-Asiatic (Other)]
-=item {aka} : Akan
+=item {ak} : Akan
+
+(Formerly "aka".)
=item {akk} : Akkadian
@@ -264,8 +273,14 @@ NOT Amharic! NOT Samaritan Aramaic!
=item {hy} : Armenian
+=item {an} : Aragonese
+
=item [{art} : Artificial (Other)]
+=item {ast} : Asturian
+
+eq Bable.
+
=item {as} : Assamese
=item [{ath} : Athapascan languages]
@@ -276,7 +291,9 @@ eq Athabaskan. eq Athapaskan. eq Athabascan.
=item [{map} : Austronesian (Other)]
-=item {ava} : Avaric
+=item {av} : Avaric
+
+(Formerly "ava".)
=item {ae} : Avestan
@@ -290,13 +307,20 @@ eq Zend
eq Azeri
+Notable forms:
+{az-Arab} Azerbaijani in Arabic script;
+{az-Cyrl} Azerbaijani in Cyrillic script;
+{az-Latn} Azerbaijani in Latin script.
+
=item {ban} : Balinese
=item [{bat} : Baltic (Other)]
=item {bal} : Baluchi
-=item {bam} : Bambara
+=item {bm} : Bambara
+
+(Formerly "bam".)
=item [{bai} : Bamileke languages]
@@ -403,14 +427,16 @@ eq Nyanja. eq Chinyanja.
=item {zh} : Chinese
Many forms are mutually un-intelligible in spoken media.
-Notable subforms:
+Notable forms:
+{zh-Hans} Chinese, in simplified script;
+{zh-Hant} Chinese, in traditional script;
+{zh-tw} Taiwan Chinese;
{zh-cn} PRC Chinese;
-{zh-hk} Hong Kong Chinese;
-{zh-mo} Macau Chinese;
{zh-sg} Singapore Chinese;
-{zh-tw} Taiwan Chinese;
+{zh-mo} Macau Chinese;
+{zh-hk} Hong Kong Chinese;
{zh-guoyu} Mandarin [Putonghua/Guoyu];
-{zh-hakka} Hakka [formerly i-hakka];
+{zh-hakka} Hakka [formerly "i-hakka"];
{zh-min} Hokkien;
{zh-min-nan} Southern Hokkien;
{zh-wuu} Shanghaiese;
@@ -447,9 +473,9 @@ eq Trukese. eq Chuuk. eq Truk. eq Ruk.
eq Corse.
-=item {cre} : Cree
+=item {cr} : Cree
-NOT Creek!
+NOT Creek! (Formerly "cre".)
=item {mus} : Creek
@@ -477,6 +503,8 @@ eq Nakota. eq Latoka.
=item {da} : Danish
+=item {dar} : Dargwa
+
=item {day} : Dayak
=item {i-default} : Default (Fallthru) Language
@@ -491,7 +519,9 @@ messages, for example.
=item {din} : Dinka
-=item {div} : Divehi
+=item {dv} : Divehi
+
+eq Maldivian. (Formerly "div".)
=item {doi} : Dogri
@@ -555,13 +585,19 @@ Notable forms:
eq Anglo-Saxon. (Historical)
+=item {i-enochian} : Enochian (Artificial)
+
+=item {myv} : Erzya
+
=item {eo} : Esperanto
(Artificial)
=item {et} : Estonian
-=item {ewe} : Ewe
+=item {ee} : Ewe
+
+(Formerly "ewe".)
=item {ewo} : Ewondo
@@ -603,7 +639,9 @@ Notable forms:
=item {fur} : Friulian
-=item {ful} : Fulah
+=item {ff} : Fulah
+
+(Formerly "ful".)
=item {gaa} : Ga
@@ -615,7 +653,9 @@ NOT Scots!
eq Galician
-=item {lug} : Ganda
+=item {lg} : Ganda
+
+(Formerly "lug".)
=item {gay} : Gayo
@@ -679,6 +719,10 @@ eq Gwichin
=item {hai} : Haida
+=item {ht} : Haitian
+
+eq Haitian Creole
+
=item {ha} : Hausa
=item {haw} : Hawaiian
@@ -716,7 +760,13 @@ Hawai'ian
=item {is} : Icelandic
-=item {ibo} : Igbo
+=item {io} : Ido
+
+(Artificial)
+
+=item {ig} : Igbo
+
+(Formerly "ibo".)
=item {ijo} : Ijo
@@ -733,6 +783,8 @@ Hawai'ian
=for etc
{in} Indonesian (old tag)
+=item {inh} : Ingush
+
=item {ia} : Interlingua (International Auxiliary Language Association)
(Artificial) NOT Interlingue!
@@ -773,12 +825,16 @@ Notable forms:
(NOT "jp"!)
-=item {jw} : Javanese
+=item {jv} : Javanese
+
+(Formerly "jw" because of a typo.)
=item {jrb} : Judeo-Arabic
=item {jpr} : Judeo-Persian
+=item {kbd} : Kabardian
+
=item {kab} : Kabyle
=item {kac} : Kachin
@@ -787,13 +843,19 @@ Notable forms:
eq Greenlandic "Eskimo"
+=item {xal} : Kalmyk
+
=item {kam} : Kamba
=item {kn} : Kannada
eq Kanarese. NOT Canadian!
-=item {kau} : Kanuri
+=item {kr} : Kanuri
+
+(Formerly "kau".)
+
+=item {krc} : Karachay-Balkar
=item {kaa} : Kara-Kalpak
@@ -801,6 +863,10 @@ eq Kanarese. NOT Canadian!
=item {ks} : Kashmiri
+=item {csb} : Kashubian
+
+eq Kashub
+
=item {kaw} : Kawi
=item {kk} : Kazakh
@@ -829,7 +895,9 @@ eq Gikuyu.
=item {kv} : Komi
-=item {kon} : Kongo
+=item {kg} : Kongo
+
+(Formerly "kon".)
=item {kok} : Konkani
@@ -877,13 +945,17 @@ eq Lettish.
=item {lb} : Letzeburgesch
-eq Luxemburgian, eq Luxemburger. (Formerly i-lux.)
+eq Luxemburgian, eq Luxemburger. (Formerly "i-lux".)
=for etc
{i-lux} Letzeburgesch (old tag)
=item {lez} : Lezghian
+=item {li} : Limburgish
+
+eq Limburger, eq Limburgan. NOT Letzeburgesch!
+
=item {ln} : Lingala
=item {lt} : Lithuanian
@@ -892,9 +964,13 @@ eq Luxemburgian, eq Luxemburger. (Formerly i-lux.)
eq Low Saxon. eq Low German. eq Low Saxon.
+=item {art-lojban} : Lojban (Artificial)
+
=item {loz} : Lozi
-=item {lub} : Luba-Katanga
+=item {lu} : Luba-Katanga
+
+(Formerly "lub".)
=item {lua} : Luba-Lulua
@@ -985,6 +1061,8 @@ Don't use this.
=item {moh} : Mohawk
+=item {mdf} : Moksha
+
=item {mo} : Moldavian
eq Moldovan.
@@ -1007,11 +1085,13 @@ Not for normal use.
=item {nah} : Nahuatl
+=item {nap} : Neapolitan
+
=item {na} : Nauru
=item {nv} : Navajo
-eq Navaho. (Formerly i-navajo.)
+eq Navaho. (Formerly "i-navajo".)
=for etc
{i-navajo} Navajo (old tag)
@@ -1038,6 +1118,8 @@ eq Nepalese. Notable forms:
=item {niu} : Niuean
+=item {nog} : Nogai
+
=item {non} : Old Norse
(Historical)
@@ -1046,24 +1128,20 @@ eq Nepalese. Notable forms:
Do not use this.
-=item {se} : Northern Sami
-
-eq Lappish. eq Lapp. eq (Northern) Saami.
-
=item {no} : Norwegian
Note the two following forms:
=item {nb} : Norwegian Bokmal
-eq BokmE<aring>l, (A form of Norwegian.) (Formerly no-bok.)
+eq BokmE<aring>l, (A form of Norwegian.) (Formerly "no-bok".)
=for etc
{no-bok} Norwegian Bokmal (old tag)
=item {nn} : Norwegian Nynorsk
-(A form of Norwegian.) (Formerly no-nyn.)
+(A form of Norwegian.) (Formerly "no-nyn".)
=for etc
{no-nyn} Norwegian Nynorsk (old tag)
@@ -1082,9 +1160,9 @@ eq BokmE<aring>l, (A form of Norwegian.) (Formerly no-bok.)
eq ProvenE<ccedil>al, eq Provencal
-=item {oji} : Ojibwa
+=item {oj} : Ojibwa
-eq Ojibwe.
+eq Ojibwe. (Formerly "oji".)
=item {or} : Oriya
@@ -1202,6 +1280,18 @@ Large language group.
NOT Aramaic!
+=item {se} : Northern Sami
+
+eq Lappish. eq Lapp. eq (Northern) Saami.
+
+=item {sma} : Southern Sami
+
+=item {smn} : Inari Sami
+
+=item {smj} : Lule Sami
+
+=item {sms} : Skolt Sami
+
=item [{smi} : Sami languages (Other)]
=item {sm} : Samoan
@@ -1234,6 +1324,10 @@ NOT Scots Gaelic!
eq Serb. NOT Sorbian.
+Notable forms:
+{sr-Cyrl} : Serbian in Cyrillic script;
+{sr-Latn} : Serbian in Latin script.
+
=item {srr} : Serer
=item {shn} : Shan
@@ -1250,6 +1344,8 @@ Always use with a subtag. Notable forms:
{sgn-ni} Nicaraguan Sign Language (ISN);
{sgn-us} American Sign Language (ASL).
+(And so on with other country codes as the subtag.)
+
=item {bla} : Siksika
eq Blackfoot. eq Pikanii.
@@ -1422,6 +1518,8 @@ Same as Setswana.
=item {tum} : Tumbuka
+=item [{tup} : Tupi languages]
+
=item {tr} : Turkish
(Typically in Roman script)
@@ -1430,6 +1528,10 @@ Same as Setswana.
(Typically in Arabic script) (Historical)
+=item {crh} : Crimean Turkish
+
+eq Crimean Tatar
+
=item {tk} : Turkmen
eq Turkmeni.
@@ -1442,6 +1544,8 @@ eq Tuvan. eq Tuvin.
=item {tw} : Twi
+=item {udm} : Udmurt
+
=item {uga} : Ugaritic
NOT Ugric!
@@ -1462,11 +1566,15 @@ Not a tag for normal use.
eq E<Ouml>zbek
+Notable forms:
+{uz-Cyrl} Uzbek in Cyrillic script;
+{uz-Latn} Uzbek in Latin script.
+
=item {vai} : Vai
-=item {ven} : Venda
+=item {ve} : Venda
-NOT Wendish! NOT Wend! NOT Avestan!
+NOT Wendish! NOT Wend! NOT Avestan! (Formerly "ven".)
=item {vi} : Vietnamese
@@ -1482,6 +1590,8 @@ eq Votian. eq Vod.
=item [{wak} : Wakashan languages]
+=item {wa} : Walloon
+
=item {wal} : Walamo
eq Wolaytta.
@@ -1517,12 +1627,14 @@ or IANA. Example, x-double-dutch
eq Yap
+=item {ii} : Sichuan Yi
+
=item {yi} : Yiddish
-Formerly "ji". Sometimes in Roman script, sometimes in Hebrew script.
+Formerly "ji". Usually in Hebrew script.
-=for etc
-{ji} Yiddish (old tag)
+Notable forms:
+{yi-latn} Yiddish in Latin script
=item {yo} : Yoruba
@@ -1558,7 +1670,7 @@ L<I18N::LangTags|I18N::LangTags> and its "See Also" section.
=head1 COPYRIGHT AND DISCLAIMER
-Copyright (c) 2001,2002 Sean M. Burke. All rights reserved.
+Copyright (c) 2001,2002,2003 Sean M. Burke. All rights reserved.
You can redistribute and/or
modify this document under the same terms as Perl itself.
diff --git a/lib/I18N/LangTags/test.pl b/lib/I18N/LangTags/t/01test.t
index 88a7bf66ae..86e251778f 100644
--- a/lib/I18N/LangTags/test.pl
+++ b/lib/I18N/LangTags/t/01test.t
@@ -3,10 +3,10 @@
######################### We start with some black magic to print on failure.
require 5;
- # Time-stamp: "2001-06-21 22:59:38 MDT"
+ # Time-stamp: "2003-07-20 07:36:49 ADT"
use strict;
use Test;
-BEGIN { plan tests => 46 };
+BEGIN { plan tests => 64 };
BEGIN { ok 1 }
use I18N::LangTags (':ALL');
@@ -20,8 +20,8 @@ ok !is_language_tag('fr-CA-');
ok !is_language_tag('fr_CA');
ok is_language_tag('fr-ca-joual');
ok !is_language_tag('frca');
-ok is_language_tag('nav');
-ok is_language_tag('nav-shiprock');
+ok is_language_tag('nav'); # (not actual tag)
+ok is_language_tag('nav-shiprock'); # (not actual tag)
ok !is_language_tag('nav-ceremonial'); # subtag too long
ok !is_language_tag('x');
ok !is_language_tag('i');
@@ -44,6 +44,8 @@ ok grep $_ eq 'it', panic_languages('es');
print "# Now the ::List tests...\n";
+print "# Perl v$], I18N::LangTags::List v$I18N::LangTags::List::VERSION\n";
+
use I18N::LangTags::List;
foreach my $lt (qw(
en
@@ -58,12 +60,30 @@ foreach my $lt (qw(
it-it
it-IT
it-FR
+ ak
+ aka
+ jv
+ jw
+ no
+ no-nyn
+ nn
+ i-lux
+ lb
+ wa
yi
ji
+ den-syllabic
+ den-syllabic-western
+ den-western
+ den-latin
cre-syllabic
cre-syllabic-western
cre-western
cre-latin
+ cr-syllabic
+ cr-syllabic-western
+ cr-western
+ cr-latin
)) {
my $name = I18N::LangTags::List::name($lt);
if($name) {
@@ -75,5 +95,7 @@ foreach my $lt (qw(
}
}
+
+
print "# So there!\n";