diff options
Diffstat (limited to 'lib/I18N')
-rw-r--r-- | lib/I18N/LangTags.pm | 10 | ||||
-rw-r--r-- | lib/I18N/LangTags/ChangeLog | 23 | ||||
-rw-r--r-- | lib/I18N/LangTags/List.pm | 63 | ||||
-rw-r--r-- | lib/I18N/LangTags/README | 2 |
4 files changed, 82 insertions, 16 deletions
diff --git a/lib/I18N/LangTags.pm b/lib/I18N/LangTags.pm index 5fa5692cf2..d64058fb1a 100644 --- a/lib/I18N/LangTags.pm +++ b/lib/I18N/LangTags.pm @@ -1,5 +1,5 @@ -# Time-stamp: "2003-07-20 07:44:42 ADT" +# Time-stamp: "2003-10-10 17:43:04 ADT" # Sean M. Burke <sburke@cpan.org> require 5.000; @@ -17,7 +17,7 @@ require Exporter; ); %EXPORT_TAGS = ('ALL' => \@EXPORT_OK); -$VERSION = "0.28"; +$VERSION = "0.29"; =head1 NAME @@ -333,7 +333,7 @@ More importantly, you assume I<at your peril> that superordinates of $lang1 are mutually intelligible with $lang1. Consider this carefully. -=cut +=cut sub super_languages { my $lang1 = $_[0]; @@ -388,7 +388,7 @@ tags. Think REAL hard about how you use this. YOU HAVE BEEN WARNED. The output is untainted. If you don't know what tainting is, don't worry about it. -=cut +=cut sub locale2language_tag { my $lang = @@ -771,7 +771,7 @@ Character Sets and Languages". Value and Encoded Word Extensions: Character Sets, Languages, and Continuations". -* RFC 2482, C<ftp://ftp.isi.edu/in-notes/rfc2482.txt>, +* RFC 2482, C<ftp://ftp.isi.edu/in-notes/rfc2482.txt>, "Language Tagging in Unicode Plain Text". * Locale::Codes, in diff --git a/lib/I18N/LangTags/ChangeLog b/lib/I18N/LangTags/ChangeLog index f3608f7125..ec76c0c9a0 100644 --- a/lib/I18N/LangTags/ChangeLog +++ b/lib/I18N/LangTags/ChangeLog @@ -1,6 +1,27 @@ Revision history for Perl module I18N::LangTags. - Time-stamp: "2002-02-02 20:45:47 MST" + Time-stamp: "2003-10-10 17:07:55 ADT" +2003-10-10 Sean M. Burke sburke@cpan.org + + * Release 0.29 + + Minor bugfix to I18N::LangTags::List code. Addition of the + is_decent function, and the 02decency.t test for it. + + Better Makefile. Thanks to everyone who told me about the + INSTALLDIRS trick. + + + +2003-07-20 Sean M. Burke sburke@cpan.org + + * Release 0.28 + Doc fixes in I18N::LangTags, plus a few added variances (jw/jv, + cre/cr, etc.) + Lots of updates to I18N::LangTags::List + Deleted rfc3066.txt from dist. + Moved test.pl to t/01test.t and added more tests. + 2002-02-02 Sean M. Burke sburke@cpan.org * Release 0.27 -- minor mods to ::List: diff --git a/lib/I18N/LangTags/List.pm b/lib/I18N/LangTags/List.pm index ebabbf5e39..37ded04eef 100644 --- a/lib/I18N/LangTags/List.pm +++ b/lib/I18N/LangTags/List.pm @@ -1,10 +1,10 @@ require 5; package I18N::LangTags::List; -# Time-stamp: "2003-07-20 07:31:08 ADT" +# Time-stamp: "2003-10-10 17:39:45 ADT" use strict; -use vars qw(%Name $Debug $VERSION); -$VERSION = '0.26'; +use vars qw(%Name %Is_Disrec $Debug $VERSION); +$VERSION = '0.29'; # POD at the end. #---------------------------------------------------------------------- @@ -12,21 +12,23 @@ $VERSION = '0.26'; # read the table out of our own POD! my $seeking = 1; my $count = 0; - my($tag,$name); + my($disrec,$tag,$name); my $last_name = ''; while(<I18N::LangTags::List::DATA>) { if($seeking) { $seeking = 0 if m/=for woohah/; - } elsif( ($tag, $name) = - m/\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/ + } elsif( ($disrec, $tag, $name) = + m/(\[?)\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/ ) { $name =~ s/\s*[;\.]*\s*$//g; next unless $name; ++$count; print "<$tag> <$name>\n" if $Debug; $last_name = $Name{$tag} = $name; - } elsif (m/Formerly \"([-a-z0-9]+)\"/) { + $Is_Disrec{$tag} = 1 if $disrec; + } elsif (m/[Ff]ormerly \"([-a-z0-9]+)\"/) { $Name{$1} = "$last_name (old tag)" if $last_name; + $Is_Disrec{$1} = 1; } } die "No tags read??" unless $count; @@ -74,6 +76,42 @@ sub name { return "$name (Subform \"$subform\")"; } +#-------------------------------------------------------------------------- + +sub is_decent { + my $tag = lc($_[0] || return 0); + #require I18N::LangTags; + + return 0 unless + $tag =~ + /^(?: # First subtag + [xi] | [a-z]{2,3} + ) + (?: # Subtags thereafter + - # separator + [a-z0-9]{1,8} # subtag + )* + $/xs; + + my @supers = (); + foreach my $bit (split('-', $tag)) { + push @supers, + scalar(@supers) ? ($supers[-1] . '-' . $bit) : $bit; + } + return 0 unless @supers; + shift @supers if $supers[0] =~ m<^(i|x|sgn)$>s; + return 0 unless @supers; + + foreach my $f ($tag, @supers) { + return 0 if $Is_Disrec{$f}; + return 2 if $Name{$f}; + # so that decent subforms of indecent tags are decent + } + return 2 if $Name{$tag}; # not only is it decent, it's known! + return 1; +} + +#-------------------------------------------------------------------------- 1; __DATA__ @@ -98,7 +136,7 @@ prints: =head1 DESCRIPTION -This module provides a function +This module provides a function C<I18N::LangTags::List::name( I<langtag> ) > that takes a language tag (see L<I18N::LangTags|I18N::LangTags>) and returns the best attempt at an English name for it, or @@ -106,6 +144,13 @@ undef if it can't make sense of the tag. The function I18N::LangTags::List::name(...) is not exported. +This module also provides a function +C<I18N::LangTags::List::is_decent( I<langtag> )> that returns true iff +the language tag is syntactically valid and is for general use (like +"fr" or "fr-ca", below). That is, it returns false for tags that are +syntactically invalid and for tags, like "aus", that are listed in +brackets below. This function is not exported. + The map of tags-to-names that it uses is accessable as %I18N::LangTags::List::Name, and it's the same as the list that follows in this documentation, which should be useful @@ -117,7 +162,7 @@ Internet language tags, as defined in RFC 3066, are a formalism for denoting human languages. The two-letter ISO 639-1 language codes are well known (as "en" for English), as are their forms when qualified by a country code ("en-US"). Less well-known are the -arbitrary-length non-ISO codes (like "i-mingo"), and the +arbitrary-length non-ISO codes (like "i-mingo"), and the recently (in 2001) introduced three-letter ISO-639-2 codes. Remember these important facts: diff --git a/lib/I18N/LangTags/README b/lib/I18N/LangTags/README index fbae05f43d..2ac6053016 100644 --- a/lib/I18N/LangTags/README +++ b/lib/I18N/LangTags/README @@ -61,7 +61,7 @@ AVAILABILITY The latest version of I18N::LangTags is available from the Comprehensive Perl Archive Network (CPAN). Visit -<http://www.cpan.org/> to find a CPAN site near you. +<http://www.perl.com/CPAN/> to find a CPAN site near you. COPYRIGHT |