diff options
author | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2004-06-17 09:43:48 +0000 |
---|---|---|
committer | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2004-06-17 09:43:48 +0000 |
commit | 8000a3fa7bb45bbd1016a26c76a82389badfc8ce (patch) | |
tree | 3d3f89390a913a537c00789ce75166ca84b1a2b8 | |
parent | 97e8bd194aec20f01864baa22dfcbed38daa0f40 (diff) | |
download | perl-8000a3fa7bb45bbd1016a26c76a82389badfc8ce.tar.gz |
Upgrade to I18N::LangTags 0.30.
p4raw-id: //depot/perl@22941
-rw-r--r-- | MANIFEST | 10 | ||||
-rw-r--r-- | lib/I18N/LangTags.pm | 112 | ||||
-rw-r--r-- | lib/I18N/LangTags/ChangeLog | 17 | ||||
-rw-r--r-- | lib/I18N/LangTags/Detect.pm | 229 | ||||
-rw-r--r-- | lib/I18N/LangTags/List.pm | 4 | ||||
-rw-r--r-- | lib/I18N/LangTags/t/01_about_verbose.t | 89 | ||||
-rw-r--r-- | lib/I18N/LangTags/t/05_main.t (renamed from lib/I18N/LangTags/t/01test.t) | 5 | ||||
-rw-r--r-- | lib/I18N/LangTags/t/07_listy.t (renamed from lib/I18N/LangTags/t/02decency.t) | 0 | ||||
-rw-r--r-- | lib/I18N/LangTags/t/10_http.t | 104 | ||||
-rw-r--r-- | lib/I18N/LangTags/t/50_super.t | 88 | ||||
-rw-r--r-- | lib/I18N/LangTags/t/55_supers_strict.t | 78 | ||||
-rw-r--r-- | lib/I18N/LangTags/t/80_all_env.t | 47 |
12 files changed, 759 insertions, 24 deletions
@@ -1303,10 +1303,16 @@ lib/I18N/Collate.pm Routines to do strxfrm-based collation lib/I18N/Collate.t See if I18N::Collate works lib/I18N/LangTags/ChangeLog I18N::LangTags lib/I18N/LangTags/List.pm List of tags for human languages +lib/I18N/LangTags/Detect.pm Detect language preferences lib/I18N/LangTags.pm I18N::LangTags lib/I18N/LangTags/README I18N::LangTags -lib/I18N/LangTags/t/01test.t See whether I18N::LangTags works -lib/I18N/LangTags/t/02decency.t See if I18N::LangTags::List::is_decent works +lib/I18N/LangTags/t/01_about_verbose.t See whether I18N::LangTags works +lib/I18N/LangTags/t/05_main.t See whether I18N::LangTags works +lib/I18N/LangTags/t/07_listy.t See whether I18N::LangTags works +lib/I18N/LangTags/t/10_http.t See whether I18N::LangTags works +lib/I18N/LangTags/t/50_super.t See whether I18N::LangTags works +lib/I18N/LangTags/t/55_supers_strict.t See whether I18N::LangTags works +lib/I18N/LangTags/t/80_all_env.t See whether I18N::LangTags works lib/if.pm For "use if" lib/if.t Tests for "use if" lib/importenv.pl Perl routine to get environment into variables diff --git a/lib/I18N/LangTags.pm b/lib/I18N/LangTags.pm index d64058fb1a..f141ab4bd2 100644 --- a/lib/I18N/LangTags.pm +++ b/lib/I18N/LangTags.pm @@ -1,5 +1,5 @@ -# Time-stamp: "2003-10-10 17:43:04 ADT" +# Time-stamp: "2004-03-30 18:21:55 AST" # Sean M. Burke <sburke@cpan.org> require 5.000; @@ -14,10 +14,15 @@ require Exporter; similarity_language_tag is_dialect_of locale2language_tag alternate_language_tags encode_language_tag panic_languages + implicate_supers + implicate_supers_strictly ); %EXPORT_TAGS = ('ALL' => \@EXPORT_OK); -$VERSION = "0.29"; +$VERSION = "0.30"; + +sub uniq { my %seen; return grep(!($seen{$_}++), @_); } # a util function + =head1 NAME @@ -25,16 +30,15 @@ I18N::LangTags - functions for dealing with RFC3066-style language tags =head1 SYNOPSIS - use I18N::LangTags qw(is_language_tag same_language_tag - extract_language_tags super_languages - similarity_language_tag is_dialect_of - locale2language_tag alternate_language_tags - encode_language_tag panic_languages - ); + use I18N::LangTags(); + +...or specify whichever of those functions you want to import, like so: -...or whatever of those functions you want to import. Those are -all the exportable functions -- you're free to import only some, -or none at all. By default, none are imported. If you say: + use I18N::LangTags qw(implicate_supers similarity_language_tag); + +All the exportable functions are listed below -- you're free to import +only some, or none at all. By default, none are imported. If you +say: use I18N::LangTags qw(:ALL) @@ -333,7 +337,7 @@ More importantly, you assume I<at your peril> that superordinates of $lang1 are mutually intelligible with $lang1. Consider this carefully. -=cut +=cut sub super_languages { my $lang1 = $_[0]; @@ -388,7 +392,7 @@ tags. Think REAL hard about how you use this. YOU HAVE BEEN WARNED. The output is untainted. If you don't know what tainting is, don't worry about it. -=cut +=cut sub locale2language_tag { my $lang = @@ -737,6 +741,84 @@ sub panic_languages { return grep !$seen{$_}++, @out, 'en'; } +#--------------------------------------------------------------------------- +#--------------------------------------------------------------------------- + +=item * the function implicate_supers( ...languages... ) + +This takes a list of strings (which are presumed to be language-tags; +strings that aren't, are ignored); and after each one, this function +inserts super-ordinate forms that don't already appear in the list. +The original list, plus these insertions, is returned. + +In other words, it takes this: + + pt-br de-DE en-US fr pt-br-janeiro + +and returns this: + + pt-br pt de-DE de en-US en fr pt-br-janeiro + +This function is most useful in the idiom + + implicate_supers( I18N::LangTags::Detect::detect() ); + +(See L<I18N::LangTags::Detect>.) + + +=item * the function implicate_supers_strictly( ...languages... ) + +This works like C<implicate_supers> except that the implicated +forms are added to the end of the return list. + +In other words, implicate_supers_strictly takes a list of strings +(which are presumed to be language-tags; strings that aren't, are +ignored) and after the whole given list, it inserts the super-ordinate forms +of all given tags, minus any tags that already appear in the input list. + +In other words, it takes this: + + pt-br de-DE en-US fr pt-br-janeiro + +and returns this: + + pt-br de-DE en-US fr pt-br-janeiro pt de en + +The reason this function has "_strictly" in its name is that when +you're processing an Accept-Language list according to the RFCs, if +you interpret the RFCs quite strictly, then you would use +implicate_supers_strictly, but for normal use (i.e., common-sense use, +as far as I'm concerned) you'd use implicate_supers. + +=cut + +sub implicate_supers { + my @languages = grep is_language_tag($_), @_; + my %seen_encoded; + foreach my $lang (@languages) { + $seen_encoded{ I18N::LangTags::encode_language_tag($lang) } = 1 + } + + my(@output_languages); + foreach my $lang (@languages) { + push @output_languages, $lang; + foreach my $s ( I18N::LangTags::super_languages($lang) ) { + # Note that super_languages returns the longest first. + last if $seen_encoded{ I18N::LangTags::encode_language_tag($s) }; + push @output_languages, $s; + } + } + return uniq( @output_languages ); + +} + +sub implicate_supers_strictly { + my @tags = grep is_language_tag($_), @_; + return uniq( @_, map super_languages($_), @_ ); +} + + + ########################################################################### 1; __END__ @@ -771,7 +853,7 @@ Character Sets and Languages". Value and Encoded Word Extensions: Character Sets, Languages, and Continuations". -* RFC 2482, C<ftp://ftp.isi.edu/in-notes/rfc2482.txt>, +* RFC 2482, C<ftp://ftp.isi.edu/in-notes/rfc2482.txt>, "Language Tagging in Unicode Plain Text". * Locale::Codes, in @@ -786,7 +868,7 @@ C<http://www.iana.org/assignments/language-tags> =head1 COPYRIGHT -Copyright (c) 1998-2003 Sean M. Burke. All rights reserved. +Copyright (c) 1998-2004 Sean M. Burke. All rights reserved. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. diff --git a/lib/I18N/LangTags/ChangeLog b/lib/I18N/LangTags/ChangeLog index ec76c0c9a0..e59c637d53 100644 --- a/lib/I18N/LangTags/ChangeLog +++ b/lib/I18N/LangTags/ChangeLog @@ -1,6 +1,21 @@ Revision history for Perl module I18N::LangTags. - Time-stamp: "2003-10-10 17:07:55 ADT" + Time-stamp: "2004-03-30 21:38:00 AST" +2004-03-30 Sean M. Burke sburke@cpan.org + + * Release 0.30 + + New in I18N::LangTags : implicate_supers and + implicate_supers_strictly. + + New module: I18N::LangTags::Detect. + + Some new tests. + + Thanks to Autrijus Tang for catching some errors in my makefile! + + + 2003-10-10 Sean M. Burke sburke@cpan.org * Release 0.29 diff --git a/lib/I18N/LangTags/Detect.pm b/lib/I18N/LangTags/Detect.pm new file mode 100644 index 0000000000..9c451687a7 --- /dev/null +++ b/lib/I18N/LangTags/Detect.pm @@ -0,0 +1,229 @@ + +# Time-stamp: "2004-03-30 17:28:24 AST" + +require 5; +package I18N::LangTags::Detect; +use strict; + +use vars qw( @ISA $VERSION $MATCH_SUPERS $USING_LANGUAGE_TAGS + $USE_LITERALS $MATCH_SUPERS_TIGHTLY); + +BEGIN { unless(defined &DEBUG) { *DEBUG = sub () {0} } } + # define the constant 'DEBUG' at compile-time + +$VERSION = "1.01"; +@ISA = (); +use I18N::LangTags qw(alternate_language_tags locale2language_tag); + +sub uniq { my %seen; return grep(!($seen{$_}++), @_); } + +#--------------------------------------------------------------------------- +# The extent of our functional interface: + +sub detect () { return __PACKAGE__->ambient_langprefs; } + +#=========================================================================== + +sub ambient_langprefs { # always returns things untainted + my $base_class = $_[0]; + + return $base_class->http_accept_langs + if length( $ENV{'REQUEST_METHOD'} || '' ); # I'm a CGI + # it's off in its own routine because it's complicated + + # Not running as a CGI: try to puzzle out from the environment + my @languages; + + foreach my $envname (qw( LANGUAGE LC_ALL LC_MESSAGES LANG )) { + next unless $ENV{$envname}; + DEBUG and print "Noting \$$envname: $ENV{$envname}\n"; + push @languages, + map locale2language_tag($_), + # if it's a lg tag, fine, pass thru (untainted) + # if it's a locale ID, try converting to a lg tag (untainted), + # otherwise nix it. + + split m/[,:]/, + $ENV{$envname} + ; + last; # first one wins + } + + if(&_try_use('Win32::Locale')) { + # If we have that module installed... + push @languages, Win32::Locale::get_language() || '' + if defined &Win32::Locale::get_language; + } + + @languages = map {; $_, alternate_language_tags($_) } @languages; + + return uniq(@languages) if wantarray; + return $languages[0]; +} + +#--------------------------------------------------------------------------- + +sub http_accept_langs { + # Deal with HTTP "Accept-Language:" stuff. Hassle. + # This code is more lenient than RFC 3282, which you must read. + # Hm. Should I just move this into I18N::LangTags at some point? + no integer; + + my $in = (@_ > 1) ? $_[1] : $ENV{'HTTP_ACCEPT_LANGUAGE'}; + # (always ends up untainting) + + return() unless defined $in and length $in; + + $in =~ s/\([^\)]*\)//g; # nix just about any comment + + if( $in =~ m/^\s*([a-zA-Z][-a-zA-Z]+)\s*$/s ) { + # Very common case: just one language tag + return lc $1; + } elsif( $in =~ m/^\s*[a-zA-Z][-a-zA-Z]+(?:\s*,\s*[a-zA-Z][-a-zA-Z]+)*\s*$/s ) { + # Common case these days: just "foo, bar, baz" + return map lc($_), $in =~ m/([a-zA-Z][-a-zA-Z]+)/g; + } + + # Else it's complicated... + + $in =~ s/\s+//g; # Yes, we can just do without the WS! + my @in = $in =~ m/([^,]+)/g; + my %pref; + + my $q; + foreach my $tag (@in) { + next unless $tag =~ + m/^([a-zA-Z][-a-zA-Z]+) + (?: + ;q= + ( + \d* # a bit too broad of a RE, but so what. + (?: + \.\d+ + )? + ) + )? + $ + /sx + ; + $q = (defined $2 and length $2) ? $2 : 1; + #print "$1 with q=$q\n"; + push @{ $pref{$q} }, lc $1; + } + + return # Read off %pref, in descending key order... + map @{$pref{$_}}, + sort {$b <=> $a} + keys %pref; +} + +#=========================================================================== + +my %tried = (); + # memoization of whether we've used this module, or found it unusable. + +sub _try_use { # Basically a wrapper around "require Modulename" + # "Many men have tried..." "They tried and failed?" "They tried and died." + return $tried{$_[0]} if exists $tried{$_[0]}; # memoization + + my $module = $_[0]; # ASSUME sane module name! + { no strict 'refs'; + return($tried{$module} = 1) + if defined(%{$module . "::Lexicon"}) or defined(@{$module . "::ISA"}); + # weird case: we never use'd it, but there it is! + } + + print " About to use $module ...\n" if DEBUG; + { + local $SIG{'__DIE__'}; + eval "require $module"; # used to be "use $module", but no point in that. + } + if($@) { + print "Error using $module \: $@\n" if DEBUG > 1; + return $tried{$module} = 0; + } else { + print " OK, $module is used\n" if DEBUG; + return $tried{$module} = 1; + } +} + +#--------------------------------------------------------------------------- +1; +__END__ + + +=head1 NAME + +I18N::LangTags::Detect - detect the user's language preferences + +=head1 SYNOPSIS + + use I18N::LangTags::Detect; + my @user_wants = I18N::LangTags::Detect::detect(); + +=head1 DESCRIPTION + +It is a common problem to want to detect what language(s) the user would +prefer output in. + +=head1 FUNCTIONS + +This module defines one public function, +C<I18N::LangTags::Detect::detect()>. This function is not exported +(nor is even exportable), and it takes no parameters. + +In scalar context, the function returns the most preferred language +tag (or undef if no preference was seen). + +In list context (which is usually what you want), +the function returns a +(possibly empty) list of language tags representing (best first) what +languages the user apparently would accept output in. You will +probably want to pass the output of this through +C<I18N::LangTags::implicate_supers_tightly(...)> +or +C<I18N::LangTags::implicate_supers(...)>, like so: + + my @languages = + I18N::LangTags::implicate_supers_tightly( + I18N::LangTags::Detect::detect() + ); + + +=head1 ENVIRONMENT + +This module looks for several environment variables, including +REQUEST_METHOD, HTTP_ACCEPT_LANGUAGE, +LANGUAGE, LC_ALL, LC_MESSAGES, and LANG. + +It will also use the L<Win32::Locale> module, if it's installed. + + +=head1 SEE ALSO + +L<I18N::LangTags>, L<Win32::Locale>, L<Locale::Maketext>. + +(This module's core code started out as a routine in Locale::Maketext; +but I moved it here once I realized it was more generally useful.) + + +=head1 COPYRIGHT + +Copyright (c) 1998-2004 Sean M. Burke. All rights reserved. + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +The programs and documentation in this dist are distributed in +the hope that they will be useful, but without any warranty; without +even the implied warranty of merchantability or fitness for a +particular purpose. + + +=head1 AUTHOR + +Sean M. Burke C<sburke@cpan.org> + +=cut + +# a tip: Put a bit of chopped up pickled ginger in your salad. It's tasty! diff --git a/lib/I18N/LangTags/List.pm b/lib/I18N/LangTags/List.pm index 37ded04eef..ca2f05923b 100644 --- a/lib/I18N/LangTags/List.pm +++ b/lib/I18N/LangTags/List.pm @@ -136,7 +136,7 @@ prints: =head1 DESCRIPTION -This module provides a function +This module provides a function C<I18N::LangTags::List::name( I<langtag> ) > that takes a language tag (see L<I18N::LangTags|I18N::LangTags>) and returns the best attempt at an English name for it, or @@ -162,7 +162,7 @@ Internet language tags, as defined in RFC 3066, are a formalism for denoting human languages. The two-letter ISO 639-1 language codes are well known (as "en" for English), as are their forms when qualified by a country code ("en-US"). Less well-known are the -arbitrary-length non-ISO codes (like "i-mingo"), and the +arbitrary-length non-ISO codes (like "i-mingo"), and the recently (in 2001) introduced three-letter ISO-639-2 codes. Remember these important facts: diff --git a/lib/I18N/LangTags/t/01_about_verbose.t b/lib/I18N/LangTags/t/01_about_verbose.t new file mode 100644 index 0000000000..3abc68d537 --- /dev/null +++ b/lib/I18N/LangTags/t/01_about_verbose.t @@ -0,0 +1,89 @@ + +require 5; +# Time-stamp: "2004-03-30 17:02:59 AST" + +# Summary of, well, things. + +use Test; +BEGIN {plan tests => 2}; + +ok 1; + +use I18N::LangTags; +use I18N::LangTags::List; +use I18N::LangTags::Detect; + +#chdir "t" if -e "t"; + +{ + my @out; + push @out, + "\n\nPerl v", + defined($^V) ? sprintf('%vd', $^V) : $], + " under $^O ", + (defined(&Win32::BuildNumber) and defined &Win32::BuildNumber()) + ? ("(Win32::BuildNumber ", &Win32::BuildNumber(), ")") : (), + (defined $MacPerl::Version) + ? ("(MacPerl version $MacPerl::Version)") : (), + "\n" + ; + + # Ugly code to walk the symbol tables: + my %v; + my @stack = (''); # start out in %:: + my $this; + my $count = 0; + my $pref; + while(@stack) { + $this = shift @stack; + die "Too many packages?" if ++$count > 1000; + next if exists $v{$this}; + next if $this eq 'main'; # %main:: is %:: + + #print "Peeking at $this => ${$this . '::VERSION'}\n"; + + if(defined ${$this . '::VERSION'} ) { + $v{$this} = ${$this . '::VERSION'} + } elsif( + defined *{$this . '::ISA'} or defined &{$this . '::import'} + or ($this ne '' and grep defined *{$_}{'CODE'}, values %{$this . "::"}) + # If it has an ISA, an import, or any subs... + ) { + # It's a class/module with no version. + $v{$this} = undef; + } else { + # It's probably an unpopulated package. + ## $v{$this} = '...'; + } + + $pref = length($this) ? "$this\::" : ''; + push @stack, map m/^(.+)::$/ ? "$pref$1" : (), keys %{$this . '::'}; + #print "Stack: @stack\n"; + } + push @out, " Modules in memory:\n"; + delete @v{'', '[none]'}; + foreach my $p (sort {lc($a) cmp lc($b)} keys %v) { + $indent = ' ' x (2 + ($p =~ tr/:/:/)); + push @out, ' ', $indent, $p, defined($v{$p}) ? " v$v{$p};\n" : ";\n"; + } + push @out, sprintf "[at %s (local) / %s (GMT)]\n", + scalar(gmtime), scalar(localtime); + my $x = join '', @out; + $x =~ s/^/#/mg; + print $x; +} + +print "# Running", + (chr(65) eq 'A') ? " in an ASCII world.\n" : " in a non-ASCII world.\n", + "#\n", +; + +print "# \@INC:\n", map("# [$_]\n", @INC), "#\n#\n"; + +print "# \%INC:\n"; +foreach my $x (sort {lc($a) cmp lc($b)} keys %INC) { + print "# [$x] = [", $INC{$x} || '', "]\n"; +} + +ok 1; + diff --git a/lib/I18N/LangTags/t/01test.t b/lib/I18N/LangTags/t/05_main.t index 86e251778f..056baafc55 100644 --- a/lib/I18N/LangTags/t/01test.t +++ b/lib/I18N/LangTags/t/05_main.t @@ -1,9 +1,6 @@ -# Before `make install' is performed this script should be runnable with -# `make test'. After `make install' it should work as `perl test.pl' -######################### We start with some black magic to print on failure. require 5; - # Time-stamp: "2003-07-20 07:36:49 ADT" + # Time-stamp: "2004-03-30 17:52:14 AST" use strict; use Test; BEGIN { plan tests => 64 }; diff --git a/lib/I18N/LangTags/t/02decency.t b/lib/I18N/LangTags/t/07_listy.t index a56a798a33..a56a798a33 100644 --- a/lib/I18N/LangTags/t/02decency.t +++ b/lib/I18N/LangTags/t/07_listy.t diff --git a/lib/I18N/LangTags/t/10_http.t b/lib/I18N/LangTags/t/10_http.t new file mode 100644 index 0000000000..377056b319 --- /dev/null +++ b/lib/I18N/LangTags/t/10_http.t @@ -0,0 +1,104 @@ + +# Time-stamp: "2004-03-30 16:59:14 AST" + +use I18N::LangTags::Detect; + +use Test; +BEGIN { plan tests => 87 }; + +my @in = grep m/\S/, split /\n/, q{ + +[ sv ] sv +[ en ] en +[ en fi ] en, fi +[ en-us ] en-us +[ en-us ] en-US +[ en-us ] EN-US + +[ en-au en i-klingon en-gb en-us mt-mt mt ja ] EN-au, JA;q=0.14, i-klingon;q=0.83, en-gb;q=0.71, en-us;q=0.57, mt-mt;q=0.43, mt;q=0.29, en;q=0.86 +[ en-au en i-klingon en-gb en-us mt-mt mt tli ja ] EN-au, tli;q=0.201, JA;q=0.14, i-klingon;q=0.83, en-gb;q=0.71, en-us;q=0.57, mt-mt;q=0.43, mt;q=0.29, en;q=0.86 +[ en-au en en-gb en-us ja ] en-au, ja;q=0.20, en-gb;q=0.60, en-us;q=0.40, en;q=0.80 + +[ en-au en en-gb en-us mt-mt mt ja ] EN-au, JA;q=0.14, en-gb;q=0.71, en-us;q=0.57, mt-mt;q=0.43, mt;q=0.29, en;q=0.86 +[ en-au en en-gb en-us ja ] en-au, ja;q=0.20, en-gb;q=0.60, en-us;q=0.40, en;q=0.80 +[ en fr ] en;q=1,fr;q=.5 +[ en fr ] en;q=1,fr;q=.99 +[ en ru ko ] en, ru;q=0.7, ko;q=0.3 +[ en ru ko ] en, ru;q=0.7, KO;q=0.3 +[ en-us en ] en-us, en;q=0.50 +[ en fr ] fr ; q = 0.9, en +[ en fr ] en,fr;q=.90 +[ ru en-uk en fr ] ru, en-UK;q=0.5, en;q=0.3, fr;q=0.1 +[ en-us fr es-mx ] en-us,fr;q=0.7,es-mx;q=0.3 +[ en-us en ] en-us, en;q=0.50 + +[ da en-gb en ] da, en-gb;q=0.8, en;q=0.7 +[ da en-gb en ] da, en;q=0.7, en-gb;q=0.8 +[ da en-gb en ] da, en-gb;q=0.8, en;q=0.7 +[ da en-gb en ] da,en;q=0.7,en-gb;q=0.8 +[ da en-gb en ] da, en-gb ; q=0.8, en ; q=0.7 +[ da en-gb en ] da , en-gb ; q = 0.8 , en ; q =0.7 +[ da en-gb en ] da (yup, Danish) , en-gb ; q = 0.8 , en ; q =0.7 + +[ no dk en-uk en-us ] en-UK;q=0.7, en-US;q=0.6, no;q=1.0, dk;q=0.8 +[ no dk en-uk en-us ] en-US;q=0.6, en-UK;q=0.7, no;q=1.0, dk;q=0.8 +[ no dk en-uk en-us ] en-UK;q=0.7, no;q=1.0, en-US;q=0.6, dk;q=0.8 +[ no dk en-uk en-us ] en-UK;q=0.7, no;q=1.0, dk;q=0.8, en-US;q=0.6 + +[ fi en ] fi;q=1, en;q=0.2 +[ de-de de en en-us en-gb ] de-DE, de;q=0.80, en;q=0.60, en-US;q=0.40, en-GB;q=0.20 +[ ru ] ru; q=1, *; q=0.1 +[ ru en ] ru, en; q=0.1 +[ ja en ] ja,en;q=0.5 +[ en ] en; q=1.0 +[ ja ] ja; q=1.0 +[ ja ] ja; q=1.0 +[ en ja ] en; q=0.5, ja; q=0.5 +[ fr-ca fr en ] fr-ca, fr;q=0.8, en;q=0.7 +[ NIX ] NIX +}; + +foreach my $in (@in) { + $in =~ s/^\s*\[([^\]]+)\]\s*//s or die "Bad input: $in"; + my @should = do { my $x = $1; $x =~ m/(\S+)/g }; + + if($in eq 'NIX') { $in = ''; @should = (); } + + local $ENV{'HTTP_ACCEPT_LANGUAGE'}; + + foreach my $modus ( + sub { + print "# Testing with arg...\n"; + $ENV{'HTTP_ACCEPT_LANGUAGE'} = 'PLORK'; + return $_[0]; + }, + sub { + print "# Testing wath HTTP_ACCEPT_LANGUAGE...\n"; + $ENV{'HTTP_ACCEPT_LANGUAGE'} = $_[0]; + return(); + }, + ) { + my @args = &$modus($in); + + # //////////////////////////////////////////////////// + my @out = I18N::LangTags::Detect->http_accept_langs(@args); + # \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ + + if( + @out == @should + and lc( join "\e", @out ) eq lc( join "\e", @should ) + ) { + print "# Happily got [@out] from [$in]\n"; + ok 1; + } else { + ok 0; + print "#Got: [@out]\n", + "# but wanted: [@should]\n", + "# < \"$in\"\n#\n"; + } + } +} + +print "#\n#\n# Bye-bye!\n"; +ok 1; + diff --git a/lib/I18N/LangTags/t/50_super.t b/lib/I18N/LangTags/t/50_super.t new file mode 100644 index 0000000000..9923c84068 --- /dev/null +++ b/lib/I18N/LangTags/t/50_super.t @@ -0,0 +1,88 @@ + +# Time-stamp: "2004-03-30 17:46:17 AST" + +use Test; +BEGIN { plan tests => 26 }; +print "#\n# Testing normal (tight) insertion of super-ordinate language tags...\n#\n"; + +use I18N::LangTags qw(implicate_supers); + +my @in = grep m/\S/, split /[\n\r]/, q{ + NIX => NIX + sv => sv + en => en + hai => hai + + pt-br => pt-br pt + pt-br fr => pt-br pt fr + pt-br fr pt => pt-br fr pt + + pt-br fr pt de => pt-br fr pt de + de pt-br fr pt => de pt-br fr pt + de pt-br fr => de pt-br pt fr + hai pt-br fr => hai pt-br pt fr + + # Now test multi-part complicateds: + pt-br-janeiro => pt-br-janeiro pt-br pt + pt-br-janeiro fr => pt-br-janeiro pt-br pt fr + pt-br-janeiro de fr => pt-br-janeiro pt-br pt de fr + pt-br-janeiro de pt fr => pt-br-janeiro pt-br de pt fr + + pt-br-janeiro pt-br-saopaolo => pt-br-janeiro pt-br pt pt-br-saopaolo + pt-br-janeiro fr pt-br-saopaolo => pt-br-janeiro pt-br pt fr pt-br-saopaolo + pt-br-janeiro de pt-br-saopaolo fr => pt-br-janeiro pt-br pt de pt-br-saopaolo fr + pt-br-janeiro de pt-br fr pt-br-saopaolo => pt-br-janeiro de pt-br pt fr pt-br-saopaolo + + pt-br de en fr pt-br-janeiro => pt-br pt de en fr pt-br-janeiro + pt-br de en fr => pt-br pt de en fr + + ja pt-br-janeiro fr => ja pt-br-janeiro pt-br pt fr + ja pt-br-janeiro de fr => ja pt-br-janeiro pt-br pt de fr + ja pt-br-janeiro de pt fr => ja pt-br-janeiro pt-br de pt fr + + pt-br-janeiro de pt-br fr => pt-br-janeiro de pt-br pt fr +# an odd case, since we don't filter for uniqueness in this sub + +}; + +sub uniq { my %seen; return grep(!($seen{$_}++), @_); } + +foreach my $in (@in) { + $in =~ s/^\s+//s; + $in =~ s/\s+$//s; + $in =~ s/#.+//s; + next unless $in =~ m/\S/; + + my(@in, @should); + { + die "What kind of line is <$in>?!" + unless $in =~ m/^(.+)=>(.+)$/s; + + my($i,$s) = ($1, $2); + @in = ($i =~ m/(\S+)/g); + @should = ($s =~ m/(\S+)/g); + #print "{@in}{@should}\n"; + } + my @out = implicate_supers( + ("@in" eq 'NIX') ? () : @in + ); + #print "O: ", join(' ', map "<$_>", @out), "\n"; + @out = 'NIX' unless @out; + + + if( @out == @should + and lc( join "\e", @out ) eq lc( join "\e", @should ) + ) { + print "# Happily got [@out] from [$in]\n"; + ok 1; + } else { + ok 0; + print "#!!Got: [@out]\n", + "#!! but wanted: [@should]\n", + "#!! from \"$in\"\n#\n"; + } +} + +print "#\n#\n# Bye-bye!\n"; +ok 1; + diff --git a/lib/I18N/LangTags/t/55_supers_strict.t b/lib/I18N/LangTags/t/55_supers_strict.t new file mode 100644 index 0000000000..3b285157a1 --- /dev/null +++ b/lib/I18N/LangTags/t/55_supers_strict.t @@ -0,0 +1,78 @@ + +# Time-stamp: "2004-03-30 17:49:58 AST" +#sub I18N::LangTags::Detect::DEBUG () {10} +use I18N::LangTags qw(implicate_supers_strictly); + +use Test; +BEGIN { plan tests => 19 }; + +print "#\n# Testing strict (non-tight) insertion of super-ordinate language tags...\n#\n"; + +my @in = grep m/\S/, split /[\n\r]/, q{ + NIX => NIX + sv => sv + en => en + hai => hai + + pt-br => pt-br pt + pt-br fr => pt-br fr pt + pt-br fr pt => pt-br fr pt + pt-br fr pt de => pt-br fr pt de + de pt-br fr pt => de pt-br fr pt + de pt-br fr => de pt-br fr pt + hai pt-br fr => hai pt-br fr pt + +# Now test multi-part complicateds: + pt-br-janeiro fr => pt-br-janeiro fr pt-br pt +pt-br-janeiro de fr => pt-br-janeiro de fr pt-br pt +pt-br-janeiro de pt fr => pt-br-janeiro de pt fr pt-br + +ja pt-br-janeiro fr => ja pt-br-janeiro fr pt-br pt +ja pt-br-janeiro de fr => ja pt-br-janeiro de fr pt-br pt +ja pt-br-janeiro de pt fr => ja pt-br-janeiro de pt fr pt-br + +pt-br-janeiro de pt-br fr => pt-br-janeiro de pt-br fr pt + # an odd case, since we don't filter for uniqueness in this sub + +}; + + +foreach my $in (@in) { + $in =~ s/^\s+//s; + $in =~ s/\s+$//s; + $in =~ s/#.+//s; + next unless $in =~ m/\S/; + + my(@in, @should); + { + die "What kind of line is <$in>?!" + unless $in =~ m/^(.+)=>(.+)$/s; + + my($i,$s) = ($1, $2); + @in = ($i =~ m/(\S+)/g); + @should = ($s =~ m/(\S+)/g); + #print "{@in}{@should}\n"; + } + my @out = I18N::LangTags::implicate_supers_strictly( + ("@in" eq 'NIX') ? () : @in + ); + #print "O: ", join(' ', map "<$_>", @out), "\n"; + @out = 'NIX' unless @out; + + + if( @out == @should + and lc( join "\e", @out ) eq lc( join "\e", @should ) + ) { + print "# Happily got [@out] from [$in]\n"; + ok 1; + } else { + ok 0; + print "#!!Got: [@out]\n", + "#!! but wanted: [@should]\n", + "#!! from \"$in\"\n#\n"; + } +} + +print "#\n#\n# Bye-bye!\n"; +ok 1; + diff --git a/lib/I18N/LangTags/t/80_all_env.t b/lib/I18N/LangTags/t/80_all_env.t new file mode 100644 index 0000000000..e93a6f5575 --- /dev/null +++ b/lib/I18N/LangTags/t/80_all_env.t @@ -0,0 +1,47 @@ + +require 5; +use Test; +# Time-stamp: "2004-03-30 17:51:06 AST" +BEGIN { plan tests => 9; } +use I18N::LangTags::Detect 1.01; +print "# Hi there...\n"; +ok 1; + +print "# Make sure we can assign to ENV entries\n", + "# (Otherwise we can't run the subsequent tests)...\n"; +$ENV{'MYORP'} = 'Zing'; ok $ENV{'MYORP'}, 'Zing'; +$ENV{'SWUZ'} = 'KLORTHO HOOBOY'; ok $ENV{'SWUZ'}, 'KLORTHO HOOBOY'; + +delete $ENV{'MYORP'}; +delete $ENV{'SWUZ'}; + +sub show { print "# (Seeing [@_] at line ", (caller)[2], ")\n"; return @_ } + +print "# Test LANG...\n"; +$ENV{'REQUEST_METHOD'} = ''; +$ENV{'LANG'} = 'Eu_MT'; +$ENV{'LANGUAGE'} = ''; +ok show I18N::LangTags::Detect::detect(); + +print "# Test LANGUAGE...\n"; +$ENV{'LANG'} = ''; +$ENV{'LANGUAGE'} = 'Eu-MT'; +ok show I18N::LangTags::Detect::detect(); + + +print "# Test HTTP_ACCEPT_LANGUAGE...\n"; +$ENV{'REQUEST_METHOD'} = 'GET'; +$ENV{'HTTP_ACCEPT_LANGUAGE'} = 'eu-MT'; +ok show I18N::LangTags::Detect::detect(); + +$ENV{'HTTP_ACCEPT_LANGUAGE'} = 'x-plorp, zaz, eu-MT, i-klung'; +ok show I18N::LangTags::Detect::detect(); + +$ENV{'HTTP_ACCEPT_LANGUAGE'} = 'x-plorp, zaz, eU-Mt, i-klung'; +ok show I18N::LangTags::Detect::detect(); + + + +print "# Byebye!\n"; +ok 1; + |