summaryrefslogtreecommitdiff
path: root/t/pragma/locale.t
diff options
context:
space:
mode:
Diffstat (limited to 't/pragma/locale.t')
-rwxr-xr-xt/pragma/locale.t454
1 files changed, 454 insertions, 0 deletions
diff --git a/t/pragma/locale.t b/t/pragma/locale.t
new file mode 100755
index 0000000000..d4b73b8f91
--- /dev/null
+++ b/t/pragma/locale.t
@@ -0,0 +1,454 @@
+#!./perl -wT
+
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = '../lib';
+ require Config; import Config;
+ if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) {
+ print "1..0\n";
+ exit;
+ }
+}
+
+use strict;
+
+my $have_setlocale = 0;
+eval {
+ require POSIX;
+ import POSIX ':locale_h';
+ $have_setlocale++;
+};
+
+print "1..", ($have_setlocale ? 102 : 98), "\n";
+
+use vars qw($a
+ $English $German $French $Spanish
+ @C @English @German @French @Spanish
+ $Locale @Locale %iLocale %UPPER %lower @Neoalpha);
+
+$a = 'abc %';
+
+sub ok {
+ my ($n, $result) = @_;
+
+ print 'not ' unless ($result);
+ print "ok $n\n";
+}
+
+# First we'll do a lot of taint checking for locales.
+# This is the easiest to test, actually, as any locale,
+# even the default locale will taint under 'use locale'.
+
+sub is_tainted { # hello, camel two.
+ local $^W; # no warnings 'undef'
+ my $dummy;
+ not eval { $dummy = join("", @_), kill 0; 1 }
+}
+
+sub check_taint ($$) {
+ ok $_[0], is_tainted($_[1]);
+}
+
+sub check_taint_not ($$) {
+ ok $_[0], not is_tainted($_[1]);
+}
+
+use locale; # engage locale and therefore locale taint.
+
+check_taint_not 1, $a;
+
+check_taint 2, uc($a);
+check_taint 3, "\U$a";
+check_taint 4, ucfirst($a);
+check_taint 5, "\u$a";
+check_taint 6, lc($a);
+check_taint 7, "\L$a";
+check_taint 8, lcfirst($a);
+check_taint 9, "\l$a";
+
+check_taint 10, sprintf('%e', 123.456);
+check_taint 11, sprintf('%f', 123.456);
+check_taint 12, sprintf('%g', 123.456);
+check_taint_not 13, sprintf('%d', 123.456);
+check_taint_not 14, sprintf('%x', 123.456);
+
+$_ = $a; # untaint $_
+
+$_ = uc($a); # taint $_
+
+check_taint 15, $_;
+
+/(\w)/; # taint $&, $`, $', $+, $1.
+check_taint 16, $&;
+check_taint 17, $`;
+check_taint 18, $';
+check_taint 19, $+;
+check_taint 20, $1;
+check_taint_not 21, $2;
+
+/(.)/; # untaint $&, $`, $', $+, $1.
+check_taint_not 22, $&;
+check_taint_not 23, $`;
+check_taint_not 24, $';
+check_taint_not 25, $+;
+check_taint_not 26, $1;
+check_taint_not 27, $2;
+
+/(\W)/; # taint $&, $`, $', $+, $1.
+check_taint 28, $&;
+check_taint 29, $`;
+check_taint 30, $';
+check_taint 31, $+;
+check_taint 32, $1;
+check_taint_not 33, $2;
+
+/(\s)/; # taint $&, $`, $', $+, $1.
+check_taint 34, $&;
+check_taint 35, $`;
+check_taint 36, $';
+check_taint 37, $+;
+check_taint 38, $1;
+check_taint_not 39, $2;
+
+/(\S)/; # taint $&, $`, $', $+, $1.
+check_taint 40, $&;
+check_taint 41, $`;
+check_taint 42, $';
+check_taint 43, $+;
+check_taint 44, $1;
+check_taint_not 45, $2;
+
+$_ = $a; # untaint $_
+
+check_taint_not 46, $_;
+
+/(b)/; # this must not taint
+check_taint_not 47, $&;
+check_taint_not 48, $`;
+check_taint_not 49, $';
+check_taint_not 50, $+;
+check_taint_not 51, $1;
+check_taint_not 52, $2;
+
+$_ = $a; # untaint $_
+
+check_taint_not 53, $_;
+
+$b = uc($a); # taint $b
+s/(.+)/$b/; # this must taint only the $_
+
+check_taint 54, $_;
+check_taint_not 55, $&;
+check_taint_not 56, $`;
+check_taint_not 57, $';
+check_taint_not 58, $+;
+check_taint_not 59, $1;
+check_taint_not 60, $2;
+
+$_ = $a; # untaint $_
+
+s/(.+)/b/; # this must not taint
+check_taint_not 61, $_;
+check_taint_not 62, $&;
+check_taint_not 63, $`;
+check_taint_not 64, $';
+check_taint_not 65, $+;
+check_taint_not 66, $1;
+check_taint_not 67, $2;
+
+$b = $a; # untaint $b
+
+($b = $a) =~ s/\w/$&/;
+check_taint 68, $b; # $b should be tainted.
+check_taint_not 69, $a; # $a should be not.
+
+$_ = $a; # untaint $_
+
+s/(\w)/\l$1/; # this must taint
+check_taint 70, $_;
+check_taint 71, $&;
+check_taint 72, $`;
+check_taint 73, $';
+check_taint 74, $+;
+check_taint 75, $1;
+check_taint_not 76, $2;
+
+$_ = $a; # untaint $_
+
+s/(\w)/\L$1/; # this must taint
+check_taint 77, $_;
+check_taint 78, $&;
+check_taint 79, $`;
+check_taint 80, $';
+check_taint 81, $+;
+check_taint 82, $1;
+check_taint_not 83, $2;
+
+$_ = $a; # untaint $_
+
+s/(\w)/\u$1/; # this must taint
+check_taint 84, $_;
+check_taint 85, $&;
+check_taint 86, $`;
+check_taint 87, $';
+check_taint 88, $+;
+check_taint 89, $1;
+check_taint_not 90, $2;
+
+$_ = $a; # untaint $_
+
+s/(\w)/\U$1/; # this must taint
+check_taint 91, $_;
+check_taint 92, $&;
+check_taint 93, $`;
+check_taint 94, $';
+check_taint 95, $+;
+check_taint 96, $1;
+check_taint_not 97, $2;
+
+# After all this tainting $a should be cool.
+
+check_taint_not 98, $a;
+
+# I think we've seen quite enough of taint.
+# Let us do some *real* locale work now,
+# unless setlocale() is missing (i.e. minitest).
+
+exit unless $have_setlocale;
+
+sub getalnum {
+ sort grep /\w/, map { chr } 0..255
+}
+
+sub locatelocale ($$@) {
+ my ($lcall, $alnum, @try) = @_;
+
+ undef $$lcall;
+
+ for (@try) {
+ local $^W = 0; # suppress "Subroutine LC_ALL redefined"
+ if (setlocale(&LC_ALL, $_)) {
+ $$lcall = $_;
+ @$alnum = &getalnum;
+ last;
+ }
+ }
+
+ @$alnum = () unless (defined $$lcall);
+}
+
+# Find some default locale
+
+locatelocale(\$Locale, \@Locale, qw(C POSIX));
+
+# Find some English locale
+
+locatelocale(\$English, \@English,
+ qw(en_US.ISO8859-1 en_GB.ISO8859-1
+ en en_US en_UK en_IE en_CA en_AU en_NZ
+ english english.iso88591
+ american american.iso88591
+ british british.iso88591
+ ));
+
+# Find some German locale
+
+locatelocale(\$German, \@German,
+ qw(de_DE.ISO8859-1 de_AT.ISO8859-1 de_CH.ISO8859-1
+ de de_DE de_AT de_CH
+ german german.iso88591));
+
+# Find some French locale
+
+locatelocale(\$French, \@French,
+ qw(fr_FR.ISO8859-1 fr_BE.ISO8859-1 fr_CA.ISO8859-1 fr_CH.ISO8859-1
+ fr fr_FR fr_BE fr_CA fr_CH
+ french french.iso88591));
+
+# Find some Spanish locale
+
+locatelocale(\$Spanish, \@Spanish,
+ qw(es_AR.ISO8859-1 es_BO.ISO8859-1 es_CL.ISO8859-1
+ es_CO.ISO8859-1 es_CR.ISO8859-1 es_EC.ISO8859-1
+ es_ES.ISO8859-1 es_GT.ISO8859-1 es_MX.ISO8859-1
+ es_NI.ISO8859-1 es_PA.ISO8859-1 es_PE.ISO8859-1
+ es_PY.ISO8859-1 es_SV.ISO8859-1 es_UY.ISO8859-1 es_VE.ISO8859-1
+ es es_AR es_BO es_CL
+ es_CO es_CR es_EC
+ es_ES es_GT es_MX
+ es_NI es_PA es_PE
+ es_PY es_SV es_UY es_VE
+ spanish spanish.iso88591));
+
+# Select the largest of the alpha(num)bets.
+
+($Locale, @Locale) = ($English, @English)
+ if (length(@English) > length(@Locale));
+($Locale, @Locale) = ($German, @German)
+ if (length(@German) > length(@Locale));
+($Locale, @Locale) = ($French, @French)
+ if (length(@French) > length(@Locale));
+($Locale, @Locale) = ($Spanish, @Spanish)
+ if (length(@Spanish) > length(@Locale));
+
+print "# Locale = $Locale\n";
+print "# Alnum_ = @Locale\n";
+
+{
+ local $^W = 0;
+ setlocale(&LC_ALL, $Locale);
+}
+
+{
+ my $i = 0;
+
+ for (@Locale) {
+ $iLocale{$_} = $i++;
+ }
+}
+
+# Sieve the uppercase and the lowercase.
+
+for (@Locale) {
+ if (/[^\d_]/) { # skip digits and the _
+ if (lc eq $_) {
+ $UPPER{$_} = uc;
+ } else {
+ $lower{$_} = lc;
+ }
+ }
+}
+
+# Find the alphabets that are not alphabets in the default locale.
+
+{
+ no locale;
+
+ for (keys %UPPER, keys %lower) {
+ push(@Neoalpha, $_) if (/\W/);
+ }
+}
+
+@Neoalpha = sort @Neoalpha;
+
+# Test \w.
+
+{
+ my $word = join('', @Neoalpha);
+
+ $word =~ /^(\w*)$/;
+
+ print 'not ' if ($1 ne $word);
+}
+print "ok 99\n";
+
+# Find places where the collation order differs from the default locale.
+
+print "# testing 100\n";
+{
+ my (@k, $i, $j, @d);
+
+ {
+ no locale;
+
+ @k = sort (keys %UPPER, keys %lower);
+ }
+
+ for ($i = 0; $i < @k; $i++) {
+ for ($j = $i + 1; $j < @k; $j++) {
+ if ($iLocale{$k[$j]} < $iLocale{$k[$i]}) {
+ push(@d, [$k[$j], $k[$i]]);
+ }
+ }
+ }
+
+ # Cross-check those places.
+
+ for (@d) {
+ ($i, $j) = @$_;
+ if ($i gt $j) {
+ print "# failed 100 at:\n";
+ print "# i = $i, j = $j, i ",
+ $i le $j ? 'le' : 'gt', " j\n";
+ print 'not ';
+ last;
+ }
+ }
+}
+print "ok 100\n";
+
+# Cross-check whole character set.
+
+print "# testing 101\n";
+for (map { chr } 0..255) {
+ if (/\w/ and /\W/) { print 'not '; last }
+ if (/\d/ and /\D/) { print 'not '; last }
+ if (/\s/ and /\S/) { print 'not '; last }
+ if (/\w/ and /\D/ and not /_/ and
+ not (exists $UPPER{$_} or exists $lower{$_})) {
+ print "# failed 101 at:\n";
+ print "# ", ord($_), " '$_'\n";
+ print 'not ';
+ last;
+ }
+}
+print "ok 101\n";
+
+# The @Locale should be internally consistent.
+
+print "# testing 102\n";
+{
+ my ($from, $to, $lesser, $greater, @test, %test, $test);
+
+ for (0..9) {
+ # Select a slice.
+ $from = int(($_*@Locale)/10);
+ $to = $from + int(@Locale/10);
+ $to = $#Locale if ($to > $#Locale);
+ $lesser = join('', @Locale[$from..$to]);
+ # Select a slice one character on.
+ $from++; $to++;
+ $to = $#Locale if ($to > $#Locale);
+ $greater = join('', @Locale[$from..$to]);
+ @test =
+ (
+ 'not ($lesser lt $greater)', # 0
+ 'not ($lesser le $greater)', # 1
+ 'not ($lesser ne $greater)', # 2
+ ' ($lesser eq $greater)', # 3
+ ' ($lesser ge $greater)', # 4
+ ' ($lesser gt $greater)', # 5
+ ' ($greater lt $lesser )', # 6
+ ' ($greater le $lesser )', # 7
+ 'not ($greater ne $lesser )', # 8
+ ' ($greater eq $lesser )', # 9
+ 'not ($greater ge $lesser )', # 10
+ 'not ($greater gt $lesser )', # 11
+ # Well, these two are sort of redundant
+ # because @Locale was derived using cmp.
+ 'not (($lesser cmp $greater) == -1)', # 12
+ 'not (($greater cmp $lesser ) == 1)' # 13
+ );
+ @test{@test} = 0 x @test;
+ $test = 0;
+ for my $ti (@test) { $test{$ti} = eval $ti ; $test ||= $test{$ti} }
+ if ($test) {
+ print "# failed 102 at:\n";
+ print "# lesser = '$lesser'\n";
+ print "# greater = '$greater'\n";
+ print "# (greater) from = $from, to = $to\n";
+ for my $ti (@test) {
+ printf("# %-40s %-4s", $ti,
+ $test{$ti} ? 'FAIL' : 'ok');
+ if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) {
+ printf("(%s == %4d)", $1, eval $1);
+ }
+ print "\n";
+ }
+
+ print 'not ';
+ last;
+ }
+ }
+}
+print "ok 102\n";