1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
|
#!./perl -wT
print "1..104\n";
BEGIN {
chdir 't' if -d 't';
@INC = '../lib';
}
use strict;
use POSIX qw(locale_h);
use vars qw($a
$English $German $French $Spanish
@C @English @German @French @Spanish
$Locale @Locale %iLocale %UPPER %lower @Neoalpha);
$a = 'abc %';
sub ok {
my ($n, $result) = @_;
print 'not ' unless ($result);
print "ok $n\n";
}
# First we'll do a lot of taint checking for locales.
# This is the easiest to test, actually, as any locale,
# even the default locale will taint under 'use locale'.
sub is_tainted { # hello, camel two.
my $dummy;
not eval { $dummy = join("", @_), kill 0; 1 }
}
sub check_taint ($$) {
ok $_[0], is_tainted($_[1]);
}
sub check_taint_not ($$) {
ok $_[0], not is_tainted($_[1]);
}
use locale; # engage locale and therefore locale taint.
check_taint_not 1, $a;
check_taint 2, uc($a);
check_taint 3, "\U$a";
check_taint 4, ucfirst($a);
check_taint 5, "\u$a";
check_taint 6, lc($a);
check_taint 7, "\L$a";
check_taint 8, lcfirst($a);
check_taint 9, "\l$a";
check_taint 10, sprintf('%e', 123.456);
check_taint 11, sprintf('%f', 123.456);
check_taint 12, sprintf('%g', 123.456);
check_taint_not 13, sprintf('%d', 123.456);
check_taint_not 14, sprintf('%x', 123.456);
$_ = $a; # untaint $_
$_ = uc($a); # taint $_
check_taint 15, $_;
/(\w)/; # taint $&, $`, $', $+, $1.
check_taint 16, $&;
check_taint 17, $`;
check_taint 18, $';
check_taint 19, $+;
check_taint 20, $1;
check_taint_not 21, $2;
/(.)/; # untaint $&, $`, $', $+, $1.
check_taint_not 22, $&;
check_taint_not 23, $`;
check_taint_not 24, $';
check_taint_not 25, $+;
check_taint_not 26, $1;
check_taint_not 27, $2;
/(\W)/; # taint $&, $`, $', $+, $1.
check_taint 28, $&;
check_taint 29, $`;
check_taint 30, $';
check_taint 31, $+;
check_taint 32, $1;
check_taint_not 33, $2;
/(\s)/; # taint $&, $`, $', $+, $1.
check_taint 34, $&;
check_taint 35, $`;
check_taint 36, $';
check_taint 37, $+;
check_taint 38, $1;
check_taint_not 39, $2;
/(\S)/; # taint $&, $`, $', $+, $1.
check_taint 40, $&;
check_taint 41, $`;
check_taint 42, $';
check_taint 43, $+;
check_taint 44, $1;
check_taint_not 45, $2;
$_ = $a; # untaint $_
check_taint_not 46, $_;
/(b)/; # this must not taint
check_taint_not 47, $&;
check_taint_not 48, $`;
check_taint_not 49, $';
check_taint_not 50, $+;
check_taint_not 51, $1;
check_taint_not 52, $2;
$_ = $a; # untaint $_
check_taint_not 53, $_;
$b = uc($a); # taint $b
s/(.+)/$b/; # this must taint only the $_
check_taint 54, $_;
check_taint_not 55, $&;
check_taint_not 56, $`;
check_taint_not 57, $';
check_taint_not 58, $+;
check_taint_not 59, $1;
check_taint_not 60, $2;
$_ = $a; # untaint $_
s/(.+)/b/; # this must not taint
check_taint_not 61, $_;
check_taint_not 62, $&;
check_taint_not 63, $`;
check_taint_not 64, $';
check_taint_not 65, $+;
check_taint_not 66, $1;
check_taint_not 67, $2;
$b = $a; # untaint $b
($b = $a) =~ s/\w/$&/;
check_taint 68, $b; # $b should be tainted.
check_taint_not 69, $a; # $a should be not.
$_ = $a; # untaint $_
s/(\w)/\l$1/; # this must taint
check_taint 70, $_;
check_taint 71, $&;
check_taint 72, $`;
check_taint 73, $';
check_taint 74, $+;
check_taint 75, $1;
check_taint_not 76, $2;
$_ = $a; # untaint $_
s/(\w)/\L$1/; # this must taint
check_taint 77, $_;
check_taint 78, $&;
check_taint 79, $`;
check_taint 80, $';
check_taint 81, $+;
check_taint 82, $1;
check_taint_not 83, $2;
$_ = $a; # untaint $_
s/(\w)/\u$1/; # this must taint
check_taint 84, $_;
check_taint 85, $&;
check_taint 86, $`;
check_taint 87, $';
check_taint 88, $+;
check_taint 89, $1;
check_taint_not 90, $2;
$_ = $a; # untaint $_
s/(\w)/\U$1/; # this must taint
check_taint 91, $_;
check_taint 92, $&;
check_taint 93, $`;
check_taint 94, $';
check_taint 95, $+;
check_taint 96, $1;
check_taint_not 97, $2;
# After all this tainting $a should be cool.
check_taint_not 98, $a;
# I think we've seen quite enough of taint.
# Let us do some *real* locale work now.
sub getalnum {
sort grep /\w/, map { chr } 0..255
}
sub locatelocale ($$@) {
my ($lcall, $alnum, @try) = @_;
undef $$lcall;
for (@try) {
local $^W = 0; # suppress "Subroutine LC_ALL redefined"
if (setlocale(LC_ALL, $_)) {
$$lcall = $_;
@$alnum = &getalnum;
last;
}
}
@$alnum = () unless (defined $$lcall);
}
# Find some default locale
locatelocale(\$Locale, \@Locale, qw(C POSIX));
# Find some English locale
locatelocale(\$English, \@English,
qw(en_US.ISO8859-1 en_GB.ISO8859-1
en en_US en_UK en_IE en_CA en_AU en_NZ
english english.iso88591
american american.iso88591
british british.iso88591
));
# Find some German locale
locatelocale(\$German, \@German,
qw(de_DE.ISO8859-1 de_AT.ISO8859-1 de_CH.ISO8859-1
de de_DE de_AT de_CH
german german.iso88591));
# Find some French locale
locatelocale(\$French, \@French,
qw(fr_FR.ISO8859-1 fr_BE.ISO8859-1 fr_CA.ISO8859-1 fr_CH.ISO8859-1
fr fr_FR fr_BE fr_CA fr_CH
french french.iso88591));
# Find some Spanish locale
locatelocale(\$Spanish, \@Spanish,
qw(es_AR.ISO8859-1 es_BO.ISO8859-1 es_CL.ISO8859-1
es_CO.ISO8859-1 es_CR.ISO8859-1 es_EC.ISO8859-1
es_ES.ISO8859-1 es_GT.ISO8859-1 es_MX.ISO8859-1
es_NI.ISO8859-1 es_PA.ISO8859-1 es_PE.ISO8859-1
es_PY.ISO8859-1 es_SV.ISO8859-1 es_UY.ISO8859-1 es_VE.ISO8859-1
es es_AR es_BO es_CL
es_CO es_CR es_EC
es_ES es_GT es_MX
es_NI es_PA es_PE
es_PY es_SV es_UY es_VE
spanish spanish.iso88591));
# Select the largest of the alpha(num)bets.
($Locale, @Locale) = ($English, @English)
if (length(@English) > length(@Locale));
($Locale, @Locale) = ($German, @German)
if (length(@German) > length(@Locale));
($Locale, @Locale) = ($French, @French)
if (length(@French) > length(@Locale));
($Locale, @Locale) = ($Spanish, @Spanish)
if (length(@Spanish) > length(@Locale));
print "# Locale = $Locale\n";
print "# Alnum_ = @Locale\n";
{
local $^W = 0;
setlocale(LC_ALL, $Locale);
}
{
my $i = 0;
for (@Locale) {
$iLocale{$_} = $i++;
}
}
# Sieve the uppercase and the lowercase.
for (@Locale) {
if (/[^\d_]/) { # skip digits and the _
if (lc eq $_) {
$UPPER{$_} = uc;
} else {
$lower{$_} = lc;
}
}
}
# Cross-check the upper and the lower.
# Yes, this is broken when the upper<->lower changes the number of
# the glyphs (e.g. the German sharp-s aka double-s aka sz-ligature,
# or the Dutch IJ or the Spanish LL or ...)
# But so far all the implementations do this wrong so we can do it wrong too.
for (keys %UPPER) {
if (defined $lower{$UPPER{$_}}) {
if ($_ ne $lower{$UPPER{$_}}) {
print 'not ';
last;
}
}
}
print "ok 99\n";
for (keys %lower) {
if (defined $UPPER{$lower{$_}}) {
if ($_ ne $UPPER{$lower{$_}}) {
print 'not ';
last;
}
}
}
print "ok 100\n";
# Find the alphabets that are not alphabets in the default locale.
{
no locale;
for (keys %UPPER, keys %lower) {
push(@Neoalpha, $_) if (/\W/);
}
}
@Neoalpha = sort @Neoalpha;
# Test \w.
{
my $word = join('', @Neoalpha);
$word =~ /^(\w*)$/;
print 'not ' if ($1 ne $word);
}
print "ok 101\n";
# Find places where the collation order differs from the default locale.
{
my (@k, $i, $j, @d);
{
no locale;
@k = sort (keys %UPPER, keys %lower);
}
for ($i = 0; $i < @k; $i++) {
for ($j = $i + 1; $j < @k; $j++) {
if ($iLocale{$k[$j]} < $iLocale{$k[$i]}) {
push(@d, [$k[$j], $k[$i]]);
}
}
}
# Cross-check those places.
for (@d) {
($i, $j) = @$_;
if ($i gt $j) {
print "# i = $i, j = $j, i ",
$i le $j ? 'le' : 'gt', " j\n";
print 'not ';
last;
}
}
}
print "ok 102\n";
# Cross-check whole character set.
for (map { chr } 0..255) {
if (/\w/ and /\W/) { print 'not '; last }
if (/\d/ and /\D/) { print 'not '; last }
if (/\s/ and /\S/) { print 'not '; last }
if (/\w/ and /\D/ and not /_/ and
not (exists $UPPER{$_} or exists $lower{$_})) {
print 'not ';
last;
}
}
print "ok 103\n";
# The @Locale should be internally consistent.
{
my ($from, $to, , $lesser, $greater);
for (0..9) {
# Select a slice.
$from = int(($_*@Locale)/10);
$to = $from + int(@Locale/10);
$to = $#Locale if ($to > $#Locale);
$lesser = join('', @Locale[$from..$to]);
# Select a slice one character on.
$from++; $to++;
$to = $#Locale if ($to > $#Locale);
$greater = join('', @Locale[$from..$to]);
if (not ($lesser lt $greater) or
not ($lesser le $greater) or
not ($lesser ne $greater) or
($lesser eq $greater) or
($lesser ge $greater) or
($lesser gt $greater) or
($greater lt $lesser ) or
($greater le $lesser ) or
not ($greater ne $lesser ) or
($greater eq $lesser ) or
not ($greater ge $lesser ) or
not ($greater gt $lesser ) or
# Well, these two are sort of redundant because @Locale
# was derived using cmp.
not (($lesser cmp $greater) == -1) or
not (($greater cmp $lesser ) == 1)
) {
print 'not ';
last;
}
}
}
print "ok 104\n";
|