summaryrefslogtreecommitdiff
path: root/t/porting/libperl.t
blob: 18c180112a9653a389dc4da5963cc83fb4d2c584 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
#!/usr/bin/perl -w

# Try opening libperl.a with nm, and verifying it has the kind of
# symbols we expect, and no symbols we should avoid.
#
# Fail softly, expect things only on known platforms:
# - linux
# - darwin (OS X), both x86 and ppc
# - freebsd
# and on other platforms, and if things seem odd, just give up (skip_all).
#
# Also, if the rarely-used builds options -DPERL_GLOBAL_STRUCT or
# -DPERL_GLOBAL_STRUCT_PRIVATE are used, verify that they did what
# they were meant to do, hide the global variables (see perlguts for
# the details).
#
# Debugging tip: nm output (this script's input) can be faked by
# giving one command line argument for this script: it should be
# either the filename to read, or "-" for STDIN.  You can also append
# "@style" (where style is a supported nm style, like "gnu" or "darwin")
# to this filename for "cross-parsing".
#
# Some terminology:
# - "text" symbols are code
# - "data" symbols are data (duh), with subdivisions:
#   - "bss": (Block-Started-by-Symbol: originally from IBM assembler...),
#     uninitialized data, which often even doesn't exist in the object
#     file as such, only its size does, which is then created on demand
#     by the loader
#  - "const": initialized read-only data, like string literals
#  - "common": uninitialized data unless initialized...
#    (the full story is too long for here, see "man nm")
#  - "data": initialized read-write data
#    (somewhat confusingly below: "data data", but it makes code simpler)
#  - "undefined": external symbol referred to by an object,
#    most likely a text symbol.  Can be either a symbol defined by
#    a Perl object file but referred to by other Perl object files,
#    or a completely external symbol from libc, or other system libraries.

BEGIN {
    chdir 't' if -d 't';
    @INC = '../lib';
    require "./test.pl";
}

use strict;

use Config;

if ($Config{cc} =~ /g\+\+/) {
    # XXX Could use c++filt, maybe.
    skip_all "on g++";
}

my $libperl_a;

for my $f (qw(../libperl.a libperl.a)) {
  if (-f $f) {
    $libperl_a = $f;
    last;
  }
}

unless (defined $libperl_a) {
  skip_all "no libperl.a";
}

print "# \$^O = $^O\n";
print "# \$Config{cc} = $Config{cc}\n";
print "# libperl = $libperl_a\n";

my $nm;
my $nm_opt = '';
my $nm_style;
my $nm_fh;
my $nm_err_tmp = "libperl$$";

END {
    # this is still executed when we skip_all above, avoid a warning
    unlink $nm_err_tmp if $nm_err_tmp;
}

my $fake_input;
my $fake_style;

if (@ARGV == 1) {
    $fake_input = shift @ARGV;
    print "# Faking nm output from $fake_input\n";
    if ($fake_input =~ s/\@(.+)$//) {
        $fake_style = $1;
        print "# Faking nm style from $fake_style\n";
        if ($fake_style eq 'gnu' ||
            $fake_style eq 'linux' ||
            $fake_style eq 'freebsd') {
            $nm_style = 'gnu'
        } elsif ($fake_style eq 'darwin' || $fake_style eq 'osx') {
            $nm_style = 'darwin'
        } else {
            die "$0: Unknown explicit nm style '$fake_style'\n";
        }
    }
}

unless (defined $nm_style) {
    if ($^O eq 'linux') {
        # The 'gnu' style could be equally well be called 'bsd' style,
        # since the output format of the GNU binutils nm is really BSD.
        $nm_style = 'gnu';
    } elsif ($^O eq 'freebsd') {
        $nm_style = 'gnu';
    } elsif ($^O eq 'darwin') {
        $nm_style = 'darwin';
    }
}

if (defined $nm_style) {
    if ($nm_style eq 'gnu') {
        $nm = '/usr/bin/nm';
    } elsif ($nm_style eq 'darwin') {
        $nm = '/usr/bin/nm';
        # With the -m option we get better information than the BSD-like
        # default: with the default, a lot of symbols get dumped into 'S'
        # or 's', for example one cannot tell the difference between const
        # and non-const data symbols.
        $nm_opt = '-m';
    } else {
        die "$0: Unexpected nm style '$nm_style'\n";
    }
}

unless (defined $nm) {
  skip_all "no nm";
}

unless (defined $nm_style) {
  skip_all "no nm style";
}

print "# nm = $nm\n";
print "# nm_style = $nm_style\n";
print "# nm_opt = $nm_opt\n";

unless (-x $nm) {
    skip_all "no executable nm $nm";
}

if ($nm_style eq 'gnu' && !defined $fake_style) {
    open(my $gnu_verify, "$nm --version|") or
        skip_all "nm failed: $!";
    my $gnu_verified;
    while (<$gnu_verify>) {
        if (/^GNU nm/) {
            $gnu_verified = 1;
            last;
        }
    }
    unless ($gnu_verified) {
        skip_all "no GNU nm";
    }
}

if (defined $fake_input) {
    if ($fake_input eq '-') {
        open($nm_fh, "<&STDIN") or
            skip_all "Duping STDIN failed: $!";
    } else {
        open($nm_fh, "<", $fake_input) or
            skip_all "Opening '$fake_input' failed: $!";
    }
    undef $nm_err_tmp; # In this case there will be no nm errors.
} else {
    open($nm_fh, "$nm $nm_opt $libperl_a 2>$nm_err_tmp |") or
        skip_all "$nm $nm_opt $libperl_a failed: $!";
}

sub is_perlish_symbol {
    $_[0] =~ /^(?:PL_|Perl|PerlIO)/;
}

# XXX Implement "internal test" for this script (option -t?)
# to verify that the parsing does what it's intended to.

sub nm_parse_gnu {
    my $symbols = shift;
    my $line = $_;
    if (m{^(\w+\.o):$}) {
        # object file name
        $symbols->{obj}{$1}++;
        $symbols->{o} = $1;
        return;
    } else {
        die "$0: undefined current object: $line"
            unless defined $symbols->{o};
        # 64-bit systems have 16 hexdigits, 32-bit systems have 8.
        if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) {
            if (/^[Rr] (\w+)$/) {
                # R: read only (const)
                $symbols->{data}{const}{$1}{$symbols->{o}}++;
            } elsif (/^r .+$/) {
                # Skip local const (read only).
            } elsif (/^[Tti] (\w+)(\..+)?$/) {
                $symbols->{text}{$1}{$symbols->{o}}++;
            } elsif (/^C (\w+)$/) {
                $symbols->{data}{common}{$1}{$symbols->{o}}++;
            } elsif (/^[BbSs] (\w+)(\.\d+)?$/) {
                # Bb: uninitialized data (bss)
                # Ss: uninitialized data "for small objects"
                $symbols->{data}{bss}{$1}{$symbols->{o}}++;
            } elsif (/^0{16} D _LIB_VERSION$/) {
                # Skip the _LIB_VERSION (not ours, probably libm)
            } elsif (/^[DdGg] (\w+)$/) {
                # Dd: initialized data
                # Gg: initialized "for small objects"
                $symbols->{data}{data}{$1}{$symbols->{o}}++;
            } elsif (/^. \.?(\w+)$/) {
                # Skip the unknown types.
                print "# Unknown type: $line ($symbols->{o})\n";
            }
            return;
        } elsif (/^ {8}(?: {8})? U _?(\w+)$/) {
            my ($symbol) = $1;
            return if is_perlish_symbol($symbol);
            $symbols->{undef}{$symbol}{$symbols->{o}}++;
            return;
	}
    }
    print "# Unexpected nm output '$line' ($symbols->{o})\n";
}

sub nm_parse_darwin {
    my $symbols = shift;
    my $line = $_;
    if (m{^(?:.+)?libperl\.a\((\w+\.o)\):$}) {
        # object file name
        $symbols->{obj}{$1}++;
        $symbols->{o} = $1;
        return;
    } else {
        die "$0: undefined current object: $line" unless defined $symbols->{o};
        # 64-bit systems have 16 hexdigits, 32-bit systems have 8.
        if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) {
            # String literals can live in different sections
            # depending on the compiler and os release, assumedly
            # also linker flags.
            if (/^\(__TEXT,__(?:const|cstring|literal\d+)\) (?:non-)?external _?(\w+)(\.\w+)?$/) {
                my ($symbol, $suffix) = ($1, $2);
                # Ignore function-local constants like
                # _Perl_av_extend_guts.oom_array_extend
                return if defined $suffix && /__TEXT,__const/;
                # Ignore the cstring unnamed strings.
                return if $symbol =~ /^L\.str\d+$/;
                $symbols->{data}{const}{$symbol}{$symbols->{o}}++;
            } elsif (/^\(__TEXT,__text\) (?:non-)?external _(\w+)$/) {
                $symbols->{text}{$1}{$symbols->{o}}++;
            } elsif (/^\(__DATA,__(const|data|bss|common)\) (?:non-)?external _(\w+)(\.\w+)?$/) {
                my ($dtype, $symbol, $suffix) = ($1, $2, $3);
                # Ignore function-local constants like
                # _Perl_pp_gmtime.dayname
                return if defined $suffix;
                $symbols->{data}{$dtype}{$symbol}{$symbols->{o}}++;
            } elsif (/^\(__DATA,__const\) non-external _\.memset_pattern\d*$/) {
                # Skip this, whatever it is (some inlined leakage from
                # darwin libc?)
            } elsif (/^\(__TEXT,__eh_frame/) {
                # Skip the eh_frame (exception handling) symbols.
                return;
            } elsif (/^\(__\w+,__\w+\) /) {
                # Skip the unknown types.
                print "# Unknown type: $line ($symbols->{o})\n";
            }
            return;
        } elsif (/^ {8}(?: {8})? \(undefined(?: \[lazy bound\])?\) external _?(.+)/) {
            # darwin/ppc marks most undefined text symbols
            # as "[lazy bound]".
            my ($symbol) = $1;
            return if is_perlish_symbol($symbol);
            $symbols->{undef}{$symbol}{$symbols->{o}}++;
            return;
        }
    }
    print "# Unexpected nm output '$line' ($symbols->{o})\n";
}

my $nm_parse;

if ($nm_style eq 'gnu') {
    $nm_parse = \&nm_parse_gnu;
} elsif ($nm_style eq 'darwin') {
    $nm_parse = \&nm_parse_darwin;
}

unless (defined $nm_parse) {
    skip_all "no nm parser ($nm_style $nm_style, \$^O $^O)";
}

my %symbols;

while (<$nm_fh>) {
    next if /^$/;
    chomp;
    $nm_parse->(\%symbols);
}

# use Data::Dumper; print Dumper(\%symbols);

if (keys %symbols == 0) {
    skip_all "no symbols\n";
}

# These should always be true for everyone.

ok($symbols{obj}{'pp.o'}, "has object pp.o");
ok($symbols{text}{'Perl_peep'}, "has text Perl_peep");
ok($symbols{text}{'Perl_pp_uc'}{'pp.o'}, "has text Perl_pp_uc in pp.o");
ok(exists $symbols{data}{const}, "has data const symbols");
ok($symbols{data}{const}{PL_no_mem}{'globals.o'}, "has PL_no_mem");

my $DEBUGGING = $Config{ccflags} =~ /-DDEBUGGING/ ? 1 : 0;

my $GS  = $Config{ccflags} =~ /-DPERL_GLOBAL_STRUCT\b/ ? 1 : 0;
my $GSP = $Config{ccflags} =~ /-DPERL_GLOBAL_STRUCT_PRIVATE/ ? 1 : 0;

print "# GS  = $GS\n";
print "# GSP = $GSP\n";

my %data_symbols;

for my $dtype (sort keys %{$symbols{data}}) {
    for my $symbol (sort keys %{$symbols{data}{$dtype}}) {
        $data_symbols{$symbol}++;
    }
}

# The following tests differ between vanilla vs $GSP or $GS.

if ($GSP) {
    print "# -DPERL_GLOBAL_STRUCT_PRIVATE\n";
    ok(!exists $data_symbols{PL_hash_seed}, "has no PL_hash_seed");
    ok(!exists $data_symbols{PL_ppaddr}, "has no PL_ppaddr");

    ok(! exists $symbols{data}{bss}, "has no data bss symbols");
    ok(! exists $symbols{data}{data} ||
            # clang with ASAN seems to add this symbol to every object file:
            !grep($_ ne '__unnamed_1', keys %{$symbols{data}{data}}),
        "has no data data symbols");
    ok(! exists $symbols{data}{common}, "has no data common symbols");

    # -DPERL_GLOBAL_STRUCT_PRIVATE should NOT have
    # the extra text symbol for accessing the vars
    # (as opposed to "just" -DPERL_GLOBAL_STRUCT)
    ok(! exists $symbols{text}{Perl_GetVars}, "has no Perl_GetVars");
} elsif ($GS) {
    print "# -DPERL_GLOBAL_STRUCT\n";
    ok(!exists $data_symbols{PL_hash_seed}, "has no PL_hash_seed");
    ok(!exists $data_symbols{PL_ppaddr}, "has no PL_ppaddr");

    ok(! exists $symbols{data}{bss}, "has no data bss symbols");

    # These PerlIO data symbols are left visible with
    # -DPERL_GLOBAL_STRUCT (as opposed to -DPERL_GLOBAL_STRUCT_PRIVATE)
    my @PerlIO =
        qw(
           PerlIO_byte
           PerlIO_crlf
           PerlIO_pending
           PerlIO_perlio
           PerlIO_raw
           PerlIO_remove
           PerlIO_stdio
           PerlIO_unix
           PerlIO_utf8
          );

    # PL_magic_vtables is const with -DPERL_GLOBAL_STRUCT_PRIVATE but
    # otherwise not const -- because of SWIG which wants to modify
    # the table.  Evil SWIG, eeevil.

    # my_cxt_index is used with PERL_IMPLICIT_CONTEXT, which
    # -DPERL_GLOBAL_STRUCT has turned on.
    eq_array([sort keys %{$symbols{data}{data}}],
             [sort('PL_VarsPtr',
                   @PerlIO,
                   'PL_magic_vtables',
                   'my_cxt_index')],
             "data data symbols");

    # Only one data common symbol, our "supervariable".
    eq_array([sort keys %{$symbols{data}{common}}],
             ['PL_Vars'],
             "data common symbols");

    ok($symbols{data}{data}{PL_VarsPtr}{'globals.o'}, "has PL_VarsPtr");
    ok($symbols{data}{common}{PL_Vars}{'globals.o'}, "has PL_Vars");

    # -DPERL_GLOBAL_STRUCT has extra text symbol for accessing the vars.
    ok($symbols{text}{Perl_GetVars}{'util.o'}, "has Perl_GetVars");
} else {
    print "# neither -DPERL_GLOBAL_STRUCT nor -DPERL_GLOBAL_STRUCT_PRIVATE\n";

    if ( !$symbols{data}{common} ) {
        # This is likely because Perl was compiled with
        # -Accflags="-fno-common"
        $symbols{data}{common} = $symbols{data}{bss};
    }

    ok($symbols{data}{common}{PL_hash_seed}{'globals.o'}, "has PL_hash_seed");
    ok($symbols{data}{data}{PL_ppaddr}{'globals.o'}, "has PL_ppaddr");

    # None of the GLOBAL_STRUCT* business here.
    ok(! exists $symbols{data}{data}{PL_VarsPtr}, "has no PL_VarsPtr");
    ok(! exists $symbols{data}{common}{PL_Vars}, "has no PL_Vars");
    ok(! exists $symbols{text}{Perl_GetVars}, "has no Perl_GetVars");
}

ok(keys %{$symbols{undef}}, "has undefined symbols");

# memchr, memcmp, memcpy should be used all over the place.
#
# chmod, socket, getenv, sigaction, sqrt, time are system/library
# calls that should each see at least one use.
my @good = qw(memchr memcmp memcpy
              chmod socket getenv sigaction sqrt time);
if ($Config{usedl}) {
    push @good, 'dlopen';
}
for my $good (@good) {
    my @o = exists $symbols{undef}{$good} ?
        sort keys %{ $symbols{undef}{$good} } : ();
    ok(@o, "uses $good (@o)");
}

# gets is horribly unsafe.
#
# fgets should not be used (Perl has its own API), even without perlio.
#
# tmpfile is unsafe.
#
# strcpy, strcat, strncpy, strncpy are unsafe.
#
# sprintf and vsprintf should not be used because
# Perl has its own safer and more portable implementations.
# (One exception: for certain floating point outputs
# the native sprintf is still used, see below.)
#
# XXX: add atoi() to @bad - unsafe and undefined failure modes.
#
my @bad = qw(gets fgets
             tmpfile
             strcpy strcat strncpy strncat tmpfile
             sprintf vsprintf);
for my $bad (@bad) {
    my @o = exists $symbols{undef}{$bad} ?
        sort keys %{ $symbols{undef}{$bad} } : ();
    # While sprintf() is bad in the general case,
    # some platforms implement Gconvert via sprintf, in sv.o.
    if ($bad eq 'sprintf' &&
        $Config{d_Gconvert} =~ /^sprintf/ &&
        @o == 1 && $o[0] eq 'sv.o') {
      SKIP: {
        skip("uses sprintf for Gconvert in sv.o");
      }
    } else {
        is(@o, 0, "uses no $bad (@o)");
    }
}

if (defined $nm_err_tmp) {
    if (open(my $nm_err_fh, $nm_err_tmp)) {
        my $error;
        while (<$nm_err_fh>) {
            # OS X has weird error where nm warns about
            # "no name list" but then outputs fine.
            if (/nm: no name list/ && $^O eq 'darwin') {
                print "# $^O ignoring $nm output: $_";
                next;
            }
            warn "$0: Unexpected $nm error: $_";
            $error++;
        }
        die "$0: Unexpected $nm errors\n" if $error;
    } else {
        warn "Failed to open '$nm_err_tmp': $!\n";
    }
}

done_testing();