summaryrefslogtreecommitdiff
path: root/dist/Devel-PPPort/devel/scanprov
blob: 5194e69d18e55eb3f6b04e4493c972dc5d22173d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
#!/usr/bin/perl -w
$|=1;
################################################################################
#
#  scanprov -- scan Perl headers for macros, and add known exceptions, and
#              functions we weren't able to otherwise find.  Thus the purpose
#              of this file has been expanded beyond what its name says.
#
#  Besides the normal options, 'mode=clean' is understood as 'write', but
#  first remove any scanprov lines added in previous runs of this.
#
#  The lines added have a code to signify they are added by us:
#   F means it is a function in embed.fnc that the normal routines didn't find
#   K means it is a macro in config.h, hence is provided, and documented
#   M means it is a provided by D:P macro
#   X means it is a known exceptional item
#   Z means it is an unprovided macro without documentation
#
#  The regeneration routines do not know the prototypes for the macros scanned
#  for, which is gotten from documentation in the source.  (If they were
#  documented, they would be put in parts/apidoc.fnc, and test cases generated
#  for them in mktodo.pl).  Therefore these are all undocumented, except for
#  things from config.h which are all documented there, and many of which are
#  just defined or not defined, and hence can't be tested.  Thus looking for
#  them here is the most convenient option, which is why it's done here.
#
#  The scope of this program has also expanded to look in almost all header
#  files for almost all macros that aren't documented nor provided.  This
#  allows ppport.h --api-info=/foo/ to return when a given element actually
#  came into existence, which can be a time saver for developers of the perl
#  core.
#
#  It would be best if people would add documentation to them in the perl
#  source, and then this portion of this function would be minimized.
#
#  On Linux nm and other uses by D:P, these are the remaining unused capital
#  flags: HJLOQY
#
################################################################################
#
#  Version 3.x, Copyright (C) 2004-2013, Marcus Holland-Moritz.
#  Version 2.x, Copyright (C) 2001, Paul Marquess.
#  Version 1.x, Copyright (C) 1999, Kenneth Albanowski.
#
#  This program is free software; you can redistribute it and/or
#  modify it under the same terms as Perl itself.
#
################################################################################

use strict;
use Getopt::Long;

require './parts/ppptools.pl';
require './parts/inc/inctools';
require './devel/devtools.pl';

our %opt = (
  mode    => 'check',
  install => '/tmp/perl/install/default',
  blead   => 'bleadperl',
  debug   => 0,
 'debug-start' => "",
);

GetOptions(\%opt, qw( install=s mode=s blead=s debug=i debug-start=s)) or die;

my $clean = $opt{mode} eq 'clean';
my $write = $clean || $opt{mode} eq 'write';
my $debug = $opt{debug};

# Get the list of known macros.  Functions are calculated separately below
my %embed = map { $_->{flags}{m} ? ( $_->{name} => 1 ) : () }
            parse_embed(qw(parts/embed.fnc parts/apidoc.fnc));

# @provided is set to everthing provided
my @provided = map { /^(\w+)/ ? $1 : () } `$^X ppport.h --list-provided`;

# There are a few exceptions that have to be dealt with specially.  Add these
# to the list of things to scan for.
my $hard_to_test_ref = known_but_hard_to_test_for();
push @provided, keys %$hard_to_test_ref;

my $base_dir = 'parts/base';
my $todo_dir = 'parts/todo';

# The identifying text placed in every entry by this program
my $id_text = "added by $0";

if ($write) {

    # Get the list of files
    my @files = all_files_in_dir($base_dir);

    # If asked to, first strip out the results of previous incarnations of
    # this script
    if ($clean) {
        print "Cleaning previous $0 runs\n";
        foreach my $file (@files) {
            open my $fh, "+<", $file or die "$file: $!\n";
            my @lines = <$fh>;
            my $orig_count = @lines;
            @lines = grep { $_ !~ /$id_text/ } @lines;
            next if @lines == $orig_count;  # No need to write if unchanged.
            truncate $fh, 0;
            seek $fh, 0, 0;
            print $fh @lines;
            close $fh or die "$file: $!\n";
        }
    }

    # The file list is returned sorted, and so the min version is in the 0th
    # element
    my $file =  $files[0];
    my $min_perl = $file;
    $min_perl =~ s,.*/,,;    # The name is the integer of __MIN_PERL__

    # There are a very few special cases that we may not find in scanning, but
    # exist all the way back.  Add them now to avoid throwing later things
    # off.
    print "-- $file --\n";
    open my $fh, "+<", $file or die "$file: $!\n";
    my @lines = <$fh>;
    my $count = @lines;
    for (qw(RETVAL CALL THIS)) { # These are also in hard_to_test_for(),
                                 # so can't be in blead, as they are skipped
                                 # in testing, so no real need to check that
                                 # they aren't dups.
        my $line = format_output_line($_, 'X');
        next if grep { /$line/ } @lines;
        print "Adding $_ to $file\n";
        push @lines, $line;
    }
    if ($count != @lines) {
        @lines = sort symbol_order @lines;
        truncate $fh, 0;
        seek $fh, 0, 0;
        print $fh @lines;
    }
    close $fh;

    # Now we're going to add the hard to test symbols.  The hash has been
    # manually populated and commited, with the version number ppport supports
    # them to.
    #
    # This is a hash ref with the keys being all symbols found in all the
    # files in the directory, and the values being the perl versions of each
    # symbol.
    my $todo = parse_todo($todo_dir);

    # The keys of $hard_to_test_ref are the symbols, and the values are
    # subhashes, with each 'version' key being its proper perl version.
    # Below, we invert %hard_to_test, so that the keys are the version, and
    # the values are the symbols that go in that version
    my %add_by_version;
    for my $hard (keys %$hard_to_test_ref) {

        # But if someone ups the min version we support, we don't want to add
        # something less than that.
        my $version = int_parse_version($hard_to_test_ref->{$hard});
        $version = $min_perl if $version < $min_perl;
        $version = format_version_line($version);

        push @{$add_by_version{$version}}, $hard
                unless grep { $todo->{$_}->{version} eq $hard } keys %$todo;
    }

    # Only a few files will have exceptions that apply to them.  Rewrite each
    foreach my $version (keys %add_by_version) {
        my $file = "$todo_dir/" . int_parse_version($version);
        print "-- Adding known exceptions to $file --\n";
        open my $fh, "+<", $file or die "$file: $!\n";
        my @lines = <$fh>;
        my $count = @lines;
        push @lines, format_version_line($version) . "\n" unless @lines;
        foreach my $symbol (@{$add_by_version{$version}}) {
            my $line = format_output_line($symbol, 'X');
            unless (grep { /$line/ } @lines) {;
                print "adding $symbol\n";
                push @lines, $line unless grep { /$line/ } @lines;
            }
        }
        if (@lines != $count) {
            @lines = sort symbol_order @lines;
            truncate $fh, 0;
            seek $fh, 0, 0;
            print $fh @lines;
        }
        close $fh;
    }
}

# Now that we've added the exceptions to a few files, we can parse
# and deal with all of them.
my $perls_ref = get_and_sort_perls(\%opt);

die "Couldn't find any perls" unless @$perls_ref > 1;

find_first_mentions($perls_ref,   # perls to look in
                    \@provided,   # List of symbol names to look for
                    '*.h',        # Look in all hdrs.
                    1,            # Strip comments
                   'M'
                   );

# Now look for functions that we didn't test in mktodo.pl, generally because
# these were hidden behind #ifdef's.
my $base_ref = parse_todo($base_dir);
my @functions = parse_embed(qw(parts/embed.fnc));

# We could just gather data for the publicly available ones, but having this
# information available for everything is useful.
#@functions = grep { exists $_->{flags}{A} } @functions;

# The ones we don't have info on are the ones in embed.fnc that aren't in the
# base files.  Certain of these will only be in the Perl_foo form.
my @missing = map { exists $base_ref->{$_->{name}}
                    ? ()
                    : ((exists $_->{flags}{p} && exists $_->{flags}{o})
                       ? ((exists $base_ref->{$_->{"Perl_$_->{name}"}}
                           ? ()
                           : "Perl_$_->{name}"))
                       : $_->{name})
                  } @functions;

# These symbols will be found in the autogen'd files, and they may be
# commented out in them.
find_first_mentions($perls_ref,
                    \@missing,
                    [ 'embed.h', 'proto.h' ],
                    0,          # Don't strip comments
                   'F'
                   );

sub symbol_order    # Sort based on first word on line
{
    my $stripped_a = $a =~ s/ ^ \s* //rx;
    $stripped_a =~ s/ \s.* //x;

    my $stripped_b = $b =~ s/ ^ \s* //rx;
    $stripped_b =~ s/ \s.* //x;

    return dictionary_order($stripped_a, $stripped_b);
}

sub format_output_line
{
    my $sym = shift;
    my $code = shift;

    return sprintf "%-30s # $code $id_text\n", $sym;
}

sub find_first_mentions
{
    my $perls_ref =    shift;   # List of perls to look in
    my $look_for_ref = shift;   # List of symbol names to look for
    my $hdrs =         shift;   # Glob of hdrs to look in
    my $strip_comments = shift;
    my $code           = shift; # Mark entries as having this type

    use feature 'state';
    state $first_perl = 1;

    $hdrs = [ $hdrs ] unless ref $hdrs;

    my %remaining;
    $remaining{$_} = $code for @$look_for_ref;

    my %v;

    # We look in descending order of perl versions.  Each time through the
    # loop %remaining is narrowed.
    for my $p (@$perls_ref) {
        print "checking perl $p->{version}...\n";

        # Get the hdr files associated with this version
        my $archlib = `$p->{path} -MConfig -l -e 'print \$Config{archlib}'`;
        chomp $archlib;
        local @ARGV;
        push @ARGV, glob "$archlib/CORE/$_" for @$hdrs;

        # %sym's keys are every single thing that looks like an identifier
        # (beginning with a non-digit \w, followed by \w*) that occurs in any
        # header, regardless of where (outside of comments).  For macros, it
        # can't end in an underscore, nor be like 'AbCd', which are marks for
        # internal.
        my %sym;

        local $/ = undef;
        while (<<>>) {  # Read in the whole next file as one string.

            # This would override function definitions with macro ones
            next if $code eq 'M' && $ARGV =~ m! / embed\.h $ !x;

            my $is_config_h = $ARGV =~ m! / config\.h $ !x;

            my $contents = $_;

            # Strip initial '/*' in config.h /*#define... lines.  This just
            # means the item isn't available on the platform this program is
            # being run on.
            $contents =~ s! ^ /\* \s* (?=\#\s*define\s) !!mx if $is_config_h;

            # Strip comments, from perl faq
            if ($strip_comments) {
                $contents =~ s#/\*[^*]*\*+([^/*][^*]*\*+)*/|("(\\.|[^"\\])*"|'(\\.|[^'\\])*'|.[^/"'\\]*)#defined $2 ? $2 : ""#gse;
            }

            # For macros, we look for #defines
            if ($code eq 'M') {
                my %defines;

                while ($contents =~ m/ ^ \s* \# \s* define \s+

                                       # A symbol not ending in underscore
                                       ( [A-Za-z][_A-Za-z0-9]*[A-Za-z0-9] )
                                     /mxg)
                {
                    my $this_define = $1;

                    # These are internal and not of external interest, so just
                    # noise if we were to index them
                    next if $this_define =~ / ^ PERL_ARGS_ASSERT /x;

                    # Names like AbCd are internal
                    next if $this_define =~ /[[:upper:]][[:lower:]][[:upper:]][[:lower:]]/;

                    $defines{$this_define}++;
                }
                $sym{$_}++ for keys %defines;

                # For functions, etc we get all the symbols for the latest
                # perl passed in, but for macros, it is just the ones for the
                # known documented ones, and we have to find the rest.  This
                # allows us to keep the logic for that in just one place:
                # here.
                if ($first_perl) {

                    # config.h symbols are documented; the rest aren't, so use
                    # different flags so downstream processing knows which are
                    # which.
                    my $new_code = ($is_config_h) ? 'K' : 'Z';

                    foreach my $define (keys %defines) {

                        # Don't override input 'M' symbols, or duplicates.
                        next if defined $remaining{$define};
                        $remaining{$define} = $new_code;
                    }
                }
            }
            else {  # Look for potential function names; remember comments
                    # have been stripped off.
                $sym{$_}++ for /(\b[^\W\d]\w*)/g;
            }
        }

        # %remaining is narrowed to include only those identifier-like things
        # that are mentioned in one of the input hdrs in this release.  (If it
        # isn't even mentioned, it won't exist in the release.)  For those not
        # mentioned, a key is added of the identifier-like thing in %v.  It is
        # a subkey of this release's "todo" release, which is the next higher
        # one.  If we are at version n, we have already done version n+1 and
        # the provided element was mentioned there, and now it no longer is.
        # We take that to mean that to mean that the element became provided
        # for in n+1.
        foreach my $symbol (keys %remaining) {
            next if defined $sym{$symbol};  # Still exists in this release

            # Gone in this release, must have come into existence in the next
            # higher one.
            $v{$p->{todo}}{$symbol} = delete $remaining{$symbol};
        }

        $first_perl = 0;
    }

    # After all releases, assume that anything still defined came into
    # existence in that earliest release.
    $v{$perls_ref->[-1]{file}}{$_} = $remaining{$_} for keys %remaining;

    # Read in the parts/base files.  The hash ref has keys being all symbols
    # found in all the files in base/, which are all we are concerned with
    # became defined in.
    my $base_ref = parse_todo($base_dir);


    # Now add the results from above.  At this point, The keys of %v are the 7
    # digit BCD version numbers, and their subkeys are the symbols provided by
    # D:P that are first mentioned in this version, like this:
    #   '5009002' => {
    #                  'MY_CXT_CLONE' => 1,
    #                  'SV_NOSTEAL' => 1,
    #                  'UTF8_MAXBYTES' => 1
    #                },

    for my $version (keys %v) {

        # Things listed in blead (the most recent file) are special.  They are
        # there by default because we haven't found them anywhere, so they
        # don't really exist as far as we can determine, so shouldn't be
        # listed as existing.
        next if $version > $perls_ref->[0]->{file};

        # @new becomes the symbols for $version not already in the file for it
        my @new = sort symbol_order grep { !exists $base_ref->{$_} }
                                                                keys %{$v{$version}};
        @new or next; # Nothing new, skip writing

        my $file = $version;
        $file =~ s/\.//g;
        $file = "$base_dir/$file";
        -e $file or die "non-existent: $file\n";
        print "-- $file --\n";
        if ($write) {
            open my $fh, "+<", $file or die "$file: $!\n";
            my @lines = <$fh>;
            my $count = @lines;
            for my $new (@new) {
                my $line = format_output_line($new, $v{$version}{$new});
                next if grep { /$line/ } @lines;
                print "adding $new\n";
                push @lines, $line;
            }
            if (@lines != $count) {
                @lines = sort symbol_order @lines;
                truncate $fh, 0;
                seek $fh, 0, 0;
                print $fh @lines;
            }
            close $fh;
        }
    }
}