summaryrefslogtreecommitdiff
path: root/Porting/checkURL.pl
blob: db55c4953661d5a81903dcaec598c0aa1047b2f5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/perl

use strict;
use warnings 'all';

use LWP::Simple qw /$ua getstore/;

my %urls;

my @dummy = qw(
	   http://something.here
	   http://www.pvhp.com
	      );
my %dummy;

@dummy{@dummy} = ();

foreach my $file (<*/*.pod */*/*.pod */*/*/*.pod README README.* INSTALL>) {
    open my $fh => $file or die "Failed to open $file: $!\n";
    while (<$fh>) {
        if (m{(?:http|ftp)://(?:(?!\w<)[-\w~?@=.])+} && !exists $dummy{$&}) {
            my $url = $&;
            $url =~ s/\.$//;
            $urls {$url} ||= { };
            $urls {$url} {$file} = 1;
        }
    }
    close $fh;
}

sub fisher_yates_shuffle {
    my $deck = shift;  # $deck is a reference to an array
    my $i = @$deck;
    while (--$i) {
	my $j = int rand ($i+1);
	@$deck[$i,$j] = @$deck[$j,$i];
    }
}

my @urls = keys %urls;

fisher_yates_shuffle(\@urls);

sub todo {
    warn "(", scalar @urls, " URLs)\n";
}

my $MAXPROC = 40;
my $MAXURL  = 10;
my $MAXFORK = $MAXPROC < $MAXURL ? 1 : $MAXPROC / $MAXURL;

select(STDERR); $| = 1;
select(STDOUT); $| = 1;

while (@urls) {
    my @list;
    my $pid;
    my $i;

    todo();

    for ($i = 0; $i < $MAXFORK; $i++) {
	$list[$i] = [ splice @urls, 0, $MAXURL ];
	$pid = fork;
	die "Failed to fork: $!\n" unless defined $pid;
	last unless $pid; # Child.
    }

    if ($pid) {
        # Parent.
	warn "(waiting)\n";
	1 until -1 == wait; # Reap.
    } else {
        # Child.
        foreach my $url (@{$list[$i]}) {
            my $code = getstore $url, "/dev/null";
            next if $code == 200;
            my $f = join ", " => keys %{$urls {$url}};
            printf "%03d  %s: %s\n" => $code, $url, $f;
        }

        exit;
    }
}

__END__