blob: db55c4953661d5a81903dcaec598c0aa1047b2f5 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
#!/usr/bin/perl
use strict;
use warnings 'all';
use LWP::Simple qw /$ua getstore/;
my %urls;
my @dummy = qw(
http://something.here
http://www.pvhp.com
);
my %dummy;
@dummy{@dummy} = ();
foreach my $file (<*/*.pod */*/*.pod */*/*/*.pod README README.* INSTALL>) {
open my $fh => $file or die "Failed to open $file: $!\n";
while (<$fh>) {
if (m{(?:http|ftp)://(?:(?!\w<)[-\w~?@=.])+} && !exists $dummy{$&}) {
my $url = $&;
$url =~ s/\.$//;
$urls {$url} ||= { };
$urls {$url} {$file} = 1;
}
}
close $fh;
}
sub fisher_yates_shuffle {
my $deck = shift; # $deck is a reference to an array
my $i = @$deck;
while (--$i) {
my $j = int rand ($i+1);
@$deck[$i,$j] = @$deck[$j,$i];
}
}
my @urls = keys %urls;
fisher_yates_shuffle(\@urls);
sub todo {
warn "(", scalar @urls, " URLs)\n";
}
my $MAXPROC = 40;
my $MAXURL = 10;
my $MAXFORK = $MAXPROC < $MAXURL ? 1 : $MAXPROC / $MAXURL;
select(STDERR); $| = 1;
select(STDOUT); $| = 1;
while (@urls) {
my @list;
my $pid;
my $i;
todo();
for ($i = 0; $i < $MAXFORK; $i++) {
$list[$i] = [ splice @urls, 0, $MAXURL ];
$pid = fork;
die "Failed to fork: $!\n" unless defined $pid;
last unless $pid; # Child.
}
if ($pid) {
# Parent.
warn "(waiting)\n";
1 until -1 == wait; # Reap.
} else {
# Child.
foreach my $url (@{$list[$i]}) {
my $code = getstore $url, "/dev/null";
next if $code == 200;
my $f = join ", " => keys %{$urls {$url}};
printf "%03d %s: %s\n" => $code, $url, $f;
}
exit;
}
}
__END__
|