1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
|
if($^O eq "MacOS") {
print "1..0\n";
exit(0);
}
unless (-f "CAN_TALK_TO_OURSELF") {
print "1..0 # Skipped: Can't talk to ourself (misconfigured system)\n";
exit;
}
$| = 1; # autoflush
require IO::Socket; # make sure this work before we try to make a HTTP::Daemon
# First we make ourself a daemon in another process
my $D = shift || '';
if ($D eq 'daemon') {
require HTTP::Daemon;
my $d = new HTTP::Daemon Timeout => 10;
print "Please to meet you at: <URL:", $d->url, ">\n";
open(STDOUT, $^O eq 'MSWin32' ? ">nul" : $^O eq 'VMS' ? ">NL:" : ">/dev/null");
while ($c = $d->accept) {
$r = $c->get_request;
if ($r) {
my $p = ($r->uri->path_segments)[1];
$p =~ s/\W//g;
my $func = lc("httpd_" . $r->method . "_$p");
#print STDERR "Calling $func...\n";
if (defined &$func) {
&$func($c, $r);
}
else {
$c->send_error(404);
}
}
$c = undef; # close connection
}
print STDERR "HTTP Server terminated\n";
exit;
}
else {
use Config;
my $perl = $Config{'perlpath'};
$perl = $^X if $^O eq 'VMS' or -x $^X and $^X =~ m,^([a-z]:)?/,i;
open(DAEMON , "$perl robot/ua.t daemon |") or die "Can't exec daemon: $!";
}
print "1..8\n";
$greating = <DAEMON>;
$greating =~ /(<[^>]+>)/;
require URI;
my $base = URI->new($1);
sub url {
my $u = URI->new(@_);
$u = $u->abs($_[1]) if @_ > 1;
$u->as_string;
}
print "Will access HTTP server at $base\n";
require LWP::RobotUA;
require HTTP::Request;
$ua = new LWP::RobotUA 'lwp-spider/0.1', 'gisle@aas.no';
$ua->delay(0.05); # rather quick robot
#----------------------------------------------------------------
sub httpd_get_robotstxt
{
my($c,$r) = @_;
$c->send_basic_header;
$c->print("Content-Type: text/plain");
$c->send_crlf;
$c->send_crlf;
$c->print("User-Agent: *
Disallow: /private
");
}
sub httpd_get_someplace
{
my($c,$r) = @_;
$c->send_basic_header;
$c->print("Content-Type: text/plain");
$c->send_crlf;
$c->send_crlf;
$c->print("Okidok\n");
}
$res = $ua->get( url("/someplace", $base) );
#print $res->as_string;
print "not " unless $res->is_success;
print "ok 1\n";
$res = $ua->get( url("/private/place", $base) );
#print $res->as_string;
print "not " unless $res->code == 403
and $res->message =~ /robots.txt/;
print "ok 2\n";
$res = $ua->get( url("/foo", $base) );
#print $res->as_string;
print "not " unless $res->code == 404; # not found
print "ok 3\n";
# Let the robotua generate "Service unavailable/Retry After response";
$ua->delay(1);
$ua->use_sleep(0);
$res = $ua->get( url("/foo", $base) );
#print $res->as_string;
print "not " unless $res->code == 503 # Unavailable
and $res->header("Retry-After");
print "ok 4\n";
#----------------------------------------------------------------
print "Terminating server...\n";
sub httpd_get_quit
{
my($c) = @_;
$c->send_error(503, "Bye, bye");
exit; # terminate HTTP server
}
$ua->delay(0);
$res = $ua->get( url("/quit", $base) );
print "not " unless $res->code == 503 and $res->content =~ /Bye, bye/;
print "ok 5\n";
#---------------------------------------------------------------
$ua->delay(1);
# host_wait() should be around 60s now
print "not " unless abs($ua->host_wait($base->host_port) - 60) < 5;
print "ok 6\n";
# Number of visits to this place should be
print "not " unless $ua->no_visits($base->host_port) == 4;
print "ok 7\n";
# RobotUA used to have problem with mailto URLs.
$ENV{SENDMAIL} = "dummy";
$res = $ua->get("mailto:gisle\@aas.no");
#print $res->as_string;
print "not " unless $res->code == 400 && $res->message eq "Library does not allow method GET for 'mailto:' URLs";
print "ok 8\n";
|