diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2015-02-14 18:44:00 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2015-02-14 18:44:00 +0000 |
commit | 20f161ca116b8a4fc7ac986a317d7f6d43e5c173 (patch) | |
tree | e61bb7f98a2c80dd9264c5f3810c4765419e64b7 /bin | |
download | libwww-perl-tarball-master.tar.gz |
libwww-perl-6.13HEADlibwww-perl-6.13master
Diffstat (limited to 'bin')
-rwxr-xr-x | bin/lwp-download | 330 | ||||
-rwxr-xr-x | bin/lwp-dump | 120 | ||||
-rwxr-xr-x | bin/lwp-mirror | 105 | ||||
-rwxr-xr-x | bin/lwp-request | 552 |
4 files changed, 1107 insertions, 0 deletions
diff --git a/bin/lwp-download b/bin/lwp-download new file mode 100755 index 0000000..bf32a85 --- /dev/null +++ b/bin/lwp-download @@ -0,0 +1,330 @@ +#!/usr/bin/perl -w + +=head1 NAME + +lwp-download - Fetch large files from the web + +=head1 SYNOPSIS + +B<lwp-download> [B<-a>] [B<-s>] <I<url>> [<I<local path>>] + +=head1 DESCRIPTION + +The B<lwp-download> program will save the file at I<url> to a local +file. + +If I<local path> is not specified, then the current directory is +assumed. + +If I<local path> is a directory, then the last segment of the path of the +I<url> is appended to form a local filename. If the I<url> path ends with +slash the name "index" is used. With the B<-s> option pick up the last segment +of the filename from server provided sources like the Content-Disposition +header or any redirect URLs. A file extension to match the server reported +Content-Type might also be appended. If a file with the produced filename +already exists, then B<lwp-download> will prompt before it overwrites and will +fail if its standard input is not a terminal. This form of invocation will +also fail is no acceptable filename can be derived from the sources mentioned +above. + +If I<local path> is not a directory, then it is simply used as the +path to save into. If the file already exists it's overwritten. + +The I<lwp-download> program is implemented using the I<libwww-perl> +library. It is better suited to down load big files than the +I<lwp-request> program because it does not store the file in memory. +Another benefit is that it will keep you updated about its progress +and that you don't have much options to worry about. + +Use the C<-a> option to save the file in text (ascii) mode. Might +make a difference on DOSish systems. + +=head1 EXAMPLE + +Fetch the newest and greatest perl version: + + $ lwp-download http://www.perl.com/CPAN/src/latest.tar.gz + Saving to 'latest.tar.gz'... + 11.4 MB received in 8 seconds (1.43 MB/sec) + +=head1 AUTHOR + +Gisle Aas <gisle@aas.no> + +=cut + +#' get emacs out of quote mode + +use strict; + +use LWP::UserAgent (); +use LWP::MediaTypes qw(guess_media_type media_suffix); +use URI (); +use HTTP::Date (); +use Encode; +use Encode::Locale; + +my $progname = $0; +$progname =~ s,.*/,,; # only basename left in progname +$progname =~ s,.*\\,, if $^O eq "MSWin32"; +$progname =~ s/\.\w*$//; # strip extension if any + +#parse option +use Getopt::Std; +my %opt; +unless (getopts('as', \%opt)) { + usage(); +} + +my $url = URI->new(decode(locale => shift) || usage()); +my $argfile = encode(locale_fs => decode(locale => shift)); +usage() if defined($argfile) && !length($argfile); +my $VERSION = "6.09"; + +my $ua = LWP::UserAgent->new( + agent => "lwp-download/$VERSION ", + keep_alive => 1, + env_proxy => 1, +); + +my $file; # name of file we download into +my $length; # total number of bytes to download +my $flength; # formatted length +my $size = 0; # number of bytes received +my $start_t; # start time of download +my $last_dur; # time of last callback + +my $shown = 0; # have we called the show() function yet + +$SIG{INT} = sub { die "Interrupted\n"; }; + +$| = 1; # autoflush + +my $res = $ua->request(HTTP::Request->new(GET => $url), + sub { + unless(defined $file) { + my $res = $_[1]; + + my $directory; + if (defined $argfile && -d $argfile) { + ($directory, $argfile) = ($argfile, undef); + } + + unless (defined $argfile) { + # find a suitable name to use + $file = $opt{s} && $res->filename; + + # if this fails we try to make something from the URL + unless ($file) { + $file = ($url->path_segments)[-1]; + if (!defined($file) || !length($file)) { + $file = "index"; + my $suffix = media_suffix($res->content_type); + $file .= ".$suffix" if $suffix; + } + elsif ($url->scheme eq 'ftp' || + $file =~ /\.t[bg]z$/ || + $file =~ /\.tar(\.(Z|gz|bz2?))?$/ + ) { + # leave the filename as it was + } + else { + my $ct = guess_media_type($file); + unless ($ct eq $res->content_type) { + # need a better suffix for this type + my $suffix = media_suffix($res->content_type); + $file .= ".$suffix" if $suffix; + } + } + } + + # validate that we don't have a harmful filename now. The server + # might try to trick us into doing something bad. + if (!length($file) || + $file =~ s/([^a-zA-Z0-9_\.\-\+\~])/sprintf "\\x%02x", ord($1)/ge || + $file =~ /^\./ + ) + { + die "Will not save <$url> as \"$file\".\nPlease override file name on the command line.\n"; + } + + if (defined $directory) { + require File::Spec; + $file = File::Spec->catfile($directory, $file); + } + + # Check if the file is already present + if (-l $file) { + die "Will not save <$url> to link \"$file\".\nPlease override file name on the command line.\n"; + } + elsif (-f _) { + die "Will not save <$url> as \"$file\" without verification.\nEither run from terminal or override file name on the command line.\n" + unless -t; + $shown = 1; + print "Overwrite $file? [y] "; + my $ans = <STDIN>; + unless (defined($ans) && $ans =~ /^y?\n/) { + if (defined $ans) { + print "Ok, aborting.\n"; + } + else { + print "\nAborting.\n"; + } + exit 1; + } + $shown = 0; + } + elsif (-e _) { + die "Will not save <$url> as \"$file\". Path exists.\n"; + } + else { + print "Saving to '$file'...\n"; + use Fcntl qw(O_WRONLY O_EXCL O_CREAT); + sysopen(FILE, $file, O_WRONLY|O_EXCL|O_CREAT) || + die "Can't open $file: $!"; + } + } + else { + $file = $argfile; + } + unless (fileno(FILE)) { + open(FILE, ">", $file) || die "Can't open $file: $!\n"; + } + binmode FILE unless $opt{a}; + $length = $res->content_length; + $flength = fbytes($length) if defined $length; + $start_t = time; + $last_dur = 0; + } + + print FILE $_[0] or die "Can't write to $file: $!\n"; + $size += length($_[0]); + + if (defined $length) { + my $dur = time - $start_t; + if ($dur != $last_dur) { # don't update too often + $last_dur = $dur; + my $perc = $size / $length; + my $speed; + $speed = fbytes($size/$dur) . "/sec" if $dur > 3; + my $secs_left = fduration($dur/$perc - $dur); + $perc = int($perc*100); + my $show = "$perc% of $flength"; + $show .= " (at $speed, $secs_left remaining)" if $speed; + show($show, 1); + } + } + else { + show( fbytes($size) . " received"); + } + } +); + +if (fileno(FILE)) { + close(FILE) || die "Can't write to $file: $!\n"; + + show(""); # clear text + print "\r"; + print fbytes($size); + print " of ", fbytes($length) if defined($length) && $length != $size; + print " received"; + my $dur = time - $start_t; + if ($dur) { + my $speed = fbytes($size/$dur) . "/sec"; + print " in ", fduration($dur), " ($speed)"; + } + print "\n"; + + if (my $mtime = $res->last_modified) { + utime time, $mtime, $file; + } + + if ($res->header("X-Died") || !$res->is_success) { + if (my $died = $res->header("X-Died")) { + print "$died\n"; + } + if (-t) { + print "Transfer aborted. Delete $file? [n] "; + my $ans = <STDIN>; + if (defined($ans) && $ans =~ /^y\n/) { + unlink($file) && print "Deleted.\n"; + } + elsif ($length > $size) { + print "Truncated file kept: ", fbytes($length - $size), " missing\n"; + } + else { + print "File kept.\n"; + } + exit 1; + } + else { + print "Transfer aborted, $file kept\n"; + } + } + exit 0; +} + +# Did not manage to create any file +print "\n" if $shown; +if (my $xdied = $res->header("X-Died")) { + print "$progname: Aborted\n$xdied\n"; +} +else { + print "$progname: ", $res->status_line, "\n"; +} +exit 1; + + +sub fbytes +{ + my $n = int(shift); + if ($n >= 1024 * 1024) { + return sprintf "%.3g MB", $n / (1024.0 * 1024); + } + elsif ($n >= 1024) { + return sprintf "%.3g KB", $n / 1024.0; + } + else { + return "$n bytes"; + } +} + +sub fduration +{ + use integer; + my $secs = int(shift); + my $hours = $secs / (60*60); + $secs -= $hours * 60*60; + my $mins = $secs / 60; + $secs %= 60; + if ($hours) { + return "$hours hours $mins minutes"; + } + elsif ($mins >= 2) { + return "$mins minutes"; + } + else { + $secs += $mins * 60; + return "$secs seconds"; + } +} + + +BEGIN { + my @ani = qw(- \ | /); + my $ani = 0; + + sub show + { + my($mess, $show_ani) = @_; + print "\r$mess" . (" " x (75 - length $mess)); + print $show_ani ? "$ani[$ani++]\b" : " "; + $ani %= @ani; + $shown++; + } +} + +sub usage +{ + die "Usage: $progname [-a] <url> [<lpath>]\n"; +} diff --git a/bin/lwp-dump b/bin/lwp-dump new file mode 100755 index 0000000..4faa414 --- /dev/null +++ b/bin/lwp-dump @@ -0,0 +1,120 @@ +#!/usr/bin/perl -w + +use strict; +use LWP::UserAgent (); +use Getopt::Long qw(GetOptions); +use Encode; +use Encode::Locale; + +my $VERSION = "6.09"; + +GetOptions(\my %opt, + 'parse-head', + 'max-length=n', + 'keep-client-headers', + 'method=s', + 'agent=s', + 'request', +) || usage(); + +my $url = shift || usage(); +@ARGV && usage(); + +sub usage { + (my $progname = $0) =~ s,.*/,,; + die <<"EOT"; +Usage: $progname [options] <url> + +Recognized options are: + --agent <str> + --keep-client-headers + --max-length <n> + --method <str> + --parse-head + --request + +EOT +} + +my $ua = LWP::UserAgent->new( + parse_head => $opt{'parse-head'} || 0, + keep_alive => 1, + env_proxy => 1, + agent => $opt{agent} || "lwp-dump/$VERSION ", +); + +my $req = HTTP::Request->new($opt{method} || 'GET' => decode(locale => $url)); +my $res = $ua->simple_request($req); +$res->remove_header(grep /^Client-/, $res->header_field_names) + unless $opt{'keep-client-headers'} or + ($res->header("Client-Warning") || "") eq "Internal response"; + +if ($opt{request}) { + $res->request->dump; + print "\n"; +} + +$res->dump(maxlength => $opt{'max-length'}); + +__END__ + +=head1 NAME + +lwp-dump - See what headers and content is returned for a URL + +=head1 SYNOPSIS + +B<lwp-dump> [ I<options> ] I<URL> + +=head1 DESCRIPTION + +The B<lwp-dump> program will get the resource identified by the URL and then +dump the response object to STDOUT. This will display the headers returned and +the initial part of the content, escaped so that it's safe to display even +binary content. The escapes syntax used is the same as for Perl's double +quoted strings. If there is no content the string "(no content)" is shown in +its place. + +The following options are recognized: + +=over + +=item B<--agent> I<str> + +Override the user agent string passed to the server. + +=item B<--keep-client-headers> + +LWP internally generate various C<Client-*> headers that are stripped by +B<lwp-dump> in order to show the headers exactly as the server provided them. +This option will suppress this. + +=item B<--max-length> I<n> + +How much of the content to show. The default is 512. Set this +to 0 for unlimited. + +If the content is longer then the string is chopped at the +limit and the string "...\n(### more bytes not shown)" +appended. + +=item B<--method> I<str> + +Use the given method for the request instead of the default "GET". + +=item B<--parse-head> + +By default B<lwp-dump> will not try to initialize headers by looking at the +head section of HTML documents. This option enables this. This corresponds to +L<LWP::UserAgent/"parse_head">. + +=item B<--request> + +Also dump the request sent. + +=back + +=head1 SEE ALSO + +L<lwp-request>, L<LWP>, L<HTTP::Message/"dump"> + diff --git a/bin/lwp-mirror b/bin/lwp-mirror new file mode 100755 index 0000000..5d8c401 --- /dev/null +++ b/bin/lwp-mirror @@ -0,0 +1,105 @@ +#!/usr/bin/perl -w + +# Simple mirror utility using LWP + +=head1 NAME + +lwp-mirror - Simple mirror utility + +=head1 SYNOPSIS + + lwp-mirror [-v] [-t timeout] <url> <local file> + +=head1 DESCRIPTION + +This program can be used to mirror a document from a WWW server. The +document is only transferred if the remote copy is newer than the local +copy. If the local copy is newer nothing happens. + +Use the C<-v> option to print the version number of this program. + +The timeout value specified with the C<-t> option. The timeout value +is the time that the program will wait for response from the remote +server before it fails. The default unit for the timeout value is +seconds. You might append "m" or "h" to the timeout value to make it +minutes or hours, respectively. + +Because this program is implemented using the LWP library, it only +supports the protocols that LWP supports. + +=head1 SEE ALSO + +L<lwp-request>, L<LWP> + +=head1 AUTHOR + +Gisle Aas <gisle@aas.no> + +=cut + + +use LWP::Simple qw(mirror is_success status_message $ua); +use Getopt::Std; +use Encode; +use Encode::Locale; + +$progname = $0; +$progname =~ s,.*/,,; # use basename only +$progname =~ s/\.\w*$//; #strip extension if any + +$VERSION = "6.09"; + +$opt_h = undef; # print usage +$opt_v = undef; # print version +$opt_t = undef; # timeout + +unless (getopts("hvt:")) { + usage(); +} + +if ($opt_v) { + require LWP; + my $DISTNAME = 'libwww-perl-' . LWP::Version(); + die <<"EOT"; +This is lwp-mirror version $VERSION ($DISTNAME) + +Copyright 1995-1999, Gisle Aas. + +This program is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. +EOT +} + +$url = decode(locale => shift) or usage(); +$file = encode(locale_fs => decode(locale => shift)) or usage(); +usage() if $opt_h or @ARGV; + +if (defined $opt_t) { + $opt_t =~ /^(\d+)([smh])?/; + die "$progname: Illegal timeout value!\n" unless defined $1; + $timeout = $1; + $timeout *= 60 if ($2 eq "m"); + $timeout *= 3600 if ($2 eq "h"); + $ua->timeout($timeout); +} + +$rc = mirror($url, $file); + +if ($rc == 304) { + print STDERR "$progname: $file is up to date\n" +} +elsif (!is_success($rc)) { + print STDERR "$progname: $rc ", status_message($rc), " ($url)\n"; + exit 1; +} +exit; + + +sub usage +{ + die <<"EOT"; +Usage: $progname [-options] <url> <file> + -v print version number of program + -t <timeout> Set timeout value +EOT +} diff --git a/bin/lwp-request b/bin/lwp-request new file mode 100755 index 0000000..d934404 --- /dev/null +++ b/bin/lwp-request @@ -0,0 +1,552 @@ +#!/usr/bin/perl -w + +# Simple user agent using LWP library. + +=head1 NAME + +lwp-request, GET, POST, HEAD - Simple command line user agent + +=head1 SYNOPSIS + +B<lwp-request> [B<-afPuUsSedvhx>] [B<-m> I<method>] [B<-b> I<base URL>] [B<-t> I<timeout>] + [B<-i> I<if-modified-since>] [B<-c> I<content-type>] + [B<-C> I<credentials>] [B<-p> I<proxy-url>] [B<-o> I<format>] I<url>... + +=head1 DESCRIPTION + +This program can be used to send requests to WWW servers and your +local file system. The request content for POST and PUT +methods is read from stdin. The content of the response is printed on +stdout. Error messages are printed on stderr. The program returns a +status value indicating the number of URLs that failed. + +The options are: + +=over 4 + +=item -m <method> + +Set which method to use for the request. If this option is not used, +then the method is derived from the name of the program. + +=item -f + +Force request through, even if the program believes that the method is +illegal. The server might reject the request eventually. + +=item -b <uri> + +This URI will be used as the base URI for resolving all relative URIs +given as argument. + +=item -t <timeout> + +Set the timeout value for the requests. The timeout is the amount of +time that the program will wait for a response from the remote server +before it fails. The default unit for the timeout value is seconds. +You might append "m" or "h" to the timeout value to make it minutes or +hours, respectively. The default timeout is '3m', i.e. 3 minutes. + +=item -i <time> + +Set the If-Modified-Since header in the request. If I<time> is the +name of a file, use the modification timestamp for this file. If +I<time> is not a file, it is parsed as a literal date. Take a look at +L<HTTP::Date> for recognized formats. + +=item -c <content-type> + +Set the Content-Type for the request. This option is only allowed for +requests that take a content, i.e. POST and PUT. You can +force methods to take content by using the C<-f> option together with +C<-c>. The default Content-Type for POST is +C<application/x-www-form-urlencoded>. The default Content-type for +the others is C<text/plain>. + +=item -p <proxy-url> + +Set the proxy to be used for the requests. The program also loads +proxy settings from the environment. You can disable this with the +C<-P> option. + +=item -P + +Don't load proxy settings from environment. + +=item -H <header> + +Send this HTTP header with each request. You can specify several, e.g.: + + lwp-request \ + -H 'Referer: http://other.url/' \ + -H 'Host: somehost' \ + http://this.url/ + +=item -C <username>:<password> + +Provide credentials for documents that are protected by Basic +Authentication. If the document is protected and you did not specify +the username and password with this option, then you will be prompted +to provide these values. + +=back + +The following options controls what is displayed by the program: + +=over 4 + +=item -u + +Print request method and absolute URL as requests are made. + +=item -U + +Print request headers in addition to request method and absolute URL. + +=item -s + +Print response status code. This option is always on for HEAD requests. + +=item -S + +Print response status chain. This shows redirect and authorization +requests that are handled by the library. + +=item -e + +Print response headers. This option is always on for HEAD requests. + +=item -E + +Print response status chain with full response headers. + +=item -d + +Do B<not> print the content of the response. + +=item -o <format> + +Process HTML content in various ways before printing it. If the +content type of the response is not HTML, then this option has no +effect. The legal format values are; I<text>, I<ps>, I<links>, +I<html> and I<dump>. + +If you specify the I<text> format then the HTML will be formatted as +plain latin1 text. If you specify the I<ps> format then it will be +formatted as Postscript. + +The I<links> format will output all links found in the HTML document. +Relative links will be expanded to absolute ones. + +The I<html> format will reformat the HTML code and the I<dump> format +will just dump the HTML syntax tree. + +Note that the C<HTML-Tree> distribution needs to be installed for this +option to work. In addition the C<HTML-Format> distribution needs to +be installed for I<-o text> or I<-o ps> to work. + +=item -v + +Print the version number of the program and quit. + +=item -h + +Print usage message and quit. + +=item -a + +Set text(ascii) mode for content input and output. If this option is not +used, content input and output is done in binary mode. + +=back + +Because this program is implemented using the LWP library, it will +only support the protocols that LWP supports. + +=head1 SEE ALSO + +L<lwp-mirror>, L<LWP> + +=head1 COPYRIGHT + +Copyright 1995-1999 Gisle Aas. + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +=head1 AUTHOR + +Gisle Aas <gisle@aas.no> + +=cut + +$progname = $0; +$progname =~ s,.*[\\/],,; # use basename only +$progname =~ s/\.\w*$//; # strip extension, if any + +$VERSION = "6.09"; + + +require LWP; + +use URI; +use URI::Heuristic qw(uf_uri); +use Encode; +use Encode::Locale; + +use HTTP::Status qw(status_message); +use HTTP::Date qw(time2str str2time); + + +# This table lists the methods that are allowed. It should really be +# a superset for all methods supported for every scheme that may be +# supported by the library. Currently it might be a bit too HTTP +# specific. You might use the -f option to force a method through. +# +# "" = No content in request, "C" = Needs content in request +# +%allowed_methods = ( + GET => "", + HEAD => "", + POST => "C", + PUT => "C", + DELETE => "", + TRACE => "", + OPTIONS => "", +); + + +# We make our own specialization of LWP::UserAgent that asks for +# user/password if document is protected. +{ + package RequestAgent; + @ISA = qw(LWP::UserAgent); + + sub new + { + my $self = LWP::UserAgent::new(@_); + $self->agent("lwp-request/$main::VERSION "); + $self; + } + + sub get_basic_credentials + { + my($self, $realm, $uri) = @_; + if ($main::options{'C'}) { + return split(':', $main::options{'C'}, 2); + } + elsif (-t) { + my $netloc = $uri->host_port; + print STDERR "Enter username for $realm at $netloc: "; + my $user = <STDIN>; + chomp($user); + return (undef, undef) unless length $user; + print STDERR "Password: "; + system("stty -echo"); + my $password = <STDIN>; + system("stty echo"); + print STDERR "\n"; # because we disabled echo + chomp($password); + return ($user, $password); + } + else { + return (undef, undef) + } + } +} + +$method = uc(lc($progname) eq "lwp-request" ? "GET" : $progname); + +# Parse command line +use Getopt::Long; + +my @getopt_args = ( + 'a', # content i/o in text(ascii) mode + 'm=s', # set method + 'f', # make request even if method is not in %allowed_methods + 'b=s', # base url + 't=s', # timeout + 'i=s', # if-modified-since + 'c=s', # content type for POST + 'C=s', # credentials for basic authorization + 'H=s@', # extra headers, form "Header: value string" + # + 'u', # display method and URL of request + 'U', # display request headers also + 's', # display status code + 'S', # display whole chain of status codes + 'e', # display response headers (default for HEAD) + 'E', # display whole chain of headers + 'd', # don't display content + # + 'h', # print usage + 'v', # print version + # + 'p=s', # proxy URL + 'P', # don't load proxy setting from environment + # + 'o=s', # output format +); + +Getopt::Long::config("noignorecase", "bundling"); +unless (GetOptions(\%options, @getopt_args)) { + usage(); +} +if ($options{'v'}) { + require LWP; + my $DISTNAME = 'libwww-perl-' . LWP::Version(); + die <<"EOT"; +This is lwp-request version $VERSION ($DISTNAME) + +Copyright 1995-1999, Gisle Aas. + +This program is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. +EOT +} + +usage() if $options{'h'} || !@ARGV; + +# Create the user agent object +$ua = RequestAgent->new; + +# Load proxy settings from *_proxy environment variables. +$ua->env_proxy unless $options{'P'}; + +$method = uc($options{'m'}) if defined $options{'m'}; + +if ($options{'f'}) { + if ($options{'c'}) { + $allowed_methods{$method} = "C"; # force content + } + else { + $allowed_methods{$method} = ""; + } +} +elsif (!defined $allowed_methods{$method}) { + die "$progname: $method is not an allowed method\n"; +} + +if ($options{'S'} || $options{'E'}) { + $options{'U'} = 1 if $options{'E'}; + $options{'E'} = 1 if $options{'e'}; + $options{'S'} = 1; + $options{'s'} = 1; + $options{'u'} = 1; +} + +if ($method eq "HEAD") { + $options{'s'} = 1; + $options{'e'} = 1 unless $options{'d'}; + $options{'d'} = 1; +} + +$options{'u'} = 1 if $options{'U'}; +$options{'s'} = 1 if $options{'e'}; + +if (defined $options{'t'}) { + $options{'t'} =~ /^(\d+)([smh])?/; + die "$progname: Illegal timeout value!\n" unless defined $1; + $timeout = $1; + if (defined $2) { + $timeout *= 60 if $2 eq "m"; + $timeout *= 3600 if $2 eq "h"; + } + $ua->timeout($timeout); +} + +if (defined $options{'i'}) { + if (-e $options{'i'}) { + $time = (stat _)[9]; + } + else { + $time = str2time($options{'i'}); + die "$progname: Illegal time syntax for -i option\n" + unless defined $time; + } + $options{'i'} = time2str($time); +} + +$content = undef; +$user_ct = undef; +if ($allowed_methods{$method} eq "C") { + # This request needs some content + unless (defined $options{'c'}) { + # set default content type + $options{'c'} = ($method eq "POST") ? + "application/x-www-form-urlencoded" + : "text/plain"; + } + else { + die "$progname: Illegal Content-type format\n" + unless $options{'c'} =~ m,^[\w\-]+/[\w\-.+]+(?:\s*;.*)?$,; + $user_ct++; + } + print STDERR "Please enter content ($options{'c'}) to be ${method}ed:\n" + if -t; + binmode STDIN unless -t or $options{'a'}; + $content = join("", <STDIN>); +} +else { + die "$progname: Can't set Content-type for $method requests\n" + if defined $options{'c'}; +} + +# Set up a request. We will use the same request object for all URLs. +$request = HTTP::Request->new($method); +$request->header('If-Modified-Since', $options{'i'}) if defined $options{'i'}; +for my $user_header (@{ $options{'H'} || [] }) { + my ($header_name, $header_value) = split /\s*:\s*/, $user_header, 2; + $header_name =~ s/^\s+//; + if (lc($header_name) eq "user-agent") { + $header_value .= $ua->agent if $header_value =~ /\s\z/; + $ua->agent($header_value); + } + else { + $request->push_header($header_name, $header_value); + } +} +#$request->header('Accept', '*/*'); +if ($options{'c'}) { # will always be set for request that wants content + my $header = ($user_ct ? 'header' : 'init_header'); + $request->$header('Content-Type', $options{'c'}); + $request->header('Content-Length', length $content); # Not really needed + $request->content($content); +} + +$errors = 0; + +sub show { + my $r = shift; + my $last = shift; + print $method, " ", $r->request->uri->as_string, "\n" if $options{'u'}; + print $r->request->headers_as_string, "\n" if $options{'U'}; + print $r->status_line, "\n" if $options{'s'}; + print $r->headers_as_string, "\n" if $options{'E'} or $last; +} + +# Ok, now we perform the requests, one URL at a time +while ($url = shift) { + # Create the URL object, but protect us against bad URLs + eval { + if ($url =~ /^\w+:/ || $options{'b'}) { # is there any scheme specification + $url = URI->new(decode(locale => $url), decode(locale => $options{'b'})); + $url = $url->abs(decode(locale => $options{'b'})) if $options{'b'}; + } + else { + $url = uf_uri($url); + } + }; + if ($@) { + $@ =~ s/ at .* line \d+.*//; + print STDERR $@; + $errors++; + next; + } + + $ua->proxy($url->scheme, decode(locale => $options{'p'})) if $options{'p'}; + + # Send the request and get a response back from the server + $request->uri($url); + $response = $ua->request($request); + + if ($options{'S'}) { + for my $r ($response->redirects) { + show($r); + } + } + show($response, $options{'e'}); + + unless ($options{'d'}) { + if ($options{'o'} && + $response->content_type eq 'text/html') { + eval { + require HTML::Parse; + }; + if ($@) { + if ($@ =~ m,^Can't locate HTML/Parse.pm in \@INC,) { + die "The HTML-Tree distribution need to be installed for the -o option to be used.\n"; + } + else { + die $@; + } + } + my $html = HTML::Parse::parse_html($response->content); + { + $options{'o'} eq 'ps' && do { + require HTML::FormatPS; + my $f = HTML::FormatPS->new; + print $f->format($html); + last; + }; + $options{'o'} eq 'text' && do { + require HTML::FormatText; + my $f = HTML::FormatText->new; + print $f->format($html); + last; + }; + $options{'o'} eq 'html' && do { + print $html->as_HTML; + last; + }; + $options{'o'} eq 'links' && do { + my $base = $response->base; + $base = $options{'b'} if $options{'b'}; + for ( @{ $html->extract_links } ) { + my($link, $elem) = @$_; + my $tag = uc $elem->tag; + $link = URI->new($link)->abs($base)->as_string; + print "$tag\t$link\n"; + } + last; + }; + $options{'o'} eq 'dump' && do { + $html->dump; + last; + }; + # It is bad to not notice this before now :-( + die "Illegal -o option value ($options{'o'})\n"; + } + } + else { + binmode STDOUT unless $options{'a'}; + print $response->content; + } + } + + $errors++ unless $response->is_success; +} + +exit $errors; + + +sub usage +{ + die <<"EOT"; +Usage: $progname [-options] <url>... + -m <method> use method for the request (default is '$method') + -f make request even if $progname believes method is illegal + -b <base> Use the specified URL as base + -t <timeout> Set timeout value + -i <time> Set the If-Modified-Since header on the request + -c <conttype> use this content-type for POST, PUT, CHECKIN + -a Use text mode for content I/O + -p <proxyurl> use this as a proxy + -P don't load proxy settings from environment + -H <header> send this HTTP header (you can specify several) + -C <username>:<password> + provide credentials for basic authentication + + -u Display method and URL before any response + -U Display request headers (implies -u) + -s Display response status code + -S Display response status chain (implies -u) + -e Display response headers (implies -s) + -E Display whole chain of headers (implies -S and -U) + -d Do not display content + -o <format> Process HTML content in various ways + + -v Show program version + -h Print this message +EOT +} |