summaryrefslogtreecommitdiff
path: root/lib/File
diff options
context:
space:
mode:
authorChris 'BinGOs' Williams <chris@bingosnet.co.uk>2009-09-02 19:53:47 +0200
committerSteffen Mueller <smueller@cpan.org>2009-09-02 19:53:47 +0200
commit99f226bb0b01e17a9592fd646bb56b069a5200e4 (patch)
tree59ac4d7f6297874be8cf5cfe0e960be63cf6b6e3 /lib/File
parent25209816f53a8d9986b7b64f9d3cf1004452aacc (diff)
downloadperl-99f226bb0b01e17a9592fd646bb56b069a5200e4.tar.gz
Move File::Fetch from lib to ext
Diffstat (limited to 'lib/File')
-rw-r--r--lib/File/Fetch.pm1382
-rw-r--r--lib/File/Fetch/t/01_File-Fetch.t253
2 files changed, 0 insertions, 1635 deletions
diff --git a/lib/File/Fetch.pm b/lib/File/Fetch.pm
deleted file mode 100644
index d093560126..0000000000
--- a/lib/File/Fetch.pm
+++ /dev/null
@@ -1,1382 +0,0 @@
-package File::Fetch;
-
-use strict;
-use FileHandle;
-use File::Temp;
-use File::Copy;
-use File::Spec;
-use File::Spec::Unix;
-use File::Basename qw[dirname];
-
-use Cwd qw[cwd];
-use Carp qw[carp];
-use IPC::Cmd qw[can_run run QUOTE];
-use File::Path qw[mkpath];
-use File::Temp qw[tempdir];
-use Params::Check qw[check];
-use Module::Load::Conditional qw[can_load];
-use Locale::Maketext::Simple Style => 'gettext';
-
-use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
- $BLACKLIST $METHOD_FAIL $VERSION $METHODS
- $FTP_PASSIVE $TIMEOUT $DEBUG $WARN
- ];
-
-$VERSION = '0.20';
-$VERSION = eval $VERSION; # avoid warnings with development releases
-$PREFER_BIN = 0; # XXX TODO implement
-$FROM_EMAIL = 'File-Fetch@example.com';
-$USER_AGENT = "File::Fetch/$VERSION";
-$BLACKLIST = [qw|ftp|];
-$METHOD_FAIL = { };
-$FTP_PASSIVE = 1;
-$TIMEOUT = 0;
-$DEBUG = 0;
-$WARN = 1;
-
-### methods available to fetch the file depending on the scheme
-$METHODS = {
- http => [ qw|lwp wget curl lftp lynx| ],
- ftp => [ qw|lwp netftp wget curl lftp ncftp ftp| ],
- file => [ qw|lwp lftp file| ],
- rsync => [ qw|rsync| ]
-};
-
-### silly warnings ###
-local $Params::Check::VERBOSE = 1;
-local $Params::Check::VERBOSE = 1;
-local $Module::Load::Conditional::VERBOSE = 0;
-local $Module::Load::Conditional::VERBOSE = 0;
-
-### see what OS we are on, important for file:// uris ###
-use constant ON_WIN => ($^O eq 'MSWin32');
-use constant ON_VMS => ($^O eq 'VMS');
-use constant ON_UNIX => (!ON_WIN);
-use constant HAS_VOL => (ON_WIN);
-use constant HAS_SHARE => (ON_WIN);
-
-
-=pod
-
-=head1 NAME
-
-File::Fetch - A generic file fetching mechanism
-
-=head1 SYNOPSIS
-
- use File::Fetch;
-
- ### build a File::Fetch object ###
- my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');
-
- ### fetch the uri to cwd() ###
- my $where = $ff->fetch() or die $ff->error;
-
- ### fetch the uri to /tmp ###
- my $where = $ff->fetch( to => '/tmp' );
-
- ### parsed bits from the uri ###
- $ff->uri;
- $ff->scheme;
- $ff->host;
- $ff->path;
- $ff->file;
-
-=head1 DESCRIPTION
-
-File::Fetch is a generic file fetching mechanism.
-
-It allows you to fetch any file pointed to by a C<ftp>, C<http>,
-C<file>, or C<rsync> uri by a number of different means.
-
-See the C<HOW IT WORKS> section further down for details.
-
-=head1 ACCESSORS
-
-A C<File::Fetch> object has the following accessors
-
-=over 4
-
-=item $ff->uri
-
-The uri you passed to the constructor
-
-=item $ff->scheme
-
-The scheme from the uri (like 'file', 'http', etc)
-
-=item $ff->host
-
-The hostname in the uri. Will be empty if host was originally
-'localhost' for a 'file://' url.
-
-=item $ff->vol
-
-On operating systems with the concept of a volume the second element
-of a file:// is considered to the be volume specification for the file.
-Thus on Win32 this routine returns the volume, on other operating
-systems this returns nothing.
-
-On Windows this value may be empty if the uri is to a network share, in
-which case the 'share' property will be defined. Additionally, volume
-specifications that use '|' as ':' will be converted on read to use ':'.
-
-On VMS, which has a volume concept, this field will be empty because VMS
-file specifications are converted to absolute UNIX format and the volume
-information is transparently included.
-
-=item $ff->share
-
-On systems with the concept of a network share (currently only Windows) returns
-the sharename from a file://// url. On other operating systems returns empty.
-
-=item $ff->path
-
-The path from the uri, will be at least a single '/'.
-
-=item $ff->file
-
-The name of the remote file. For the local file name, the
-result of $ff->output_file will be used.
-
-=cut
-
-
-##########################
-### Object & Accessors ###
-##########################
-
-{
- ### template for autogenerated accessors ###
- my $Tmpl = {
- scheme => { default => 'http' },
- host => { default => 'localhost' },
- path => { default => '/' },
- file => { required => 1 },
- uri => { required => 1 },
- vol => { default => '' }, # windows for file:// uris
- share => { default => '' }, # windows for file:// uris
- _error_msg => { no_override => 1 },
- _error_msg_long => { no_override => 1 },
- };
-
- for my $method ( keys %$Tmpl ) {
- no strict 'refs';
- *$method = sub {
- my $self = shift;
- $self->{$method} = $_[0] if @_;
- return $self->{$method};
- }
- }
-
- sub _create {
- my $class = shift;
- my %hash = @_;
-
- my $args = check( $Tmpl, \%hash ) or return;
-
- bless $args, $class;
-
- if( lc($args->scheme) ne 'file' and not $args->host ) {
- return File::Fetch->_error(loc(
- "Hostname required when fetching from '%1'",$args->scheme));
- }
-
- for (qw[path file]) {
- unless( $args->$_() ) { # 5.5.x needs the ()
- return File::Fetch->_error(loc("No '%1' specified",$_));
- }
- }
-
- return $args;
- }
-}
-
-=item $ff->output_file
-
-The name of the output file. This is the same as $ff->file,
-but any query parameters are stripped off. For example:
-
- http://example.com/index.html?x=y
-
-would make the output file be C<index.html> rather than
-C<index.html?x=y>.
-
-=back
-
-=cut
-
-sub output_file {
- my $self = shift;
- my $file = $self->file;
-
- $file =~ s/\?.*$//g;
-
- return $file;
-}
-
-### XXX do this or just point to URI::Escape?
-# =head2 $esc_uri = $ff->escaped_uri
-#
-# =cut
-#
-# ### most of this is stolen straight from URI::escape
-# { ### Build a char->hex map
-# my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;
-#
-# sub escaped_uri {
-# my $self = shift;
-# my $uri = $self->uri;
-#
-# ### Default unsafe characters. RFC 2732 ^(uric - reserved)
-# $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/
-# $escapes{$1} || $self->_fail_hi($1)/ge;
-#
-# return $uri;
-# }
-#
-# sub _fail_hi {
-# my $self = shift;
-# my $char = shift;
-#
-# $self->_error(loc(
-# "Can't escape '%1', try using the '%2' module instead",
-# sprintf("\\x{%04X}", ord($char)), 'URI::Escape'
-# ));
-# }
-#
-# sub output_file {
-#
-# }
-#
-#
-# }
-
-=head1 METHODS
-
-=head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );
-
-Parses the uri and creates a corresponding File::Fetch::Item object,
-that is ready to be C<fetch>ed and returns it.
-
-Returns false on failure.
-
-=cut
-
-sub new {
- my $class = shift;
- my %hash = @_;
-
- my ($uri);
- my $tmpl = {
- uri => { required => 1, store => \$uri },
- };
-
- check( $tmpl, \%hash ) or return;
-
- ### parse the uri to usable parts ###
- my $href = __PACKAGE__->_parse_uri( $uri ) or return;
-
- ### make it into a FFI object ###
- my $ff = File::Fetch->_create( %$href ) or return;
-
-
- ### return the object ###
- return $ff;
-}
-
-### parses an uri to a hash structure:
-###
-### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
-###
-### becomes:
-###
-### $href = {
-### scheme => 'ftp',
-### host => 'ftp.cpan.org',
-### path => '/pub/mirror',
-### file => 'index.html'
-### };
-###
-### In the case of file:// urls there maybe be additional fields
-###
-### For systems with volume specifications such as Win32 there will be
-### a volume specifier provided in the 'vol' field.
-###
-### 'vol' => 'volumename'
-###
-### For windows file shares there may be a 'share' key specified
-###
-### 'share' => 'sharename'
-###
-### Note that the rules of what a file:// url means vary by the operating system
-### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious
-### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and
-### not '/foo/bar.txt'
-###
-### Similarly if the host interpreting the url is VMS then
-### file:///disk$user/my/notes/note12345.txt' means
-### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as
-### if it is unix where it means /disk$user/my/notes/note12345.txt'.
-### Except for some cases in the File::Spec methods, Perl on VMS will generally
-### handle UNIX format file specifications.
-###
-### This means it is impossible to serve certain file:// urls on certain systems.
-###
-### Thus are the problems with a protocol-less specification. :-(
-###
-
-sub _parse_uri {
- my $self = shift;
- my $uri = shift or return;
-
- my $href = { uri => $uri };
-
- ### find the scheme ###
- $uri =~ s|^(\w+)://||;
- $href->{scheme} = $1;
-
- ### See rfc 1738 section 3.10
- ### http://www.faqs.org/rfcs/rfc1738.html
- ### And wikipedia for more on windows file:// urls
- ### http://en.wikipedia.org/wiki/File://
- if( $href->{scheme} eq 'file' ) {
-
- my @parts = split '/',$uri;
-
- ### file://hostname/...
- ### file://hostname/...
- ### normalize file://localhost with file:///
- $href->{host} = $parts[0] || '';
-
- ### index in @parts where the path components begin;
- my $index = 1;
-
- ### file:////hostname/sharename/blah.txt
- if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) {
-
- $href->{host} = $parts[2] || ''; # avoid warnings
- $href->{share} = $parts[3] || ''; # avoid warnings
-
- $index = 4 # index after the share
-
- ### file:///D|/blah.txt
- ### file:///D:/blah.txt
- } elsif (HAS_VOL) {
-
- ### this code comes from dmq's patch, but:
- ### XXX if volume is empty, wouldn't that be an error? --kane
- ### if so, our file://localhost test needs to be fixed as wel
- $href->{vol} = $parts[1] || '';
-
- ### correct D| style colume descriptors
- $href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN;
-
- $index = 2; # index after the volume
- }
-
- ### rebuild the path from the leftover parts;
- $href->{path} = join '/', '', splice( @parts, $index, $#parts );
-
- } else {
- ### using anything but qw() in hash slices may produce warnings
- ### in older perls :-(
- @{$href}{ qw(host path) } = $uri =~ m|([^/]*)(/.*)$|s;
- }
-
- ### split the path into file + dir ###
- { my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
- $href->{path} = $parts[1];
- $href->{file} = $parts[2];
- }
-
- ### host will be empty if the target was 'localhost' and the
- ### scheme was 'file'
- $href->{host} = '' if ($href->{host} eq 'localhost') and
- ($href->{scheme} eq 'file');
-
- return $href;
-}
-
-=head2 $where = $ff->fetch( [to => /my/output/dir/ | \$scalar] )
-
-Fetches the file you requested and returns the full path to the file.
-
-By default it writes to C<cwd()>, but you can override that by specifying
-the C<to> argument:
-
- ### file fetch to /tmp, full path to the file in $where
- $where = $ff->fetch( to => '/tmp' );
-
- ### file slurped into $scalar, full path to the file in $where
- ### file is downloaded to a temp directory and cleaned up at exit time
- $where = $ff->fetch( to => \$scalar );
-
-Returns the full path to the downloaded file on success, and false
-on failure.
-
-=cut
-
-sub fetch {
- my $self = shift or return;
- my %hash = @_;
-
- my $target;
- my $tmpl = {
- to => { default => cwd(), store => \$target },
- };
-
- check( $tmpl, \%hash ) or return;
-
- my ($to, $fh);
- ### you want us to slurp the contents
- if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
- $to = tempdir( 'FileFetch.XXXXXX', CLEANUP => 1 );
-
- ### plain old fetch
- } else {
- $to = $target;
-
- ### On VMS force to VMS format so File::Spec will work.
- $to = VMS::Filespec::vmspath($to) if ON_VMS;
-
- ### create the path if it doesn't exist yet ###
- unless( -d $to ) {
- eval { mkpath( $to ) };
-
- return $self->_error(loc("Could not create path '%1'",$to)) if $@;
- }
- }
-
- ### set passive ftp if required ###
- local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
-
- ### we dont use catfile on win32 because if we are using a cygwin tool
- ### under cmd.exe they wont understand windows style separators.
- my $out_to = ON_WIN ? $to.'/'.$self->output_file
- : File::Spec->catfile( $to, $self->output_file );
-
- for my $method ( @{ $METHODS->{$self->scheme} } ) {
- my $sub = '_'.$method.'_fetch';
-
- unless( __PACKAGE__->can($sub) ) {
- $self->_error(loc("Cannot call method for '%1' -- WEIRD!",
- $method));
- next;
- }
-
- ### method is blacklisted ###
- next if grep { lc $_ eq $method } @$BLACKLIST;
-
- ### method is known to fail ###
- next if $METHOD_FAIL->{$method};
-
- ### there's serious issues with IPC::Run and quoting of command
- ### line arguments. using quotes in the wrong place breaks things,
- ### and in the case of say,
- ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
- ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
- ### it doesn't matter how you quote, it always fails.
- local $IPC::Cmd::USE_IPC_RUN = 0;
-
- if( my $file = $self->$sub(
- to => $out_to
- )){
-
- unless( -e $file && -s _ ) {
- $self->_error(loc("'%1' said it fetched '%2', ".
- "but it was not created",$method,$file));
-
- ### mark the failure ###
- $METHOD_FAIL->{$method} = 1;
-
- next;
-
- } else {
-
- ### slurp mode?
- if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
-
- ### open the file
- open my $fh, $file or do {
- $self->_error(
- loc("Could not open '%1': %2", $file, $!));
- return;
- };
-
- ### slurp
- $$target = do { local $/; <$fh> };
-
- }
-
- my $abs = File::Spec->rel2abs( $file );
- return $abs;
-
- }
- }
- }
-
-
- ### if we got here, we looped over all methods, but we weren't able
- ### to fetch it.
- return;
-}
-
-########################
-### _*_fetch methods ###
-########################
-
-### LWP fetching ###
-sub _lwp_fetch {
- my $self = shift;
- my %hash = @_;
-
- my ($to);
- my $tmpl = {
- to => { required => 1, store => \$to }
- };
- check( $tmpl, \%hash ) or return;
-
- ### modules required to download with lwp ###
- my $use_list = {
- LWP => '0.0',
- 'LWP::UserAgent' => '0.0',
- 'HTTP::Request' => '0.0',
- 'HTTP::Status' => '0.0',
- URI => '0.0',
-
- };
-
- if( can_load(modules => $use_list) ) {
-
- ### setup the uri object
- my $uri = URI->new( File::Spec::Unix->catfile(
- $self->path, $self->file
- ) );
-
- ### special rules apply for file:// uris ###
- $uri->scheme( $self->scheme );
- $uri->host( $self->scheme eq 'file' ? '' : $self->host );
- $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file';
-
- ### set up the useragent object
- my $ua = LWP::UserAgent->new();
- $ua->timeout( $TIMEOUT ) if $TIMEOUT;
- $ua->agent( $USER_AGENT );
- $ua->from( $FROM_EMAIL );
- $ua->env_proxy;
-
- my $res = $ua->mirror($uri, $to) or return;
-
- ### uptodate or fetched ok ###
- if ( $res->code == 304 or $res->code == 200 ) {
- return $to;
-
- } else {
- return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
- $res->code, HTTP::Status::status_message($res->code),
- $res->status_line));
- }
-
- } else {
- $METHOD_FAIL->{'lwp'} = 1;
- return;
- }
-}
-
-### Net::FTP fetching
-sub _netftp_fetch {
- my $self = shift;
- my %hash = @_;
-
- my ($to);
- my $tmpl = {
- to => { required => 1, store => \$to }
- };
- check( $tmpl, \%hash ) or return;
-
- ### required modules ###
- my $use_list = { 'Net::FTP' => 0 };
-
- if( can_load( modules => $use_list ) ) {
-
- ### make connection ###
- my $ftp;
- my @options = ($self->host);
- push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
- unless( $ftp = Net::FTP->new( @options ) ) {
- return $self->_error(loc("Ftp creation failed: %1",$@));
- }
-
- ### login ###
- unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
- return $self->_error(loc("Could not login to '%1'",$self->host));
- }
-
- ### set binary mode, just in case ###
- $ftp->binary;
-
- ### create the remote path
- ### remember remote paths are unix paths! [#11483]
- my $remote = File::Spec::Unix->catfile( $self->path, $self->file );
-
- ### fetch the file ###
- my $target;
- unless( $target = $ftp->get( $remote, $to ) ) {
- return $self->_error(loc("Could not fetch '%1' from '%2'",
- $remote, $self->host));
- }
-
- ### log out ###
- $ftp->quit;
-
- return $target;
-
- } else {
- $METHOD_FAIL->{'netftp'} = 1;
- return;
- }
-}
-
-### /bin/wget fetch ###
-sub _wget_fetch {
- my $self = shift;
- my %hash = @_;
-
- my ($to);
- my $tmpl = {
- to => { required => 1, store => \$to }
- };
- check( $tmpl, \%hash ) or return;
-
- ### see if we have a wget binary ###
- if( my $wget = can_run('wget') ) {
-
- ### no verboseness, thanks ###
- my $cmd = [ $wget, '--quiet' ];
-
- ### if a timeout is set, add it ###
- push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
-
- ### run passive if specified ###
- push @$cmd, '--passive-ftp' if $FTP_PASSIVE;
-
- ### set the output document, add the uri ###
- push @$cmd, '--output-document', $to, $self->uri;
-
- ### with IPC::Cmd > 0.41, this is fixed in teh library,
- ### and there's no need for special casing any more.
- ### DO NOT quote things for IPC::Run, it breaks stuff.
- # $IPC::Cmd::USE_IPC_RUN
- # ? ($to, $self->uri)
- # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
-
- ### shell out ###
- my $captured;
- unless(run( command => $cmd,
- buffer => \$captured,
- verbose => $DEBUG
- )) {
- ### wget creates the output document always, even if the fetch
- ### fails.. so unlink it in that case
- 1 while unlink $to;
-
- return $self->_error(loc( "Command failed: %1", $captured || '' ));
- }
-
- return $to;
-
- } else {
- $METHOD_FAIL->{'wget'} = 1;
- return;
- }
-}
-
-### /bin/lftp fetch ###
-sub _lftp_fetch {
- my $self = shift;
- my %hash = @_;
-
- my ($to);
- my $tmpl = {
- to => { required => 1, store => \$to }
- };
- check( $tmpl, \%hash ) or return;
-
- ### see if we have a wget binary ###
- if( my $lftp = can_run('lftp') ) {
-
- ### no verboseness, thanks ###
- my $cmd = [ $lftp, '-f' ];
-
- my $fh = File::Temp->new;
-
- my $str;
-
- ### if a timeout is set, add it ###
- $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT;
-
- ### run passive if specified ###
- $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE;
-
- ### set the output document, add the uri ###
- ### quote the URI, because lftp supports certain shell
- ### expansions, most notably & for backgrounding.
- ### ' quote does nto work, must be "
- $str .= q[get ']. $self->uri .q[' -o ]. $to . $/;
-
- if( $DEBUG ) {
- my $pp_str = join ' ', split $/, $str;
- print "# lftp command: $pp_str\n";
- }
-
- ### write straight to the file.
- $fh->autoflush(1);
- print $fh $str;
-
- ### the command needs to be 1 string to be executed
- push @$cmd, $fh->filename;
-
- ### with IPC::Cmd > 0.41, this is fixed in teh library,
- ### and there's no need for special casing any more.
- ### DO NOT quote things for IPC::Run, it breaks stuff.
- # $IPC::Cmd::USE_IPC_RUN
- # ? ($to, $self->uri)
- # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
-
-
- ### shell out ###
- my $captured;
- unless(run( command => $cmd,
- buffer => \$captured,
- verbose => $DEBUG
- )) {
- ### wget creates the output document always, even if the fetch
- ### fails.. so unlink it in that case
- 1 while unlink $to;
-
- return $self->_error(loc( "Command failed: %1", $captured || '' ));
- }
-
- return $to;
-
- } else {
- $METHOD_FAIL->{'lftp'} = 1;
- return;
- }
-}
-
-
-
-### /bin/ftp fetch ###
-sub _ftp_fetch {
- my $self = shift;
- my %hash = @_;
-
- my ($to);
- my $tmpl = {
- to => { required => 1, store => \$to }
- };
- check( $tmpl, \%hash ) or return;
-
- ### see if we have a ftp binary ###
- if( my $ftp = can_run('ftp') ) {
-
- my $fh = FileHandle->new;
-
- local $SIG{CHLD} = 'IGNORE';
-
- unless ($fh->open("|$ftp -n")) {
- return $self->_error(loc("%1 creation failed: %2", $ftp, $!));
- }
-
- my @dialog = (
- "lcd " . dirname($to),
- "open " . $self->host,
- "user anonymous $FROM_EMAIL",
- "cd /",
- "cd " . $self->path,
- "binary",
- "get " . $self->file . " " . $self->output_file,
- "quit",
- );
-
- foreach (@dialog) { $fh->print($_, "\n") }
- $fh->close or return;
-
- return $to;
- }
-}
-
-### lynx is stupid - it decompresses any .gz file it finds to be text
-### use /bin/lynx to fetch files
-sub _lynx_fetch {
- my $self = shift;
- my %hash = @_;
-
- my ($to);
- my $tmpl = {
- to => { required => 1, store => \$to }
- };
- check( $tmpl, \%hash ) or return;
-
- ### see if we have a lynx binary ###
- if( my $lynx = can_run('lynx') ) {
-
- unless( IPC::Cmd->can_capture_buffer ) {
- $METHOD_FAIL->{'lynx'} = 1;
-
- return $self->_error(loc(
- "Can not capture buffers. Can not use '%1' to fetch files",
- 'lynx' ));
- }
-
- ### check if the HTTP resource exists ###
- if ($self->uri =~ /^https?:\/\//i) {
- my $cmd = [
- $lynx,
- '-head',
- '-source',
- "-auth=anonymous:$FROM_EMAIL",
- ];
-
- push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
-
- push @$cmd, $self->uri;
-
- ### shell out ###
- my $head;
- unless(run( command => $cmd,
- buffer => \$head,
- verbose => $DEBUG )
- ) {
- return $self->_error(loc("Command failed: %1", $head || ''));
- }
-
- unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) {
- return $self->_error(loc("Command failed: %1", $head || ''));
- }
- }
-
- ### write to the output file ourselves, since lynx ass_u_mes to much
- my $local = FileHandle->new(">$to")
- or return $self->_error(loc(
- "Could not open '%1' for writing: %2",$to,$!));
-
- ### dump to stdout ###
- my $cmd = [
- $lynx,
- '-source',
- "-auth=anonymous:$FROM_EMAIL",
- ];
-
- push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
-
- ### DO NOT quote things for IPC::Run, it breaks stuff.
- push @$cmd, $self->uri;
-
- ### with IPC::Cmd > 0.41, this is fixed in teh library,
- ### and there's no need for special casing any more.
- ### DO NOT quote things for IPC::Run, it breaks stuff.
- # $IPC::Cmd::USE_IPC_RUN
- # ? $self->uri
- # : QUOTE. $self->uri .QUOTE;
-
-
- ### shell out ###
- my $captured;
- unless(run( command => $cmd,
- buffer => \$captured,
- verbose => $DEBUG )
- ) {
- return $self->_error(loc("Command failed: %1", $captured || ''));
- }
-
- ### print to local file ###
- ### XXX on a 404 with a special error page, $captured will actually
- ### hold the contents of that page, and make it *appear* like the
- ### request was a success, when really it wasn't :(
- ### there doesn't seem to be an option for lynx to change the exit
- ### code based on a 4XX status or so.
- ### the closest we can come is using --error_file and parsing that,
- ### which is very unreliable ;(
- $local->print( $captured );
- $local->close or return;
-
- return $to;
-
- } else {
- $METHOD_FAIL->{'lynx'} = 1;
- return;
- }
-}
-
-### use /bin/ncftp to fetch files
-sub _ncftp_fetch {
- my $self = shift;
- my %hash = @_;
-
- my ($to);
- my $tmpl = {
- to => { required => 1, store => \$to }
- };
- check( $tmpl, \%hash ) or return;
-
- ### we can only set passive mode in interactive sesssions, so bail out
- ### if $FTP_PASSIVE is set
- return if $FTP_PASSIVE;
-
- ### see if we have a ncftp binary ###
- if( my $ncftp = can_run('ncftp') ) {
-
- my $cmd = [
- $ncftp,
- '-V', # do not be verbose
- '-p', $FROM_EMAIL, # email as password
- $self->host, # hostname
- dirname($to), # local dir for the file
- # remote path to the file
- ### DO NOT quote things for IPC::Run, it breaks stuff.
- $IPC::Cmd::USE_IPC_RUN
- ? File::Spec::Unix->catdir( $self->path, $self->file )
- : QUOTE. File::Spec::Unix->catdir(
- $self->path, $self->file ) .QUOTE
-
- ];
-
- ### shell out ###
- my $captured;
- unless(run( command => $cmd,
- buffer => \$captured,
- verbose => $DEBUG )
- ) {
- return $self->_error(loc("Command failed: %1", $captured || ''));
- }
-
- return $to;
-
- } else {
- $METHOD_FAIL->{'ncftp'} = 1;
- return;
- }
-}
-
-### use /bin/curl to fetch files
-sub _curl_fetch {
- my $self = shift;
- my %hash = @_;
-
- my ($to);
- my $tmpl = {
- to => { required => 1, store => \$to }
- };
- check( $tmpl, \%hash ) or return;
-
- if (my $curl = can_run('curl')) {
-
- ### these long opts are self explanatory - I like that -jmb
- my $cmd = [ $curl, '-q' ];
-
- push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;
-
- push(@$cmd, '--silent') unless $DEBUG;
-
- ### curl does the right thing with passive, regardless ###
- if ($self->scheme eq 'ftp') {
- push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
- }
-
- ### curl doesn't follow 302 (temporarily moved) etc automatically
- ### so we add --location to enable that.
- push @$cmd, '--fail', '--location', '--output', $to, $self->uri;
-
- ### with IPC::Cmd > 0.41, this is fixed in teh library,
- ### and there's no need for special casing any more.
- ### DO NOT quote things for IPC::Run, it breaks stuff.
- # $IPC::Cmd::USE_IPC_RUN
- # ? ($to, $self->uri)
- # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
-
-
- my $captured;
- unless(run( command => $cmd,
- buffer => \$captured,
- verbose => $DEBUG )
- ) {
-
- return $self->_error(loc("Command failed: %1", $captured || ''));
- }
-
- return $to;
-
- } else {
- $METHOD_FAIL->{'curl'} = 1;
- return;
- }
-}
-
-
-### use File::Copy for fetching file:// urls ###
-###
-### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html)
-### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://)
-###
-
-sub _file_fetch {
- my $self = shift;
- my %hash = @_;
-
- my ($to);
- my $tmpl = {
- to => { required => 1, store => \$to }
- };
- check( $tmpl, \%hash ) or return;
-
-
-
- ### prefix a / on unix systems with a file uri, since it would
- ### look somewhat like this:
- ### file:///home/kane/file
- ### wheras windows file uris for 'c:\some\dir\file' might look like:
- ### file:///C:/some/dir/file
- ### file:///C|/some/dir/file
- ### or for a network share '\\host\share\some\dir\file':
- ### file:////host/share/some/dir/file
- ###
- ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like:
- ### file://vms.host.edu/disk$user/my/notes/note12345.txt
- ###
-
- my $path = $self->path;
- my $vol = $self->vol;
- my $share = $self->share;
-
- my $remote;
- if (!$share and $self->host) {
- return $self->_error(loc(
- "Currently %1 cannot handle hosts in %2 urls",
- 'File::Fetch', 'file://'
- ));
- }
-
- if( $vol ) {
- $path = File::Spec->catdir( split /\//, $path );
- $remote = File::Spec->catpath( $vol, $path, $self->file);
-
- } elsif( $share ) {
- ### win32 specific, and a share name, so we wont bother with File::Spec
- $path =~ s|/+|\\|g;
- $remote = "\\\\".$self->host."\\$share\\$path";
-
- } else {
- ### File::Spec on VMS can not currently handle UNIX syntax.
- my $file_class = ON_VMS
- ? 'File::Spec::Unix'
- : 'File::Spec';
-
- $remote = $file_class->catfile( $path, $self->file );
- }
-
- ### File::Copy is littered with 'die' statements :( ###
- my $rv = eval { File::Copy::copy( $remote, $to ) };
-
- ### something went wrong ###
- if( !$rv or $@ ) {
- return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
- $remote, $to, $!, $@));
- }
-
- return $to;
-}
-
-### use /usr/bin/rsync to fetch files
-sub _rsync_fetch {
- my $self = shift;
- my %hash = @_;
-
- my ($to);
- my $tmpl = {
- to => { required => 1, store => \$to }
- };
- check( $tmpl, \%hash ) or return;
-
- if (my $rsync = can_run('rsync')) {
-
- my $cmd = [ $rsync ];
-
- ### XXX: rsync has no I/O timeouts at all, by default
- push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
-
- push(@$cmd, '--quiet') unless $DEBUG;
-
- ### DO NOT quote things for IPC::Run, it breaks stuff.
- push @$cmd, $self->uri, $to;
-
- ### with IPC::Cmd > 0.41, this is fixed in teh library,
- ### and there's no need for special casing any more.
- ### DO NOT quote things for IPC::Run, it breaks stuff.
- # $IPC::Cmd::USE_IPC_RUN
- # ? ($to, $self->uri)
- # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
-
- my $captured;
- unless(run( command => $cmd,
- buffer => \$captured,
- verbose => $DEBUG )
- ) {
-
- return $self->_error(loc("Command %1 failed: %2",
- "@$cmd" || '', $captured || ''));
- }
-
- return $to;
-
- } else {
- $METHOD_FAIL->{'rsync'} = 1;
- return;
- }
-}
-
-#################################
-#
-# Error code
-#
-#################################
-
-=pod
-
-=head2 $ff->error([BOOL])
-
-Returns the last encountered error as string.
-Pass it a true value to get the C<Carp::longmess()> output instead.
-
-=cut
-
-### error handling the way Archive::Extract does it
-sub _error {
- my $self = shift;
- my $error = shift;
-
- $self->_error_msg( $error );
- $self->_error_msg_long( Carp::longmess($error) );
-
- if( $WARN ) {
- carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
- }
-
- return;
-}
-
-sub error {
- my $self = shift;
- return shift() ? $self->_error_msg_long : $self->_error_msg;
-}
-
-
-1;
-
-=pod
-
-=head1 HOW IT WORKS
-
-File::Fetch is able to fetch a variety of uris, by using several
-external programs and modules.
-
-Below is a mapping of what utilities will be used in what order
-for what schemes, if available:
-
- file => LWP, lftp, file
- http => LWP, wget, curl, lftp, lynx
- ftp => LWP, Net::FTP, wget, curl, lftp, ncftp, ftp
- rsync => rsync
-
-If you'd like to disable the use of one or more of these utilities
-and/or modules, see the C<$BLACKLIST> variable further down.
-
-If a utility or module isn't available, it will be marked in a cache
-(see the C<$METHOD_FAIL> variable further down), so it will not be
-tried again. The C<fetch> method will only fail when all options are
-exhausted, and it was not able to retrieve the file.
-
-A special note about fetching files from an ftp uri:
-
-By default, all ftp connections are done in passive mode. To change
-that, see the C<$FTP_PASSIVE> variable further down.
-
-Furthermore, ftp uris only support anonymous connections, so no
-named user/password pair can be passed along.
-
-C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable
-further down.
-
-=head1 GLOBAL VARIABLES
-
-The behaviour of File::Fetch can be altered by changing the following
-global variables:
-
-=head2 $File::Fetch::FROM_EMAIL
-
-This is the email address that will be sent as your anonymous ftp
-password.
-
-Default is C<File-Fetch@example.com>.
-
-=head2 $File::Fetch::USER_AGENT
-
-This is the useragent as C<LWP> will report it.
-
-Default is C<File::Fetch/$VERSION>.
-
-=head2 $File::Fetch::FTP_PASSIVE
-
-This variable controls whether the environment variable C<FTP_PASSIVE>
-and any passive switches to commandline tools will be set to true.
-
-Default value is 1.
-
-Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch
-files, since passive mode can only be set interactively for this binary
-
-=head2 $File::Fetch::TIMEOUT
-
-When set, controls the network timeout (counted in seconds).
-
-Default value is 0.
-
-=head2 $File::Fetch::WARN
-
-This variable controls whether errors encountered internally by
-C<File::Fetch> should be C<carp>'d or not.
-
-Set to false to silence warnings. Inspect the output of the C<error()>
-method manually to see what went wrong.
-
-Defaults to C<true>.
-
-=head2 $File::Fetch::DEBUG
-
-This enables debugging output when calling commandline utilities to
-fetch files.
-This also enables C<Carp::longmess> errors, instead of the regular
-C<carp> errors.
-
-Good for tracking down why things don't work with your particular
-setup.
-
-Default is 0.
-
-=head2 $File::Fetch::BLACKLIST
-
-This is an array ref holding blacklisted modules/utilities for fetching
-files with.
-
-To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could
-set $File::Fetch::BLACKLIST to:
-
- $File::Fetch::BLACKLIST = [qw|lwp netftp|]
-
-The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable.
-
-See the note on C<MAPPING> below.
-
-=head2 $File::Fetch::METHOD_FAIL
-
-This is a hashref registering what modules/utilities were known to fail
-for fetching files (mostly because they weren't installed).
-
-You can reset this cache by assigning an empty hashref to it, or
-individually remove keys.
-
-See the note on C<MAPPING> below.
-
-=head1 MAPPING
-
-
-Here's a quick mapping for the utilities/modules, and their names for
-the $BLACKLIST, $METHOD_FAIL and other internal functions.
-
- LWP => lwp
- Net::FTP => netftp
- wget => wget
- lynx => lynx
- ncftp => ncftp
- ftp => ftp
- curl => curl
- rsync => rsync
- lftp => lftp
-
-=head1 FREQUENTLY ASKED QUESTIONS
-
-=head2 So how do I use a proxy with File::Fetch?
-
-C<File::Fetch> currently only supports proxies with LWP::UserAgent.
-You will need to set your environment variables accordingly. For
-example, to use an ftp proxy:
-
- $ENV{ftp_proxy} = 'foo.com';
-
-Refer to the LWP::UserAgent manpage for more details.
-
-=head2 I used 'lynx' to fetch a file, but its contents is all wrong!
-
-C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>,
-which we in turn capture. If that content is a 'custom' error file
-(like, say, a C<404 handler>), you will get that contents instead.
-
-Sadly, C<lynx> doesn't support any options to return a different exit
-code on non-C<200 OK> status, giving us no way to tell the difference
-between a 'successfull' fetch and a custom error page.
-
-Therefor, we recommend to only use C<lynx> as a last resort. This is
-why it is at the back of our list of methods to try as well.
-
-=head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do?
-
-C<File::Fetch> is relatively smart about things. When trying to write
-a file to disk, it removes the C<query parameters> (see the
-C<output_file> method for details) from the file name before creating
-it. In most cases this suffices.
-
-If you have any other characters you need to escape, please install
-the C<URI::Escape> module from CPAN, and pre-encode your URI before
-passing it to C<File::Fetch>. You can read about the details of URIs
-and URI encoding here:
-
- http://www.faqs.org/rfcs/rfc2396.html
-
-=head1 TODO
-
-=over 4
-
-=item Implement $PREFER_BIN
-
-To indicate to rather use commandline tools than modules
-
-=back
-
-=head1 BUG REPORTS
-
-Please report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>.
-
-=head1 AUTHOR
-
-This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
-
-=head1 COPYRIGHT
-
-This library is free software; you may redistribute and/or modify it
-under the same terms as Perl itself.
-
-
-=cut
-
-# Local variables:
-# c-indentation-style: bsd
-# c-basic-offset: 4
-# indent-tabs-mode: nil
-# End:
-# vim: expandtab shiftwidth=4:
-
-
-
-
diff --git a/lib/File/Fetch/t/01_File-Fetch.t b/lib/File/Fetch/t/01_File-Fetch.t
deleted file mode 100644
index 1cd7e8d126..0000000000
--- a/lib/File/Fetch/t/01_File-Fetch.t
+++ /dev/null
@@ -1,253 +0,0 @@
-BEGIN { chdir 't' if -d 't' };
-
-use strict;
-use lib '../lib';
-
-use Test::More 'no_plan';
-
-use Cwd qw[cwd];
-use File::Basename qw[basename];
-use Data::Dumper;
-
-use_ok('File::Fetch');
-
-### optionally set debugging ###
-$File::Fetch::DEBUG = $File::Fetch::DEBUG = 1 if $ARGV[0];
-$IPC::Cmd::DEBUG = $IPC::Cmd::DEBUG = 1 if $ARGV[0];
-
-unless( $ENV{PERL_CORE} ) {
- warn qq[
-
-####################### NOTE ##############################
-
-Some of these tests assume you are connected to the
-internet. If you are not, or if certain protocols or hosts
-are blocked and/or firewalled, these tests could fail due
-to no fault of the module itself.
-
-###########################################################
-
-];
-
- sleep 3 unless $File::Fetch::DEBUG;
-}
-
-### show us the tools IPC::Cmd will use to run binary programs
-if( $File::Fetch::DEBUG ) {
- ### stupid 'used only once' warnings ;(
- diag( "IPC::Run enabled: " .
- $IPC::Cmd::USE_IPC_RUN || $IPC::Cmd::USE_IPC_RUN );
- diag( "IPC::Run available: " . IPC::Cmd->can_use_ipc_run );
- diag( "IPC::Run vesion: $IPC::Run::VERSION" );
- diag( "IPC::Open3 enabled: " .
- $IPC::Cmd::USE_IPC_OPEN3 || $IPC::Cmd::USE_IPC_OPEN3 );
- diag( "IPC::Open3 available: " . IPC::Cmd->can_use_ipc_open3 );
- diag( "IPC::Open3 vesion: $IPC::Open3::VERSION" );
-}
-
-### _parse_uri tests
-### these go on all platforms
-my @map = (
- { uri => 'ftp://cpan.org/pub/mirror/index.txt',
- scheme => 'ftp',
- host => 'cpan.org',
- path => '/pub/mirror/',
- file => 'index.txt'
- },
- { uri => 'rsync://cpan.pair.com/CPAN/MIRRORING.FROM',
- scheme => 'rsync',
- host => 'cpan.pair.com',
- path => '/CPAN/',
- file => 'MIRRORING.FROM',
- },
- { uri => 'http://localhost/tmp/index.txt',
- scheme => 'http',
- host => 'localhost', # host is empty only on 'file://'
- path => '/tmp/',
- file => 'index.txt',
- },
-
- ### only test host part, the rest is OS dependant
- { uri => 'file://localhost/tmp/index.txt',
- host => '', # host should be empty on 'file://'
- },
-);
-
-### these only if we're not on win32/vms
-push @map, (
- { uri => 'file:///usr/local/tmp/foo.txt',
- scheme => 'file',
- host => '',
- path => '/usr/local/tmp/',
- file => 'foo.txt',
- },
- { uri => 'file://hostname/tmp/foo.txt',
- scheme => 'file',
- host => 'hostname',
- path => '/tmp/',
- file => 'foo.txt',
- },
-) if not &File::Fetch::ON_WIN and not &File::Fetch::ON_VMS;
-
-### these only on win32
-push @map, (
- { uri => 'file:////hostname/share/tmp/foo.txt',
- scheme => 'file',
- host => 'hostname',
- share => 'share',
- path => '/tmp/',
- file => 'foo.txt',
- },
- { uri => 'file:///D:/tmp/foo.txt',
- scheme => 'file',
- host => '',
- vol => 'D:',
- path => '/tmp/',
- file => 'foo.txt',
- },
- { uri => 'file:///D|/tmp/foo.txt',
- scheme => 'file',
- host => '',
- vol => 'D:',
- path => '/tmp/',
- file => 'foo.txt',
- },
-) if &File::Fetch::ON_WIN;
-
-
-### sanity tests
-{ like( $File::Fetch::USER_AGENT, qr/$File::Fetch::VERSION/,
- "User agent contains version" );
- like( $File::Fetch::FROM_EMAIL, qr/@/,
- q[Email contains '@'] );
-}
-
-### parse uri tests ###
-for my $entry (@map ) {
- my $uri = $entry->{'uri'};
-
- my $href = File::Fetch->_parse_uri( $uri );
- ok( $href, "Able to parse uri '$uri'" );
-
- for my $key ( sort keys %$entry ) {
- is( $href->{$key}, $entry->{$key},
- " '$key' ok ($entry->{$key}) for $uri");
- }
-}
-
-### File::Fetch->new tests ###
-for my $entry (@map) {
- my $ff = File::Fetch->new( uri => $entry->{uri} );
-
- ok( $ff, "Object for uri '$entry->{uri}'" );
- isa_ok( $ff, "File::Fetch", " Object" );
-
- for my $acc ( keys %$entry ) {
- is( $ff->$acc(), $entry->{$acc},
- " Accessor '$acc' ok ($entry->{$acc})" );
- }
-}
-
-### fetch() tests ###
-
-### file:// tests ###
-{
- my $prefix = &File::Fetch::ON_UNIX ? 'file://' : 'file:///';
- my $uri = $prefix . cwd() .'/'. basename($0);
-
- for (qw[lwp lftp file]) {
- _fetch_uri( file => $uri, $_ );
- }
-}
-
-### ftp:// tests ###
-{ my $uri = 'ftp://ftp.funet.fi/pub/CPAN/index.html';
- for (qw[lwp netftp wget curl lftp ncftp]) {
-
- ### STUPID STUPID warnings ###
- next if $_ eq 'ncftp' and $File::Fetch::FTP_PASSIVE
- and $File::Fetch::FTP_PASSIVE;
-
- _fetch_uri( ftp => $uri, $_ );
- }
-}
-
-### http:// tests ###
-{ for my $uri ( 'http://www.cpan.org/index.html',
- 'http://www.cpan.org/index.html?q=1',
- 'http://www.cpan.org/index.html?q=1&y=2',
- ) {
- for (qw[lwp wget curl lftp lynx]) {
- _fetch_uri( http => $uri, $_ );
- }
- }
-}
-
-### rsync:// tests ###
-{ my $uri = 'rsync://cpan.pair.com/CPAN/MIRRORING.FROM';
-
- for (qw[rsync]) {
- _fetch_uri( rsync => $uri, $_ );
- }
-}
-
-sub _fetch_uri {
- my $type = shift;
- my $uri = shift;
- my $method = shift or return;
-
- SKIP: {
- skip "'$method' fetching tests disabled under perl core", 4
- if $ENV{PERL_CORE};
-
- ### stupid warnings ###
- $File::Fetch::METHODS =
- $File::Fetch::METHODS = { $type => [$method] };
-
- ### fetch regularly
- my $ff = File::Fetch->new( uri => $uri );
-
- ok( $ff, "FF object for $uri (fetch with $method)" );
-
- for my $to ( 'tmp', do { \my $o } ) { SKIP: {
-
-
- my $how = ref $to ? 'slurp' : 'file';
- my $skip = ref $to ? 4 : 3;
-
- ok( 1, " Fetching '$uri' in $how mode" );
-
- my $file = $ff->fetch( to => $to );
-
- skip "You do not have '$method' installed/available", $skip
- if $File::Fetch::METHOD_FAIL->{$method} &&
- $File::Fetch::METHOD_FAIL->{$method};
-
- ### if the file wasn't fetched, it may be a network/firewall issue
- skip "Fetch failed; no network connectivity for '$type'?", $skip
- unless $file;
-
- ok( $file, " File ($file) fetched with $method ($uri)" );
-
- ### check we got some contents if we were meant to slurp
- if( ref $to ) {
- ok( $$to, " Contents slurped" );
- }
-
- ok( $file && -s $file,
- " File has size" );
- is( $file && basename($file), $ff->output_file,
- " File has expected name" );
-
- unlink $file;
- }}
- }
-}
-
-
-
-
-
-
-
-