diff options
Diffstat (limited to 'lib/File/Listing.pm')
-rw-r--r-- | lib/File/Listing.pm | 435 |
1 files changed, 435 insertions, 0 deletions
diff --git a/lib/File/Listing.pm b/lib/File/Listing.pm new file mode 100644 index 0000000..c8d72a7 --- /dev/null +++ b/lib/File/Listing.pm @@ -0,0 +1,435 @@ +package File::Listing; + +sub Version { $VERSION; } +$VERSION = "6.04"; + +require Exporter; +@ISA = qw(Exporter); +@EXPORT = qw(parse_dir); + +use strict; + +use Carp (); +use HTTP::Date qw(str2time); + + + +sub parse_dir ($;$$$) +{ + my($dir, $tz, $fstype, $error) = @_; + + $fstype ||= 'unix'; + $fstype = "File::Listing::" . lc $fstype; + + my @args = $_[0]; + push(@args, $tz) if(@_ >= 2); + push(@args, $error) if(@_ >= 4); + + $fstype->parse(@args); +} + + +sub line { Carp::croak("Not implemented yet"); } +sub init { } # Dummy sub + + +sub file_mode ($) +{ + Carp::croak("Input to file_mode() must be a 10 character string.") + unless length($_[0]) == 10; + + # This routine was originally borrowed from Graham Barr's + # Net::FTP package. + + local $_ = shift; + my $mode = 0; + my($type); + + s/^(.)// and $type = $1; + + # When the set-group-ID bit (file mode bit 02000) is set, and the group + # execution bit (file mode bit 00020) is unset, and it is a regular file, + # some implementations of `ls' use the letter `S', others use `l' or `L'. + # Convert this `S'. + + s/[Ll](...)$/S$1/; + + while (/(.)/g) { + $mode <<= 1; + $mode |= 1 if $1 ne "-" && + $1 ne 'S' && + $1 ne 'T'; + } + + $mode |= 0004000 if /^..s....../i; + $mode |= 0002000 if /^.....s.../i; + $mode |= 0001000 if /^........t/i; + + # De facto standard definitions. From 'stat.h' on Solaris 9. + + $type eq "p" and $mode |= 0010000 or # fifo + $type eq "c" and $mode |= 0020000 or # character special + $type eq "d" and $mode |= 0040000 or # directory + $type eq "b" and $mode |= 0060000 or # block special + $type eq "-" and $mode |= 0100000 or # regular + $type eq "l" and $mode |= 0120000 or # symbolic link + $type eq "s" and $mode |= 0140000 or # socket + $type eq "D" and $mode |= 0150000 or # door + Carp::croak("Unknown file type: $type"); + + $mode; +} + + +sub parse +{ + my($pkg, $dir, $tz, $error) = @_; + + # First let's try to determine what kind of dir parameter we have + # received. We allow both listings, reference to arrays and + # file handles to read from. + + if (ref($dir) eq 'ARRAY') { + # Already splitted up + } + elsif (ref($dir) eq 'GLOB') { + # A file handle + } + elsif (ref($dir)) { + Carp::croak("Illegal argument to parse_dir()"); + } + elsif ($dir =~ /^\*\w+(::\w+)+$/) { + # This scalar looks like a file handle, so we assume it is + } + else { + # A normal scalar listing + $dir = [ split(/\n/, $dir) ]; + } + + $pkg->init(); + + my @files = (); + if (ref($dir) eq 'ARRAY') { + for (@$dir) { + push(@files, $pkg->line($_, $tz, $error)); + } + } + else { + local($_); + while (<$dir>) { + chomp; + push(@files, $pkg->line($_, $tz, $error)); + } + } + wantarray ? @files : \@files; +} + + + +package File::Listing::unix; + +use HTTP::Date qw(str2time); + +# A place to remember current directory from last line parsed. +use vars qw($curdir @ISA); + +@ISA = qw(File::Listing); + + + +sub init +{ + $curdir = ''; +} + + +sub line +{ + shift; # package name + local($_) = shift; + my($tz, $error) = @_; + + s/\015//g; + #study; + + my ($kind, $size, $date, $name); + if (($kind, $size, $date, $name) = + /^([\-FlrwxsStTdD]{10}) # Type and permission bits + .* # Graps + \D(\d+) # File size + \s+ # Some space + (\w{3}\s+\d+\s+(?:\d{1,2}:\d{2}|\d{4})|\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}) # Date + \s+ # Some more space + (.*)$ # File name + /x ) + + { + return if $name eq '.' || $name eq '..'; + $name = "$curdir/$name" if length $curdir; + my $type = '?'; + if ($kind =~ /^l/ && $name =~ /(.*) -> (.*)/ ) { + $name = $1; + $type = "l $2"; + } + elsif ($kind =~ /^[\-F]/) { # (hopefully) a regular file + $type = 'f'; + } + elsif ($kind =~ /^[dD]/) { + $type = 'd'; + $size = undef; # Don't believe the reported size + } + return [$name, $type, $size, str2time($date, $tz), + File::Listing::file_mode($kind)]; + + } + elsif (/^(.+):$/ && !/^[dcbsp].*\s.*\s.*:$/ ) { + my $dir = $1; + return () if $dir eq '.'; + $curdir = $dir; + return (); + } + elsif (/^[Tt]otal\s+(\d+)$/ || /^\s*$/) { + return (); + } + elsif (/not found/ || # OSF1, HPUX, and SunOS return + # "$file not found" + /No such file/ || # IRIX returns + # "UX:ls: ERROR: Cannot access $file: No such file or directory" + # Solaris returns + # "$file: No such file or directory" + /cannot find/ # Windows NT returns + # "The system cannot find the path specified." + ) { + return () unless defined $error; + &$error($_) if ref($error) eq 'CODE'; + warn "Error: $_\n" if $error eq 'warn'; + return (); + } + elsif ($_ eq '') { # AIX, and Linux return nothing + return () unless defined $error; + &$error("No such file or directory") if ref($error) eq 'CODE'; + warn "Warning: No such file or directory\n" if $error eq 'warn'; + return (); + } + else { + # parse failed, check if the dosftp parse understands it + File::Listing::dosftp->init(); + return(File::Listing::dosftp->line($_,$tz,$error)); + } + +} + + + +package File::Listing::dosftp; + +use HTTP::Date qw(str2time); + +# A place to remember current directory from last line parsed. +use vars qw($curdir @ISA); + +@ISA = qw(File::Listing); + + + +sub init +{ + $curdir = ''; +} + + +sub line +{ + shift; # package name + local($_) = shift; + my($tz, $error) = @_; + + s/\015//g; + + my ($date, $size_or_dir, $name, $size); + + # 02-05-96 10:48AM 1415 src.slf + # 09-10-96 09:18AM <DIR> sl_util + if (($date, $size_or_dir, $name) = + /^(\d\d-\d\d-\d\d\s+\d\d:\d\d\wM) # Date and time info + \s+ # Some space + (<\w{3}>|\d+) # Dir or Size + \s+ # Some more space + (.+)$ # File name + /x ) + { + return if $name eq '.' || $name eq '..'; + $name = "$curdir/$name" if length $curdir; + my $type = '?'; + if ($size_or_dir eq '<DIR>') { + $type = "d"; + $size = ""; # directories have no size in the pc listing + } + else { + $type = 'f'; + $size = $size_or_dir; + } + return [$name, $type, $size, str2time($date, $tz), undef]; + } + else { + return () unless defined $error; + &$error($_) if ref($error) eq 'CODE'; + warn "Can't parse: $_\n" if $error eq 'warn'; + return (); + } + +} + + + +package File::Listing::vms; +@File::Listing::vms::ISA = qw(File::Listing); + +package File::Listing::netware; +@File::Listing::netware::ISA = qw(File::Listing); + + + +package File::Listing::apache; + +use vars qw(@ISA); + +@ISA = qw(File::Listing); + + +sub init { } + + +sub line { + shift; # package name + local($_) = shift; + my($tz, $error) = @_; # ignored for now... + + s!</?t[rd][^>]*>! !g; # clean away various table stuff + if (m!<A\s+HREF=\"([^\"]+)\">.*</A>.*?(\d+)-([a-zA-Z]+|\d+)-(\d+)\s+(\d+):(\d+)\s+(?:([\d\.]+[kMG]?|-))!i) { + my($filename, $filesize) = ($1, $7); + my($d,$m,$y, $H,$M) = ($2,$3,$4,$5,$6); + if ($m =~ /^\d+$/) { + ($d,$y) = ($y,$d) # iso date + } + else { + $m = _monthabbrev_number($m); + } + + $filesize = 0 if $filesize eq '-'; + if ($filesize =~ s/k$//i) { + $filesize *= 1024; + } + elsif ($filesize =~ s/M$//) { + $filesize *= 1024*1024; + } + elsif ($filesize =~ s/G$//) { + $filesize *= 1024*1024*1024; + } + $filesize = int $filesize; + + require Time::Local; + my $filetime = Time::Local::timelocal(0,$M,$H,$d,$m-1,_guess_year($y)-1900); + my $filetype = ($filename =~ s|/$|| ? "d" : "f"); + return [$filename, $filetype, $filesize, $filetime, undef]; + } + + return (); +} + + +sub _guess_year { + my $y = shift; + if ($y >= 90) { + $y = 1900+$y; + } + elsif ($y < 100) { + $y = 2000+$y; + } + $y; +} + + +sub _monthabbrev_number { + my $mon = shift; + +{'Jan' => 1, + 'Feb' => 2, + 'Mar' => 3, + 'Apr' => 4, + 'May' => 5, + 'Jun' => 6, + 'Jul' => 7, + 'Aug' => 8, + 'Sep' => 9, + 'Oct' => 10, + 'Nov' => 11, + 'Dec' => 12, + }->{$mon}; +} + + +1; + +__END__ + +=head1 NAME + +File::Listing - parse directory listing + +=head1 SYNOPSIS + + use File::Listing qw(parse_dir); + $ENV{LANG} = "C"; # dates in non-English locales not supported + for (parse_dir(`ls -l`)) { + ($name, $type, $size, $mtime, $mode) = @$_; + next if $type ne 'f'; # plain file + #... + } + + # directory listing can also be read from a file + open(LISTING, "zcat ls-lR.gz|"); + $dir = parse_dir(\*LISTING, '+0000'); + +=head1 DESCRIPTION + +This module exports a single function called parse_dir(), which can be +used to parse directory listings. + +The first parameter to parse_dir() is the directory listing to parse. +It can be a scalar, a reference to an array of directory lines or a +glob representing a filehandle to read the directory listing from. + +The second parameter is the time zone to use when parsing time stamps +in the listing. If this value is undefined, then the local time zone is +assumed. + +The third parameter is the type of listing to assume. Currently +supported formats are 'unix', 'apache' and 'dosftp'. The default +value is 'unix'. Ideally, the listing type should be determined +automatically. + +The fourth parameter specifies how unparseable lines should be treated. +Values can be 'ignore', 'warn' or a code reference. Warn means that +the perl warn() function will be called. If a code reference is +passed, then this routine will be called and the return value from it +will be incorporated in the listing. The default is 'ignore'. + +Only the first parameter is mandatory. + +The return value from parse_dir() is a list of directory entries. In +a scalar context the return value is a reference to the list. The +directory entries are represented by an array consisting of [ +$filename, $filetype, $filesize, $filetime, $filemode ]. The +$filetype value is one of the letters 'f', 'd', 'l' or '?'. The +$filetime value is the seconds since Jan 1, 1970. The +$filemode is a bitmask like the mode returned by stat(). + +=head1 COPYRIGHT + +Copyright 1996-2010, Gisle Aas + +Based on lsparse.pl (from Lee McLoughlin's ftp mirror package) and +Net::FTP's parse_dir (Graham Barr). + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. |