diff options
author | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2005-06-21 12:01:07 +0000 |
---|---|---|
committer | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2005-06-21 12:01:07 +0000 |
commit | 39713df493e0bfd95f001dd75d5926ec9fa719bf (patch) | |
tree | b6c3afb1e853cdc0a6391b74fc963bfc05d756df /lib/Archive | |
parent | 69c678eb3bbbed7a7896fd9a2e52b0bbb93d4c3e (diff) | |
download | perl-39713df493e0bfd95f001dd75d5926ec9fa719bf.tar.gz |
Add Archive::Tar 1.24, except ptar for now
p4raw-id: //depot/perl@24922
Diffstat (limited to 'lib/Archive')
-rw-r--r-- | lib/Archive/Tar.pm | 1468 | ||||
-rw-r--r-- | lib/Archive/Tar/Constant.pm | 72 | ||||
-rw-r--r-- | lib/Archive/Tar/File.pm | 579 | ||||
-rw-r--r-- | lib/Archive/Tar/t/01_use.t | 7 | ||||
-rw-r--r-- | lib/Archive/Tar/t/02_methods.t | 770 | ||||
-rw-r--r-- | lib/Archive/Tar/t/03_file.t | 139 | ||||
-rw-r--r-- | lib/Archive/Tar/t/99_pod.t | 18 | ||||
-rw-r--r-- | lib/Archive/Tar/t/src/long/b | 1 | ||||
-rw-r--r-- | lib/Archive/Tar/t/src/long/bar.tar | bin | 0 -> 10240 bytes | |||
-rw-r--r-- | lib/Archive/Tar/t/src/long/foo.tgz | bin | 0 -> 331 bytes | |||
-rw-r--r-- | lib/Archive/Tar/t/src/short/b | 1 | ||||
-rw-r--r-- | lib/Archive/Tar/t/src/short/bar.tar | bin | 0 -> 10240 bytes | |||
-rw-r--r-- | lib/Archive/Tar/t/src/short/foo.tgz | bin | 0 -> 145 bytes |
13 files changed, 3055 insertions, 0 deletions
diff --git a/lib/Archive/Tar.pm b/lib/Archive/Tar.pm new file mode 100644 index 0000000000..e2b16314ac --- /dev/null +++ b/lib/Archive/Tar.pm @@ -0,0 +1,1468 @@ +### the gnu tar specification: +### http://www.gnu.org/software/tar/manual/html_mono/tar.html +### +### and the pax format spec, which tar derives from: +### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html + +package Archive::Tar; +require 5.005_03; + +use strict; +use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD + $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING]; + +$DEBUG = 0; +$WARN = 1; +$FOLLOW_SYMLINK = 0; +$VERSION = "1.24"; +$CHOWN = 1; +$CHMOD = 1; +$DO_NOT_USE_PREFIX = 0; + +BEGIN { + use Config; + $HAS_PERLIO = $Config::Config{useperlio}; + + ### try and load IO::String anyway, so you can dynamically + ### switch between perlio and IO::String + eval { + require IO::String; + import IO::String; + }; + $HAS_IO_STRING = $@ ? 0 : 1; + +} + +use Cwd; +use IO::File; +use Carp qw(carp croak); +use File::Spec (); +use File::Spec::Unix (); +use File::Path (); + +use Archive::Tar::File; +use Archive::Tar::Constant; + +=head1 NAME + +Archive::Tar - module for manipulations of tar archives + +=head1 SYNOPSIS + + use Archive::Tar; + my $tar = Archive::Tar->new; + + $tar->read('origin.tgz',1); + $tar->extract(); + + $tar->add_files('file/foo.pl', 'docs/README'); + $tar->add_data('file/baz.txt', 'This is the contents now'); + + $tar->rename('oldname', 'new/file/name'); + + $tar->write('files.tar'); + +=head1 DESCRIPTION + +Archive::Tar provides an object oriented mechanism for handling tar +files. It provides class methods for quick and easy files handling +while also allowing for the creation of tar file objects for custom +manipulation. If you have the IO::Zlib module installed, +Archive::Tar will also support compressed or gzipped tar files. + +An object of class Archive::Tar represents a .tar(.gz) archive full +of files and things. + +=head1 Object Methods + +=head2 Archive::Tar->new( [$file, $compressed] ) + +Returns a new Tar object. If given any arguments, C<new()> calls the +C<read()> method automatically, passing on the arguments provided to +the C<read()> method. + +If C<new()> is invoked with arguments and the C<read()> method fails +for any reason, C<new()> returns undef. + +=cut + +my $tmpl = { + _data => [ ], + _file => 'Unknown', +}; + +### install get/set accessors for this object. +for my $key ( keys %$tmpl ) { + no strict 'refs'; + *{__PACKAGE__."::$key"} = sub { + my $self = shift; + $self->{$key} = $_[0] if @_; + return $self->{$key}; + } +} + +sub new { + my $class = shift; + $class = ref $class if ref $class; + + ### copying $tmpl here since a shallow copy makes it use the + ### same aref, causing for files to remain in memory always. + my $obj = bless { _data => [ ], _file => 'Unknown' }, $class; + + if (@_) { + return unless $obj->read( @_ ); + } + + return $obj; +} + +=head2 $tar->read ( $filename|$handle, $compressed, {opt => 'val'} ) + +Read the given tar file into memory. +The first argument can either be the name of a file or a reference to +an already open filehandle (or an IO::Zlib object if it's compressed) +The second argument indicates whether the file referenced by the first +argument is compressed. + +The C<read> will I<replace> any previous content in C<$tar>! + +The second argument may be considered optional if IO::Zlib is +installed, since it will transparently Do The Right Thing. +Archive::Tar will warn if you try to pass a compressed file if +IO::Zlib is not available and simply return. + +The third argument can be a hash reference with options. Note that +all options are case-sensitive. + +=over 4 + +=item limit + +Do not read more than C<limit> files. This is useful if you have +very big archives, and are only interested in the first few files. + +=item extract + +If set to true, immediately extract entries when reading them. This +gives you the same memory break as the C<extract_archive> function. +Note however that entries will not be read into memory, but written +straight to disk. + +=back + +All files are stored internally as C<Archive::Tar::File> objects. +Please consult the L<Archive::Tar::File> documentation for details. + +Returns the number of files read in scalar context, and a list of +C<Archive::Tar::File> objects in list context. + +=cut + +sub read { + my $self = shift; + my $file = shift; + my $gzip = shift || 0; + my $opts = shift || {}; + + unless( defined $file ) { + $self->_error( qq[No file to read from!] ); + return; + } else { + $self->_file( $file ); + } + + my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) ) + or return; + + my $data = $self->_read_tar( $handle, $opts ) or return; + + $self->_data( $data ); + + return wantarray ? @$data : scalar @$data; +} + +sub _get_handle { + my $self = shift; + my $file = shift; return unless defined $file; + return $file if ref $file; + + my $gzip = shift || 0; + my $mode = shift || READ_ONLY->( ZLIB ); # default to read only + + my $fh; my $bin; + + ### only default to ZLIB if we're not trying to /write/ to a handle ### + if( ZLIB and $gzip || MODE_READ->( $mode ) ) { + + ### IO::Zlib will Do The Right Thing, even when passed + ### a plain file ### + $fh = new IO::Zlib; + + } else { + if( $gzip ) { + $self->_error(qq[Compression not available - Install IO::Zlib!]); + return; + + } else { + $fh = new IO::File; + $bin++; + } + } + + unless( $fh->open( $file, $mode ) ) { + $self->_error( qq[Could not create filehandle for '$file': $!!] ); + return; + } + + binmode $fh if $bin; + + return $fh; +} + +sub _read_tar { + my $self = shift; + my $handle = shift or return; + my $opts = shift || {}; + + my $count = $opts->{limit} || 0; + my $extract = $opts->{extract} || 0; + + ### set a cap on the amount of files to extract ### + my $limit = 0; + $limit = 1 if $count > 0; + + my $tarfile = [ ]; + my $chunk; + my $read = 0; + my $real_name; # to set the name of a file when + # we're encountering @longlink + my $data; + + LOOP: + while( $handle->read( $chunk, HEAD ) ) { + ### IO::Zlib doesn't support this yet + my $offset = eval { tell $handle } || 'unknown'; + + unless( $read++ ) { + my $gzip = GZIP_MAGIC_NUM; + if( $chunk =~ /$gzip/ ) { + $self->_error( qq[Cannot read compressed format in tar-mode] ); + return; + } + } + + ### if we can't read in all bytes... ### + last if length $chunk != HEAD; + + ### Apparently this should really be two blocks of 512 zeroes, + ### but GNU tar sometimes gets it wrong. See comment in the + ### source code (tar.c) to GNU cpio. + next if $chunk eq TAR_END; + + my $entry; + unless( $entry = Archive::Tar::File->new( chunk => $chunk ) ) { + $self->_error( qq[Couldn't read chunk at offset $offset] ); + next; + } + + ### ignore labels: + ### http://www.gnu.org/manual/tar/html_node/tar_139.html + next if $entry->is_label; + + if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) { + + if ( $entry->is_file && !$entry->validate ) { + ### sometimes the chunk is rather fux0r3d and a whole 512 + ### bytes ends p in the ->name area. + ### clean it up, if need be + my $name = $entry->name; + $name = substr($name, 0, 100) if length $name > 100; + $name =~ s/\n/ /g; + + $self->_error( $name . qq[: checksum error] ); + next LOOP; + } + + my $block = BLOCK_SIZE->( $entry->size ); + + $data = $entry->get_content_by_ref; + + ### just read everything into memory + ### can't do lazy loading since IO::Zlib doesn't support 'seek' + ### this is because Compress::Zlib doesn't support it =/ + ### this reads in the whole data in one read() call. + if( $handle->read( $$data, $block ) < $block ) { + $self->_error( qq[Read error on tarfile (missing data) ']. + $entry->full_path ."' at offset $offset" ); + next; + } + + ### throw away trailing garbage ### + substr ($$data, $entry->size) = ""; + + ### part II of the @LongLink munging -- need to do /after/ + ### the checksum check. + if( $entry->is_longlink ) { + ### weird thing in tarfiles -- if the file is actually a + ### @LongLink, the data part seems to have a trailing ^@ + ### (unprintable) char. to display, pipe output through less. + ### but that doesn't *always* happen.. so check if the last + ### character is a control character, and if so remove it + ### at any rate, we better remove that character here, or tests + ### like 'eq' and hashlook ups based on names will SO not work + ### remove it by calculating the proper size, and then + ### tossing out everything that's longer than that size. + + ### count number of nulls + my $nulls = $$data =~ tr/\0/\0/; + + ### cut data + size by that many bytes + $entry->size( $entry->size - $nulls ); + substr ($$data, $entry->size) = ""; + } + } + + ### clean up of the entries.. posix tar /apparently/ has some + ### weird 'feature' that allows for filenames > 255 characters + ### they'll put a header in with as name '././@LongLink' and the + ### contents will be the name of the /next/ file in the archive + ### pretty crappy and kludgy if you ask me + + ### set the name for the next entry if this is a @LongLink; + ### this is one ugly hack =/ but needed for direct extraction + if( $entry->is_longlink ) { + $real_name = $data; + next; + } elsif ( defined $real_name ) { + $entry->name( $$real_name ); + $entry->prefix(''); + undef $real_name; + } + + $self->_extract_file( $entry ) if $extract + && !$entry->is_longlink + && !$entry->is_unknown + && !$entry->is_label; + + ### Guard against tarfiles with garbage at the end + last LOOP if $entry->name eq ''; + + ### push only the name on the rv if we're extracting + ### -- for extract_archive + push @$tarfile, ($extract ? $entry->name : $entry); + + if( $limit ) { + $count-- unless $entry->is_longlink || $entry->is_dir; + last LOOP unless $count; + } + } continue { + undef $data; + } + + return $tarfile; +} + +=head2 $tar->contains_file( $filename ) + +Check if the archive contains a certain file. +It will return true if the file is in the archive, false otherwise. + +Note however, that this function does an exact match using C<eq> +on the full path. So it cannot compensate for case-insensitive file- +systems or compare 2 paths to see if they would point to the same +underlying file. + +=cut + +sub contains_file { + my $self = shift; + my $full = shift or return; + + return 1 if $self->_find_entry($full); + return; +} + +=head2 $tar->extract( [@filenames] ) + +Write files whose names are equivalent to any of the names in +C<@filenames> to disk, creating subdirectories as necessary. This +might not work too well under VMS. +Under MacPerl, the file's modification time will be converted to the +MacOS zero of time, and appropriate conversions will be done to the +path. However, the length of each element of the path is not +inspected to see whether it's longer than MacOS currently allows (32 +characters). + +If C<extract> is called without a list of file names, the entire +contents of the archive are extracted. + +Returns a list of filenames extracted. + +=cut + +sub extract { + my $self = shift; + my @files; + + ### you requested the extraction of only certian files + if( @_ ) { + for my $file (@_) { + my $found; + for my $entry ( @{$self->_data} ) { + next unless $file eq $entry->full_path; + + ### we found the file you're looking for + push @files, $entry; + $found++; + } + + unless( $found ) { + return $self->_error( qq[Could not find '$file' in archive] ); + } + } + + ### just grab all the file items + } else { + @files = $self->get_files; + } + + ### nothing found? that's an error + unless( scalar @files ) { + $self->_error( qq[No files found for ] . $self->_file ); + return; + } + + ### now extract them + for my $entry ( @files ) { + unless( $self->_extract_file( $entry ) ) { + $self->_error(q[Could not extract ']. $entry->full_path .q['] ); + return; + } + } + + return @files; +} + +=head2 $tar->extract_file( $file, [$extract_path] ) + +Write an entry, whose name is equivalent to the file name provided to +disk. Optionally takes a second parameter, which is the full (unix) +path (including filename) the entry will be written to. + +For example: + + $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' ); + +Returns true on success, false on failure. + +=cut + +sub extract_file { + my $self = shift; + my $file = shift or return; + my $alt = shift; + + my $entry = $self->_find_entry( $file ) + or $self->_error( qq[Could not find an entry for '$file'] ), return; + + return $self->_extract_file( $entry, $alt ); +} + +sub _extract_file { + my $self = shift; + my $entry = shift or return; + my $alt = shift; + my $cwd = cwd(); + + ### you wanted an alternate extraction location ### + my $name = defined $alt ? $alt : $entry->full_path; + + ### splitpath takes a bool at the end to indicate + ### that it's splitting a dir + my ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name, + $entry->is_dir ); + my $dir; + ### is $name an absolute path? ### + if( File::Spec->file_name_is_absolute( $dirs ) ) { + $dir = $dirs; + + ### it's a relative path ### + } else { + my @dirs = File::Spec::Unix->splitdir( $dirs ); + my @cwd = File::Spec->splitdir( $cwd ); + $dir = File::Spec->catdir(@cwd, @dirs); + } + + if( -e $dir && !-d _ ) { + $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] ); + return; + } + + unless ( -d _ ) { + eval { File::Path::mkpath( $dir, 0, 0777 ) }; + if( $@ ) { + $self->_error( qq[Could not create directory '$dir': $@] ); + return; + } + } + + ### we're done if we just needed to create a dir ### + return 1 if $entry->is_dir; + + my $full = File::Spec->catfile( $dir, $file ); + + if( $entry->is_unknown ) { + $self->_error( qq[Unknown file type for file '$full'] ); + return; + } + + if( length $entry->type && $entry->is_file ) { + my $fh = IO::File->new; + $fh->open( '>' . $full ) or ( + $self->_error( qq[Could not open file '$full': $!] ), + return + ); + + if( $entry->size ) { + binmode $fh; + syswrite $fh, $entry->data or ( + $self->_error( qq[Could not write data to '$full'] ), + return + ); + } + + close $fh or ( + $self->_error( qq[Could not close file '$full'] ), + return + ); + + } else { + $self->_make_special_file( $entry, $full ) or return; + } + + utime time, $entry->mtime - TIME_OFFSET, $full or + $self->_error( qq[Could not update timestamp] ); + + if( $CHOWN && CAN_CHOWN ) { + chown $entry->uid, $entry->gid, $full or + $self->_error( qq[Could not set uid/gid on '$full'] ); + } + + ### only chmod if we're allowed to, but never chmod symlinks, since they'll + ### change the perms on the file they're linking too... + if( $CHMOD and not -l $full ) { + chmod $entry->mode, $full or + $self->_error( qq[Could not chown '$full' to ] . $entry->mode ); + } + + return 1; +} + +sub _make_special_file { + my $self = shift; + my $entry = shift or return; + my $file = shift; return unless defined $file; + + my $err; + + if( $entry->is_symlink ) { + my $fail; + if( ON_UNIX ) { + symlink( $entry->linkname, $file ) or $fail++; + + } else { + $self->_extract_special_file_as_plain_file( $entry, $file ) + or $fail++; + } + + $err = qq[Making symbolink link from '] . $entry->linkname . + qq[' to '$file' failed] if $fail; + + } elsif ( $entry->is_hardlink ) { + my $fail; + if( ON_UNIX ) { + link( $entry->linkname, $file ) or $fail++; + + } else { + $self->_extract_special_file_as_plain_file( $entry, $file ) + or $fail++; + } + + $err = qq[Making hard link from '] . $entry->linkname . + qq[' to '$file' failed] if $fail; + + } elsif ( $entry->is_fifo ) { + ON_UNIX && !system('mknod', $file, 'p') or + $err = qq[Making fifo ']. $entry->name .qq[' failed]; + + } elsif ( $entry->is_blockdev or $entry->is_chardev ) { + my $mode = $entry->is_blockdev ? 'b' : 'c'; + + ON_UNIX && !system('mknod', $file, $mode, + $entry->devmajor, $entry->devminor) or + $err = qq[Making block device ']. $entry->name .qq[' (maj=] . + $entry->devmajor . qq[ min=] . $entry->devminor . + qq[) failed.]; + + } elsif ( $entry->is_socket ) { + ### the original doesn't do anything special for sockets.... ### + 1; + } + + return $err ? $self->_error( $err ) : 1; +} + +### don't know how to make symlinks, let's just extract the file as +### a plain file +sub _extract_special_file_as_plain_file { + my $self = shift; + my $entry = shift or return; + my $file = shift; return unless defined $file; + + my $err; + TRY: { + my $orig = $self->_find_entry( $entry->linkname ); + + unless( $orig ) { + $err = qq[Could not find file '] . $entry->linkname . + qq[' in memory.]; + last TRY; + } + + ### clone the entry, make it appear as a normal file ### + my $clone = $entry->clone; + $clone->_downgrade_to_plainfile; + $self->_extract_file( $clone, $file ) or last TRY; + + return 1; + } + + return $self->_error($err); +} + +=head2 $tar->list_files( [\@properties] ) + +Returns a list of the names of all the files in the archive. + +If C<list_files()> is passed an array reference as its first argument +it returns a list of hash references containing the requested +properties of each file. The following list of properties is +supported: name, size, mtime (last modified date), mode, uid, gid, +linkname, uname, gname, devmajor, devminor, prefix. + +Passing an array reference containing only one element, 'name', is +special cased to return a list of names rather than a list of hash +references, making it equivalent to calling C<list_files> without +arguments. + +=cut + +sub list_files { + my $self = shift; + my $aref = shift || [ ]; + + unless( $self->_data ) { + $self->read() or return; + } + + if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) { + return map { $_->full_path } @{$self->_data}; + } else { + + #my @rv; + #for my $obj ( @{$self->_data} ) { + # push @rv, { map { $_ => $obj->$_() } @$aref }; + #} + #return @rv; + + ### this does the same as the above.. just needs a +{ } + ### to make sure perl doesn't confuse it for a block + return map { my $o=$_; + +{ map { $_ => $o->$_() } @$aref } + } @{$self->_data}; + } +} + +sub _find_entry { + my $self = shift; + my $file = shift; + + unless( defined $file ) { + $self->_error( qq[No file specified] ); + return; + } + + for my $entry ( @{$self->_data} ) { + my $path = $entry->full_path; + return $entry if $path eq $file; + } + + $self->_error( qq[No such file in archive: '$file'] ); + return; +} + +=head2 $tar->get_files( [@filenames] ) + +Returns the C<Archive::Tar::File> objects matching the filenames +provided. If no filename list was passed, all C<Archive::Tar::File> +objects in the current Tar object are returned. + +Please refer to the C<Archive::Tar::File> documentation on how to +handle these objects. + +=cut + +sub get_files { + my $self = shift; + + return @{ $self->_data } unless @_; + + my @list; + for my $file ( @_ ) { + push @list, grep { defined } $self->_find_entry( $file ); + } + + return @list; +} + +=head2 $tar->get_content( $file ) + +Return the content of the named file. + +=cut + +sub get_content { + my $self = shift; + my $entry = $self->_find_entry( shift ) or return; + + return $entry->data; +} + +=head2 $tar->replace_content( $file, $content ) + +Make the string $content be the content for the file named $file. + +=cut + +sub replace_content { + my $self = shift; + my $entry = $self->_find_entry( shift ) or return; + + return $entry->replace_content( shift ); +} + +=head2 $tar->rename( $file, $new_name ) + +Rename the file of the in-memory archive to $new_name. + +Note that you must specify a Unix path for $new_name, since per tar +standard, all files in the archive must be Unix paths. + +Returns true on success and false on failure. + +=cut + +sub rename { + my $self = shift; + my $file = shift; return unless defined $file; + my $new = shift; return unless defined $new; + + my $entry = $self->_find_entry( $file ) or return; + + return $entry->rename( $new ); +} + +=head2 $tar->remove (@filenamelist) + +Removes any entries with names matching any of the given filenames +from the in-memory archive. Returns a list of C<Archive::Tar::File> +objects that remain. + +=cut + +sub remove { + my $self = shift; + my @list = @_; + + my %seen = map { $_->full_path => $_ } @{$self->_data}; + delete $seen{ $_ } for @list; + + $self->_data( [values %seen] ); + + return values %seen; +} + +=head2 $tar->clear + +C<clear> clears the current in-memory archive. This effectively gives +you a 'blank' object, ready to be filled again. Note that C<clear> +only has effect on the object, not the underlying tarfile. + +=cut + +sub clear { + my $self = shift or return; + + $self->_data( [] ); + $self->_file( '' ); + + return 1; +} + + +=head2 $tar->write ( [$file, $compressed, $prefix] ) + +Write the in-memory archive to disk. The first argument can either +be the name of a file or a reference to an already open filehandle (a +GLOB reference). If the second argument is true, the module will use +IO::Zlib to write the file in a compressed format. If IO::Zlib is +not available, the C<write> method will fail and return. + +Note that when you pass in a filehandle, the compression argument +is ignored, as all files are printed verbatim to your filehandle. +If you wish to enable compression with filehandles, use an +C<IO::Zlib> filehandle instead. + +Specific levels of compression can be chosen by passing the values 2 +through 9 as the second parameter. + +The third argument is an optional prefix. All files will be tucked +away in the directory you specify as prefix. So if you have files +'a' and 'b' in your archive, and you specify 'foo' as prefix, they +will be written to the archive as 'foo/a' and 'foo/b'. + +If no arguments are given, C<write> returns the entire formatted +archive as a string, which could be useful if you'd like to stuff the +archive into a socket or a pipe to gzip or something. + +=cut + +sub write { + my $self = shift; + my $file = shift; $file = '' unless defined $file; + my $gzip = shift || 0; + my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; + my $dummy = ''; + + ### only need a handle if we have a file to print to ### + my $handle = length($file) + ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) ) + or return ) + : $HAS_PERLIO ? do { open my $h, '>', \$dummy; $h } + : $HAS_IO_STRING ? IO::String->new + : __PACKAGE__->no_string_support(); + + + + for my $entry ( @{$self->_data} ) { + ### entries to be written to the tarfile ### + my @write_me; + + ### only now will we change the object to reflect the current state + ### of the name and prefix fields -- this needs to be limited to + ### write() only! + my $clone = $entry->clone; + + + ### so, if you don't want use to use the prefix, we'll stuff + ### everything in the name field instead + if( $DO_NOT_USE_PREFIX ) { + + ### you might have an extended prefix, if so, set it in the clone + ### XXX is ::Unix right? + $clone->name( length $ext_prefix + ? File::Spec::Unix->catdir( $ext_prefix, + $clone->full_path) + : $clone->full_path ); + $clone->prefix( '' ); + + ### otherwise, we'll have to set it properly -- prefix part in the + ### prefix and name part in the name field. + } else { + + ### split them here, not before! + my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path ); + + ### you might have an extended prefix, if so, set it in the clone + ### XXX is ::Unix right? + $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix ) + if length $ext_prefix; + + $clone->prefix( $prefix ); + $clone->name( $name ); + } + + ### names are too long, and will get truncated if we don't add a + ### '@LongLink' file... + my $make_longlink = ( length($clone->name) > NAME_LENGTH or + length($clone->prefix) > PREFIX_LENGTH + ) || 0; + + ### perhaps we need to make a longlink file? + if( $make_longlink ) { + my $longlink = Archive::Tar::File->new( + data => LONGLINK_NAME, + $clone->full_path, + { type => LONGLINK } + ); + + unless( $longlink ) { + $self->_error( qq[Could not create 'LongLink' entry for ] . + qq[oversize file '] . $clone->full_path ."'" ); + return; + }; + + push @write_me, $longlink; + } + + push @write_me, $clone; + + ### write the one, optionally 2 a::t::file objects to the handle + for my $clone (@write_me) { + + ### if the file is a symlink, there are 2 options: + ### either we leave the symlink intact, but then we don't write any + ### data OR we follow the symlink, which means we actually make a + ### copy. if we do the latter, we have to change the TYPE of the + ### clone to 'FILE' + my $link_ok = $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK; + my $data_ok = !$clone->is_symlink && $clone->has_content; + + ### downgrade to a 'normal' file if it's a symlink we're going to + ### treat as a regular file + $clone->_downgrade_to_plainfile if $link_ok; + + ### get the header for this block + my $header = $self->_format_tar_entry( $clone ); + unless( $header ) { + $self->_error(q[Could not format header for: ] . + $clone->full_path ); + return; + } + + unless( print $handle $header ) { + $self->_error(q[Could not write header for: ] . + $clone->full_path); + return; + } + + if( $link_ok or $data_ok ) { + unless( print $handle $clone->data ) { + $self->_error(q[Could not write data for: ] . + $clone->full_path); + return; + } + + ### pad the end of the clone if required ### + print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK + } + + } ### done writing these entries + } + + ### write the end markers ### + print $handle TAR_END x 2 or + return $self->_error( qq[Could not write tar end markers] ); + ### did you want it written to a file, or returned as a string? ### + return length($file) ? 1 + : $HAS_PERLIO ? $dummy + : do { seek $handle, 0, 0; local $/; <$handle> } +} + +sub _format_tar_entry { + my $self = shift; + my $entry = shift or return; + my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; + my $no_prefix = shift || 0; + + my $file = $entry->name; + my $prefix = $entry->prefix; $prefix = '' unless defined $prefix; + + ### remove the prefix from the file name + ### not sure if this is still neeeded --kane + ### no it's not -- Archive::Tar::File->_new_from_file will take care of + ### this for us. Even worse, this would break if we tried to add a file + ### like x/x. + #if( length $prefix ) { + # $file =~ s/^$match//; + #} + + $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix) + if length $ext_prefix; + + ### not sure why this is... ### + my $l = PREFIX_LENGTH; # is ambiguous otherwise... + substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH; + + my $f1 = "%06o"; my $f2 = "%11o"; + + ### this might be optimizable with a 'changed' flag in the file objects ### + my $tar = pack ( + PACK, + $file, + + (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]), + (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]), + + "", # checksum field - space padded a bit down + + (map { $entry->$_() } qw[type linkname magic]), + + $entry->version || TAR_VERSION, + + (map { $entry->$_() } qw[uname gname]), + (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]), + + ($no_prefix ? '' : $prefix) + ); + + ### add the checksum ### + substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar)); + + return $tar; +} + +=head2 $tar->add_files( @filenamelist ) + +Takes a list of filenames and adds them to the in-memory archive. + +The path to the file is automatically converted to a Unix like +equivalent for use in the archive, and, if on MacOS, the file's +modification time is converted from the MacOS epoch to the Unix epoch. +So tar archives created on MacOS with B<Archive::Tar> can be read +both with I<tar> on Unix and applications like I<suntar> or +I<Stuffit Expander> on MacOS. + +Be aware that the file's type/creator and resource fork will be lost, +which is usually what you want in cross-platform archives. + +Returns a list of C<Archive::Tar::File> objects that were just added. + +=cut + +sub add_files { + my $self = shift; + my @files = @_ or return; + + my @rv; + for my $file ( @files ) { + unless( -e $file ) { + $self->_error( qq[No such file: '$file'] ); + next; + } + + my $obj = Archive::Tar::File->new( file => $file ); + unless( $obj ) { + $self->_error( qq[Unable to add file: '$file'] ); + next; + } + + push @rv, $obj; + } + + push @{$self->{_data}}, @rv; + + return @rv; +} + +=head2 $tar->add_data ( $filename, $data, [$opthashref] ) + +Takes a filename, a scalar full of data and optionally a reference to +a hash with specific options. + +Will add a file to the in-memory archive, with name C<$filename> and +content C<$data>. Specific properties can be set using C<$opthashref>. +The following list of properties is supported: name, size, mtime +(last modified date), mode, uid, gid, linkname, uname, gname, +devmajor, devminor, prefix. (On MacOS, the file's path and +modification times are converted to Unix equivalents.) + +Returns the C<Archive::Tar::File> object that was just added, or +C<undef> on failure. + +=cut + +sub add_data { + my $self = shift; + my ($file, $data, $opt) = @_; + + my $obj = Archive::Tar::File->new( data => $file, $data, $opt ); + unless( $obj ) { + $self->_error( qq[Unable to add file: '$file'] ); + return; + } + + push @{$self->{_data}}, $obj; + + return $obj; +} + +=head2 $tar->error( [$BOOL] ) + +Returns the current errorstring (usually, the last error reported). +If a true value was specified, it will give the C<Carp::longmess> +equivalent of the error, in effect giving you a stacktrace. + +For backwards compatibility, this error is also available as +C<$Archive::Tar::error> although it is much recommended you use the +method call instead. + +=cut + +{ + $error = ''; + my $longmess; + + sub _error { + my $self = shift; + my $msg = $error = shift; + $longmess = Carp::longmess($error); + + ### set Archive::Tar::WARN to 0 to disable printing + ### of errors + if( $WARN ) { + carp $DEBUG ? $longmess : $msg; + } + + return; + } + + sub error { + my $self = shift; + return shift() ? $longmess : $error; + } +} + + +=head2 $bool = $tar->has_io_string + +Returns true if we currently have C<IO::String> support loaded. + +Either C<IO::String> or C<perlio> support is needed to support writing +stringified archives. Currently, C<perlio> is the preffered method, if +available. + +See the C<GLOBAL VARIABLES> section to see how to change this preference. + +=cut + +sub has_io_string { return $HAS_IO_STRING; } + +=head2 $bool = $tar->has_perlio + +Returns true if we currently have C<perlio> support loaded. + +This requires C<perl-5.8> or higher, compiled with C<perlio> + +Either C<IO::String> or C<perlio> support is needed to support writing +stringified archives. Currently, C<perlio> is the preffered method, if +available. + +See the C<GLOBAL VARIABLES> section to see how to change this preference. + +=cut + +sub has_perlio { return $HAS_PERLIO; } + + +=head1 Class Methods + +=head2 Archive::Tar->create_archive($file, $compression, @filelist) + +Creates a tar file from the list of files provided. The first +argument can either be the name of the tar file to create or a +reference to an open file handle (e.g. a GLOB reference). + +The second argument specifies the level of compression to be used, if +any. Compression of tar files requires the installation of the +IO::Zlib module. Specific levels of compression may be +requested by passing a value between 2 and 9 as the second argument. +Any other value evaluating as true will result in the default +compression level being used. + +Note that when you pass in a filehandle, the compression argument +is ignored, as all files are printed verbatim to your filehandle. +If you wish to enable compression with filehandles, use an +C<IO::Zlib> filehandle instead. + +The remaining arguments list the files to be included in the tar file. +These files must all exist. Any files which don't exist or can't be +read are silently ignored. + +If the archive creation fails for any reason, C<create_archive> will +return false. Please use the C<error> method to find the cause of the +failure. + +Note that this method does not write C<on the fly> as it were; it +still reads all the files into memory before writing out the archive. +Consult the FAQ below if this is a problem. + +=cut + +sub create_archive { + my $class = shift; + + my $file = shift; return unless defined $file; + my $gzip = shift || 0; + my @files = @_; + + unless( @files ) { + return $class->_error( qq[Cowardly refusing to create empty archive!] ); + } + + my $tar = $class->new; + $tar->add_files( @files ); + return $tar->write( $file, $gzip ); +} + +=head2 Archive::Tar->list_archive ($file, $compressed, [\@properties]) + +Returns a list of the names of all the files in the archive. The +first argument can either be the name of the tar file to list or a +reference to an open file handle (e.g. a GLOB reference). + +If C<list_archive()> is passed an array reference as its third +argument it returns a list of hash references containing the requested +properties of each file. The following list of properties is +supported: name, size, mtime (last modified date), mode, uid, gid, +linkname, uname, gname, devmajor, devminor, prefix. + +Passing an array reference containing only one element, 'name', is +special cased to return a list of names rather than a list of hash +references. + +=cut + +sub list_archive { + my $class = shift; + my $file = shift; return unless defined $file; + my $gzip = shift || 0; + + my $tar = $class->new($file, $gzip); + return unless $tar; + + return $tar->list_files( @_ ); +} + +=head2 Archive::Tar->extract_archive ($file, $gzip) + +Extracts the contents of the tar file. The first argument can either +be the name of the tar file to create or a reference to an open file +handle (e.g. a GLOB reference). All relative paths in the tar file will +be created underneath the current working directory. + +C<extract_archive> will return a list of files it extracted. +If the archive extraction fails for any reason, C<extract_archive> +will return false. Please use the C<error> method to find the cause +of the failure. + +=cut + +sub extract_archive { + my $class = shift; + my $file = shift; return unless defined $file; + my $gzip = shift || 0; + + my $tar = $class->new( ) or return; + + return $tar->read( $file, $gzip, { extract => 1 } ); +} + +=head2 Archive::Tar->can_handle_compressed_files + +A simple checking routine, which will return true if C<Archive::Tar> +is able to uncompress compressed archives on the fly with C<IO::Zlib>, +or false if C<IO::Zlib> is not installed. + +You can use this as a shortcut to determine whether C<Archive::Tar> +will do what you think before passing compressed archives to its +C<read> method. + +=cut + +sub can_handle_compressed_files { return ZLIB ? 1 : 0 } + +sub no_string_support { + croak("You have to install IO::String to support writing archives to strings"); +} + +1; + +__END__ + +=head1 GLOBAL VARIABLES + +=head2 $Archive::Tar::FOLLOW_SYMLINK + +Set this variable to C<1> to make C<Archive::Tar> effectively make a +copy of the file when extracting. Default is C<0>, which +means the symlink stays intact. Of course, you will have to pack the +file linked to as well. + +This option is checked when you write out the tarfile using C<write> +or C<create_archive>. + +This works just like C</bin/tar>'s C<-h> option. + +=head2 $Archive::Tar::CHOWN + +By default, C<Archive::Tar> will try to C<chown> your files if it is +able to. In some cases, this may not be desired. In that case, set +this variable to C<0> to disable C<chown>-ing, even if it were +possible. + +The default is C<1>. + +=head2 $Archive::Tar::CHMOD + +By default, C<Archive::Tar> will try to C<chmod> your files to +whatever mode was specified for the particular file in the archive. +In some cases, this may not be desired. In that case, set this +variable to C<0> to disable C<chmod>-ing. + +The default is C<1>. + +=head2 $Archive::Tar::DO_NOT_USE_PREFIX + +By default, C<Archive::Tar> will try to put paths that are over +100 characters in the C<prefix> field of your tar header. However, +some older tar programs do not implement this spec. To retain +compatibility with these older versions, you can set the +C<$DO_NOT_USE_PREFIX> variable to a true value, and C<Archive::Tar> +will use an alternate way of dealing with paths over 100 characters +by using the C<GNU Extended Header> feature. + +The default is C<0>. + +=head2 $Archive::Tar::DEBUG + +Set this variable to C<1> to always get the C<Carp::longmess> output +of the warnings, instead of the regular C<carp>. This is the same +message you would get by doing: + + $tar->error(1); + +Defaults to C<0>. + +=head2 $Archive::Tar::WARN + +Set this variable to C<0> if you do not want any warnings printed. +Personally I recommend against doing this, but people asked for the +option. Also, be advised that this is of course not threadsafe. + +Defaults to C<1>. + +=head2 $Archive::Tar::error + +Holds the last reported error. Kept for historical reasons, but its +use is very much discouraged. Use the C<error()> method instead: + + warn $tar->error unless $tar->extract; + +=head2 $Archive::Tar::HAS_PERLIO + +This variable holds a boolean indicating if we currently have +C<perlio> support loaded. This will be enabled for any perl +greater than C<5.8> compiled with C<perlio>. + +If you feel strongly about disabling it, set this variable to +C<false>. Note that you will then need C<IO::String> installed +to support writing stringified archives. + +Don't change this variable unless you B<really> know what you're +doing. + +=head2 $Archive::Tar::HAS_IO_STRING + +This variable holds a boolean indicating if we currently have +C<IO::String> support loaded. This will be enabled for any perl +that has a loadable C<IO::String> module. + +If you feel strongly about disabling it, set this variable to +C<false>. Note that you will then need C<perlio> support from +your perl to be able to write stringified archives. + +Don't change this variable unless you B<really> know what you're +doing. + +=head1 FAQ + +=over 4 + +=item What's the minimum perl version required to run Archive::Tar? + +You will need perl version 5.005_03 or newer. + +=item Isn't Archive::Tar slow? + +Yes it is. It's pure perl, so it's a lot slower then your C</bin/tar> +However, it's very portable. If speed is an issue, consider using +C</bin/tar> instead. + +=item Isn't Archive::Tar heavier on memory than /bin/tar? + +Yes it is, see previous answer. Since C<Compress::Zlib> and therefore +C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little +choice but to read the archive into memory. +This is ok if you want to do in-memory manipulation of the archive. +If you just want to extract, use the C<extract_archive> class method +instead. It will optimize and write to disk immediately. + +=item Can't you lazy-load data instead? + +No, not easily. See previous question. + +=item How much memory will an X kb tar file need? + +Probably more than X kb, since it will all be read into memory. If +this is a problem, and you don't need to do in memory manipulation +of the archive, consider using C</bin/tar> instead. + +=item What do you do with unsupported filetypes in an archive? + +C<Unix> has a few filetypes that aren't supported on other platforms, +like C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just +try to make a copy of the original file, rather than throwing an error. + +This does require you to read the entire archive in to memory first, +since otherwise we wouldn't know what data to fill the copy with. +(This means that you cannot use the class methods on archives that +have incompatible filetypes and still expect things to work). + +For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that +the extraction of this particular item didn't work. + +=back + +=head1 TODO + +=over 4 + +=item Check if passed in handles are open for read/write + +Currently I don't know of any portable pure perl way to do this. +Suggestions welcome. + +=back + +=head1 AUTHOR + +This module by +Jos Boumans E<lt>kane@cpan.orgE<gt>. + +=head1 ACKNOWLEDGEMENTS + +Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney and +especially Andrew Savige for their help and suggestions. + +=head1 COPYRIGHT + +This module is +copyright (c) 2002 Jos Boumans E<lt>kane@cpan.orgE<gt>. +All rights reserved. + +This library is free software; +you may redistribute and/or modify it under the same +terms as Perl itself. + +=cut diff --git a/lib/Archive/Tar/Constant.pm b/lib/Archive/Tar/Constant.pm new file mode 100644 index 0000000000..fe5bb14726 --- /dev/null +++ b/lib/Archive/Tar/Constant.pm @@ -0,0 +1,72 @@ +package Archive::Tar::Constant; + +BEGIN { + require Exporter; + $VERSION= '0.02'; + @ISA = qw[Exporter]; + @EXPORT = qw[ + FILE HARDLINK SYMLINK CHARDEV BLOCKDEV DIR FIFO SOCKET UNKNOWN + BUFFER HEAD READ_ONLY WRITE_ONLY UNPACK PACK TIME_OFFSET ZLIB + BLOCK_SIZE TAR_PAD TAR_END ON_UNIX BLOCK CAN_READLINK MAGIC + TAR_VERSION UNAME GNAME CAN_CHOWN MODE CHECK_SUM UID GID + GZIP_MAGIC_NUM MODE_READ LONGLINK LONGLINK_NAME PREFIX_LENGTH + LABEL NAME_LENGTH STRIP_MODE + ]; + + require Time::Local if $^O eq "MacOS"; +} + +use constant FILE => 0; +use constant HARDLINK => 1; +use constant SYMLINK => 2; +use constant CHARDEV => 3; +use constant BLOCKDEV => 4; +use constant DIR => 5; +use constant FIFO => 6; +use constant SOCKET => 8; +use constant UNKNOWN => 9; +use constant LONGLINK => 'L'; +use constant LABEL => 'V'; + +use constant BUFFER => 4096; +use constant HEAD => 512; +use constant BLOCK => 512; + +use constant BLOCK_SIZE => sub { my $n = int($_[0]/BLOCK); $n++ if $_[0] % BLOCK; $n * BLOCK }; +use constant TAR_PAD => sub { my $x = shift || return; return "\0" x (BLOCK - ($x % BLOCK) ) }; +use constant TAR_END => "\0" x BLOCK; + +use constant READ_ONLY => sub { shift() ? 'rb' : 'r' }; +use constant WRITE_ONLY => sub { $_[0] ? 'wb' . shift : 'w' }; +use constant MODE_READ => sub { $_[0] =~ /^r/ ? 1 : 0 }; + +# Pointless assigment to make -w shut up +my $getpwuid; $getpwuid = 'unknown' unless eval { my $f = getpwuid (0); }; +my $getgrgid; $getgrgid = 'unknown' unless eval { my $f = getgrgid (0); }; +use constant UNAME => sub { $getpwuid || scalar getpwuid( shift() ) }; +use constant GNAME => sub { $getgrgid || scalar getgrgid( shift() ) }; +use constant UID => $>; +use constant GID => (split ' ', $) )[0]; + +use constant MODE => do { 0666 & (0777 & ~umask) }; +use constant STRIP_MODE => sub { shift() & 0777 }; +use constant CHECK_SUM => " "; + +use constant UNPACK => 'A100 A8 A8 A8 A12 A12 A8 A1 A100 A6 A2 A32 A32 A8 A8 A155 x12'; +use constant PACK => 'a100 a8 a8 a8 a12 a12 A8 a1 a100 a6 a2 a32 a32 a8 a8 a155 x12'; +use constant NAME_LENGTH => 100; +use constant PREFIX_LENGTH => 155; + +use constant TIME_OFFSET => ($^O eq "MacOS") ? Time::Local::timelocal(0,0,0,1,0,70) : 0; +use constant MAGIC => "ustar"; +use constant TAR_VERSION => "00"; +use constant LONGLINK_NAME => '././@LongLink'; + +use constant ZLIB => do { eval { require IO::Zlib }; $@ ? 0 : 1 }; +use constant GZIP_MAGIC_NUM => qr/^(?:\037\213|\037\235)/; + +use constant CAN_CHOWN => do { ($> == 0 and $^O ne "MacOS" and $^O ne "MSWin32") }; +use constant CAN_READLINK => ($^O ne 'MSWin32' and $^O !~ /RISC(?:[ _])?OS/i and $^O ne 'VMS'); +use constant ON_UNIX => ($^O ne 'MSWin32' and $^O ne 'MacOS' and $^O ne 'VMS'); + +1; diff --git a/lib/Archive/Tar/File.pm b/lib/Archive/Tar/File.pm new file mode 100644 index 0000000000..a310ee9de9 --- /dev/null +++ b/lib/Archive/Tar/File.pm @@ -0,0 +1,579 @@ +package Archive::Tar::File; +use strict; + +use IO::File; +use File::Spec::Unix (); +use File::Spec (); +use File::Basename (); +use Archive::Tar::Constant; + +use vars qw[@ISA $VERSION]; +@ISA = qw[Archive::Tar]; +$VERSION = '0.02'; + +### set value to 1 to oct() it during the unpack ### +my $tmpl = [ + name => 0, # string + mode => 1, # octal + uid => 1, # octal + gid => 1, # octal + size => 1, # octal + mtime => 1, # octal + chksum => 1, # octal + type => 0, # character + linkname => 0, # string + magic => 0, # string + version => 0, # 2 bytes + uname => 0, # string + gname => 0, # string + devmajor => 1, # octal + devminor => 1, # octal + prefix => 0, + +### end UNPACK items ### + raw => 0, # the raw data chunk + data => 0, # the data associated with the file -- + # This might be very memory intensive +]; + +### install get/set accessors for this object. +for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) { + my $key = $tmpl->[$i]; + no strict 'refs'; + *{__PACKAGE__."::$key"} = sub { + my $self = shift; + $self->{$key} = $_[0] if @_; + + ### just in case the key is not there or undef or something ### + { local $^W = 0; + return $self->{$key}; + } + } +} + +=head1 NAME + +Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar + +=head1 SYNOPSIS + + my @items = $tar->get_files; + + print $_->name, ' ', $_->size, "\n" for @items; + + print $object->get_content; + $object->replace_content('new content'); + + $object->rename( 'new/full/path/to/file.c' ); + +=head1 DESCRIPTION + +Archive::Tar::Files provides a neat little object layer for in-memory +extracted files. It's mostly used internally in Archive::Tar to tidy +up the code, but there's no reason users shouldn't use this API as +well. + +=head2 Accessors + +A lot of the methods in this package are accessors to the various +fields in the tar header: + +=over 4 + +=item name + +The file's name + +=item mode + +The file's mode + +=item uid + +The user id owning the file + +=item gid + +The group id owning the file + +=item size + +File size in bytes + +=item mtime + +Modification time. Adjusted to mac-time on MacOS if required + +=item chksum + +Checksum field for the tar header + +=item type + +File type -- numeric, but comparable to exported constants -- see +Archive::Tar's documentation + +=item linkname + +If the file is a symlink, the file it's pointing to + +=item magic + +Tar magic string -- not useful for most users + +=item version + +Tar version string -- not useful for most users + +=item uname + +The user name that owns the file + +=item gname + +The group name that owns the file + +=item devmajor + +Device major number in case of a special file + +=item devminor + +Device minor number in case of a special file + +=item prefix + +Any directory to prefix to the extraction path, if any + +=item raw + +Raw tar header -- not useful for most users + +=back + +=head1 Methods + +=head2 new( file => $path ) + +Returns a new Archive::Tar::File object from an existing file. + +Returns undef on failure. + +=head2 new( data => $path, $data, $opt ) + +Returns a new Archive::Tar::File object from data. + +C<$path> defines the file name (which need not exist), C<$data> the +file contents, and C<$opt> is a reference to a hash of attributes +which may be used to override the default attributes (fields in the +tar header), which are described above in the Accessors section. + +Returns undef on failure. + +=head2 new( chunk => $chunk ) + +Returns a new Archive::Tar::File object from a raw 512-byte tar +archive chunk. + +Returns undef on failure. + +=cut + +sub new { + my $class = shift; + my $what = shift; + + my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) : + ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) : + ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) : + undef; + + return $obj; +} + +### copies the data, creates a clone ### +sub clone { + my $self = shift; + return bless { %$self }, ref $self; +} + +sub _new_from_chunk { + my $class = shift; + my $chunk = shift or return; + + ### makes it start at 0 actually... :) ### + my $i = -1; + my %entry = map { + $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_ + } map { /^([^\0]*)/ } unpack( UNPACK, $chunk ); + + my $obj = bless \%entry, $class; + + ### magic is a filetype string.. it should have something like 'ustar' or + ### something similar... if the chunk is garbage, skip it + return unless $obj->magic !~ /\W/; + + ### store the original chunk ### + $obj->raw( $chunk ); + + $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) ); + $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) ); + + + return $obj; + +} + +sub _new_from_file { + my $class = shift; + my $path = shift or return; + my $type = __PACKAGE__->_filetype($path); + my $data = ''; + + unless ($type == DIR) { + my $fh = IO::File->new; + $fh->open($path) or return; + + ### binmode needed to read files properly on win32 ### + binmode $fh; + $data = do { local $/; <$fh> }; + close $fh; + } + + my @items = qw[mode uid gid size mtime]; + my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9]; + + ### you *must* set size == 0 on symlinks, or the next entry will be + ### though of as the contents of the symlink, which is wrong. + ### this fixes bug #7937 + $hash{size} = 0 if ($type == DIR or $type == SYMLINK); + $hash{mtime} -= TIME_OFFSET; + + ### strip the high bits off the mode, which we don't need to store + $hash{mode} = STRIP_MODE->( $hash{mode} ); + + + ### probably requires some file path munging here ... ### + ### name and prefix are set later + my $obj = { + %hash, + name => '', + chksum => CHECK_SUM, + type => $type, + linkname => ($type == SYMLINK and CAN_READLINK) + ? readlink $path + : '', + magic => MAGIC, + version => TAR_VERSION, + uname => UNAME->( $hash{uid} ), + gname => GNAME->( $hash{gid} ), + devmajor => 0, # not handled + devminor => 0, # not handled + prefix => '', + data => $data, + }; + + bless $obj, $class; + + ### fix up the prefix and file from the path + my($prefix,$file) = $obj->_prefix_and_file( $path ); + $obj->prefix( $prefix ); + $obj->name( $file ); + + return $obj; +} + +sub _new_from_data { + my $class = shift; + my $path = shift or return; + my $data = shift; return unless defined $data; + my $opt = shift; + + my $obj = { + data => $data, + name => '', + mode => MODE, + uid => UID, + gid => GID, + size => length $data, + mtime => time - TIME_OFFSET, + chksum => CHECK_SUM, + type => FILE, + linkname => '', + magic => MAGIC, + version => TAR_VERSION, + uname => UNAME->( UID ), + gname => GNAME->( GID ), + devminor => 0, + devmajor => 0, + prefix => '', + }; + + ### overwrite with user options, if provided ### + if( $opt and ref $opt eq 'HASH' ) { + for my $key ( keys %$opt ) { + + ### don't write bogus options ### + next unless exists $obj->{$key}; + $obj->{$key} = $opt->{$key}; + } + } + + bless $obj, $class; + + ### fix up the prefix and file from the path + my($prefix,$file) = $obj->_prefix_and_file( $path ); + $obj->prefix( $prefix ); + $obj->name( $file ); + + return $obj; +} + +sub _prefix_and_file { + my $self = shift; + my $path = shift; + + my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir ); + my @dirs = File::Spec->splitdir( $dirs ); + + ### so sometimes the last element is '' -- probably when trailing + ### dir slashes are encountered... this is is of course pointless, + ### so remove it + pop @dirs while @dirs and not length $dirs[-1]; + + ### if it's a directory, then $file might be empty + $file = pop @dirs if $self->is_dir and not length $file; + + my $prefix = File::Spec::Unix->catdir( + grep { length } $vol, @dirs + ); + return( $prefix, $file ); +} + +sub _filetype { + my $self = shift; + my $file = shift or return; + + return SYMLINK if (-l $file); # Symlink + + return FILE if (-f _); # Plain file + + return DIR if (-d _); # Directory + + return FIFO if (-p _); # Named pipe + + return SOCKET if (-S _); # Socket + + return BLOCKDEV if (-b _); # Block special + + return CHARDEV if (-c _); # Character special + + ### shouldn't happen, this is when making archives, not reading ### + return LONGLINK if ( $file eq LONGLINK_NAME ); + + return UNKNOWN; # Something else (like what?) + +} + +### this method 'downgrades' a file to plain file -- this is used for +### symlinks when FOLLOW_SYMLINKS is true. +sub _downgrade_to_plainfile { + my $entry = shift; + $entry->type( FILE ); + $entry->mode( MODE ); + $entry->linkname(''); + + return 1; +} + +=head2 full_path + +Returns the full path from the tar header; this is basically a +concatenation of the C<prefix> and C<name> fields. + +=cut + +sub full_path { + my $self = shift; + + ### if prefix field is emtpy + return $self->name unless defined $self->prefix and length $self->prefix; + + ### or otherwise, catfile'd + return File::Spec::Unix->catfile( $self->prefix, $self->name ); +} + + +=head2 validate + +Done by Archive::Tar internally when reading the tar file: +validate the header against the checksum to ensure integer tar file. + +Returns true on success, false on failure + +=cut + +sub validate { + my $self = shift; + + my $raw = $self->raw; + + ### don't know why this one is different from the one we /write/ ### + substr ($raw, 148, 8) = " "; + return unpack ("%16C*", $raw) == $self->chksum ? 1 : 0; +} + +=head2 has_content + +Returns a boolean to indicate whether the current object has content. +Some special files like directories and so on never will have any +content. This method is mainly to make sure you don't get warnings +for using uninitialized values when looking at an object's content. + +=cut + +sub has_content { + my $self = shift; + return defined $self->data() && length $self->data() ? 1 : 0; +} + +=head2 get_content + +Returns the current content for the in-memory file + +=cut + +sub get_content { + my $self = shift; + $self->data( ); +} + +=head2 get_content_by_ref + +Returns the current content for the in-memory file as a scalar +reference. Normal users won't need this, but it will save memory if +you are dealing with very large data files in your tar archive, since +it will pass the contents by reference, rather than make a copy of it +first. + +=cut + +sub get_content_by_ref { + my $self = shift; + + return \$self->{data}; +} + +=head2 replace_content( $content ) + +Replace the current content of the file with the new content. This +only affects the in-memory archive, not the on-disk version until +you write it. + +Returns true on success, false on failure. + +=cut + +sub replace_content { + my $self = shift; + my $data = shift || ''; + + $self->data( $data ); + $self->size( length $data ); + return 1; +} + +=head2 rename( $new_name ) + +Rename the current file to $new_name. + +Note that you must specify a Unix path for $new_name, since per tar +standard, all files in the archive must be Unix paths. + +Returns true on success and false on failure. + +=cut + +sub rename { + my $self = shift; + my $path = shift or return; + + my ($prefix,$file) = $self->_prefix_and_file( $path ); + + $self->name( $file ); + $self->prefix( $prefix ); + + return 1; +} + +=head1 Convenience methods + +To quickly check the type of a C<Archive::Tar::File> object, you can +use the following methods: + +=over 4 + +=item is_file + +Returns true if the file is of type C<file> + +=item is_dir + +Returns true if the file is of type C<dir> + +=item is_hardlink + +Returns true if the file is of type C<hardlink> + +=item is_symlink + +Returns true if the file is of type C<symlink> + +=item is_chardev + +Returns true if the file is of type C<chardev> + +=item is_blockdev + +Returns true if the file is of type C<blockdev> + +=item is_fifo + +Returns true if the file is of type C<fifo> + +=item is_socket + +Returns true if the file is of type C<socket> + +=item is_longlink + +Returns true if the file is of type C<LongLink>. +Should not happen after a successful C<read>. + +=item is_label + +Returns true if the file is of type C<Label>. +Should not happen after a successful C<read>. + +=item is_unknown + +Returns true if the file type is C<unknown> + +=back + +=cut + +#stupid perl5.5.3 needs to warn if it's not numeric +sub is_file { local $^W; FILE == $_[0]->type } +sub is_dir { local $^W; DIR == $_[0]->type } +sub is_hardlink { local $^W; HARDLINK == $_[0]->type } +sub is_symlink { local $^W; SYMLINK == $_[0]->type } +sub is_chardev { local $^W; CHARDEV == $_[0]->type } +sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type } +sub is_fifo { local $^W; FIFO == $_[0]->type } +sub is_socket { local $^W; SOCKET == $_[0]->type } +sub is_unknown { local $^W; UNKNOWN == $_[0]->type } +sub is_longlink { local $^W; LONGLINK eq $_[0]->type } +sub is_label { local $^W; LABEL eq $_[0]->type } + +1; diff --git a/lib/Archive/Tar/t/01_use.t b/lib/Archive/Tar/t/01_use.t new file mode 100644 index 0000000000..0641086306 --- /dev/null +++ b/lib/Archive/Tar/t/01_use.t @@ -0,0 +1,7 @@ +use Test::More tests => 2; +use strict; + +use_ok('Archive::Tar') or diag 'Archive::Tar not found -- exit' && die; + +my $tar = new Archive::Tar; +isa_ok( $tar, 'Archive::Tar', 'Object created' ); diff --git a/lib/Archive/Tar/t/02_methods.t b/lib/Archive/Tar/t/02_methods.t new file mode 100644 index 0000000000..20e37b4d22 --- /dev/null +++ b/lib/Archive/Tar/t/02_methods.t @@ -0,0 +1,770 @@ +BEGIN { + if( $ENV{PERL_CORE} ) { + chdir '../lib/Archive/Tar' if -d '../lib/Archive/Tar'; + } + use lib '../../..'; +} + +BEGIN { chdir 't' if -d 't' } + +use Test::More 'no_plan'; +use strict; +use lib '../lib'; + +use Cwd; +use IO::File; +use File::Copy; +use File::Path; +use File::Spec (); +use File::Spec::Unix (); +use File::Basename (); +use Data::Dumper; + +use Archive::Tar; +use Archive::Tar::Constant; + +### XXX TODO: +### * change to fullname +### * add tests for global variables + +### set up the environment ### +my @EXPECT_NORMAL = ( + ### dirs filename contents + [ [], 'c', qr/^iiiiiiiiiiii\s*$/ ], + [ [], 'd', qr/^uuuuuuuu\s*$/ ], +); + +### includes binary data +my $ALL_CHARS = join '', "\r\n", map( chr, 1..255 ), "zzz\n\r"; + +### @EXPECTBIN is used to ensure that $tarbin is written in the right +### order and that the contents and order match exactly when extracted +my @EXPECTBIN = ( + ### dirs filename contents ### + [ [], 'bIn11', $ALL_CHARS x 11 ], + [ [], 'bIn3', $ALL_CHARS x 3 ], + [ [], 'bIn4', $ALL_CHARS x 4 ], + [ [], 'bIn1', $ALL_CHARS ], + [ [], 'bIn2', $ALL_CHARS x 2 ], +); + +### @EXPECTX is used to ensure that $tarx is written in the right +### order and that the contents and order match exactly when extracted +### the 'x/x' extraction used to fail before A::T 1.08 +my @EXPECTX = ( + ### dirs filename contents + [ [ 'x' ], 'k', '', ], + [ [ 'x' ], 'x', 'j', ], # failed before A::T 1.08 +); + +my $LONG_FILE = qq[directory/really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-long-directory-name/myfile]; + +### wintendo can't deal with too long paths, so we might have to skip tests ### +my $TOO_LONG = ($^O eq 'MSWin32' or $^O eq 'cygwin') + && length( cwd(). $LONG_FILE ) > 247; + +### warn if we are going to skip long file names +$TOO_LONG ? diag("No long filename support - long filename extraction disabled") + : ( push @EXPECT_NORMAL, [ [], $LONG_FILE, qr/^hello\s*$/] ) ; + +my @ROOT = grep { length } 'src', $TOO_LONG ? 'short' : 'long'; + +my $ZLIB = eval { require IO::Zlib; 1 } ? 1 : 0; +my $NO_UNLINK = $ARGV[0] ? 1 : 0; + +### enable debugging? +$Archive::Tar::DEBUG = 1 if $ARGV[1]; + +### tests for binary and x/x files +my $TARBIN = Archive::Tar->new; +my $TARX = Archive::Tar->new; + +### paths to a .tar and .tgz file to use for tests +my $TAR_FILE = File::Spec->catfile( @ROOT, 'bar.tar' ); +my $TGZ_FILE = File::Spec->catfile( @ROOT, 'foo.tgz' ); +my $OUT_TAR_FILE = File::Spec->catfile( @ROOT, 'out.tar' ); +my $OUT_TGZ_FILE = File::Spec->catfile( @ROOT, 'out.tgz' ); + +copy( File::Basename::basename($0), 'copy' ); +my $COMPRESS_FILE = 'copy'; +chmod 0644, $COMPRESS_FILE; + +### done setting up environment ### + + +### did we probe IO::Zlib support ok? ### +{ is( Archive::Tar->can_handle_compressed_files, $ZLIB, + "Proper IO::Zlib support detected" ); +} + + +### tar error tests +{ my $tar = Archive::Tar->new; + + ok( $tar, "Object created" ); + isa_ok( $tar, 'Archive::Tar'); + + local $Archive::Tar::WARN = 0; + + ### should be empty to begin with + is( $tar->error, '', "The error string is empty" ); + + ### try a read on nothing + my @list = $tar->read(); + + ok(!(scalar @list), "Function read returns 0 files on error" ); + ok( $tar->error, " error string is non empty" ); + like( $tar->error, qr/No file to read from/, + " error string from create()" ); + unlike( $tar->error, qr/add/, " error string does not contain add" ); + + ### now, add empty data + my $obj = $tar->add_data( '' ); + + ok( !$obj, "'add_data' returns undef on error" ); + ok( $tar->error, " error string is non empty" ); + like( $tar->error, qr/add/, " error string contains add" ); + unlike( $tar->error, qr/create/," error string does not contain create" ); + + ### check if ->error eq $error + is( $tar->error, $Archive::Tar::error, + '$error matches error() method' ); +} + +### read tests ### +{ ### normal tar + gz compressed file + my $archive = $TAR_FILE; + my $compressed = $TGZ_FILE; + my $tar = Archive::Tar->new; + my $gzip = 0; + + ### check we got the object + ok( $tar, "Object created" ); + isa_ok( $tar, 'Archive::Tar'); + + for my $type( $archive, $compressed ) { + my $state = $gzip ? 'compressed' : 'uncompressed'; + + SKIP: { + + ### skip gz compressed archives wihtout IO::Zlib + skip( "No IO::Zlib - cannot read compressed archives", + 4 + 2 * (scalar @EXPECT_NORMAL) + ) if( $gzip and !$ZLIB); + + ### ->read test + { my @list = $tar->read( $type ); + my $cnt = scalar @list; + my $expect = scalar __PACKAGE__->get_expect(); + + ok( $cnt, "Reading $state file using 'read()'" ); + is( $cnt, $expect, " All files accounted for" ); + + for my $file ( @list ) { + ok( $file, "Got File object" ); + isa_ok( $file, "Archive::Tar::File" ); + + next unless $file->is_file; + + my $name = $file->full_path; + my($expect_name, $expect_content) = + get_expect_name_and_contents( $name, \@EXPECT_NORMAL ); + + ### ->fullname! + ok($expect_name," Found expected file '$name'" ); + + like($tar->get_content($name), $expect_content, + " Content OK" ); + } + } + + + ### list_archive test + { my @list = Archive::Tar->list_archive( $archive ); + my $cnt = scalar @list; + my $expect = scalar __PACKAGE__->get_expect(); + + ok( $cnt, "Reading $state file using 'list_archive'"); + is( $cnt, $expect, " All files accounted for" ); + + for my $file ( @list ) { + next if __PACKAGE__->is_dir( $file ); # directories + + my($expect_name, $expect_content) = + get_expect_name_and_contents( $file, \@EXPECT_NORMAL ); + + ok( $expect_name, + " Found expected file '$file'" ); + } + } + } + + ### now we try gz compressed archives + $gzip++; + } +} + +### add files tests ### +{ my @add = map { File::Spec->catfile( @ROOT, @$_ ) } ['b']; + my @addunix = map { File::Spec::Unix->catfile( @ROOT, @$_ ) } ['b']; + my $tar = Archive::Tar->new; + + ### check we got the object + ok( $tar, "Object created" ); + isa_ok( $tar, 'Archive::Tar'); + + ### add the files + { my @files = $tar->add_files( @add ); + + is( scalar @files, scalar @add, + "Adding files"); + is( $files[0]->name, 'b', " Proper name" ); + is( $files[0]->is_file, 1, " Proper type" ); + like( $files[0]->get_content, qr/^bbbbbbbbbbb\s*$/, + " Content OK" ); + + ### check if we have then in our tar object + for my $file ( @addunix ) { + ok( $tar->contains_file($file), + " File found in archive" ); + } + } + + ### check adding files doesn't conflict with a secondary archive + ### old A::T bug, we should keep testing for it + { my $tar2 = Archive::Tar->new; + my @added = $tar2->add_files( $COMPRESS_FILE ); + my @count = $tar2->list_files; + + is( scalar @added, 1, "Added files to secondary archive" ); + is( scalar @added, scalar @count, + " Does not conflict with first archive" ); + + ### check the adding of directories + my @add_dirs = File::Spec->catfile( @ROOT ); + my @dirs = $tar2->add_files( @add_dirs ); + is( scalar @dirs, scalar @add_dirs, + "Adding dirs"); + ok( $dirs[0]->is_dir, " Proper type" ); + } +} + +### add data tests ### +{ + { ### standard data ### + my @to_add = ( 'a', 'aaaaa' ); + my $tar = Archive::Tar->new; + + ### check we got the object + ok( $tar, "Object created" ); + isa_ok( $tar, 'Archive::Tar'); + + ### add a new file item as data + my $obj = $tar->add_data( @to_add ); + + ok( $obj, "Adding data" ); + is( $obj->name, $to_add[0], " Proper name" ); + is( $obj->is_file, 1, " Proper type" ); + like( $obj->get_content, qr/^$to_add[1]\s*$/, + " Content OK" ); + } + + { ### binary data + + ### dir/file structure -- x/y always went ok, x/x used to extract + ### in the wrong way -- this test catches that + for my $list ( [$TARBIN, \@EXPECTBIN], + [$TARX, \@EXPECTX], + ) { + ### XXX GLOBAL! changes may affect other tests! + my($tar,$struct) = @$list; + + for my $aref ( @$struct ) { + my ($dirs,$file,$data) = @$aref; + + my $path = File::Spec::Unix->catfile( + grep { length } @$dirs, $file ); + + my $obj = $tar->add_data( $path, $data ); + + ok( $obj, "Adding data '$file'" ); + is( $obj->full_path, $path, + " Proper name" ); + ok( $obj->is_file, " Proper type" ); + is( $obj->get_content, $data, + " Content OK" ); + } + } + } +} + +### rename/replace_content tests ### +{ my $tar = Archive::Tar->new; + my $from = 'c'; + my $to = 'e'; + + ### read in the file, check the proper files are there + ok( $tar->read( $TAR_FILE ), "Read in '$TAR_FILE'" ); + ok( $tar->get_files($from), " Found file '$from'" ); + { local $Archive::Tar::WARN = 0; + ok(!$tar->get_files($to), " File '$to' not yet found" ); + } + + ### rename an entry, check the rename has happened + ok( $tar->rename( $from, $to ), " Renamed '$from' to '$to'" ); + ok( $tar->get_files($to), " File '$to' now found" ); + { local $Archive::Tar::WARN = 0; + ok(!$tar->get_files($from), " File '$from' no longer found'"); + } + + ### now, replace the content + my($expect_name, $expect_content) = + get_expect_name_and_contents( $from, \@EXPECT_NORMAL ); + + like( $tar->get_content($to), $expect_content, + "Original content of '$from' in '$to'" ); + ok( $tar->replace_content( $to, $from ), + " Set content for '$to' to '$from'" ); + is( $tar->get_content($to), $from, + " Content for '$to' is indeed '$from'" ); +} + +### remove tests ### +{ my $remove = 'c'; + my $tar = Archive::Tar->new; + + ok( $tar->read( $TAR_FILE ), "Read in '$TAR_FILE'" ); + + ### remove returns the files left, which should be equal to list_files + is( scalar($tar->remove($remove)), scalar($tar->list_files), + "Removing file '$remove'" ); + + ### so what's left should be all expected files minus 1 + is( scalar($tar->list_files), scalar(__PACKAGE__->get_expect) - 1, + " Proper files remaining" ); +} + +### write + read + extract tests ### +SKIP: { + skip('no IO::String', 326) if !$Archive::Tar::HAS_PERLIO && + !$Archive::Tar::HAS_IO_STRING; + + my $tar = Archive::Tar->new; + my $new = Archive::Tar->new; + ok( $tar->read( $TAR_FILE ), "Read in '$TAR_FILE'" ); + + for my $aref ( [$tar, \@EXPECT_NORMAL], + [$TARBIN, \@EXPECTBIN], + [$TARX, \@EXPECTX] + ) { + my($obj,$struct) = @$aref; + + ### check if we stringify it ok + { my $string = $obj->write; + ok( $string, "Stringified tar file has size" ); + cmp_ok( length($string) % BLOCK, '==', 0, + "Tar archive stringified" ); + } + + ### write tar tests + { my $out = $OUT_TAR_FILE; + + { ### write() + ok( $obj->write($out), + "Wrote tarfile using 'write'" ); + check_tar_file( $out ); + check_tar_object( $obj, $struct ); + + ### now read it in again + ok( $new->read( $out ), + "Read '$out' in again" ); + + check_tar_object( $new, $struct ); + + ### now extract it again + ok( $new->extract, "Extracted '$out' with 'extract'" ); + check_tar_extract( $new, $struct ); + + rm( $out ) unless $NO_UNLINK; + } + + + { ### create_archive() + ok( Archive::Tar->create_archive( $out, 0, $COMPRESS_FILE ), + "Wrote tarfile using 'create_archive'" ); + check_tar_file( $out ); + + ### now extract it again + ok( Archive::Tar->extract_archive( $out ), + "Extracted file using 'extract_archive'"); + rm( $out ) unless $NO_UNLINK; + } + } + + ## write tgz tests + { my $out = $OUT_TGZ_FILE; + + SKIP: { + + ### weird errors from scalar(@x,@y,@z), dot it this way... + my $file_cnt; + map { $file_cnt += scalar @$_ } \@EXPECT_NORMAL, \@EXPECTBIN, + \@EXPECTX; + + my $cnt = 5 + # the tests below + (5*3*2) + # check_tgz_file + # check_tar_object fixed tests + (3 * 2 * (2 + $file_cnt)) + + ((4*$file_cnt) + 1);# check_tar_extract tests + + skip( "No IO::Zlib - cannot write compressed archives", $cnt ) + unless $ZLIB; + + { ### write() + ok($obj->write($out, 1), + "Writing compressed file using 'write'" ); + check_tgz_file( $out ); + check_tar_object( $obj, $struct ); + + ### now read it in again + ok( $new->read( $out ), + "Read '$out' in again" ); + check_tar_object( $new, $struct ); + + ### now extract it again + ok( $new->extract, + "Extracted '$out' again" ); + check_tar_extract( $new, $struct ); + + rm( $out ) unless $NO_UNLINK; + } + + { ### create_archive() + ok( Archive::Tar->create_archive( $out, 1, $COMPRESS_FILE ), + "Wrote gzip file using 'create_archive'" ); + check_tgz_file( $out ); + + ### now extract it again + ok( Archive::Tar->extract_archive( $out, 1 ), + "Extracted file using 'extract_archive'"); + rm( $out ) unless $NO_UNLINK; + } + } + } + } +} + + +### limited read + extract tests ### +{ my $tar = Archive::Tar->new; + my @files = $tar->read( $TAR_FILE, 0, { limit => 1 } ); + my $obj = $files[0]; + + is( scalar @files, 1, "Limited read" ); + + my ($name,$content) = get_expect_name_and_contents( + $obj->full_path, \@EXPECT_NORMAL ); + + is( $obj->name, $name, " Expected file found" ); + + ### extract this single file to cwd() + for my $meth (qw[extract extract_file]) { + ok( $tar->$meth( $obj->full_path ), + "Extracted '$name' to cwd() with $meth" ); + ok( -e $obj->full_path, " Extracted file exists" ); + rm( $obj->full_path ) unless $NO_UNLINK; + } + + ### extract this file to @ROOT + ### can only do that with 'extract_file', not with 'extract' + for my $meth (qw[extract_file]) { + my $outpath = File::Spec->catdir( @ROOT ); + my $outfile = File::Spec->catfile( $outpath, $$ ); #$obj->full_path ); + + ok( $tar->$meth( $obj->full_path, $outfile ), + "Extracted file '$name' to $outpath with $meth" ); + ok( -e $outfile, " Extracted file '$outfile' exists" ); + rm( $outfile ) unless $NO_UNLINK; + } + +} + + +### clear tests ### +{ my $tar = Archive::Tar->new; + my @files = $tar->read( $TAR_FILE ); + + my $cnt = $tar->list_files(); + ok( $cnt, "Found old data" ); + ok( $tar->clear, " Clearing old data" ); + + my $new_cnt = $tar->list_files; + ok( !$new_cnt, " Old data cleared" ); +} + +### $DO_NOT_USE_PREFIX tests +{ my $tar = Archive::Tar->new; + + + ### first write a tar file without prefix + { my ($obj) = $tar->add_files( $COMPRESS_FILE ); + my $dir = ''; # dir is empty! + my $file = File::Basename::basename( $COMPRESS_FILE ); + + ok( $obj, "File added" ); + isa_ok( $obj, "Archive::Tar::File" ); + + ### internal storage ### + is( $obj->name, $file, " Name set to '$file'" ); + is( $obj->prefix, $dir, " Prefix set to '$dir'" ); + + ### write the tar file without a prefix in it + local $Archive::Tar::DO_NOT_USE_PREFIX = 1; + ok( $tar->write( $OUT_TAR_FILE ), + " Tar file written" ); + + ### and forget all about it... + $tar->clear; + } + + ### now read it back in, there should be no prefix + { ok( $tar->read( $OUT_TAR_FILE ), + "Tar file read in again" ); + + my ($obj) = $tar->get_files; + ok( $obj, " File retrieved" ); + isa_ok( $obj, "Archive::Tar::File" ); + + is( $obj->name, $COMPRESS_FILE, + " Name now set to '$COMPRESS_FILE'" ); + is( $obj->prefix, '', " Prefix now empty" ); + + my $re = quotemeta $COMPRESS_FILE; + like( $obj->raw, qr/^$re/, " Prefix + name in name slot of header" ); + } + + rm( $OUT_TAR_FILE ) unless $NO_UNLINK; +} + +### clean up stuff +END { + for my $struct ( \@EXPECT_NORMAL, \@EXPECTBIN, \@EXPECTX ) { + for my $aref (@$struct) { + + my $dir = $aref->[0]->[0]; + rmtree $dir if $dir && -d $dir && not $NO_UNLINK; + } + } + + my ($dir) = File::Spec::Unix->splitdir( $LONG_FILE ); + rmtree $dir if $dir && -d $dir && not $NO_UNLINK; +} + +########################### +### helper subs ### +########################### +sub get_expect { + return map { + split '/', $_ + } map { + File::Spec::Unix->catfile( + grep { defined } @{$_->[0]}, $_->[1] + ) + } @EXPECT_NORMAL; +} + +sub is_dir { + my $file = pop(); + return $file =~ m|/$| ? 1 : 0; +} + +sub rm { + my $x = shift; + is_dir($x) ? rmtree($x) : unlink $x; +} + +sub check_tar_file { + my $file = shift; + my $filesize = -s $file; + my $contents = slurp_binfile( $file ); + + ok( defined( $contents ), " File read" ); + ok( $filesize, " File written size=$filesize" ); + + cmp_ok( $filesize % BLOCK, '==', 0, + " File size is a multiple of 512" ); + + cmp_ok( length($contents), '==', $filesize, + " File contents match size" ); + + is( TAR_END x 2, substr( $contents, -(BLOCK*2) ), + " Ends with 1024 null bytes" ); + + return $contents; +} + +sub check_tgz_file { + my $file = shift; + my $filesize = -s $file; + my $contents = slurp_gzfile( $file ); + my $uncompressedsize = length $contents; + + ok( defined( $contents ), " File read and uncompressed" ); + ok( $filesize, " File written size=$filesize uncompressed size=$uncompressedsize" ); + + cmp_ok( $uncompressedsize % BLOCK, '==', 0, + " Uncompressed size is a multiple of 512" ); + + is( TAR_END x 2, substr($contents, -(BLOCK*2)), + " Ends with 1024 null bytes" ); + + cmp_ok( $filesize, '<', $uncompressedsize, + " Compressed size < uncompressed size" ); + + return $contents; +} + +sub check_tar_object { + my $obj = shift; + my $struct = shift or return; + + ### amount of files (not dirs!) there should be in the object + my $expect = scalar @$struct; + my @files = grep { $_->is_file } $obj->get_files; + + ### count how many files there are in the object + ok( scalar @files, " Found some files in the archive" ); + is( scalar @files, $expect, " Found expected number of files" ); + + for my $file (@files) { + + ### XXX ->fullname + #my $path = File::Spec::Unix->catfile( + # grep { length } $file->prefix, $file->name ); + my($ename,$econtent) = + get_expect_name_and_contents( $file->full_path, $struct ); + + ok( $file->is_file, " It is a file" ); + is( $file->full_path, $ename, + " Name matches expected name" ); + like( $file->get_content, $econtent, + " Content as expected" ); + } +} + +sub check_tar_extract { + my $tar = shift; + my $struct = shift; + + my @dirs; + for my $file ($tar->get_files) { + push @dirs, $file && next if $file->is_dir; + + + my $path = $file->full_path; + my($ename,$econtent) = + get_expect_name_and_contents( $path, $struct ); + + + is( $ename, $path, " Expected file found" ); + ok( -e $path, " File '$path' exists" ); + + my $fh; + open $fh, "$path" or warn "Error opening file '$path': $!\n"; + binmode $fh; + + ok( $fh, " Opening file" ); + + my $content = do{local $/;<$fh>}; chomp $content; + like( $content, qr/$econtent/, + " Contents OK" ); + + unlink $path unless $NO_UNLINK; + + ### alternate extract path tests + ### to abs and rel paths + { for my $outpath ( File::Spec->catdir( @ROOT ), + File::Spec->rel2abs( + File::Spec->catdir( @ROOT ) + ) + ) { + + my $outfile = File::Spec->catfile( $outpath, $$ ); + + ok( $tar->extract_file( $file->full_path, $outfile ), + " Extracted file '$path' to $outfile" ); + ok( -e $outfile," Extracted file '$outfile' exists" ); + + rm( $outfile ) unless $NO_UNLINK; + } + } + } + + ### now check if list_files is returning the same info as get_files + is_deeply( [$tar->list_files], [ map { $_->full_path } $tar->get_files], + " Verified via list_files as well" ); + + #do { rmtree $_->full_path if -d $_->full_path && not $NO_UNLINK } + # for @dirs; +} + +sub slurp_binfile { + my $file = shift; + my $fh = IO::File->new; + + $fh->open( $file ) or warn( "Error opening '$file': $!" ), return undef; + + binmode $fh; + local $/; + return <$fh>; +} + +sub slurp_gzfile { + my $file = shift; + my $str; + my $buff; + + require IO::Zlib; + my $fh = new IO::Zlib; + $fh->open( $file, READ_ONLY->(1) ) + or warn( "Error opening '$file' with IO::Zlib" ), return undef; + + $str .= $buff while $fh->read( $buff, 4096 ) > 0; + $fh->close(); + return $str; +} + +sub get_expect_name_and_contents { + my $find = shift; + my $struct = shift or return; + + ### find the proper name + contents for this file from + ### the expect structure + my ($name, $content) = + map { + @$_; + } grep { + $_->[0] eq $find + } map { + [ ### full path ### + File::Spec::Unix->catfile( + grep { length } @{$_->[0]}, $_->[1] + ), + ### regex + $_->[2], + ] + } @$struct; + + ### not a qr// yet? + unless( ref $content ) { + my $x = quotemeta ($content || ''); + $content = qr/$x/; + } + + unless( $name ) { + warn "Could not find '$find' in " . Dumper $struct; + } + + return ($name, $content); +} + +__END__ diff --git a/lib/Archive/Tar/t/03_file.t b/lib/Archive/Tar/t/03_file.t new file mode 100644 index 0000000000..9d4e7553ee --- /dev/null +++ b/lib/Archive/Tar/t/03_file.t @@ -0,0 +1,139 @@ +### This program tests Archive::Tar::File ### + +use Test::More 'no_plan'; +use strict; + +use File::Spec::Unix (); + +use Archive::Tar::File; +use Archive::Tar::Constant; + +my $all_chars = join '', "\r\n", map( chr, 0..255 ), "zzz\n\r"; +my $start_time = time() - 1 - TIME_OFFSET; +my $replace_contents = $all_chars x 42; + +my $rename_path = 'x/yy/42'; +my ($rename_dir, $rename_file) = dir_and_file( $rename_path ); + +my @test_files = ( + ### pathname contents optional hash of attributes ### + [ 'x/bIn1', $all_chars ], + [ 'bIn2', $all_chars x 2 ], + [ 'bIn0', '' ], + + ### keep this one as the last entry + [ 'x/yy/z', '', { type => DIR, + mode => 0777, + uid => 42, + gid => 43, + uname => 'Ford', + gname => 'Prefect', + mtime => $start_time } ], +); + +### new( data => ... ) tests ### +for my $f ( @test_files ) { + my $unix_path = $f->[0]; + my $contents = $f->[1]; + my $attr = $f->[2] || {}; + my ($dir, $file) = dir_and_file( $unix_path ); + + my $obj = Archive::Tar::File->new( data => $unix_path, $contents, $attr ); + + isa_ok( $obj, 'Archive::Tar::File', "Object created" ); + is( $obj->name, $file, " name '$file' ok" ); + is( $obj->prefix, $dir, " prefix '$dir' ok" ); + is( $obj->size, length($contents), " size ok" ); + is( $obj->mode, exists($attr->{mode}) ? $attr->{mode} : MODE, + " mode ok" ); + is( $obj->uid, exists($attr->{uid}) ? $attr->{uid} : UID, + " uid ok" ); + is( $obj->gid, exists($attr->{gid}) ? $attr->{gid} : GID, + " gid ok" ); + is( $obj->uname, exists($attr->{uname}) ? $attr->{uname} : UNAME->(UID ), + " uname ok" ); + is( $obj->gname, exists($attr->{gname}) ? $attr->{gname} : GNAME->( GID ), + " gname ok" ); + is( $obj->type, exists($attr->{type}) ? $attr->{type} : FILE, + " type ok" ); + if (exists($attr->{mtime})) { + is( $obj->mtime, $attr->{mtime}, " mtime matches" ); + } else { + cmp_ok( $obj->mtime, '>', $start_time, " mtime after start time" ); + } + ok( $obj->chksum, " chksum ok" ); + ok( $obj->version, " version ok" ); + ok( ! $obj->linkname, " linkname ok" ); + ok( ! $obj->devmajor, " devmajor ok" ); + ok( ! $obj->devminor, " devminor ok" ); + ok( ! $obj->raw, " raw ok" ); + + ### test type checkers + SKIP: { + skip "Attributes defined, may not be plainfile", 11 if keys %$attr; + + ok( $obj->is_file, " Object is a file" ); + + for my $name (qw[dir hardlink symlink chardev blockdev fifo + socket unknown longlink label ] + ) { + my $method = 'is_' . $name; + + ok(!$obj->$method(), " Object is not a '$name'"); + } + } + + ### Use "ok" not "is" to avoid binary data screwing up the screen ### + ok( $obj->get_content eq $contents, " get_content ok" ); + ok( ${$obj->get_content_by_ref} eq $contents, + " get_content_by_ref ok" ); + is( $obj->has_content, length($contents) ? 1 : 0, + " has_content ok" ); + ok( $obj->replace_content( $replace_contents ), + " replace_content ok" ); + ok( $obj->get_content eq $replace_contents, " get_content ok" ); + ok( $obj->replace_content( $contents ), " replace_content ok" ); + ok( $obj->get_content eq $contents, " get_content ok" ); + + ok( $obj->rename( $rename_path ), " rename ok" ); + is( $obj->name, $rename_file, " name '$file' ok" ); + is( $obj->prefix, $rename_dir, " prefix '$dir' ok" ); + ok( $obj->rename( $unix_path ), " rename ok" ); + is( $obj->name, $file, " name '$file' ok" ); + is( $obj->prefix, $dir, " prefix '$dir' ok" ); + + ### clone tests ### + my $clone = $obj->clone; + isnt( $obj, $clone, "Clone is different object" ); + is_deeply( $obj, $clone, " Clone holds same data" ); +} + +### _downgrade_to_plainfile +{ my $aref = $test_files[-1]; + my $unix_path = $aref->[0]; + my $contents = $aref->[1]; + my $attr = $aref->[2]; + + my $obj = Archive::Tar::File->new( data => $unix_path, $contents, $attr ); + + ### check if the object is as expected + isa_ok( $obj, 'Archive::Tar::File' ); + ok( $obj->is_dir, " Is a directory" ); + + ### do the downgrade + ok( $obj->_downgrade_to_plainfile, " Downgraded to plain file" ); + + ### now check if it's downgraded + ok( $obj->is_file, " Is now a file" ); + is( $obj->linkname, '', " No link entered" ); + is( $obj->mode, MODE, " Mode as expected" ); +} + +### helper subs ### +sub dir_and_file { + my $unix_path = shift; + my ($vol, $dirs, $file) = File::Spec::Unix->splitpath( $unix_path ); + my $dir = File::Spec::Unix->catdir( grep { length } $vol, + File::Spec::Unix->splitdir( $dirs ) ); + return ( $dir, $file ); +} diff --git a/lib/Archive/Tar/t/99_pod.t b/lib/Archive/Tar/t/99_pod.t new file mode 100644 index 0000000000..8e4084c680 --- /dev/null +++ b/lib/Archive/Tar/t/99_pod.t @@ -0,0 +1,18 @@ +use Test::More; +use File::Spec; +use File::Find; +use strict; + +eval 'use Test::Pod'; +plan skip_all => "Test::Pod v0.95 required for testing POD" + if $@ || $Test::Pod::VERSION < 0.95; + +my @files; +find( sub { push @files, $File::Find::name if /\.p(?:l|m|od)$/ }, + File::Spec->catfile( qw(blib lib) ) ); +plan tests => scalar @files; +for my $file ( @files ) { + pod_file_ok( $file ); +} + + diff --git a/lib/Archive/Tar/t/src/long/b b/lib/Archive/Tar/t/src/long/b new file mode 100644 index 0000000000..38f6d2d61d --- /dev/null +++ b/lib/Archive/Tar/t/src/long/b @@ -0,0 +1 @@ +bbbbbbbbbbb diff --git a/lib/Archive/Tar/t/src/long/bar.tar b/lib/Archive/Tar/t/src/long/bar.tar Binary files differnew file mode 100644 index 0000000000..d4a12bd075 --- /dev/null +++ b/lib/Archive/Tar/t/src/long/bar.tar diff --git a/lib/Archive/Tar/t/src/long/foo.tgz b/lib/Archive/Tar/t/src/long/foo.tgz Binary files differnew file mode 100644 index 0000000000..98657c01f6 --- /dev/null +++ b/lib/Archive/Tar/t/src/long/foo.tgz diff --git a/lib/Archive/Tar/t/src/short/b b/lib/Archive/Tar/t/src/short/b new file mode 100644 index 0000000000..38f6d2d61d --- /dev/null +++ b/lib/Archive/Tar/t/src/short/b @@ -0,0 +1 @@ +bbbbbbbbbbb diff --git a/lib/Archive/Tar/t/src/short/bar.tar b/lib/Archive/Tar/t/src/short/bar.tar Binary files differnew file mode 100644 index 0000000000..cf5fd276ad --- /dev/null +++ b/lib/Archive/Tar/t/src/short/bar.tar diff --git a/lib/Archive/Tar/t/src/short/foo.tgz b/lib/Archive/Tar/t/src/short/foo.tgz Binary files differnew file mode 100644 index 0000000000..de54e7d0bf --- /dev/null +++ b/lib/Archive/Tar/t/src/short/foo.tgz |