From 39713df493e0bfd95f001dd75d5926ec9fa719bf Mon Sep 17 00:00:00 2001 From: Rafael Garcia-Suarez Date: Tue, 21 Jun 2005 12:01:07 +0000 Subject: Add Archive::Tar 1.24, except ptar for now p4raw-id: //depot/perl@24922 --- MANIFEST | 13 + lib/Archive/Tar.pm | 1468 +++++++++++++++++++++++++++++++++++ lib/Archive/Tar/Constant.pm | 72 ++ lib/Archive/Tar/File.pm | 579 ++++++++++++++ lib/Archive/Tar/t/01_use.t | 7 + lib/Archive/Tar/t/02_methods.t | 770 ++++++++++++++++++ lib/Archive/Tar/t/03_file.t | 139 ++++ lib/Archive/Tar/t/99_pod.t | 18 + lib/Archive/Tar/t/src/long/b | 1 + lib/Archive/Tar/t/src/long/bar.tar | Bin 0 -> 10240 bytes lib/Archive/Tar/t/src/long/foo.tgz | Bin 0 -> 331 bytes lib/Archive/Tar/t/src/short/b | 1 + lib/Archive/Tar/t/src/short/bar.tar | Bin 0 -> 10240 bytes lib/Archive/Tar/t/src/short/foo.tgz | Bin 0 -> 145 bytes 14 files changed, 3068 insertions(+) create mode 100644 lib/Archive/Tar.pm create mode 100644 lib/Archive/Tar/Constant.pm create mode 100644 lib/Archive/Tar/File.pm create mode 100644 lib/Archive/Tar/t/01_use.t create mode 100644 lib/Archive/Tar/t/02_methods.t create mode 100644 lib/Archive/Tar/t/03_file.t create mode 100644 lib/Archive/Tar/t/99_pod.t create mode 100644 lib/Archive/Tar/t/src/long/b create mode 100644 lib/Archive/Tar/t/src/long/bar.tar create mode 100644 lib/Archive/Tar/t/src/long/foo.tgz create mode 100644 lib/Archive/Tar/t/src/short/b create mode 100644 lib/Archive/Tar/t/src/short/bar.tar create mode 100644 lib/Archive/Tar/t/src/short/foo.tgz diff --git a/MANIFEST b/MANIFEST index e6e20ac68b..692f6f7226 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1181,6 +1181,19 @@ lib/assertions/activate.pm assertions activate/deactivate lib/assertions/compat.pm assertions compatibility for earlier perls lib/assertions.pm module support for -A flag lib/assert.pl assertion and panic with stack trace +lib/Archive/Tar/Constant.pm Archive::Tar +lib/Archive/Tar/File.pm Archive::Tar +lib/Archive/Tar/t/01_use.t Archive::Tar tests +lib/Archive/Tar/t/02_methods.t Archive::Tar tests +lib/Archive/Tar/t/03_file.t Archive::Tar tests +lib/Archive/Tar/t/99_pod.t Archive::Tar tests +lib/Archive/Tar/t/src/long/b Archive::Tar tests +lib/Archive/Tar/t/src/long/bar.tar Archive::Tar tests +lib/Archive/Tar/t/src/long/foo.tgz Archive::Tar tests +lib/Archive/Tar/t/src/short/b Archive::Tar tests +lib/Archive/Tar/t/src/short/bar.tar Archive::Tar tests +lib/Archive/Tar/t/src/short/foo.tgz Archive::Tar tests +lib/Archive/Tar.pm Archive::Tar lib/Attribute/Handlers/Changes Attribute::Handlers lib/Attribute/Handlers/demo/demo2.pl Attribute::Handlers demo lib/Attribute/Handlers/demo/demo3.pl Attribute::Handlers demo diff --git a/lib/Archive/Tar.pm b/lib/Archive/Tar.pm new file mode 100644 index 0000000000..e2b16314ac --- /dev/null +++ b/lib/Archive/Tar.pm @@ -0,0 +1,1468 @@ +### the gnu tar specification: +### http://www.gnu.org/software/tar/manual/html_mono/tar.html +### +### and the pax format spec, which tar derives from: +### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html + +package Archive::Tar; +require 5.005_03; + +use strict; +use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD + $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING]; + +$DEBUG = 0; +$WARN = 1; +$FOLLOW_SYMLINK = 0; +$VERSION = "1.24"; +$CHOWN = 1; +$CHMOD = 1; +$DO_NOT_USE_PREFIX = 0; + +BEGIN { + use Config; + $HAS_PERLIO = $Config::Config{useperlio}; + + ### try and load IO::String anyway, so you can dynamically + ### switch between perlio and IO::String + eval { + require IO::String; + import IO::String; + }; + $HAS_IO_STRING = $@ ? 0 : 1; + +} + +use Cwd; +use IO::File; +use Carp qw(carp croak); +use File::Spec (); +use File::Spec::Unix (); +use File::Path (); + +use Archive::Tar::File; +use Archive::Tar::Constant; + +=head1 NAME + +Archive::Tar - module for manipulations of tar archives + +=head1 SYNOPSIS + + use Archive::Tar; + my $tar = Archive::Tar->new; + + $tar->read('origin.tgz',1); + $tar->extract(); + + $tar->add_files('file/foo.pl', 'docs/README'); + $tar->add_data('file/baz.txt', 'This is the contents now'); + + $tar->rename('oldname', 'new/file/name'); + + $tar->write('files.tar'); + +=head1 DESCRIPTION + +Archive::Tar provides an object oriented mechanism for handling tar +files. It provides class methods for quick and easy files handling +while also allowing for the creation of tar file objects for custom +manipulation. If you have the IO::Zlib module installed, +Archive::Tar will also support compressed or gzipped tar files. + +An object of class Archive::Tar represents a .tar(.gz) archive full +of files and things. + +=head1 Object Methods + +=head2 Archive::Tar->new( [$file, $compressed] ) + +Returns a new Tar object. If given any arguments, C calls the +C method automatically, passing on the arguments provided to +the C method. + +If C is invoked with arguments and the C method fails +for any reason, C returns undef. + +=cut + +my $tmpl = { + _data => [ ], + _file => 'Unknown', +}; + +### install get/set accessors for this object. +for my $key ( keys %$tmpl ) { + no strict 'refs'; + *{__PACKAGE__."::$key"} = sub { + my $self = shift; + $self->{$key} = $_[0] if @_; + return $self->{$key}; + } +} + +sub new { + my $class = shift; + $class = ref $class if ref $class; + + ### copying $tmpl here since a shallow copy makes it use the + ### same aref, causing for files to remain in memory always. + my $obj = bless { _data => [ ], _file => 'Unknown' }, $class; + + if (@_) { + return unless $obj->read( @_ ); + } + + return $obj; +} + +=head2 $tar->read ( $filename|$handle, $compressed, {opt => 'val'} ) + +Read the given tar file into memory. +The first argument can either be the name of a file or a reference to +an already open filehandle (or an IO::Zlib object if it's compressed) +The second argument indicates whether the file referenced by the first +argument is compressed. + +The C will I any previous content in C<$tar>! + +The second argument may be considered optional if IO::Zlib is +installed, since it will transparently Do The Right Thing. +Archive::Tar will warn if you try to pass a compressed file if +IO::Zlib is not available and simply return. + +The third argument can be a hash reference with options. Note that +all options are case-sensitive. + +=over 4 + +=item limit + +Do not read more than C files. This is useful if you have +very big archives, and are only interested in the first few files. + +=item extract + +If set to true, immediately extract entries when reading them. This +gives you the same memory break as the C function. +Note however that entries will not be read into memory, but written +straight to disk. + +=back + +All files are stored internally as C objects. +Please consult the L documentation for details. + +Returns the number of files read in scalar context, and a list of +C objects in list context. + +=cut + +sub read { + my $self = shift; + my $file = shift; + my $gzip = shift || 0; + my $opts = shift || {}; + + unless( defined $file ) { + $self->_error( qq[No file to read from!] ); + return; + } else { + $self->_file( $file ); + } + + my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) ) + or return; + + my $data = $self->_read_tar( $handle, $opts ) or return; + + $self->_data( $data ); + + return wantarray ? @$data : scalar @$data; +} + +sub _get_handle { + my $self = shift; + my $file = shift; return unless defined $file; + return $file if ref $file; + + my $gzip = shift || 0; + my $mode = shift || READ_ONLY->( ZLIB ); # default to read only + + my $fh; my $bin; + + ### only default to ZLIB if we're not trying to /write/ to a handle ### + if( ZLIB and $gzip || MODE_READ->( $mode ) ) { + + ### IO::Zlib will Do The Right Thing, even when passed + ### a plain file ### + $fh = new IO::Zlib; + + } else { + if( $gzip ) { + $self->_error(qq[Compression not available - Install IO::Zlib!]); + return; + + } else { + $fh = new IO::File; + $bin++; + } + } + + unless( $fh->open( $file, $mode ) ) { + $self->_error( qq[Could not create filehandle for '$file': $!!] ); + return; + } + + binmode $fh if $bin; + + return $fh; +} + +sub _read_tar { + my $self = shift; + my $handle = shift or return; + my $opts = shift || {}; + + my $count = $opts->{limit} || 0; + my $extract = $opts->{extract} || 0; + + ### set a cap on the amount of files to extract ### + my $limit = 0; + $limit = 1 if $count > 0; + + my $tarfile = [ ]; + my $chunk; + my $read = 0; + my $real_name; # to set the name of a file when + # we're encountering @longlink + my $data; + + LOOP: + while( $handle->read( $chunk, HEAD ) ) { + ### IO::Zlib doesn't support this yet + my $offset = eval { tell $handle } || 'unknown'; + + unless( $read++ ) { + my $gzip = GZIP_MAGIC_NUM; + if( $chunk =~ /$gzip/ ) { + $self->_error( qq[Cannot read compressed format in tar-mode] ); + return; + } + } + + ### if we can't read in all bytes... ### + last if length $chunk != HEAD; + + ### Apparently this should really be two blocks of 512 zeroes, + ### but GNU tar sometimes gets it wrong. See comment in the + ### source code (tar.c) to GNU cpio. + next if $chunk eq TAR_END; + + my $entry; + unless( $entry = Archive::Tar::File->new( chunk => $chunk ) ) { + $self->_error( qq[Couldn't read chunk at offset $offset] ); + next; + } + + ### ignore labels: + ### http://www.gnu.org/manual/tar/html_node/tar_139.html + next if $entry->is_label; + + if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) { + + if ( $entry->is_file && !$entry->validate ) { + ### sometimes the chunk is rather fux0r3d and a whole 512 + ### bytes ends p in the ->name area. + ### clean it up, if need be + my $name = $entry->name; + $name = substr($name, 0, 100) if length $name > 100; + $name =~ s/\n/ /g; + + $self->_error( $name . qq[: checksum error] ); + next LOOP; + } + + my $block = BLOCK_SIZE->( $entry->size ); + + $data = $entry->get_content_by_ref; + + ### just read everything into memory + ### can't do lazy loading since IO::Zlib doesn't support 'seek' + ### this is because Compress::Zlib doesn't support it =/ + ### this reads in the whole data in one read() call. + if( $handle->read( $$data, $block ) < $block ) { + $self->_error( qq[Read error on tarfile (missing data) ']. + $entry->full_path ."' at offset $offset" ); + next; + } + + ### throw away trailing garbage ### + substr ($$data, $entry->size) = ""; + + ### part II of the @LongLink munging -- need to do /after/ + ### the checksum check. + if( $entry->is_longlink ) { + ### weird thing in tarfiles -- if the file is actually a + ### @LongLink, the data part seems to have a trailing ^@ + ### (unprintable) char. to display, pipe output through less. + ### but that doesn't *always* happen.. so check if the last + ### character is a control character, and if so remove it + ### at any rate, we better remove that character here, or tests + ### like 'eq' and hashlook ups based on names will SO not work + ### remove it by calculating the proper size, and then + ### tossing out everything that's longer than that size. + + ### count number of nulls + my $nulls = $$data =~ tr/\0/\0/; + + ### cut data + size by that many bytes + $entry->size( $entry->size - $nulls ); + substr ($$data, $entry->size) = ""; + } + } + + ### clean up of the entries.. posix tar /apparently/ has some + ### weird 'feature' that allows for filenames > 255 characters + ### they'll put a header in with as name '././@LongLink' and the + ### contents will be the name of the /next/ file in the archive + ### pretty crappy and kludgy if you ask me + + ### set the name for the next entry if this is a @LongLink; + ### this is one ugly hack =/ but needed for direct extraction + if( $entry->is_longlink ) { + $real_name = $data; + next; + } elsif ( defined $real_name ) { + $entry->name( $$real_name ); + $entry->prefix(''); + undef $real_name; + } + + $self->_extract_file( $entry ) if $extract + && !$entry->is_longlink + && !$entry->is_unknown + && !$entry->is_label; + + ### Guard against tarfiles with garbage at the end + last LOOP if $entry->name eq ''; + + ### push only the name on the rv if we're extracting + ### -- for extract_archive + push @$tarfile, ($extract ? $entry->name : $entry); + + if( $limit ) { + $count-- unless $entry->is_longlink || $entry->is_dir; + last LOOP unless $count; + } + } continue { + undef $data; + } + + return $tarfile; +} + +=head2 $tar->contains_file( $filename ) + +Check if the archive contains a certain file. +It will return true if the file is in the archive, false otherwise. + +Note however, that this function does an exact match using C +on the full path. So it cannot compensate for case-insensitive file- +systems or compare 2 paths to see if they would point to the same +underlying file. + +=cut + +sub contains_file { + my $self = shift; + my $full = shift or return; + + return 1 if $self->_find_entry($full); + return; +} + +=head2 $tar->extract( [@filenames] ) + +Write files whose names are equivalent to any of the names in +C<@filenames> to disk, creating subdirectories as necessary. This +might not work too well under VMS. +Under MacPerl, the file's modification time will be converted to the +MacOS zero of time, and appropriate conversions will be done to the +path. However, the length of each element of the path is not +inspected to see whether it's longer than MacOS currently allows (32 +characters). + +If C is called without a list of file names, the entire +contents of the archive are extracted. + +Returns a list of filenames extracted. + +=cut + +sub extract { + my $self = shift; + my @files; + + ### you requested the extraction of only certian files + if( @_ ) { + for my $file (@_) { + my $found; + for my $entry ( @{$self->_data} ) { + next unless $file eq $entry->full_path; + + ### we found the file you're looking for + push @files, $entry; + $found++; + } + + unless( $found ) { + return $self->_error( qq[Could not find '$file' in archive] ); + } + } + + ### just grab all the file items + } else { + @files = $self->get_files; + } + + ### nothing found? that's an error + unless( scalar @files ) { + $self->_error( qq[No files found for ] . $self->_file ); + return; + } + + ### now extract them + for my $entry ( @files ) { + unless( $self->_extract_file( $entry ) ) { + $self->_error(q[Could not extract ']. $entry->full_path .q['] ); + return; + } + } + + return @files; +} + +=head2 $tar->extract_file( $file, [$extract_path] ) + +Write an entry, whose name is equivalent to the file name provided to +disk. Optionally takes a second parameter, which is the full (unix) +path (including filename) the entry will be written to. + +For example: + + $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' ); + +Returns true on success, false on failure. + +=cut + +sub extract_file { + my $self = shift; + my $file = shift or return; + my $alt = shift; + + my $entry = $self->_find_entry( $file ) + or $self->_error( qq[Could not find an entry for '$file'] ), return; + + return $self->_extract_file( $entry, $alt ); +} + +sub _extract_file { + my $self = shift; + my $entry = shift or return; + my $alt = shift; + my $cwd = cwd(); + + ### you wanted an alternate extraction location ### + my $name = defined $alt ? $alt : $entry->full_path; + + ### splitpath takes a bool at the end to indicate + ### that it's splitting a dir + my ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name, + $entry->is_dir ); + my $dir; + ### is $name an absolute path? ### + if( File::Spec->file_name_is_absolute( $dirs ) ) { + $dir = $dirs; + + ### it's a relative path ### + } else { + my @dirs = File::Spec::Unix->splitdir( $dirs ); + my @cwd = File::Spec->splitdir( $cwd ); + $dir = File::Spec->catdir(@cwd, @dirs); + } + + if( -e $dir && !-d _ ) { + $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] ); + return; + } + + unless ( -d _ ) { + eval { File::Path::mkpath( $dir, 0, 0777 ) }; + if( $@ ) { + $self->_error( qq[Could not create directory '$dir': $@] ); + return; + } + } + + ### we're done if we just needed to create a dir ### + return 1 if $entry->is_dir; + + my $full = File::Spec->catfile( $dir, $file ); + + if( $entry->is_unknown ) { + $self->_error( qq[Unknown file type for file '$full'] ); + return; + } + + if( length $entry->type && $entry->is_file ) { + my $fh = IO::File->new; + $fh->open( '>' . $full ) or ( + $self->_error( qq[Could not open file '$full': $!] ), + return + ); + + if( $entry->size ) { + binmode $fh; + syswrite $fh, $entry->data or ( + $self->_error( qq[Could not write data to '$full'] ), + return + ); + } + + close $fh or ( + $self->_error( qq[Could not close file '$full'] ), + return + ); + + } else { + $self->_make_special_file( $entry, $full ) or return; + } + + utime time, $entry->mtime - TIME_OFFSET, $full or + $self->_error( qq[Could not update timestamp] ); + + if( $CHOWN && CAN_CHOWN ) { + chown $entry->uid, $entry->gid, $full or + $self->_error( qq[Could not set uid/gid on '$full'] ); + } + + ### only chmod if we're allowed to, but never chmod symlinks, since they'll + ### change the perms on the file they're linking too... + if( $CHMOD and not -l $full ) { + chmod $entry->mode, $full or + $self->_error( qq[Could not chown '$full' to ] . $entry->mode ); + } + + return 1; +} + +sub _make_special_file { + my $self = shift; + my $entry = shift or return; + my $file = shift; return unless defined $file; + + my $err; + + if( $entry->is_symlink ) { + my $fail; + if( ON_UNIX ) { + symlink( $entry->linkname, $file ) or $fail++; + + } else { + $self->_extract_special_file_as_plain_file( $entry, $file ) + or $fail++; + } + + $err = qq[Making symbolink link from '] . $entry->linkname . + qq[' to '$file' failed] if $fail; + + } elsif ( $entry->is_hardlink ) { + my $fail; + if( ON_UNIX ) { + link( $entry->linkname, $file ) or $fail++; + + } else { + $self->_extract_special_file_as_plain_file( $entry, $file ) + or $fail++; + } + + $err = qq[Making hard link from '] . $entry->linkname . + qq[' to '$file' failed] if $fail; + + } elsif ( $entry->is_fifo ) { + ON_UNIX && !system('mknod', $file, 'p') or + $err = qq[Making fifo ']. $entry->name .qq[' failed]; + + } elsif ( $entry->is_blockdev or $entry->is_chardev ) { + my $mode = $entry->is_blockdev ? 'b' : 'c'; + + ON_UNIX && !system('mknod', $file, $mode, + $entry->devmajor, $entry->devminor) or + $err = qq[Making block device ']. $entry->name .qq[' (maj=] . + $entry->devmajor . qq[ min=] . $entry->devminor . + qq[) failed.]; + + } elsif ( $entry->is_socket ) { + ### the original doesn't do anything special for sockets.... ### + 1; + } + + return $err ? $self->_error( $err ) : 1; +} + +### don't know how to make symlinks, let's just extract the file as +### a plain file +sub _extract_special_file_as_plain_file { + my $self = shift; + my $entry = shift or return; + my $file = shift; return unless defined $file; + + my $err; + TRY: { + my $orig = $self->_find_entry( $entry->linkname ); + + unless( $orig ) { + $err = qq[Could not find file '] . $entry->linkname . + qq[' in memory.]; + last TRY; + } + + ### clone the entry, make it appear as a normal file ### + my $clone = $entry->clone; + $clone->_downgrade_to_plainfile; + $self->_extract_file( $clone, $file ) or last TRY; + + return 1; + } + + return $self->_error($err); +} + +=head2 $tar->list_files( [\@properties] ) + +Returns a list of the names of all the files in the archive. + +If C is passed an array reference as its first argument +it returns a list of hash references containing the requested +properties of each file. The following list of properties is +supported: name, size, mtime (last modified date), mode, uid, gid, +linkname, uname, gname, devmajor, devminor, prefix. + +Passing an array reference containing only one element, 'name', is +special cased to return a list of names rather than a list of hash +references, making it equivalent to calling C without +arguments. + +=cut + +sub list_files { + my $self = shift; + my $aref = shift || [ ]; + + unless( $self->_data ) { + $self->read() or return; + } + + if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) { + return map { $_->full_path } @{$self->_data}; + } else { + + #my @rv; + #for my $obj ( @{$self->_data} ) { + # push @rv, { map { $_ => $obj->$_() } @$aref }; + #} + #return @rv; + + ### this does the same as the above.. just needs a +{ } + ### to make sure perl doesn't confuse it for a block + return map { my $o=$_; + +{ map { $_ => $o->$_() } @$aref } + } @{$self->_data}; + } +} + +sub _find_entry { + my $self = shift; + my $file = shift; + + unless( defined $file ) { + $self->_error( qq[No file specified] ); + return; + } + + for my $entry ( @{$self->_data} ) { + my $path = $entry->full_path; + return $entry if $path eq $file; + } + + $self->_error( qq[No such file in archive: '$file'] ); + return; +} + +=head2 $tar->get_files( [@filenames] ) + +Returns the C objects matching the filenames +provided. If no filename list was passed, all C +objects in the current Tar object are returned. + +Please refer to the C documentation on how to +handle these objects. + +=cut + +sub get_files { + my $self = shift; + + return @{ $self->_data } unless @_; + + my @list; + for my $file ( @_ ) { + push @list, grep { defined } $self->_find_entry( $file ); + } + + return @list; +} + +=head2 $tar->get_content( $file ) + +Return the content of the named file. + +=cut + +sub get_content { + my $self = shift; + my $entry = $self->_find_entry( shift ) or return; + + return $entry->data; +} + +=head2 $tar->replace_content( $file, $content ) + +Make the string $content be the content for the file named $file. + +=cut + +sub replace_content { + my $self = shift; + my $entry = $self->_find_entry( shift ) or return; + + return $entry->replace_content( shift ); +} + +=head2 $tar->rename( $file, $new_name ) + +Rename the file of the in-memory archive to $new_name. + +Note that you must specify a Unix path for $new_name, since per tar +standard, all files in the archive must be Unix paths. + +Returns true on success and false on failure. + +=cut + +sub rename { + my $self = shift; + my $file = shift; return unless defined $file; + my $new = shift; return unless defined $new; + + my $entry = $self->_find_entry( $file ) or return; + + return $entry->rename( $new ); +} + +=head2 $tar->remove (@filenamelist) + +Removes any entries with names matching any of the given filenames +from the in-memory archive. Returns a list of C +objects that remain. + +=cut + +sub remove { + my $self = shift; + my @list = @_; + + my %seen = map { $_->full_path => $_ } @{$self->_data}; + delete $seen{ $_ } for @list; + + $self->_data( [values %seen] ); + + return values %seen; +} + +=head2 $tar->clear + +C clears the current in-memory archive. This effectively gives +you a 'blank' object, ready to be filled again. Note that C +only has effect on the object, not the underlying tarfile. + +=cut + +sub clear { + my $self = shift or return; + + $self->_data( [] ); + $self->_file( '' ); + + return 1; +} + + +=head2 $tar->write ( [$file, $compressed, $prefix] ) + +Write the in-memory archive to disk. The first argument can either +be the name of a file or a reference to an already open filehandle (a +GLOB reference). If the second argument is true, the module will use +IO::Zlib to write the file in a compressed format. If IO::Zlib is +not available, the C method will fail and return. + +Note that when you pass in a filehandle, the compression argument +is ignored, as all files are printed verbatim to your filehandle. +If you wish to enable compression with filehandles, use an +C filehandle instead. + +Specific levels of compression can be chosen by passing the values 2 +through 9 as the second parameter. + +The third argument is an optional prefix. All files will be tucked +away in the directory you specify as prefix. So if you have files +'a' and 'b' in your archive, and you specify 'foo' as prefix, they +will be written to the archive as 'foo/a' and 'foo/b'. + +If no arguments are given, C returns the entire formatted +archive as a string, which could be useful if you'd like to stuff the +archive into a socket or a pipe to gzip or something. + +=cut + +sub write { + my $self = shift; + my $file = shift; $file = '' unless defined $file; + my $gzip = shift || 0; + my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; + my $dummy = ''; + + ### only need a handle if we have a file to print to ### + my $handle = length($file) + ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) ) + or return ) + : $HAS_PERLIO ? do { open my $h, '>', \$dummy; $h } + : $HAS_IO_STRING ? IO::String->new + : __PACKAGE__->no_string_support(); + + + + for my $entry ( @{$self->_data} ) { + ### entries to be written to the tarfile ### + my @write_me; + + ### only now will we change the object to reflect the current state + ### of the name and prefix fields -- this needs to be limited to + ### write() only! + my $clone = $entry->clone; + + + ### so, if you don't want use to use the prefix, we'll stuff + ### everything in the name field instead + if( $DO_NOT_USE_PREFIX ) { + + ### you might have an extended prefix, if so, set it in the clone + ### XXX is ::Unix right? + $clone->name( length $ext_prefix + ? File::Spec::Unix->catdir( $ext_prefix, + $clone->full_path) + : $clone->full_path ); + $clone->prefix( '' ); + + ### otherwise, we'll have to set it properly -- prefix part in the + ### prefix and name part in the name field. + } else { + + ### split them here, not before! + my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path ); + + ### you might have an extended prefix, if so, set it in the clone + ### XXX is ::Unix right? + $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix ) + if length $ext_prefix; + + $clone->prefix( $prefix ); + $clone->name( $name ); + } + + ### names are too long, and will get truncated if we don't add a + ### '@LongLink' file... + my $make_longlink = ( length($clone->name) > NAME_LENGTH or + length($clone->prefix) > PREFIX_LENGTH + ) || 0; + + ### perhaps we need to make a longlink file? + if( $make_longlink ) { + my $longlink = Archive::Tar::File->new( + data => LONGLINK_NAME, + $clone->full_path, + { type => LONGLINK } + ); + + unless( $longlink ) { + $self->_error( qq[Could not create 'LongLink' entry for ] . + qq[oversize file '] . $clone->full_path ."'" ); + return; + }; + + push @write_me, $longlink; + } + + push @write_me, $clone; + + ### write the one, optionally 2 a::t::file objects to the handle + for my $clone (@write_me) { + + ### if the file is a symlink, there are 2 options: + ### either we leave the symlink intact, but then we don't write any + ### data OR we follow the symlink, which means we actually make a + ### copy. if we do the latter, we have to change the TYPE of the + ### clone to 'FILE' + my $link_ok = $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK; + my $data_ok = !$clone->is_symlink && $clone->has_content; + + ### downgrade to a 'normal' file if it's a symlink we're going to + ### treat as a regular file + $clone->_downgrade_to_plainfile if $link_ok; + + ### get the header for this block + my $header = $self->_format_tar_entry( $clone ); + unless( $header ) { + $self->_error(q[Could not format header for: ] . + $clone->full_path ); + return; + } + + unless( print $handle $header ) { + $self->_error(q[Could not write header for: ] . + $clone->full_path); + return; + } + + if( $link_ok or $data_ok ) { + unless( print $handle $clone->data ) { + $self->_error(q[Could not write data for: ] . + $clone->full_path); + return; + } + + ### pad the end of the clone if required ### + print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK + } + + } ### done writing these entries + } + + ### write the end markers ### + print $handle TAR_END x 2 or + return $self->_error( qq[Could not write tar end markers] ); + ### did you want it written to a file, or returned as a string? ### + return length($file) ? 1 + : $HAS_PERLIO ? $dummy + : do { seek $handle, 0, 0; local $/; <$handle> } +} + +sub _format_tar_entry { + my $self = shift; + my $entry = shift or return; + my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; + my $no_prefix = shift || 0; + + my $file = $entry->name; + my $prefix = $entry->prefix; $prefix = '' unless defined $prefix; + + ### remove the prefix from the file name + ### not sure if this is still neeeded --kane + ### no it's not -- Archive::Tar::File->_new_from_file will take care of + ### this for us. Even worse, this would break if we tried to add a file + ### like x/x. + #if( length $prefix ) { + # $file =~ s/^$match//; + #} + + $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix) + if length $ext_prefix; + + ### not sure why this is... ### + my $l = PREFIX_LENGTH; # is ambiguous otherwise... + substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH; + + my $f1 = "%06o"; my $f2 = "%11o"; + + ### this might be optimizable with a 'changed' flag in the file objects ### + my $tar = pack ( + PACK, + $file, + + (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]), + (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]), + + "", # checksum field - space padded a bit down + + (map { $entry->$_() } qw[type linkname magic]), + + $entry->version || TAR_VERSION, + + (map { $entry->$_() } qw[uname gname]), + (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]), + + ($no_prefix ? '' : $prefix) + ); + + ### add the checksum ### + substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar)); + + return $tar; +} + +=head2 $tar->add_files( @filenamelist ) + +Takes a list of filenames and adds them to the in-memory archive. + +The path to the file is automatically converted to a Unix like +equivalent for use in the archive, and, if on MacOS, the file's +modification time is converted from the MacOS epoch to the Unix epoch. +So tar archives created on MacOS with B can be read +both with I on Unix and applications like I or +I on MacOS. + +Be aware that the file's type/creator and resource fork will be lost, +which is usually what you want in cross-platform archives. + +Returns a list of C objects that were just added. + +=cut + +sub add_files { + my $self = shift; + my @files = @_ or return; + + my @rv; + for my $file ( @files ) { + unless( -e $file ) { + $self->_error( qq[No such file: '$file'] ); + next; + } + + my $obj = Archive::Tar::File->new( file => $file ); + unless( $obj ) { + $self->_error( qq[Unable to add file: '$file'] ); + next; + } + + push @rv, $obj; + } + + push @{$self->{_data}}, @rv; + + return @rv; +} + +=head2 $tar->add_data ( $filename, $data, [$opthashref] ) + +Takes a filename, a scalar full of data and optionally a reference to +a hash with specific options. + +Will add a file to the in-memory archive, with name C<$filename> and +content C<$data>. Specific properties can be set using C<$opthashref>. +The following list of properties is supported: name, size, mtime +(last modified date), mode, uid, gid, linkname, uname, gname, +devmajor, devminor, prefix. (On MacOS, the file's path and +modification times are converted to Unix equivalents.) + +Returns the C object that was just added, or +C on failure. + +=cut + +sub add_data { + my $self = shift; + my ($file, $data, $opt) = @_; + + my $obj = Archive::Tar::File->new( data => $file, $data, $opt ); + unless( $obj ) { + $self->_error( qq[Unable to add file: '$file'] ); + return; + } + + push @{$self->{_data}}, $obj; + + return $obj; +} + +=head2 $tar->error( [$BOOL] ) + +Returns the current errorstring (usually, the last error reported). +If a true value was specified, it will give the C +equivalent of the error, in effect giving you a stacktrace. + +For backwards compatibility, this error is also available as +C<$Archive::Tar::error> although it is much recommended you use the +method call instead. + +=cut + +{ + $error = ''; + my $longmess; + + sub _error { + my $self = shift; + my $msg = $error = shift; + $longmess = Carp::longmess($error); + + ### set Archive::Tar::WARN to 0 to disable printing + ### of errors + if( $WARN ) { + carp $DEBUG ? $longmess : $msg; + } + + return; + } + + sub error { + my $self = shift; + return shift() ? $longmess : $error; + } +} + + +=head2 $bool = $tar->has_io_string + +Returns true if we currently have C support loaded. + +Either C or C support is needed to support writing +stringified archives. Currently, C is the preffered method, if +available. + +See the C section to see how to change this preference. + +=cut + +sub has_io_string { return $HAS_IO_STRING; } + +=head2 $bool = $tar->has_perlio + +Returns true if we currently have C support loaded. + +This requires C or higher, compiled with C + +Either C or C support is needed to support writing +stringified archives. Currently, C is the preffered method, if +available. + +See the C section to see how to change this preference. + +=cut + +sub has_perlio { return $HAS_PERLIO; } + + +=head1 Class Methods + +=head2 Archive::Tar->create_archive($file, $compression, @filelist) + +Creates a tar file from the list of files provided. The first +argument can either be the name of the tar file to create or a +reference to an open file handle (e.g. a GLOB reference). + +The second argument specifies the level of compression to be used, if +any. Compression of tar files requires the installation of the +IO::Zlib module. Specific levels of compression may be +requested by passing a value between 2 and 9 as the second argument. +Any other value evaluating as true will result in the default +compression level being used. + +Note that when you pass in a filehandle, the compression argument +is ignored, as all files are printed verbatim to your filehandle. +If you wish to enable compression with filehandles, use an +C filehandle instead. + +The remaining arguments list the files to be included in the tar file. +These files must all exist. Any files which don't exist or can't be +read are silently ignored. + +If the archive creation fails for any reason, C will +return false. Please use the C method to find the cause of the +failure. + +Note that this method does not write C as it were; it +still reads all the files into memory before writing out the archive. +Consult the FAQ below if this is a problem. + +=cut + +sub create_archive { + my $class = shift; + + my $file = shift; return unless defined $file; + my $gzip = shift || 0; + my @files = @_; + + unless( @files ) { + return $class->_error( qq[Cowardly refusing to create empty archive!] ); + } + + my $tar = $class->new; + $tar->add_files( @files ); + return $tar->write( $file, $gzip ); +} + +=head2 Archive::Tar->list_archive ($file, $compressed, [\@properties]) + +Returns a list of the names of all the files in the archive. The +first argument can either be the name of the tar file to list or a +reference to an open file handle (e.g. a GLOB reference). + +If C is passed an array reference as its third +argument it returns a list of hash references containing the requested +properties of each file. The following list of properties is +supported: name, size, mtime (last modified date), mode, uid, gid, +linkname, uname, gname, devmajor, devminor, prefix. + +Passing an array reference containing only one element, 'name', is +special cased to return a list of names rather than a list of hash +references. + +=cut + +sub list_archive { + my $class = shift; + my $file = shift; return unless defined $file; + my $gzip = shift || 0; + + my $tar = $class->new($file, $gzip); + return unless $tar; + + return $tar->list_files( @_ ); +} + +=head2 Archive::Tar->extract_archive ($file, $gzip) + +Extracts the contents of the tar file. The first argument can either +be the name of the tar file to create or a reference to an open file +handle (e.g. a GLOB reference). All relative paths in the tar file will +be created underneath the current working directory. + +C will return a list of files it extracted. +If the archive extraction fails for any reason, C +will return false. Please use the C method to find the cause +of the failure. + +=cut + +sub extract_archive { + my $class = shift; + my $file = shift; return unless defined $file; + my $gzip = shift || 0; + + my $tar = $class->new( ) or return; + + return $tar->read( $file, $gzip, { extract => 1 } ); +} + +=head2 Archive::Tar->can_handle_compressed_files + +A simple checking routine, which will return true if C +is able to uncompress compressed archives on the fly with C, +or false if C is not installed. + +You can use this as a shortcut to determine whether C +will do what you think before passing compressed archives to its +C method. + +=cut + +sub can_handle_compressed_files { return ZLIB ? 1 : 0 } + +sub no_string_support { + croak("You have to install IO::String to support writing archives to strings"); +} + +1; + +__END__ + +=head1 GLOBAL VARIABLES + +=head2 $Archive::Tar::FOLLOW_SYMLINK + +Set this variable to C<1> to make C effectively make a +copy of the file when extracting. Default is C<0>, which +means the symlink stays intact. Of course, you will have to pack the +file linked to as well. + +This option is checked when you write out the tarfile using C +or C. + +This works just like C's C<-h> option. + +=head2 $Archive::Tar::CHOWN + +By default, C will try to C your files if it is +able to. In some cases, this may not be desired. In that case, set +this variable to C<0> to disable C-ing, even if it were +possible. + +The default is C<1>. + +=head2 $Archive::Tar::CHMOD + +By default, C will try to C your files to +whatever mode was specified for the particular file in the archive. +In some cases, this may not be desired. In that case, set this +variable to C<0> to disable C-ing. + +The default is C<1>. + +=head2 $Archive::Tar::DO_NOT_USE_PREFIX + +By default, C will try to put paths that are over +100 characters in the C field of your tar header. However, +some older tar programs do not implement this spec. To retain +compatibility with these older versions, you can set the +C<$DO_NOT_USE_PREFIX> variable to a true value, and C +will use an alternate way of dealing with paths over 100 characters +by using the C feature. + +The default is C<0>. + +=head2 $Archive::Tar::DEBUG + +Set this variable to C<1> to always get the C output +of the warnings, instead of the regular C. This is the same +message you would get by doing: + + $tar->error(1); + +Defaults to C<0>. + +=head2 $Archive::Tar::WARN + +Set this variable to C<0> if you do not want any warnings printed. +Personally I recommend against doing this, but people asked for the +option. Also, be advised that this is of course not threadsafe. + +Defaults to C<1>. + +=head2 $Archive::Tar::error + +Holds the last reported error. Kept for historical reasons, but its +use is very much discouraged. Use the C method instead: + + warn $tar->error unless $tar->extract; + +=head2 $Archive::Tar::HAS_PERLIO + +This variable holds a boolean indicating if we currently have +C support loaded. This will be enabled for any perl +greater than C<5.8> compiled with C. + +If you feel strongly about disabling it, set this variable to +C. Note that you will then need C installed +to support writing stringified archives. + +Don't change this variable unless you B know what you're +doing. + +=head2 $Archive::Tar::HAS_IO_STRING + +This variable holds a boolean indicating if we currently have +C support loaded. This will be enabled for any perl +that has a loadable C module. + +If you feel strongly about disabling it, set this variable to +C. Note that you will then need C support from +your perl to be able to write stringified archives. + +Don't change this variable unless you B know what you're +doing. + +=head1 FAQ + +=over 4 + +=item What's the minimum perl version required to run Archive::Tar? + +You will need perl version 5.005_03 or newer. + +=item Isn't Archive::Tar slow? + +Yes it is. It's pure perl, so it's a lot slower then your C +However, it's very portable. If speed is an issue, consider using +C instead. + +=item Isn't Archive::Tar heavier on memory than /bin/tar? + +Yes it is, see previous answer. Since C and therefore +C doesn't support C on their filehandles, there is little +choice but to read the archive into memory. +This is ok if you want to do in-memory manipulation of the archive. +If you just want to extract, use the C class method +instead. It will optimize and write to disk immediately. + +=item Can't you lazy-load data instead? + +No, not easily. See previous question. + +=item How much memory will an X kb tar file need? + +Probably more than X kb, since it will all be read into memory. If +this is a problem, and you don't need to do in memory manipulation +of the archive, consider using C instead. + +=item What do you do with unsupported filetypes in an archive? + +C has a few filetypes that aren't supported on other platforms, +like C. If we encounter a C or C we'll just +try to make a copy of the original file, rather than throwing an error. + +This does require you to read the entire archive in to memory first, +since otherwise we wouldn't know what data to fill the copy with. +(This means that you cannot use the class methods on archives that +have incompatible filetypes and still expect things to work). + +For other filetypes, like C and C we'll warn that +the extraction of this particular item didn't work. + +=back + +=head1 TODO + +=over 4 + +=item Check if passed in handles are open for read/write + +Currently I don't know of any portable pure perl way to do this. +Suggestions welcome. + +=back + +=head1 AUTHOR + +This module by +Jos Boumans Ekane@cpan.orgE. + +=head1 ACKNOWLEDGEMENTS + +Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney and +especially Andrew Savige for their help and suggestions. + +=head1 COPYRIGHT + +This module is +copyright (c) 2002 Jos Boumans Ekane@cpan.orgE. +All rights reserved. + +This library is free software; +you may redistribute and/or modify it under the same +terms as Perl itself. + +=cut diff --git a/lib/Archive/Tar/Constant.pm b/lib/Archive/Tar/Constant.pm new file mode 100644 index 0000000000..fe5bb14726 --- /dev/null +++ b/lib/Archive/Tar/Constant.pm @@ -0,0 +1,72 @@ +package Archive::Tar::Constant; + +BEGIN { + require Exporter; + $VERSION= '0.02'; + @ISA = qw[Exporter]; + @EXPORT = qw[ + FILE HARDLINK SYMLINK CHARDEV BLOCKDEV DIR FIFO SOCKET UNKNOWN + BUFFER HEAD READ_ONLY WRITE_ONLY UNPACK PACK TIME_OFFSET ZLIB + BLOCK_SIZE TAR_PAD TAR_END ON_UNIX BLOCK CAN_READLINK MAGIC + TAR_VERSION UNAME GNAME CAN_CHOWN MODE CHECK_SUM UID GID + GZIP_MAGIC_NUM MODE_READ LONGLINK LONGLINK_NAME PREFIX_LENGTH + LABEL NAME_LENGTH STRIP_MODE + ]; + + require Time::Local if $^O eq "MacOS"; +} + +use constant FILE => 0; +use constant HARDLINK => 1; +use constant SYMLINK => 2; +use constant CHARDEV => 3; +use constant BLOCKDEV => 4; +use constant DIR => 5; +use constant FIFO => 6; +use constant SOCKET => 8; +use constant UNKNOWN => 9; +use constant LONGLINK => 'L'; +use constant LABEL => 'V'; + +use constant BUFFER => 4096; +use constant HEAD => 512; +use constant BLOCK => 512; + +use constant BLOCK_SIZE => sub { my $n = int($_[0]/BLOCK); $n++ if $_[0] % BLOCK; $n * BLOCK }; +use constant TAR_PAD => sub { my $x = shift || return; return "\0" x (BLOCK - ($x % BLOCK) ) }; +use constant TAR_END => "\0" x BLOCK; + +use constant READ_ONLY => sub { shift() ? 'rb' : 'r' }; +use constant WRITE_ONLY => sub { $_[0] ? 'wb' . shift : 'w' }; +use constant MODE_READ => sub { $_[0] =~ /^r/ ? 1 : 0 }; + +# Pointless assigment to make -w shut up +my $getpwuid; $getpwuid = 'unknown' unless eval { my $f = getpwuid (0); }; +my $getgrgid; $getgrgid = 'unknown' unless eval { my $f = getgrgid (0); }; +use constant UNAME => sub { $getpwuid || scalar getpwuid( shift() ) }; +use constant GNAME => sub { $getgrgid || scalar getgrgid( shift() ) }; +use constant UID => $>; +use constant GID => (split ' ', $) )[0]; + +use constant MODE => do { 0666 & (0777 & ~umask) }; +use constant STRIP_MODE => sub { shift() & 0777 }; +use constant CHECK_SUM => " "; + +use constant UNPACK => 'A100 A8 A8 A8 A12 A12 A8 A1 A100 A6 A2 A32 A32 A8 A8 A155 x12'; +use constant PACK => 'a100 a8 a8 a8 a12 a12 A8 a1 a100 a6 a2 a32 a32 a8 a8 a155 x12'; +use constant NAME_LENGTH => 100; +use constant PREFIX_LENGTH => 155; + +use constant TIME_OFFSET => ($^O eq "MacOS") ? Time::Local::timelocal(0,0,0,1,0,70) : 0; +use constant MAGIC => "ustar"; +use constant TAR_VERSION => "00"; +use constant LONGLINK_NAME => '././@LongLink'; + +use constant ZLIB => do { eval { require IO::Zlib }; $@ ? 0 : 1 }; +use constant GZIP_MAGIC_NUM => qr/^(?:\037\213|\037\235)/; + +use constant CAN_CHOWN => do { ($> == 0 and $^O ne "MacOS" and $^O ne "MSWin32") }; +use constant CAN_READLINK => ($^O ne 'MSWin32' and $^O !~ /RISC(?:[ _])?OS/i and $^O ne 'VMS'); +use constant ON_UNIX => ($^O ne 'MSWin32' and $^O ne 'MacOS' and $^O ne 'VMS'); + +1; diff --git a/lib/Archive/Tar/File.pm b/lib/Archive/Tar/File.pm new file mode 100644 index 0000000000..a310ee9de9 --- /dev/null +++ b/lib/Archive/Tar/File.pm @@ -0,0 +1,579 @@ +package Archive::Tar::File; +use strict; + +use IO::File; +use File::Spec::Unix (); +use File::Spec (); +use File::Basename (); +use Archive::Tar::Constant; + +use vars qw[@ISA $VERSION]; +@ISA = qw[Archive::Tar]; +$VERSION = '0.02'; + +### set value to 1 to oct() it during the unpack ### +my $tmpl = [ + name => 0, # string + mode => 1, # octal + uid => 1, # octal + gid => 1, # octal + size => 1, # octal + mtime => 1, # octal + chksum => 1, # octal + type => 0, # character + linkname => 0, # string + magic => 0, # string + version => 0, # 2 bytes + uname => 0, # string + gname => 0, # string + devmajor => 1, # octal + devminor => 1, # octal + prefix => 0, + +### end UNPACK items ### + raw => 0, # the raw data chunk + data => 0, # the data associated with the file -- + # This might be very memory intensive +]; + +### install get/set accessors for this object. +for ( my $i=0; $i[$i]; + no strict 'refs'; + *{__PACKAGE__."::$key"} = sub { + my $self = shift; + $self->{$key} = $_[0] if @_; + + ### just in case the key is not there or undef or something ### + { local $^W = 0; + return $self->{$key}; + } + } +} + +=head1 NAME + +Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar + +=head1 SYNOPSIS + + my @items = $tar->get_files; + + print $_->name, ' ', $_->size, "\n" for @items; + + print $object->get_content; + $object->replace_content('new content'); + + $object->rename( 'new/full/path/to/file.c' ); + +=head1 DESCRIPTION + +Archive::Tar::Files provides a neat little object layer for in-memory +extracted files. It's mostly used internally in Archive::Tar to tidy +up the code, but there's no reason users shouldn't use this API as +well. + +=head2 Accessors + +A lot of the methods in this package are accessors to the various +fields in the tar header: + +=over 4 + +=item name + +The file's name + +=item mode + +The file's mode + +=item uid + +The user id owning the file + +=item gid + +The group id owning the file + +=item size + +File size in bytes + +=item mtime + +Modification time. Adjusted to mac-time on MacOS if required + +=item chksum + +Checksum field for the tar header + +=item type + +File type -- numeric, but comparable to exported constants -- see +Archive::Tar's documentation + +=item linkname + +If the file is a symlink, the file it's pointing to + +=item magic + +Tar magic string -- not useful for most users + +=item version + +Tar version string -- not useful for most users + +=item uname + +The user name that owns the file + +=item gname + +The group name that owns the file + +=item devmajor + +Device major number in case of a special file + +=item devminor + +Device minor number in case of a special file + +=item prefix + +Any directory to prefix to the extraction path, if any + +=item raw + +Raw tar header -- not useful for most users + +=back + +=head1 Methods + +=head2 new( file => $path ) + +Returns a new Archive::Tar::File object from an existing file. + +Returns undef on failure. + +=head2 new( data => $path, $data, $opt ) + +Returns a new Archive::Tar::File object from data. + +C<$path> defines the file name (which need not exist), C<$data> the +file contents, and C<$opt> is a reference to a hash of attributes +which may be used to override the default attributes (fields in the +tar header), which are described above in the Accessors section. + +Returns undef on failure. + +=head2 new( chunk => $chunk ) + +Returns a new Archive::Tar::File object from a raw 512-byte tar +archive chunk. + +Returns undef on failure. + +=cut + +sub new { + my $class = shift; + my $what = shift; + + my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) : + ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) : + ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) : + undef; + + return $obj; +} + +### copies the data, creates a clone ### +sub clone { + my $self = shift; + return bless { %$self }, ref $self; +} + +sub _new_from_chunk { + my $class = shift; + my $chunk = shift or return; + + ### makes it start at 0 actually... :) ### + my $i = -1; + my %entry = map { + $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_ + } map { /^([^\0]*)/ } unpack( UNPACK, $chunk ); + + my $obj = bless \%entry, $class; + + ### magic is a filetype string.. it should have something like 'ustar' or + ### something similar... if the chunk is garbage, skip it + return unless $obj->magic !~ /\W/; + + ### store the original chunk ### + $obj->raw( $chunk ); + + $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) ); + $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) ); + + + return $obj; + +} + +sub _new_from_file { + my $class = shift; + my $path = shift or return; + my $type = __PACKAGE__->_filetype($path); + my $data = ''; + + unless ($type == DIR) { + my $fh = IO::File->new; + $fh->open($path) or return; + + ### binmode needed to read files properly on win32 ### + binmode $fh; + $data = do { local $/; <$fh> }; + close $fh; + } + + my @items = qw[mode uid gid size mtime]; + my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9]; + + ### you *must* set size == 0 on symlinks, or the next entry will be + ### though of as the contents of the symlink, which is wrong. + ### this fixes bug #7937 + $hash{size} = 0 if ($type == DIR or $type == SYMLINK); + $hash{mtime} -= TIME_OFFSET; + + ### strip the high bits off the mode, which we don't need to store + $hash{mode} = STRIP_MODE->( $hash{mode} ); + + + ### probably requires some file path munging here ... ### + ### name and prefix are set later + my $obj = { + %hash, + name => '', + chksum => CHECK_SUM, + type => $type, + linkname => ($type == SYMLINK and CAN_READLINK) + ? readlink $path + : '', + magic => MAGIC, + version => TAR_VERSION, + uname => UNAME->( $hash{uid} ), + gname => GNAME->( $hash{gid} ), + devmajor => 0, # not handled + devminor => 0, # not handled + prefix => '', + data => $data, + }; + + bless $obj, $class; + + ### fix up the prefix and file from the path + my($prefix,$file) = $obj->_prefix_and_file( $path ); + $obj->prefix( $prefix ); + $obj->name( $file ); + + return $obj; +} + +sub _new_from_data { + my $class = shift; + my $path = shift or return; + my $data = shift; return unless defined $data; + my $opt = shift; + + my $obj = { + data => $data, + name => '', + mode => MODE, + uid => UID, + gid => GID, + size => length $data, + mtime => time - TIME_OFFSET, + chksum => CHECK_SUM, + type => FILE, + linkname => '', + magic => MAGIC, + version => TAR_VERSION, + uname => UNAME->( UID ), + gname => GNAME->( GID ), + devminor => 0, + devmajor => 0, + prefix => '', + }; + + ### overwrite with user options, if provided ### + if( $opt and ref $opt eq 'HASH' ) { + for my $key ( keys %$opt ) { + + ### don't write bogus options ### + next unless exists $obj->{$key}; + $obj->{$key} = $opt->{$key}; + } + } + + bless $obj, $class; + + ### fix up the prefix and file from the path + my($prefix,$file) = $obj->_prefix_and_file( $path ); + $obj->prefix( $prefix ); + $obj->name( $file ); + + return $obj; +} + +sub _prefix_and_file { + my $self = shift; + my $path = shift; + + my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir ); + my @dirs = File::Spec->splitdir( $dirs ); + + ### so sometimes the last element is '' -- probably when trailing + ### dir slashes are encountered... this is is of course pointless, + ### so remove it + pop @dirs while @dirs and not length $dirs[-1]; + + ### if it's a directory, then $file might be empty + $file = pop @dirs if $self->is_dir and not length $file; + + my $prefix = File::Spec::Unix->catdir( + grep { length } $vol, @dirs + ); + return( $prefix, $file ); +} + +sub _filetype { + my $self = shift; + my $file = shift or return; + + return SYMLINK if (-l $file); # Symlink + + return FILE if (-f _); # Plain file + + return DIR if (-d _); # Directory + + return FIFO if (-p _); # Named pipe + + return SOCKET if (-S _); # Socket + + return BLOCKDEV if (-b _); # Block special + + return CHARDEV if (-c _); # Character special + + ### shouldn't happen, this is when making archives, not reading ### + return LONGLINK if ( $file eq LONGLINK_NAME ); + + return UNKNOWN; # Something else (like what?) + +} + +### this method 'downgrades' a file to plain file -- this is used for +### symlinks when FOLLOW_SYMLINKS is true. +sub _downgrade_to_plainfile { + my $entry = shift; + $entry->type( FILE ); + $entry->mode( MODE ); + $entry->linkname(''); + + return 1; +} + +=head2 full_path + +Returns the full path from the tar header; this is basically a +concatenation of the C and C fields. + +=cut + +sub full_path { + my $self = shift; + + ### if prefix field is emtpy + return $self->name unless defined $self->prefix and length $self->prefix; + + ### or otherwise, catfile'd + return File::Spec::Unix->catfile( $self->prefix, $self->name ); +} + + +=head2 validate + +Done by Archive::Tar internally when reading the tar file: +validate the header against the checksum to ensure integer tar file. + +Returns true on success, false on failure + +=cut + +sub validate { + my $self = shift; + + my $raw = $self->raw; + + ### don't know why this one is different from the one we /write/ ### + substr ($raw, 148, 8) = " "; + return unpack ("%16C*", $raw) == $self->chksum ? 1 : 0; +} + +=head2 has_content + +Returns a boolean to indicate whether the current object has content. +Some special files like directories and so on never will have any +content. This method is mainly to make sure you don't get warnings +for using uninitialized values when looking at an object's content. + +=cut + +sub has_content { + my $self = shift; + return defined $self->data() && length $self->data() ? 1 : 0; +} + +=head2 get_content + +Returns the current content for the in-memory file + +=cut + +sub get_content { + my $self = shift; + $self->data( ); +} + +=head2 get_content_by_ref + +Returns the current content for the in-memory file as a scalar +reference. Normal users won't need this, but it will save memory if +you are dealing with very large data files in your tar archive, since +it will pass the contents by reference, rather than make a copy of it +first. + +=cut + +sub get_content_by_ref { + my $self = shift; + + return \$self->{data}; +} + +=head2 replace_content( $content ) + +Replace the current content of the file with the new content. This +only affects the in-memory archive, not the on-disk version until +you write it. + +Returns true on success, false on failure. + +=cut + +sub replace_content { + my $self = shift; + my $data = shift || ''; + + $self->data( $data ); + $self->size( length $data ); + return 1; +} + +=head2 rename( $new_name ) + +Rename the current file to $new_name. + +Note that you must specify a Unix path for $new_name, since per tar +standard, all files in the archive must be Unix paths. + +Returns true on success and false on failure. + +=cut + +sub rename { + my $self = shift; + my $path = shift or return; + + my ($prefix,$file) = $self->_prefix_and_file( $path ); + + $self->name( $file ); + $self->prefix( $prefix ); + + return 1; +} + +=head1 Convenience methods + +To quickly check the type of a C object, you can +use the following methods: + +=over 4 + +=item is_file + +Returns true if the file is of type C + +=item is_dir + +Returns true if the file is of type C + +=item is_hardlink + +Returns true if the file is of type C + +=item is_symlink + +Returns true if the file is of type C + +=item is_chardev + +Returns true if the file is of type C + +=item is_blockdev + +Returns true if the file is of type C + +=item is_fifo + +Returns true if the file is of type C + +=item is_socket + +Returns true if the file is of type C + +=item is_longlink + +Returns true if the file is of type C. +Should not happen after a successful C. + +=item is_label + +Returns true if the file is of type C