Move IO::Compress from ext/ to cpan/

author: Nicholas Clark <nick@ccl4.org> 2009-10-02 11:11:19 +0100
committer: Nicholas Clark <nick@ccl4.org> 2009-10-02 11:11:19 +0100
commit: 3fd969f44926f311e1c67d9470a9e936f7af2d73 (patch)
tree: ce6e701f0f80bfd0de9befe7b1bf766e37a6cfbb /cpan/IO-Compress/pod
parent: 70b2007073159a8b94a74b0b9ba406945c45917d (diff)
download: perl-3fd969f44926f311e1c67d9470a9e936f7af2d73.tar.gz
1 files changed, 512 insertions, 0 deletions
diff --git a/cpan/IO-Compress/pod/FAQ.pod b/cpan/IO-Compress/pod/FAQ.pod
new file mode 100644
index 0000000000..0fee2a9f6d
--- /dev/null
+++ b/cpan/IO-Compress/pod/FAQ.pod
@@ -0,0 +1,512 @@
+
+=head1 NAME
+
+IO::Compress::FAQ -- Frequently Asked Questions about IO::Compress
+
+=head1 DESCRIPTION
+
+Common questions answered.
+
+=head2 Compatibility with Unix compress/uncompress.
+
+Although C<Compress::Zlib> has a pair of functions called C<compress> and
+C<uncompress>, they are I<not> related to the Unix programs of the same
+name. The C<Compress::Zlib> module is not compatible with Unix
+C<compress>.
+
+If you have the C<uncompress> program available, you can use this to read
+compressed files
+
+    open F, "uncompress -c $filename |";
+    while (<F>)
+    {
+        ...
+
+Alternatively, if you have the C<gunzip> program available, you can use
+this to read compressed files
+
+    open F, "gunzip -c $filename |";
+    while (<F>)
+    {
+        ...
+
+and this to write compress files, if you have the C<compress> program
+available
+
+    open F, "| compress -c $filename ";
+    print F "data";
+    ...
+    close F ;
+
+=head2 Accessing .tar.Z files
+
+The C<Archive::Tar> module can optionally use C<Compress::Zlib> (via the
+C<IO::Zlib> module) to access tar files that have been compressed with
+C<gzip>. Unfortunately tar files compressed with the Unix C<compress>
+utility cannot be read by C<Compress::Zlib> and so cannot be directly
+accessed by C<Archive::Tar>.
+
+If the C<uncompress> or C<gunzip> programs are available, you can use one
+of these workarounds to read C<.tar.Z> files from C<Archive::Tar>
+
+Firstly with C<uncompress>
+
+    use strict;
+    use warnings;
+    use Archive::Tar;
+
+    open F, "uncompress -c $filename |";
+    my $tar = Archive::Tar->new(*F);
+    ...
+
+and this with C<gunzip>
+
+    use strict;
+    use warnings;
+    use Archive::Tar;
+
+    open F, "gunzip -c $filename |";
+    my $tar = Archive::Tar->new(*F);
+    ...
+
+Similarly, if the C<compress> program is available, you can use this to
+write a C<.tar.Z> file
+
+    use strict;
+    use warnings;
+    use Archive::Tar;
+    use IO::File;
+
+    my $fh = new IO::File "| compress -c >$filename";
+    my $tar = Archive::Tar->new();
+    ...
+    $tar->write($fh);
+    $fh->close ;
+
+=head2 Accessing Zip Files
+
+This module provides support for reading/writing zip files using the
+C<IO::Compress::Zip> and C<IO::Uncompress::Unzip> modules.
+
+The primary focus of the C<IO::Compress::Zip> and C<IO::Uncompress::Unzip>
+modules is to provide an C<IO::File> compatible streaming read/write
+interface to zip files/buffers. They are not fully flegged archivers. If
+you are looking for an archiver check out the C<Archive::Zip> module. You
+can find it on CPAN at 
+
+    http://www.cpan.org/modules/by-module/Archive/Archive-Zip-*.tar.gz    
+
+=head2 Compressed files and Net::FTP
+
+The C<Net::FTP> module provides two low-level methods called C<stor> and
+C<retr> that both return filehandles. These filehandles can used with the
+C<IO::Compress/Uncompress> modules to compress or uncompress files read
+from or written to an FTP Server on the fly, without having to create a
+temporary file.
+
+Firstly, here is code that uses C<retr> to uncompressed a file as it is
+read from the FTP Server.
+
+    use Net::FTP;
+    use IO::Uncompress::Gunzip qw(:all);
+
+    my $ftp = new Net::FTP ...
+
+    my $retr_fh = $ftp->retr($compressed_filename);
+    gunzip $retr_fh => $outFilename, AutoClose => 1
+        or die "Cannot uncompress '$compressed_file': $GunzipError\n";
+
+and this to compress a file as it is written to the FTP Server 
+
+    use Net::FTP;
+    use IO::Compress::Gzip qw(:all);
+
+    my $stor_fh = $ftp->stor($filename);
+    gzip "filename" => $stor_fh, AutoClose => 1
+        or die "Cannot compress '$filename': $GzipError\n";
+
+=head2 How do I recompress using a different compression?
+
+This is easier that you might expect if you realise that all the
+C<IO::Compress::*> objects are derived from C<IO::File> and that all the
+C<IO::Uncompress::*> modules can read from an C<IO::File> filehandle.
+
+So, for example, say you have a file compressed with gzip that you want to
+recompress with bzip2. Here is all that is needed to carry out the
+recompression.
+
+    use IO::Uncompress::Gunzip ':all';
+    use IO::Compress::Bzip2 ':all';
+
+    my $gzipFile = "somefile.gz";
+    my $bzipFile = "somefile.bz2";
+
+    my $gunzip = new IO::Uncompress::Gunzip $gzipFile
+        or die "Cannot gunzip $gzipFile: $GunzipError\n" ;
+
+    bzip2 $gunzip => $bzipFile 
+        or die "Cannot bzip2 to $bzipFile: $Bzip2Error\n" ;
+
+Note, there is a limitation of this technique. Some compression file
+formats store extra information along with the compressed data payload. For
+example, gzip can optionally store the original filename and Zip stores a
+lot of information about the original file. If the original compressed file
+contains any of this extra information, it will not be transferred to the
+new compressed file usign the technique above.
+
+=head2 Apache::GZip Revisited
+
+Below is a mod_perl Apache compression module, called C<Apache::GZip>,
+taken from
+F<http://perl.apache.org/docs/tutorials/tips/mod_perl_tricks/mod_perl_tricks.html#On_the_Fly_Compression>
+
+  package Apache::GZip;
+  #File: Apache::GZip.pm
+  
+  use strict vars;
+  use Apache::Constants ':common';
+  use Compress::Zlib;
+  use IO::File;
+  use constant GZIP_MAGIC => 0x1f8b;
+  use constant OS_MAGIC => 0x03;
+  
+  sub handler {
+      my $r = shift;
+      my ($fh,$gz);
+      my $file = $r->filename;
+      return DECLINED unless $fh=IO::File->new($file);
+      $r->header_out('Content-Encoding'=>'gzip');
+      $r->send_http_header;
+      return OK if $r->header_only;
+  
+      tie *STDOUT,'Apache::GZip',$r;
+      print($_) while <$fh>;
+      untie *STDOUT;
+      return OK;
+  }
+  
+  sub TIEHANDLE {
+      my($class,$r) = @_;
+      # initialize a deflation stream
+      my $d = deflateInit(-WindowBits=>-MAX_WBITS()) || return undef;
+  
+      # gzip header -- don't ask how I found out
+      $r->print(pack("nccVcc",GZIP_MAGIC,Z_DEFLATED,0,time(),0,OS_MAGIC));
+  
+      return bless { r   => $r,
+                     crc =>  crc32(undef),
+                     d   => $d,
+                     l   =>  0 
+                   },$class;
+  }
+  
+  sub PRINT {
+      my $self = shift;
+      foreach (@_) {
+        # deflate the data
+        my $data = $self->{d}->deflate($_);
+        $self->{r}->print($data);
+        # keep track of its length and crc
+        $self->{l} += length($_);
+        $self->{crc} = crc32($_,$self->{crc});
+      }
+  }
+  
+  sub DESTROY {
+     my $self = shift;
+     
+     # flush the output buffers
+     my $data = $self->{d}->flush;
+     $self->{r}->print($data);
+     
+     # print the CRC and the total length (uncompressed)
+     $self->{r}->print(pack("LL",@{$self}{qw/crc l/}));
+  }
+   
+  1;
+
+Here's the Apache configuration entry you'll need to make use of it.  Once
+set it will result in everything in the /compressed directory will be
+compressed automagically.
+
+  <Location /compressed>
+     SetHandler  perl-script
+     PerlHandler Apache::GZip
+  </Location>
+
+Although at first sight there seems to be quite a lot going on in
+C<Apache::GZip>, you could sum up what the code was doing as follows --
+read the contents of the file in C<< $r->filename >>, compress it and write
+the compressed data to standard output. That's all.
+
+This code has to jump through a few hoops to achieve this because
+
+=over
+
+=item 1.
+
+The gzip support in C<Compress::Zlib> version 1.x can only work with a real
+filesystem filehandle. The filehandles used by Apache modules are not
+associated with the filesystem.
+
+=item 2.
+
+That means all the gzip support has to be done by hand - in this case by
+creating a tied filehandle to deal with creating the gzip header and
+trailer.
+
+=back
+
+C<IO::Compress::Gzip> doesn't have that filehandle limitation (this was one
+of the reasons for writing it in the first place). So if
+C<IO::Compress::Gzip> is used instead of C<Compress::Zlib> the whole tied
+filehandle code can be removed. Here is the rewritten code.
+
+  package Apache::GZip;
+  
+  use strict vars;
+  use Apache::Constants ':common';
+  use IO::Compress::Gzip;
+  use IO::File;
+  
+  sub handler {
+      my $r = shift;
+      my ($fh,$gz);
+      my $file = $r->filename;
+      return DECLINED unless $fh=IO::File->new($file);
+      $r->header_out('Content-Encoding'=>'gzip');
+      $r->send_http_header;
+      return OK if $r->header_only;
+
+      my $gz = new IO::Compress::Gzip '-', Minimal => 1
+          or return DECLINED ;
+
+      print $gz $_ while <$fh>;
+  
+      return OK;
+  }
+  
+or even more succinctly, like this, using a one-shot gzip
+
+  package Apache::GZip;
+  
+  use strict vars;
+  use Apache::Constants ':common';
+  use IO::Compress::Gzip qw(gzip);
+  
+  sub handler {
+      my $r = shift;
+      $r->header_out('Content-Encoding'=>'gzip');
+      $r->send_http_header;
+      return OK if $r->header_only;
+
+      gzip $r->filename => '-', Minimal => 1
+        or return DECLINED ;
+
+      return OK;
+  }
+   
+  1;
+
+The use of one-shot C<gzip> above just reads from C<< $r->filename >> and
+writes the compressed data to standard output.
+
+Note the use of the C<Minimal> option in the code above. When using gzip
+for Content-Encoding you should I<always> use this option. In the example
+above it will prevent the filename being included in the gzip header and
+make the size of the gzip data stream a slight bit smaller.
+
+=head2 Using C<InputLength> to uncompress data embedded in a larger file/buffer.
+
+A fairly common use-case is where compressed data is embedded in a larger
+file/buffer and you want to read both.
+
+As an example consider the structure of a zip file. This is a well-defined
+file format that mixes both compressed and uncompressed sections of data in
+a single file. 
+
+For the purposes of this discussion you can think of a zip file as sequence
+of compressed data streams, each of which is prefixed by an uncompressed
+local header. The local header contains information about the compressed
+data stream, including the name of the compressed file and, in particular,
+the length of the compressed data stream. 
+
+To illustrate how to use C<InputLength> here is a script that walks a zip
+file and prints out how many lines are in each compressed file (if you
+intend write code to walking through a zip file for real see
+L<IO::Uncompress::Unzip/"Walking through a zip file"> )
+
+    use strict;
+    use warnings;
+
+    use IO::File;
+    use IO::Uncompress::RawInflate qw(:all);
+
+    use constant ZIP_LOCAL_HDR_SIG  => 0x04034b50;
+    use constant ZIP_LOCAL_HDR_LENGTH => 30;
+
+    my $file = $ARGV[0] ;
+
+    my $fh = new IO::File "<$file"
+                or die "Cannot open '$file': $!\n";
+
+    while (1)
+    {
+        my $sig;
+        my $buffer;
+
+        my $x ;
+        ($x = $fh->read($buffer, ZIP_LOCAL_HDR_LENGTH)) == ZIP_LOCAL_HDR_LENGTH 
+            or die "Truncated file: $!\n";
+
+        my $signature = unpack ("V", substr($buffer, 0, 4));
+
+        last unless $signature == ZIP_LOCAL_HDR_SIG;
+
+        # Read Local Header
+        my $gpFlag             = unpack ("v", substr($buffer, 6, 2));
+        my $compressedMethod   = unpack ("v", substr($buffer, 8, 2));
+        my $compressedLength   = unpack ("V", substr($buffer, 18, 4));
+        my $uncompressedLength = unpack ("V", substr($buffer, 22, 4));
+        my $filename_length    = unpack ("v", substr($buffer, 26, 2)); 
+        my $extra_length       = unpack ("v", substr($buffer, 28, 2));
+
+        my $filename ;
+        $fh->read($filename, $filename_length) == $filename_length 
+            or die "Truncated file\n";
+
+        $fh->read($buffer, $extra_length) == $extra_length
+            or die "Truncated file\n";
+
+        if ($compressedMethod != 8 && $compressedMethod != 0)
+        {
+            warn "Skipping file '$filename' - not deflated $compressedMethod\n";
+            $fh->read($buffer, $compressedLength) == $compressedLength
+                or die "Truncated file\n";
+            next;
+        }
+
+        if ($compressedMethod == 0 && $gpFlag & 8 == 8)
+        {
+            die "Streamed Stored not supported for '$filename'\n";
+        }
+
+        next if $compressedLength == 0;
+
+        # Done reading the Local Header
+
+        my $inf = new IO::Uncompress::RawInflate $fh,
+                            Transparent => 1,
+                            InputLength => $compressedLength
+          or die "Cannot uncompress $file [$filename]: $RawInflateError\n"  ;
+
+        my $line_count = 0;
+
+        while (<$inf>)
+        {
+            ++ $line_count;
+        }
+
+        print "$filename: $line_count\n";
+    }
+
+The majority of the code above is concerned with reading the zip local
+header data. The code that I want to focus on is at the bottom. 
+
+    while (1) {
+    
+        # read local zip header data
+        # get $filename
+        # get $compressedLength
+
+        my $inf = new IO::Uncompress::RawInflate $fh,
+                            Transparent => 1,
+                            InputLength => $compressedLength
+          or die "Cannot uncompress $file [$filename]: $RawInflateError\n"  ;
+
+        my $line_count = 0;
+
+        while (<$inf>)
+        {
+            ++ $line_count;
+        }
+
+        print "$filename: $line_count\n";
+    }
+
+The call to C<IO::Uncompress::RawInflate> creates a new filehandle C<$inf>
+that can be used to read from the parent filehandle C<$fh>, uncompressing
+it as it goes. The use of the C<InputLength> option will guarantee that
+I<at most> C<$compressedLength> bytes of compressed data will be read from
+the C<$fh> filehandle (The only exception is for an error case like a
+truncated file or a corrupt data stream).
+
+This means that once RawInflate is finished C<$fh> will be left at the
+byte directly after the compressed data stream. 
+
+Now consider what the code looks like without C<InputLength> 
+
+    while (1) {
+    
+        # read local zip header data
+        # get $filename
+        # get $compressedLength
+
+        # read all the compressed data into $data
+        read($fh, $data, $compressedLength);
+
+        my $inf = new IO::Uncompress::RawInflate \$data,
+                            Transparent => 1,
+          or die "Cannot uncompress $file [$filename]: $RawInflateError\n"  ;
+
+        my $line_count = 0;
+
+        while (<$inf>)
+        {
+            ++ $line_count;
+        }
+
+        print "$filename: $line_count\n";
+    }
+
+The difference here is the addition of the temporary variable C<$data>.
+This is used to store a copy of the compressed data while it is being
+uncompressed.
+
+If you know that C<$compressedLength> isn't that big then using temporary
+storage won't be a problem. But if C<$compressedLength> is very large or
+you are writing an application that other people will use, and so have no
+idea how big C<$compressedLength> will be, it could be an issue.
+
+Using C<InputLength> avoids the use of temporary storage and means the
+application can cope with large compressed data streams.
+
+One final point -- obviously C<InputLength> can only be used whenever you
+know the length of the compressed data beforehand, like here with a zip
+file. 
+
+=head1 SEE ALSO
+
+L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Compress::Lzf>, L<IO::Uncompress::UnLzf>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress>
+
+L<Compress::Zlib::FAQ|Compress::Zlib::FAQ>
+
+L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>,
+L<Archive::Tar|Archive::Tar>,
+L<IO::Zlib|IO::Zlib>
+
+=head1 AUTHOR
+
+This module was written by Paul Marquess, F<pmqs@cpan.org>. 
+
+=head1 MODIFICATION HISTORY
+
+See the Changes file.
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (c) 2005-2009 Paul Marquess. All rights reserved.
+
+This program is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
author	Nicholas Clark <nick@ccl4.org>	2009-10-02 11:11:19 +0100
committer	Nicholas Clark <nick@ccl4.org>	2009-10-02 11:11:19 +0100
commit	3fd969f44926f311e1c67d9470a9e936f7af2d73 (patch)
tree	ce6e701f0f80bfd0de9befe7b1bf766e37a6cfbb /cpan/IO-Compress/pod
parent	70b2007073159a8b94a74b0b9ba406945c45917d (diff)
download	perl-3fd969f44926f311e1c67d9470a9e936f7af2d73.tar.gz