summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Changes167
-rw-r--r--MANIFEST39
-rw-r--r--META.yml17
-rw-r--r--Makefile.PL22
-rw-r--r--README37
-rw-r--r--TODO33
-rw-r--r--extras/FileSlurp_12.pm260
-rw-r--r--extras/slurp_article.pod743
-rwxr-xr-xextras/slurp_bench.pl587
-rwxr-xr-xlib/File/Slurp.pm1261
-rw-r--r--t/TestDriver.pm91
-rw-r--r--t/append_null.t24
-rw-r--r--t/binmode.t50
-rw-r--r--t/chomp.t53
-rw-r--r--t/data_list.t62
-rw-r--r--t/data_scalar.t62
-rw-r--r--t/edit_file.t107
-rw-r--r--t/error.t125
-rw-r--r--t/error_mode.t59
-rw-r--r--t/file_object.t75
-rw-r--r--t/handle.t222
-rw-r--r--t/inode.t44
-rw-r--r--t/large.t175
-rw-r--r--t/newline.t52
-rw-r--r--t/no_clobber.t26
-rw-r--r--t/original.t55
-rw-r--r--t/paragraph.t64
-rw-r--r--t/perms.t31
-rw-r--r--t/pod.t13
-rw-r--r--t/pod_coverage.t24
-rw-r--r--t/prepend_file.t74
-rw-r--r--t/pseudo.t34
-rw-r--r--t/read_dir.t66
-rw-r--r--t/signal.t34
-rw-r--r--t/slurp.t19
-rw-r--r--t/stdin.t23
-rw-r--r--t/stringify.t45
-rw-r--r--t/tainted.t69
-rw-r--r--t/write_file_win32.t29
39 files changed, 4973 insertions, 0 deletions
diff --git a/Changes b/Changes
new file mode 100644
index 0000000..379f9c9
--- /dev/null
+++ b/Changes
@@ -0,0 +1,167 @@
+Revision history File::Slurp
+
+9999.19 Tue Jun 7 04:06:06 EDT 2011
+ - Fixed use line in t/edit_file.t to import :edit first
+ Thanks to paul
+ - read_file and write_file work even when interrupted by signals
+ this includes a test for read_file interrupt
+ Thanks to Andrew Danforth
+ - Fixed bugs in the config synopsis example
+
+9999.18 Fri May 13 02:30:05 EDT 2011
+ - Added :std and :edit export tags
+ - Cleaned up EXPORT vars
+ - Documented importing edit_file and edit_file_lines
+ - Fixed some pod spelling
+
+9999.17 Wed Apr 27 02:20:03 EDT 2011
+ - Requiring Perl 5.6.2 (first time older Perls were dropped)
+ This is because of use of the re 'taint' pragma
+ - Added major new features: edit_file and edit_file_lines
+ - Speed up of tainted slurp with return of lines
+ - Added chomp option to read_file
+ - Added prefix option to read_dir
+ - Fixed optimization of reading small files.
+
+9999.16 Wed Apr 13 03:47:26 EDT 2011
+ - Added support for read_file options to be a hash reference.
+ - Added support for read_dir options to be a hash reference.
+ - Added new feature prepend_file
+ - Fixed bug with array_ref in list context. was introduced by .15/.14
+ Thanks to Norbert Gruener
+ - Cleaned up some pod
+
+9999.15 Thu Mar 24 16:40:19 EDT 2011
+ - Fixed error.t test so it works when run as root
+ - Removed skip lines from error.t
+ - Fixed pod about binmode option to reflect changes in .14
+
+9999.14 Sun Mar 20 16:26:47 EDT 2011
+ - Added LICENCE (same as perl) to POD
+ - Added special faster code to slurp in small text files which
+ is a common case
+ - Rewrote the extras/slurp_bench.pl script. It has a full
+ legend, better CLI options, size is selectable, benchmark
+ entries have more consistant names and it compares the new
+ fast slurp for small files to the general slurp code.
+ Thanks to Mark Friendlich
+ - Cleaned up pod
+ - Added more Synopsis examples
+ - Added t/error.t to actually test error conditions. Previous
+ error.t was renamed to error_mode.t which better reflects its
+ tests.
+ - t/error.t uses a new test driver module. this may get used by
+ other tests in the future.
+ - Fixed check for SEEK_SET and other constant subs being defined
+ - Added support for binmode other than :raw and binmode.t test
+ Thanks to Martin J. Evans, Peter Edwards, Bryce Nesbitt
+ - Added support for perms option in write_file and perms.t test
+ Thanks to Peter Corlett and Paul Miller
+ - Added check to the rename call in atomic mode. Tested in error.t.
+ Thanks to Daniel Scott Sterling
+ - Added POD to state that using scalar_ref or buf_ref will be faster
+ and save memory due to not making a copy
+ Thanks to Peter Edwards
+ - read_file in list mode keeps data tainted
+ Thanks to Sébastien Aperghis-Tramoni
+ - read_file checks for an overloaded object to get the file
+ name.
+ Thanks to Sébastien Aperghis-Tramoni
+
+9999.13 Tue Oct 10 02:04:51 EDT 2006
+ - Refactored the extras/slurp_bench.pl script. It has options,
+ a key the benchmarks, help and more benchmarks.
+ - Reordered changes so recent entries are first
+ - Added error check on atomic rename and test for it
+ Thanks to Daniel Scott Sterling
+
+9999.12 Thu Feb 2 02:26:31 EST 2006
+ - Fixed bug on windows with classic slurping and File::Slurp not
+ agreeing on newline conversion.
+ - Added t/newline.t test to check for that fix.
+ - When passing text data by scalar reference to write_file under
+ windows, the buffer is copied so the newline conversion won't
+ modify the caller's data.
+ - Thanks to Johan Lodin <lodin@cpan.org> for a test script which
+ I modified into t/newline.t
+
+9999.11 Fri Jan 20 01:24:00 EDT 2005
+ - Quick release to remove code that forced the faked SEEK_*
+ values to be used. Showed up when tested on OSX which doesn't
+ need that backport.
+
+9999.10 Thu Jan 19 11:38:00 EDT 2005
+ - t/*.t modules don't use Fcntl.pm
+ - using POSIX qw( :fcntl_h ) instead of Fcntl qw( :seek ) for
+ backwards compatiblity to 5.00503
+ - added conditional definitions of SEEK_* and O_* subs as they are not
+ defined in perl 5.004
+ - File::Slurp now runs on perl 5.004 and newer (see BUGS section)
+ All of the above thanks to Smylers <Smylers@stripey.com>,
+ Piers Kent <piers.kent@bbc.co.uk> and
+ John Alden <john.alden@bbc.co.uk>
+ - Added pod.t and pod_coverage.t tests. This is to pass all
+ the CPANTS tests.
+
+9999.09 Tue Apr 19 01:21:55 EDT 2005
+ - t/original.t and read_dir.t no longer search for tempdirs. they just
+ use the current dir which should be in the build directory
+ - t/readdir.t renamed to read_dir.t for consistancy
+ - write_file return values are docuemented
+ Thanks to Adam Kennedy <adamk@cpan.org>
+ - added no_clobber option to write_file and t/no_clobber.t test for it
+ Thanks to <pagaltzis@gmx.de>
+ - fixed bug when appending a null string to a file which then
+ truncates it. seems to be an odd way for linux and OS X to
+ handle O_APPEND mode on sysopen. they don't seek to the end of
+ the file so it gets truncated. fixed by adding a seek to the
+ end if in append mode.n
+ Thanks to Chris Dolan <cdolan@cpan.org>
+
+9999.08 Sat Apr 16 01:01:27 EDT 2005
+ - read_dir returns an array ref in scalar context
+ - read_dir keeps . and .. if keep_dot_dot option is set.
+ Thanks to John Alden <john.alden@bbc.co.uk>
+ - slurp() is an optional exported alias to read_file
+ Thanks to Damian Conway <damian@conway.org>
+
+9999.07 Tue Jan 25 01:33:11 EST 2005
+ - Slurping in pseudo files (as in /proc) which show a size of 0
+ but actually have data works. This seems to be the case on
+ linux but on Solaris those files show their proper size.
+ Thanks to Juerd Waalboer <juerd@cpan.org>
+
+9999.06 Mon Sep 20 01:57:00 EDT 2004
+ - Slurping the DATA handle now works without the workaround.
+ tests are in t/data_scalar.t and t/data_list.t
+ - Paragraph mode in read_file is supported. As with <> when $/
+ (input record separator) is set to '', then the input file is
+ split on multiple newlines (/\n\n+/).
+ Thanks to Geoffrey Leach <geoff@direcway.com>
+
+9999.05 Tue Feb 24 21:14:55 EST 2004
+ - skip handle tests where socketpair is not supported (pre 5.8
+ on windows)
+ Thanks to Mike Arms <marms@sandia.gov>
+
+9999.04 Mon Feb 23 14:20:52 EST 2004
+ - fixed DATA handle bug in t/handle.t (not seen on most OS's)
+ Thanks to James Willmore <jwillmore@adelphia.net>
+
+9999.03 Mon Dec 22 01:44:43 EST 2003
+ - fixed DATA handle bugs in t/handle.t on osx (should be fixed
+ on BSD as well)
+ - added more comments to code
+
+9999.02 Wed Dec 17 03:40:49 EST 2003
+ - skip DATA test in handle.t on OSX (bug in perl with sysread on DATA)
+ - changed checking if file handle from fileno to ref
+ from Randal Schwartz <merlyn@stonehenge.com>
+ - added support for atomic spewing
+ - added new test stdin.t for the fileno/ref change
+ - added new test inode.t to test atomic spewing
+
+9999.01 Mon Sep 1 00:20:56 2003
+ - original version; created by h2xs 1.21 with options
+ -AX -n File::FastSlurp
+
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..f4c0506
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,39 @@
+Changes
+lib/File/Slurp.pm
+Makefile.PL
+MANIFEST
+README
+TODO
+t/TestDriver.pm
+t/append_null.t
+t/binmode.t
+t/chomp.t
+t/data_list.t
+t/data_scalar.t
+t/edit_file.t
+t/error.t
+t/error_mode.t
+t/file_object.t
+t/handle.t
+t/inode.t
+t/large.t
+t/newline.t
+t/no_clobber.t
+t/original.t
+t/paragraph.t
+t/perms.t
+t/pod.t
+t/pod_coverage.t
+t/prepend_file.t
+t/pseudo.t
+t/read_dir.t
+t/signal.t
+t/slurp.t
+t/stdin.t
+t/stringify.t
+t/tainted.t
+t/write_file_win32.t
+extras/slurp_bench.pl
+extras/FileSlurp_12.pm
+extras/slurp_article.pod
+META.yml Module meta-data (added by MakeMaker)
diff --git a/META.yml b/META.yml
new file mode 100644
index 0000000..2ab5ccf
--- /dev/null
+++ b/META.yml
@@ -0,0 +1,17 @@
+--- #YAML:1.0
+name: File-Slurp
+version: 9999.19
+abstract: Simple and Efficient Reading/Writing/Modifying of Complete Files
+license: perl
+author:
+ - Uri Guttman <uri@stemsystems.com>
+generated_by: ExtUtils::MakeMaker version 6.42
+distribution_type: module
+requires:
+ Carp: 0
+ Exporter: 0
+ Fcntl: 0
+ POSIX: 0
+meta-spec:
+ url: http://module-build.sourceforge.net/META-spec-v1.3.html
+ version: 1.3
diff --git a/Makefile.PL b/Makefile.PL
new file mode 100644
index 0000000..364dc45
--- /dev/null
+++ b/Makefile.PL
@@ -0,0 +1,22 @@
+use strict ;
+use ExtUtils::MakeMaker;
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+WriteMakefile(
+ 'NAME' => 'File::Slurp',
+ 'LICENSE' => 'perl',
+ 'AUTHOR' => 'Uri Guttman <uri@stemsystems.com>',
+ 'VERSION_FROM' => 'lib/File/Slurp.pm',
+ 'ABSTRACT_FROM' => 'lib/File/Slurp.pm',
+ 'META_MERGE' => {
+ requires => {
+ perl => 5.004,
+ },
+ },
+ 'PREREQ_PM' => {
+ 'Carp' => 0,
+ 'Exporter' => 0,
+ 'Fcntl' => 0,
+ 'POSIX' => 0,
+ },
+);
diff --git a/README b/README
new file mode 100644
index 0000000..c499b17
--- /dev/null
+++ b/README
@@ -0,0 +1,37 @@
+File::Slurp.pm
+===========================
+
+This module provides subroutines to read or write entire files with a
+simple call. It also has a subroutine for reading the list of filenames
+in a directory.
+
+In the extras/ directory you can read an article (slurp_article.pod)
+about file slurping and also run a benchmark (slurp_bench.pl) that
+compares many ways of slurping/spewing files. This benchmark was
+rewritten for .14 and is much better.
+
+This module was first written and owned by David Muir Sharnoff (MUIR on
+CPAN). I checked out his module and decided to write a new version
+which would be faster and with many more features. To that end, David
+graciously transfered the namespace to me.
+
+There have been some comments about the somewhat unusual version number.
+The problem was that David used a future date (2004.0904) in his version
+number, and the only way I could get CPAN to index my new module was to
+make it have a version number higher than the old one, so I chose the
+9999 prefix and appended the real revision number to it.
+
+INSTALLATION
+
+To install this module type the following:
+
+ perl Makefile.PL
+ make
+ make test
+ make install
+
+COPYRIGHT AND LICENCE
+
+Copyright (C) 2010 Uri Guttman <uri@PerlHunter.com>
+
+Licensed the same as Perl.
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..9af7980
--- /dev/null
+++ b/TODO
@@ -0,0 +1,33 @@
+
+File::Slurp TODO
+
+NEW FEATURES
+
+prepend_file() -- prepend text to the front of a file
+
+ options: lock file? enable atomic
+
+edit_file() -- slurp into $_, call edit code block, write out $_
+
+ options: lock file?
+
+edit_file_lines() -- slurp each line into $_, call edit code block,
+ write out $_
+
+ options: lock file?
+
+read_file_lines()
+ reads lines to array ref or list
+ same as $list = read_file( $file, { array_ref => 1 }
+ or @lines = read_file()
+
+new options for read_dir
+ prepend -- prepend the dir name to each dir entry.
+ filter -- grep dir entries with qr// or code ref.
+
+BUGS:
+
+restart sysread/write after a signal (or check i/o count)
+
+FEATURE REQUESTS
+
diff --git a/extras/FileSlurp_12.pm b/extras/FileSlurp_12.pm
new file mode 100644
index 0000000..5f24792
--- /dev/null
+++ b/extras/FileSlurp_12.pm
@@ -0,0 +1,260 @@
+package FileSlurp_12;
+
+use strict;
+
+use Carp ;
+use Fcntl qw( :DEFAULT ) ;
+use POSIX qw( :fcntl_h ) ;
+use Symbol ;
+
+use base 'Exporter' ;
+use vars qw( %EXPORT_TAGS @EXPORT_OK $VERSION @EXPORT ) ;
+
+%EXPORT_TAGS = ( 'all' => [
+ qw( read_file write_file overwrite_file append_file read_dir ) ] ) ;
+
+@EXPORT = ( @{ $EXPORT_TAGS{'all'} } );
+@EXPORT_OK = qw( slurp ) ;
+
+$VERSION = '9999.13';
+
+my $is_win32 = $^O =~ /win32/i ;
+
+# Install subs for various constants that aren't set in older perls
+# (< 5.005). Fcntl on old perls uses Exporter to define subs without a
+# () prototype These can't be overridden with the constant pragma or
+# we get a prototype mismatch. Hence this less than aesthetically
+# appealing BEGIN block:
+
+BEGIN {
+ unless( eval { defined SEEK_SET() } ) {
+ *SEEK_SET = sub { 0 };
+ *SEEK_CUR = sub { 1 };
+ *SEEK_END = sub { 2 };
+ }
+
+ unless( eval { defined O_BINARY() } ) {
+ *O_BINARY = sub { 0 };
+ *O_RDONLY = sub { 0 };
+ *O_WRONLY = sub { 1 };
+ }
+
+ unless ( eval { defined O_APPEND() } ) {
+
+ if ( $^O =~ /olaris/ ) {
+ *O_APPEND = sub { 8 };
+ *O_CREAT = sub { 256 };
+ *O_EXCL = sub { 1024 };
+ }
+ elsif ( $^O =~ /inux/ ) {
+ *O_APPEND = sub { 1024 };
+ *O_CREAT = sub { 64 };
+ *O_EXCL = sub { 128 };
+ }
+ elsif ( $^O =~ /BSD/i ) {
+ *O_APPEND = sub { 8 };
+ *O_CREAT = sub { 512 };
+ *O_EXCL = sub { 2048 };
+ }
+ }
+}
+
+# print "OS [$^O]\n" ;
+
+# print "O_BINARY = ", O_BINARY(), "\n" ;
+# print "O_RDONLY = ", O_RDONLY(), "\n" ;
+# print "O_WRONLY = ", O_WRONLY(), "\n" ;
+# print "O_APPEND = ", O_APPEND(), "\n" ;
+# print "O_CREAT ", O_CREAT(), "\n" ;
+# print "O_EXCL ", O_EXCL(), "\n" ;
+
+
+*slurp = \&read_file ;
+
+sub read_file {
+
+ my( $file_name, %args ) = @_ ;
+
+# set the buffer to either the passed in one or ours and init it to the null
+# string
+
+ my $buf ;
+ my $buf_ref = $args{'buf_ref'} || \$buf ;
+ ${$buf_ref} = '' ;
+
+ my( $read_fh, $size_left, $blk_size ) ;
+
+# check if we are reading from a handle (glob ref or IO:: object)
+
+ if ( ref $file_name ) {
+
+# slurping a handle so use it and don't open anything.
+# set the block size so we know it is a handle and read that amount
+
+ $read_fh = $file_name ;
+ $blk_size = $args{'blk_size'} || 1024 * 1024 ;
+ $size_left = $blk_size ;
+
+# DEEP DARK MAGIC. this checks the UNTAINT IO flag of a
+# glob/handle. only the DATA handle is untainted (since it is from
+# trusted data in the source file). this allows us to test if this is
+# the DATA handle and then to do a sysseek to make sure it gets
+# slurped correctly. on some systems, the buffered i/o pointer is not
+# left at the same place as the fd pointer. this sysseek makes them
+# the same so slurping with sysread will work.
+
+ eval{ require B } ;
+
+ if ( $@ ) {
+
+ @_ = ( \%args, <<ERR ) ;
+Can't find B.pm with this Perl: $!.
+That module is needed to slurp the DATA handle.
+ERR
+ goto &_error ;
+ }
+
+ if ( B::svref_2object( $read_fh )->IO->IoFLAGS & 16 ) {
+
+# set the seek position to the current tell.
+
+ sysseek( $read_fh, tell( $read_fh ), SEEK_SET ) ||
+ croak "sysseek $!" ;
+ }
+ }
+ else {
+
+# a regular file. set the sysopen mode
+
+ my $mode = O_RDONLY ;
+ $mode |= O_BINARY if $args{'binmode'} ;
+
+#printf "RD: BINARY %x MODE %x\n", O_BINARY, $mode ;
+
+# open the file and handle any error
+
+ $read_fh = gensym ;
+ unless ( sysopen( $read_fh, $file_name, $mode ) ) {
+ @_ = ( \%args, "read_file '$file_name' - sysopen: $!");
+ goto &_error ;
+ }
+
+# get the size of the file for use in the read loop
+
+ $size_left = -s $read_fh ;
+
+ unless( $size_left ) {
+
+ $blk_size = $args{'blk_size'} || 1024 * 1024 ;
+ $size_left = $blk_size ;
+ }
+ }
+
+# infinite read loop. we exit when we are done slurping
+
+ while( 1 ) {
+
+# do the read and see how much we got
+
+ my $read_cnt = sysread( $read_fh, ${$buf_ref},
+ $size_left, length ${$buf_ref} ) ;
+
+ if ( defined $read_cnt ) {
+
+# good read. see if we hit EOF (nothing left to read)
+
+ last if $read_cnt == 0 ;
+
+# loop if we are slurping a handle. we don't track $size_left then.
+
+ next if $blk_size ;
+
+# count down how much we read and loop if we have more to read.
+ $size_left -= $read_cnt ;
+ last if $size_left <= 0 ;
+ next ;
+ }
+
+# handle the read error
+
+ @_ = ( \%args, "read_file '$file_name' - sysread: $!");
+ goto &_error ;
+ }
+
+# fix up cr/lf to be a newline if this is a windows text file
+
+ ${$buf_ref} =~ s/\015\012/\n/g if $is_win32 && !$args{'binmode'} ;
+
+# this is the 5 returns in a row. each handles one possible
+# combination of caller context and requested return type
+
+ my $sep = $/ ;
+ $sep = '\n\n+' if defined $sep && $sep eq '' ;
+
+# caller wants to get an array ref of lines
+
+# this split doesn't work since it tries to use variable length lookbehind
+# the m// line works.
+# return [ split( m|(?<=$sep)|, ${$buf_ref} ) ] if $args{'array_ref'} ;
+ return [ length(${$buf_ref}) ? ${$buf_ref} =~ /(.*?$sep|.+)/sg : () ]
+ if $args{'array_ref'} ;
+
+# caller wants a list of lines (normal list context)
+
+# same problem with this split as before.
+# return split( m|(?<=$sep)|, ${$buf_ref} ) if wantarray ;
+ return length(${$buf_ref}) ? ${$buf_ref} =~ /(.*?$sep|.+)/sg : ()
+ if wantarray ;
+
+# caller wants a scalar ref to the slurped text
+
+ return $buf_ref if $args{'scalar_ref'} ;
+
+# caller wants a scalar with the slurped text (normal scalar context)
+
+ return ${$buf_ref} if defined wantarray ;
+
+# caller passed in an i/o buffer by reference (normal void context)
+
+ return ;
+}
+
+
+# error handling section
+#
+# all the error handling uses magic goto so the caller will get the
+# error message as if from their code and not this module. if we just
+# did a call on the error code, the carp/croak would report it from
+# this module since the error sub is one level down on the call stack
+# from read_file/write_file/read_dir.
+
+
+my %err_func = (
+ 'carp' => \&carp,
+ 'croak' => \&croak,
+) ;
+
+sub _error {
+
+ my( $args, $err_msg ) = @_ ;
+
+# get the error function to use
+
+ my $func = $err_func{ $args->{'err_mode'} || 'croak' } ;
+
+# if we didn't find it in our error function hash, they must have set
+# it to quiet and we don't do anything.
+
+ return unless $func ;
+
+# call the carp/croak function
+
+ $func->($err_msg) ;
+
+# return a hard undef (in list context this will be a single value of
+# undef which is not a legal in-band value)
+
+ return undef ;
+}
+
+1;
diff --git a/extras/slurp_article.pod b/extras/slurp_article.pod
new file mode 100644
index 0000000..8b000f7
--- /dev/null
+++ b/extras/slurp_article.pod
@@ -0,0 +1,743 @@
+=head1 Perl Slurp Ease
+
+=head2 Introduction
+
+
+One of the common Perl idioms is processing text files line by line:
+
+ while( <FH> ) {
+ do something with $_
+ }
+
+This idiom has several variants, but the key point is that it reads in
+only one line from the file in each loop iteration. This has several
+advantages, including limiting memory use to one line, the ability to
+handle any size file (including data piped in via STDIN), and it is
+easily taught and understood to Perl newbies. In fact newbies are the
+ones who do silly things like this:
+
+ while( <FH> ) {
+ push @lines, $_ ;
+ }
+
+ foreach ( @lines ) {
+ do something with $_
+ }
+
+Line by line processing is fine, but it isn't the only way to deal with
+reading files. The other common style is reading the entire file into a
+scalar or array, and that is commonly known as slurping. Now, slurping has
+somewhat of a poor reputation, and this article is an attempt at
+rehabilitating it. Slurping files has advantages and limitations, and is
+not something you should just do when line by line processing is fine.
+It is best when you need the entire file in memory for processing all at
+once. Slurping with in memory processing can be faster and lead to
+simpler code than line by line if done properly.
+
+The biggest issue to watch for with slurping is file size. Slurping very
+large files or unknown amounts of data from STDIN can be disastrous to
+your memory usage and cause swap disk thrashing. You can slurp STDIN if
+you know that you can handle the maximum size input without
+detrimentally affecting your memory usage. So I advocate slurping only
+disk files and only when you know their size is reasonable and you have
+a real reason to process the file as a whole. Note that reasonable size
+these days is larger than the bad old days of limited RAM. Slurping in a
+megabyte is not an issue on most systems. But most of the
+files I tend to slurp in are much smaller than that. Typical files that
+work well with slurping are configuration files, (mini-)language scripts,
+some data (especially binary) files, and other files of known sizes
+which need fast processing.
+
+Another major win for slurping over line by line is speed. Perl's IO
+system (like many others) is slow. Calling C<< <> >> for each line
+requires a check for the end of line, checks for EOF, copying a line,
+munging the internal handle structure, etc. Plenty of work for each line
+read in. Whereas slurping, if done correctly, will usually involve only
+one I/O call and no extra data copying. The same is true for writing
+files to disk, and we will cover that as well (even though the term
+slurping is traditionally a read operation, I use the term ``slurp'' for
+the concept of doing I/O with an entire file in one operation).
+
+Finally, when you have slurped the entire file into memory, you can do
+operations on the data that are not possible or easily done with line by
+line processing. These include global search/replace (without regard for
+newlines), grabbing all matches with one call of C<//g>, complex parsing
+(which in many cases must ignore newlines), processing *ML (where line
+endings are just white space) and performing complex transformations
+such as template expansion.
+
+=head2 Global Operations
+
+Here are some simple global operations that can be done quickly and
+easily on an entire file that has been slurped in. They could also be
+done with line by line processing but that would be slower and require
+more code.
+
+A common problem is reading in a file with key/value pairs. There are
+modules which do this but who needs them for simple formats? Just slurp
+in the file and do a single parse to grab all the key/value pairs.
+
+ my $text = read_file( $file ) ;
+ my %config = $text =~ /^(\w+)=(.+)$/mg ;
+
+That matches a key which starts a line (anywhere inside the string
+because of the C</m> modifier), the '=' char and the text to the end of the
+line (again, C</m> makes that work). In fact the ending C<$> is not even needed
+since C<.> will not normally match a newline. Since the key and value are
+grabbed and the C<m//> is in list context with the C</g> modifier, it will
+grab all key/value pairs and return them. The C<%config>hash will be
+assigned this list and now you have the file fully parsed into a hash.
+
+Various projects I have worked on needed some simple templating and I
+wasn't in the mood to use a full module (please, no flames about your
+favorite template module :-). So I rolled my own by slurping in the
+template file, setting up a template hash and doing this one line:
+
+ $text =~ s/<%(.+?)%>/$template{$1}/g ;
+
+That only works if the entire file was slurped in. With a little
+extra work it can handle chunks of text to be expanded:
+
+ $text =~ s/<%(\w+)_START%>(.+?)<%\1_END%>/ template($1, $2)/sge ;
+
+Just supply a C<template> sub to expand the text between the markers and
+you have yourself a simple system with minimal code. Note that this will
+work and grab over multiple lines due the the C</s> modifier. This is
+something that is much trickier with line by line processing.
+
+Note that this is a very simple templating system, and it can't directly
+handle nested tags and other complex features. But even if you use one
+of the myriad of template modules on the CPAN, you will gain by having
+speedier ways to read and write files.
+
+Slurping in a file into an array also offers some useful advantages.
+One simple example is reading in a flat database where each record has
+fields separated by a character such as C<:>:
+
+ my @pw_fields = map [ split /:/ ], read_file( '/etc/passwd' ) ;
+
+Random access to any line of the slurped file is another advantage. Also
+a line index could be built to speed up searching the array of lines.
+
+
+=head2 Traditional Slurping
+
+Perl has always supported slurping files with minimal code. Slurping of
+a file to a list of lines is trivial, just call the C<< <> >> operator
+in a list context:
+
+ my @lines = <FH> ;
+
+and slurping to a scalar isn't much more work. Just set the built in
+variable C<$/> (the input record separator to the undefined value and read
+in the file with C<< <> >>:
+
+ {
+ local( $/, *FH ) ;
+ open( FH, $file ) or die "sudden flaming death\n"
+ $text = <FH>
+ }
+
+Notice the use of C<local()>. It sets C<$/> to C<undef> for you and when
+the scope exits it will revert C<$/> back to its previous value (most
+likely "\n").
+
+Here is a Perl idiom that allows the C<$text> variable to be declared,
+and there is no need for a tightly nested block. The C<do> block will
+execute C<< <FH> >> in a scalar context and slurp in the file named by
+C<$text>:
+
+ local( *FH ) ;
+ open( FH, $file ) or die "sudden flaming death\n"
+ my $text = do { local( $/ ) ; <FH> } ;
+
+Both of those slurps used localized filehandles to be compatible with
+5.005. Here they are with 5.6.0 lexical autovivified handles:
+
+ {
+ local( $/ ) ;
+ open( my $fh, $file ) or die "sudden flaming death\n"
+ $text = <$fh>
+ }
+
+ open( my $fh, $file ) or die "sudden flaming death\n"
+ my $text = do { local( $/ ) ; <$fh> } ;
+
+And this is a variant of that idiom that removes the need for the open
+call:
+
+ my $text = do { local( @ARGV, $/ ) = $file ; <> } ;
+
+The filename in C<$file> is assigned to a localized C<@ARGV> and the
+null filehandle is used which reads the data from the files in C<@ARGV>.
+
+Instead of assigning to a scalar, all the above slurps can assign to an
+array and it will get the file but split into lines (using C<$/> as the
+end of line marker).
+
+There is one common variant of those slurps which is very slow and not
+good code. You see it around, and it is almost always cargo cult code:
+
+ my $text = join( '', <FH> ) ;
+
+That needlessly splits the input file into lines (C<join> provides a
+list context to C<< <FH> >>) and then joins up those lines again. The
+original coder of this idiom obviously never read I<perlvar> and learned
+how to use C<$/> to allow scalar slurping.
+
+=head2 Write Slurping
+
+While reading in entire files at one time is common, writing out entire
+files is also done. We call it ``slurping'' when we read in files, but
+there is no commonly accepted term for the write operation. I asked some
+Perl colleagues and got two interesting nominations. Peter Scott said to
+call it ``burping'' (rhymes with ``slurping'' and suggests movement in
+the opposite direction). Others suggested ``spewing'' which has a
+stronger visual image :-) Tell me your favorite or suggest your own. I
+will use both in this section so you can see how they work for you.
+
+Spewing a file is a much simpler operation than slurping. You don't have
+context issues to worry about and there is no efficiency problem with
+returning a buffer. Here is a simple burp subroutine:
+
+ sub burp {
+ my( $file_name ) = shift ;
+ open( my $fh, ">$file_name" ) ||
+ die "can't create $file_name $!" ;
+ print $fh @_ ;
+ }
+
+Note that it doesn't copy the input text but passes @_ directly to
+print. We will look at faster variations of that later on.
+
+=head2 Slurp on the CPAN
+
+As you would expect there are modules in the CPAN that will slurp files
+for you. The two I found are called Slurp.pm (by Rob Casey - ROBAU on
+CPAN) and File::Slurp.pm (by David Muir Sharnoff - MUIR on CPAN).
+
+Here is the code from Slurp.pm:
+
+ sub slurp {
+ local( $/, @ARGV ) = ( wantarray ? $/ : undef, @_ );
+ return <ARGV>;
+ }
+
+ sub to_array {
+ my @array = slurp( @_ );
+ return wantarray ? @array : \@array;
+ }
+
+ sub to_scalar {
+ my $scalar = slurp( @_ );
+ return $scalar;
+ }
+
++The subroutine C<slurp()> uses the magic undefined value of C<$/> and
+the magic file +handle C<ARGV> to support slurping into a scalar or
+array. It also provides two wrapper subs that allow the caller to
+control the context of the slurp. And the C<to_array()> subroutine will
+return the list of slurped lines or a anonymous array of them according
+to its caller's context by checking C<wantarray>. It has 'slurp' in
+C<@EXPORT> and all three subroutines in C<@EXPORT_OK>.
+
+<Footnote: Slurp.pm is poorly named and it shouldn't be in the top level
+namespace.>
+
+The original File::Slurp.pm has this code:
+
+sub read_file
+{
+ my ($file) = @_;
+
+ local($/) = wantarray ? $/ : undef;
+ local(*F);
+ my $r;
+ my (@r);
+
+ open(F, "<$file") || croak "open $file: $!";
+ @r = <F>;
+ close(F) || croak "close $file: $!";
+
+ return $r[0] unless wantarray;
+ return @r;
+}
+
+This module provides several subroutines including C<read_file()> (more
+on the others later). C<read_file()> behaves simularly to
+C<Slurp::slurp()> in that it will slurp a list of lines or a single
+scalar depending on the caller's context. It also uses the magic
+undefined value of C<$/> for scalar slurping but it uses an explicit
+open call rather than using a localized C<@ARGV> and the other module
+did. Also it doesn't provide a way to get an anonymous array of the
+lines but that can easily be rectified by calling it inside an anonymous
+array constuctor C<[]>.
+
+Both of these modules make it easier for Perl coders to slurp in
+files. They both use the magic C<$/> to slurp in scalar mode and the
+natural behavior of C<< <> >> in list context to slurp as lines. But
+neither is optmized for speed nor can they handle C<binmode()> to
+support binary or unicode files. See below for more on slurp features
+and speedups.
+
+=head2 Slurping API Design
+
+The slurp modules on CPAN are have a very simple API and don't support
+C<binmode()>. This section will cover various API design issues such as
+efficient return by reference, C<binmode()> and calling variations.
+
+Let's start with the call variations. Slurped files can be returned in
+four formats: as a single scalar, as a reference to a scalar, as a list
+of lines or as an anonymous array of lines. But the caller can only
+provide two contexts: scalar or list. So we have to either provide an
+API with more than one subroutine (as Slurp.pm did) or just provide one
+subroutine which only returns a scalar or a list (not an anonymous
+array) as File::Slurp does.
+
+I have used my own C<read_file()> subroutine for years and it has the
+same API as File::Slurp: a single subroutine that returns a scalar or a
+list of lines depending on context. But I recognize the interest of
+those that want an anonymous array for line slurping. For one thing, it
+is easier to pass around to other subs and for another, it eliminates
+the extra copying of the lines via C<return>. So my module provides only
+one slurp subroutine that returns the file data based on context and any
+format options passed in. There is no need for a specific
+slurp-in-as-a-scalar or list subroutine as the general C<read_file()>
+sub will do that by default in the appropriate context. If you want
+C<read_file()> to return a scalar reference or anonymous array of lines,
+you can request those formats with options. You can even pass in a
+reference to a scalar (e.g. a previously allocated buffer) and have that
+filled with the slurped data (and that is one of the fastest slurp
+modes. see the benchmark section for more on that). If you want to
+slurp a scalar into an array, just select the desired array element and
+that will provide scalar context to the C<read_file()> subroutine.
+
+The next area to cover is what to name the slurp sub. I will go with
+C<read_file()>. It is descriptive and keeps compatibilty with the
+current simple and don't use the 'slurp' nickname (though that nickname
+is in the module name). Also I decided to keep the File::Slurp
+namespace which was graciously handed over to me by its current owner,
+David Muir.
+
+Another critical area when designing APIs is how to pass in
+arguments. The C<read_file()> subroutine takes one required argument
+which is the file name. To support C<binmode()> we need another optional
+argument. A third optional argument is needed to support returning a
+slurped scalar by reference. My first thought was to design the API with
+3 positional arguments - file name, buffer reference and binmode. But if
+you want to set the binmode and not pass in a buffer reference, you have
+to fill the second argument with C<undef> and that is ugly. So I decided
+to make the filename argument positional and the other two named. The
+subroutine starts off like this:
+
+ sub read_file {
+
+ my( $file_name, %args ) = @_ ;
+
+ my $buf ;
+ my $buf_ref = $args{'buf'} || \$buf ;
+
+The other sub (C<read_file_lines()>) will only take an optional binmode
+(so you can read files with binary delimiters). It doesn't need a buffer
+reference argument since it can return an anonymous array if the called
+in a scalar context. So this subroutine could use positional arguments,
+but to keep its API similar to the API of C<read_file()>, it will also
+use pass by name for the optional arguments. This also means that new
+optional arguments can be added later without breaking any legacy
+code. A bonus with keeping the API the same for both subs will be seen
+how the two subs are optimized to work together.
+
+Write slurping (or spewing or burping :-)) needs to have its API
+designed as well. The biggest issue is not only needing to support
+optional arguments but a list of arguments to be written is needed. Perl
+6 will be able to handle that with optional named arguments and a final
+slurp argument. Since this is Perl 5 we have to do it using some
+cleverness. The first argument is the file name and it will be
+positional as with the C<read_file> subroutine. But how can we pass in
+the optional arguments and also a list of data? The solution lies in the
+fact that the data list should never contain a reference.
+Burping/spewing works only on plain data. So if the next argument is a
+hash reference, we can assume it cointains the optional arguments and
+the rest of the arguments is the data list. So the C<write_file()>
+subroutine will start off like this:
+
+ sub write_file {
+
+ my $file_name = shift ;
+
+ my $args = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
+
+Whether or not optional arguments are passed in, we leave the data list
+in C<@_> to minimize any more copying. You call C<write_file()> like this:
+
+ write_file( 'foo', { binmode => ':raw' }, @data ) ;
+ write_file( 'junk', { append => 1 }, @more_junk ) ;
+ write_file( 'bar', @spew ) ;
+
+=head2 Fast Slurping
+
+Somewhere along the line, I learned about a way to slurp files faster
+than by setting $/ to undef. The method is very simple, you do a single
+read call with the size of the file (which the -s operator provides).
+This bypasses the I/O loop inside perl that checks for EOF and does all
+sorts of processing. I then decided to experiment and found that
+sysread is even faster as you would expect. sysread bypasses all of
+Perl's stdio and reads the file from the kernel buffers directly into a
+Perl scalar. This is why the slurp code in File::Slurp uses
+sysopen/sysread/syswrite. All the rest of the code is just to support
+the various options and data passing techniques.
+
+
+=head2 Benchmarks
+
+Benchmarks can be enlightening, informative, frustrating and
+deceiving. It would make no sense to create a new and more complex slurp
+module unless it also gained signifigantly in speed. So I created a
+benchmark script which compares various slurp methods with differing
+file sizes and calling contexts. This script can be run from the main
+directory of the tarball like this:
+
+ perl -Ilib extras/slurp_bench.pl
+
+If you pass in an argument on the command line, it will be passed to
+timethese() and it will control the duration. It defaults to -2 which
+makes each benchmark run to at least 2 seconds of cpu time.
+
+The following numbers are from a run I did on my 300Mhz sparc. You will
+most likely get much faster counts on your boxes but the relative speeds
+shouldn't change by much. If you see major differences on your
+benchmarks, please send me the results and your Perl and OS
+versions. Also you can play with the benchmark script and add more slurp
+variations or data files.
+
+The rest of this section will be discussing the results of the
+benchmarks. You can refer to extras/slurp_bench.pl to see the code for
+the individual benchmarks. If the benchmark name starts with cpan_, it
+is either from Slurp.pm or File::Slurp.pm. Those starting with new_ are
+from the new File::Slurp.pm. Those that start with file_contents_ are
+from a client's code base. The rest are variations I created to
+highlight certain aspects of the benchmarks.
+
+The short and long file data is made like this:
+
+ my @lines = ( 'abc' x 30 . "\n") x 100 ;
+ my $text = join( '', @lines ) ;
+
+ @lines = ( 'abc' x 40 . "\n") x 1000 ;
+ $text = join( '', @lines ) ;
+
+So the short file is 9,100 bytes and the long file is 121,000
+bytes.
+
+=head3 Scalar Slurp of Short File
+
+ file_contents 651/s
+ file_contents_no_OO 828/s
+ cpan_read_file 1866/s
+ cpan_slurp 1934/s
+ read_file 2079/s
+ new 2270/s
+ new_buf_ref 2403/s
+ new_scalar_ref 2415/s
+ sysread_file 2572/s
+
+=head3 Scalar Slurp of Long File
+
+ file_contents_no_OO 82.9/s
+ file_contents 85.4/s
+ cpan_read_file 250/s
+ cpan_slurp 257/s
+ read_file 323/s
+ new 468/s
+ sysread_file 489/s
+ new_scalar_ref 766/s
+ new_buf_ref 767/s
+
+The primary inference you get from looking at the mumbers above is that
+when slurping a file into a scalar, the longer the file, the more time
+you save by returning the result via a scalar reference. The time for
+the extra buffer copy can add up. The new module came out on top overall
+except for the very simple sysread_file entry which was added to
+highlight the overhead of the more flexible new module which isn't that
+much. The file_contents entries are always the worst since they do a
+list slurp and then a join, which is a classic newbie and cargo culted
+style which is extremely slow. Also the OO code in file_contents slows
+it down even more (I added the file_contents_no_OO entry to show this).
+The two CPAN modules are decent with small files but they are laggards
+compared to the new module when the file gets much larger.
+
+=head3 List Slurp of Short File
+
+ cpan_read_file 589/s
+ cpan_slurp_to_array 620/s
+ read_file 824/s
+ new_array_ref 824/s
+ sysread_file 828/s
+ new 829/s
+ new_in_anon_array 833/s
+ cpan_slurp_to_array_ref 836/s
+
+=head3 List Slurp of Long File
+
+ cpan_read_file 62.4/s
+ cpan_slurp_to_array 62.7/s
+ read_file 92.9/s
+ sysread_file 94.8/s
+ new_array_ref 95.5/s
+ new 96.2/s
+ cpan_slurp_to_array_ref 96.3/s
+ new_in_anon_array 97.2/s
+
+This is perhaps the most interesting result of this benchmark. Five
+different entries have effectively tied for the lead. The logical
+conclusion is that splitting the input into lines is the bounding
+operation, no matter how the file gets slurped. This is the only
+benchmark where the new module isn't the clear winner (in the long file
+entries - it is no worse than a close second in the short file
+entries).
+
+
+Note: In the benchmark information for all the spew entries, the extra
+number at the end of each line is how many wallclock seconds the whole
+entry took. The benchmarks were run for at least 2 CPU seconds per
+entry. The unusually large wallclock times will be discussed below.
+
+=head3 Scalar Spew of Short File
+
+ cpan_write_file 1035/s 38
+ print_file 1055/s 41
+ syswrite_file 1135/s 44
+ new 1519/s 2
+ print_join_file 1766/s 2
+ new_ref 1900/s 2
+ syswrite_file2 2138/s 2
+
+=head3 Scalar Spew of Long File
+
+ cpan_write_file 164/s 20
+ print_file 211/s 26
+ syswrite_file 236/s 25
+ print_join_file 277/s 2
+ new 295/s 2
+ syswrite_file2 428/s 2
+ new_ref 608/s 2
+
+In the scalar spew entries, the new module API wins when it is passed a
+reference to the scalar buffer. The C<syswrite_file2> entry beats it
+with the shorter file due to its simpler code. The old CPAN module is
+the slowest due to its extra copying of the data and its use of print.
+
+=head3 List Spew of Short File
+
+ cpan_write_file 794/s 29
+ syswrite_file 1000/s 38
+ print_file 1013/s 42
+ new 1399/s 2
+ print_join_file 1557/s 2
+
+=head3 List Spew of Long File
+
+ cpan_write_file 112/s 12
+ print_file 179/s 21
+ syswrite_file 181/s 19
+ print_join_file 205/s 2
+ new 228/s 2
+
+Again, the simple C<print_join_file> entry beats the new module when
+spewing a short list of lines to a file. But is loses to the new module
+when the file size gets longer. The old CPAN module lags behind the
+others since it first makes an extra copy of the lines and then it calls
+C<print> on the output list and that is much slower than passing to
+C<print> a single scalar generated by join. The C<print_file> entry
+shows the advantage of directly printing C<@_> and the
+C<print_join_file> adds the join optimization.
+
+Now about those long wallclock times. If you look carefully at the
+benchmark code of all the spew entries, you will find that some always
+write to new files and some overwrite existing files. When I asked David
+Muir why the old File::Slurp module had an C<overwrite> subroutine, he
+answered that by overwriting a file, you always guarantee something
+readable is in the file. If you create a new file, there is a moment
+when the new file is created but has no data in it. I feel this is not a
+good enough answer. Even when overwriting, you can write a shorter file
+than the existing file and then you have to truncate the file to the new
+size. There is a small race window there where another process can slurp
+in the file with the new data followed by leftover junk from the
+previous version of the file. This reinforces the point that the only
+way to ensure consistant file data is the proper use of file locks.
+
+But what about those long times? Well it is all about the difference
+between creating files and overwriting existing ones. The former have to
+allocate new inodes (or the equivilent on other file systems) and the
+latter can reuse the exising inode. This mean the overwrite will save on
+disk seeks as well as on cpu time. In fact when running this benchmark,
+I could hear my disk going crazy allocating inodes during the spew
+operations. This speedup in both cpu and wallclock is why the new module
+always does overwriting when spewing files. It also does the proper
+truncate (and this is checked in the tests by spewing shorter files
+after longer ones had previously been written). The C<overwrite>
+subroutine is just an typeglob alias to C<write_file> and is there for
+backwards compatibilty with the old File::Slurp module.
+
+=head3 Benchmark Conclusion
+
+Other than a few cases where a simpler entry beat it out, the new
+File::Slurp module is either the speed leader or among the leaders. Its
+special APIs for passing buffers by reference prove to be very useful
+speedups. Also it uses all the other optimizations including using
+C<sysread/syswrite> and joining output lines. I expect many projects
+that extensively use slurping will notice the speed improvements,
+especially if they rewrite their code to take advantage of the new API
+features. Even if they don't touch their code and use the simple API
+they will get a significant speedup.
+
+=head2 Error Handling
+
+Slurp subroutines are subject to conditions such as not being able to
+open the file, or I/O errors. How these errors are handled, and what the
+caller will see, are important aspects of the design of an API. The
+classic error handling for slurping has been to call C<die()> or even
+better, C<croak()>. But sometimes you want the slurp to either
+C<warn()>/C<carp()> or allow your code to handle the error. Sure, this
+can be done by wrapping the slurp in a C<eval> block to catch a fatal
+error, but not everyone wants all that extra code. So I have added
+another option to all the subroutines which selects the error
+handling. If the 'err_mode' option is 'croak' (which is also the
+default), the called subroutine will croak. If set to 'carp' then carp
+will be called. Set to any other string (use 'quiet' when you want to
+be explicit) and no error handler is called. Then the caller can use the
+error status from the call.
+
+C<write_file()> doesn't use the return value for data so it can return a
+false status value in-band to mark an error. C<read_file()> does use its
+return value for data, but we can still make it pass back the error
+status. A successful read in any scalar mode will return either a
+defined data string or a reference to a scalar or array. So a bare
+return would work here. But if you slurp in lines by calling it in a
+list context, a bare C<return> will return an empty list, which is the
+same value it would get from an existing but empty file. So now,
+C<read_file()> will do something I normally strongly advocate against,
+i.e., returning an explicit C<undef> value. In the scalar context this
+still returns a error, and in list context, the returned first value
+will be C<undef>, and that is not legal data for the first element. So
+the list context also gets a error status it can detect:
+
+ my @lines = read_file( $file_name, err_mode => 'quiet' ) ;
+ your_handle_error( "$file_name can't be read\n" ) unless
+ @lines && defined $lines[0] ;
+
+
+=head2 File::FastSlurp
+
+ sub read_file {
+
+ my( $file_name, %args ) = @_ ;
+
+ my $buf ;
+ my $buf_ref = $args{'buf_ref'} || \$buf ;
+
+ my $mode = O_RDONLY ;
+ $mode |= O_BINARY if $args{'binmode'} ;
+
+ local( *FH ) ;
+ sysopen( FH, $file_name, $mode ) or
+ carp "Can't open $file_name: $!" ;
+
+ my $size_left = -s FH ;
+
+ while( $size_left > 0 ) {
+
+ my $read_cnt = sysread( FH, ${$buf_ref},
+ $size_left, length ${$buf_ref} ) ;
+
+ unless( $read_cnt ) {
+
+ carp "read error in file $file_name: $!" ;
+ last ;
+ }
+
+ $size_left -= $read_cnt ;
+ }
+
+ # handle void context (return scalar by buffer reference)
+
+ return unless defined wantarray ;
+
+ # handle list context
+
+ return split m|?<$/|g, ${$buf_ref} if wantarray ;
+
+ # handle scalar context
+
+ return ${$buf_ref} ;
+ }
+
+ sub write_file {
+
+ my $file_name = shift ;
+
+ my $args = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
+ my $buf = join '', @_ ;
+
+
+ my $mode = O_WRONLY ;
+ $mode |= O_BINARY if $args->{'binmode'} ;
+ $mode |= O_APPEND if $args->{'append'} ;
+
+ local( *FH ) ;
+ sysopen( FH, $file_name, $mode ) or
+ carp "Can't open $file_name: $!" ;
+
+ my $size_left = length( $buf ) ;
+ my $offset = 0 ;
+
+ while( $size_left > 0 ) {
+
+ my $write_cnt = syswrite( FH, $buf,
+ $size_left, $offset ) ;
+
+ unless( $write_cnt ) {
+
+ carp "write error in file $file_name: $!" ;
+ last ;
+ }
+
+ $size_left -= $write_cnt ;
+ $offset += $write_cnt ;
+ }
+
+ return ;
+ }
+
+=head2 Slurping in Perl 6
+
+As usual with Perl 6, much of the work in this article will be put to
+pasture. Perl 6 will allow you to set a 'slurp' property on file handles
+and when you read from such a handle, the file is slurped. List and
+scalar context will still be supported so you can slurp into lines or a
+<scalar. I would expect that support for slurping in Perl 6 will be
+optimized and bypass the stdio subsystem since it can use the slurp
+property to trigger a call to special code. Otherwise some enterprising
+individual will just create a File::FastSlurp module for Perl 6. The
+code in the Perl 5 module could easily be modified to Perl 6 syntax and
+semantics. Any volunteers?
+
+=head2 In Summary
+
+We have compared classic line by line processing with munging a whole
+file in memory. Slurping files can speed up your programs and simplify
+your code if done properly. You must still be aware to not slurp
+humongous files (logs, DNA sequences, etc.) or STDIN where you don't
+know how much data you will read in. But slurping megabyte sized files
+is not an major issue on today's systems with the typical amount of RAM
+installed. When Perl was first being used in depth (Perl 4), slurping
+was limited by the smaller RAM size of 10 years ago. Now, you should be
+able to slurp almost any reasonably sized file, whether it contains
+configuration, source code, data, etc.
+
+=head2 Acknowledgements
+
+
+
+
+
diff --git a/extras/slurp_bench.pl b/extras/slurp_bench.pl
new file mode 100755
index 0000000..68eb5fd
--- /dev/null
+++ b/extras/slurp_bench.pl
@@ -0,0 +1,587 @@
+#!/usr/local/bin/perl
+
+use strict ;
+use warnings ;
+
+use Getopt::Long ;
+use Benchmark qw( timethese cmpthese ) ;
+use Carp ;
+use FileHandle ;
+use Fcntl qw( :DEFAULT :seek );
+
+use File::Slurp () ;
+use FileSlurp_12 () ;
+
+my $file_name = 'slurp_data' ;
+my( @lines, $text ) ;
+
+my %opts ;
+
+parse_options() ;
+
+run_benchmarks() ;
+
+unlink $file_name ;
+
+exit ;
+
+sub run_benchmarks {
+
+ foreach my $size ( @{$opts{size_list}} ) {
+
+ @lines = ( 'a' x 80 . "\n") x ( $size / 81 + 1 ) ;
+ $text = join( '', @lines ) ;
+
+ my $overage = length($text) - $size ;
+ substr( $text, -$overage, $overage, '' ) ;
+ substr( $lines[-1], -$overage, $overage, '' ) ;
+
+ if ( $opts{slurp} ) {
+
+ File::Slurp::write_file( $file_name, $text ) ;
+
+ bench_list_slurp( $size ) if $opts{list} ;
+ bench_scalar_slurp( $size ) if $opts{scalar} ;
+ }
+
+ if ( $opts{spew} ) {
+
+ bench_spew_list( $size ) if $opts{list} ;
+ bench_scalar_spew( $size ) if $opts{scalar} ;
+ }
+ }
+}
+
+##########################################
+##########################################
+sub bench_scalar_slurp {
+
+ my ( $size ) = @_ ;
+
+ print "\n\nReading (Slurp) into a scalar: Size = $size bytes\n\n" ;
+
+ my $buffer ;
+
+ my $result = timethese( $opts{iterations}, {
+
+ 'FS::read_file' =>
+ sub { my $text = File::Slurp::read_file( $file_name ) },
+
+ 'FS12::read_file' =>
+ sub { my $text = FileSlurp_12::read_file( $file_name ) },
+
+ 'FS::read_file_buf_ref' =>
+ sub { my $text ;
+ File::Slurp::read_file( $file_name, buf_ref => \$text ) },
+ 'FS::read_file_buf_ref2' =>
+ sub {
+ File::Slurp::read_file( $file_name, buf_ref => \$buffer ) },
+ 'FS::read_file_scalar_ref' =>
+ sub { my $text =
+ File::Slurp::read_file( $file_name, scalar_ref => 1 ) },
+
+ old_sysread_file =>
+ sub { my $text = old_sysread_file( $file_name ) },
+
+ old_read_file =>
+ sub { my $text = old_read_file( $file_name ) },
+
+ orig_read_file =>
+ sub { my $text = orig_read_file( $file_name ) },
+
+ orig_slurp =>
+ sub { my $text = orig_slurp_scalar( $file_name ) },
+
+ file_contents =>
+ sub { my $text = file_contents( $file_name ) },
+
+ file_contents_no_OO =>
+ sub { my $text = file_contents_no_OO( $file_name ) },
+ } ) ;
+
+ cmpthese( $result ) ;
+}
+
+##########################################
+
+sub bench_list_slurp {
+
+ my ( $size ) = @_ ;
+
+ print "\n\nReading (Slurp) into a list: Size = $size bytes\n\n" ;
+
+ my $result = timethese( $opts{iterations}, {
+
+ 'FS::read_file' =>
+ sub { my @lines = File::Slurp::read_file( $file_name ) },
+
+ 'FS::read_file_array_ref' =>
+ sub { my $lines_ref =
+ File::Slurp::read_file( $file_name, array_ref => 1 ) },
+
+ 'FS::read_file_scalar' =>
+ sub { my $lines_ref =
+ [ File::Slurp::read_file( $file_name ) ] },
+
+ old_sysread_file =>
+ sub { my @lines = old_sysread_file( $file_name ) },
+
+ old_read_file =>
+ sub { my @lines = old_read_file( $file_name ) },
+
+ orig_read_file =>
+ sub { my @lines = orig_read_file( $file_name ) },
+
+ orig_slurp_array =>
+ sub { my @lines = orig_slurp_array( $file_name ) },
+
+ orig_slurp_array_ref =>
+ sub { my $lines_ref = orig_slurp_array( $file_name ) },
+ } ) ;
+
+ cmpthese( $result ) ;
+}
+
+######################################
+# uri's old fast slurp
+
+sub old_read_file {
+
+ my( $file_name ) = shift ;
+
+ local( *FH ) ;
+ open( FH, $file_name ) || carp "can't open $file_name $!" ;
+
+ return <FH> if wantarray ;
+
+ my $buf ;
+
+ read( FH, $buf, -s FH ) ;
+ return $buf ;
+}
+
+sub old_sysread_file {
+
+ my( $file_name ) = shift ;
+
+ local( *FH ) ;
+ open( FH, $file_name ) || carp "can't open $file_name $!" ;
+
+ return <FH> if wantarray ;
+
+ my $buf ;
+
+ sysread( FH, $buf, -s FH ) ;
+ return $buf ;
+}
+
+######################################
+# from File::Slurp.pm on cpan
+
+sub orig_read_file
+{
+ my ($file) = @_;
+
+ local($/) = wantarray ? $/ : undef;
+ local(*F);
+ my $r;
+ my (@r);
+
+ open(F, "<$file") || croak "open $file: $!";
+ @r = <F>;
+ close(F) || croak "close $file: $!";
+
+ return $r[0] unless wantarray;
+ return @r;
+}
+
+
+######################################
+# from Slurp.pm on cpan
+
+sub orig_slurp {
+ local( $/, @ARGV ) = ( wantarray ? $/ : undef, @_ );
+ return <ARGV>;
+}
+
+sub orig_slurp_array {
+ my @array = orig_slurp( @_ );
+ return wantarray ? @array : \@array;
+}
+
+sub orig_slurp_scalar {
+ my $scalar = orig_slurp( @_ );
+ return $scalar;
+}
+
+######################################
+# very slow slurp code used by a client
+
+sub file_contents {
+ my $file = shift;
+ my $fh = new FileHandle $file or
+ warn("Util::file_contents:Can't open file $file"), return '';
+ return join '', <$fh>;
+}
+
+# same code but doesn't use FileHandle.pm
+
+sub file_contents_no_OO {
+ my $file = shift;
+
+ local( *FH ) ;
+ open( FH, $file ) || carp "can't open $file $!" ;
+
+ return join '', <FH>;
+}
+
+##########################################
+##########################################
+
+sub bench_spew_list {
+
+ my( $size ) = @_ ;
+
+ print "\n\nWriting (Spew) a list of lines: Size = $size bytes\n\n" ;
+
+ my $result = timethese( $opts{iterations}, {
+ 'FS::write_file' => sub { unlink $file_name if $opts{unlink} ;
+ File::Slurp::write_file( $file_name, @lines ) },
+ 'FS::write_file Aref' => sub { unlink $file_name if $opts{unlink} ;
+ File::Slurp::write_file( $file_name, \@lines ) },
+ 'print' => sub { unlink $file_name if $opts{unlink} ;
+ print_file( $file_name, @lines ) },
+ 'print/join' => sub { unlink $file_name if $opts{unlink} ;
+ print_join_file( $file_name, @lines ) },
+ 'syswrite/join' => sub { unlink $file_name if $opts{unlink} ;
+ syswrite_join_file( $file_name, @lines ) },
+ 'original write_file' => sub { unlink $file_name if $opts{unlink} ;
+ orig_write_file( $file_name, @lines ) },
+ } ) ;
+
+ cmpthese( $result ) ;
+}
+
+sub print_file {
+
+ my( $file_name ) = shift ;
+
+ local( *FH ) ;
+ open( FH, ">$file_name" ) || carp "can't create $file_name $!" ;
+
+ print FH @_ ;
+}
+
+sub print_join_file {
+
+ my( $file_name ) = shift ;
+
+ local( *FH ) ;
+ open( FH, ">$file_name" ) || carp "can't create $file_name $!" ;
+
+ print FH join( '', @_ ) ;
+}
+
+sub syswrite_join_file {
+
+ my( $file_name ) = shift ;
+
+ local( *FH ) ;
+ open( FH, ">$file_name" ) || carp "can't create $file_name $!" ;
+
+ syswrite( FH, join( '', @_ ) ) ;
+}
+
+sub sysopen_syswrite_join_file {
+
+ my( $file_name ) = shift ;
+
+ local( *FH ) ;
+ sysopen( FH, $file_name, O_WRONLY | O_CREAT ) ||
+ carp "can't create $file_name $!" ;
+
+ syswrite( FH, join( '', @_ ) ) ;
+}
+
+sub orig_write_file
+{
+ my ($f, @data) = @_;
+
+ local(*F);
+
+ open(F, ">$f") || croak "open >$f: $!";
+ (print F @data) || croak "write $f: $!";
+ close(F) || croak "close $f: $!";
+ return 1;
+}
+
+##########################################
+
+sub bench_scalar_spew {
+
+ my ( $size ) = @_ ;
+
+ print "\n\nWriting (Spew) a scalar: Size = $size bytes\n\n" ;
+
+ my $result = timethese( $opts{iterations}, {
+ 'FS::write_file' => sub { unlink $file_name if $opts{unlink} ;
+ File::Slurp::write_file( $file_name, $text ) },
+ 'FS::write_file Sref' => sub { unlink $file_name if $opts{unlink} ;
+ File::Slurp::write_file( $file_name, \$text ) },
+ 'print' => sub { unlink $file_name if $opts{unlink} ;
+ print_file( $file_name, $text ) },
+ 'syswrite_file' => sub { unlink $file_name if $opts{unlink} ;
+ syswrite_file( $file_name, $text ) },
+ 'syswrite_file_ref' => sub { unlink $file_name if $opts{unlink} ;
+ syswrite_file_ref( $file_name, \$text ) },
+ 'orig_write_file' => sub { unlink $file_name if $opts{unlink} ;
+ orig_write_file( $file_name, $text ) },
+ } ) ;
+
+ cmpthese( $result ) ;
+}
+
+sub syswrite_file {
+
+ my( $file_name, $text ) = @_ ;
+
+ local( *FH ) ;
+ open( FH, ">$file_name" ) || carp "can't create $file_name $!" ;
+
+ syswrite( FH, $text ) ;
+}
+
+sub syswrite_file_ref {
+
+ my( $file_name, $text_ref ) = @_ ;
+
+ local( *FH ) ;
+ open( FH, ">$file_name" ) || carp "can't create $file_name $!" ;
+
+ syswrite( FH, ${$text_ref} ) ;
+}
+
+sub parse_options {
+
+ my $result = GetOptions (\%opts, qw(
+ iterations|i=s
+ direction|d=s
+ context|c=s
+ sizes|s=s
+ unlink|u
+ legend|key|l|k
+ help|usage
+ ) ) ;
+
+ usage() unless $result ;
+
+ usage() if $opts{help} ;
+
+ legend() if $opts{legend} ;
+
+# set defaults
+
+ $opts{direction} ||= 'both' ;
+ $opts{context} ||= 'both' ;
+ $opts{iterations} ||= -2 ;
+ $opts{sizes} ||= '512,10k,1m' ;
+
+ if ( $opts{direction} eq 'both' ) {
+
+ $opts{spew} = 1 ;
+ $opts{slurp} = 1 ;
+ }
+ elsif ( $opts{direction} eq 'in' ) {
+
+ $opts{slurp} = 1 ;
+
+ }
+ elsif ( $opts{direction} eq 'out' ) {
+
+ $opts{spew} = 1 ;
+ }
+ else {
+
+ usage( "Unknown direction: $opts{direction}" ) ;
+ }
+
+ if ( $opts{context} eq 'both' ) {
+
+ $opts{list} = 1 ;
+ $opts{scalar} = 1 ;
+ }
+ elsif ( $opts{context} eq 'scalar' ) {
+
+ $opts{scalar} = 1 ;
+
+ }
+ elsif ( $opts{context} eq 'list' ) {
+
+ $opts{list} = 1 ;
+ }
+ else {
+
+ usage( "Unknown context: $opts{context}" ) ;
+ }
+
+ if ( $opts{context} eq 'both' ) {
+
+ $opts{list} = 1 ;
+ $opts{scalar} = 1 ;
+ }
+ elsif ( $opts{context} eq 'scalar' ) {
+
+ $opts{scalar} = 1 ;
+
+ }
+ elsif ( $opts{context} eq 'list' ) {
+
+ $opts{list} = 1 ;
+ }
+ else {
+
+ usage( "Unknown context: $opts{context}" ) ;
+ }
+
+ foreach my $size ( split ',', ( $opts{sizes} ) ) {
+
+
+# check for valid size and suffix. grab both.
+
+ usage( "Illegal size: $size") unless $size =~ /^(\d+)([km])?$/ ;
+
+# handle suffix multipliers
+
+ $size = $1 * (( $2 eq 'k' ) ? 1024 : 1024*1024) if $2 ;
+
+ push( @{$opts{size_list}}, $size ) ;
+ }
+
+#use Data::Dumper ;
+#print Dumper \%opts ;
+}
+
+sub legend {
+
+ die <<'LEGEND' ;
+--------------------------------------------------------------------------
+Legend for the Slurp Benchmark Entries
+
+In all cases below 'FS' or 'F::S' means the current File::Slurp module
+is being used in the benchmark. The full name and description will say
+which options are being used.
+--------------------------------------------------------------------------
+These benchmarks write a list of lines to a file. Use the direction option
+of 'out' or 'both' and the context option is 'list' or 'both'.
+
+ Key Description/Source
+ ----- --------------------------
+ FS::write_file Current F::S write_file
+ FS::write_file Aref Current F::S write_file on array ref of data
+ print Open a file and call print() on the list data
+ print/join Open a file and call print() on the joined list
+ data
+ syswrite/join Open a file, call syswrite on joined list data
+ sysopen/syswrite Sysopen a file, call syswrite on joined list
+ data
+ original write_file write_file code from original File::Slurp
+ (pre-version 9999.*)
+--------------------------------------------------------------------------
+These benchmarks write a scalar to a file. Use the direction option
+of 'out' or 'both' and the context option is 'scalar' or 'both'.
+
+ Key Description/Source
+ ----- --------------------------
+ FS::write_file Current F::S write_file
+ FS::write_file Sref Current F::S write_file of scalar ref of data
+ print Open a file and call print() on the scalar data
+ syswrite_file Open a file, call syswrite on scalar data
+ syswrite_file_ref Open a file, call syswrite on scalar ref of
+ data
+ orig_write_file write_file code from original File::Slurp
+ (pre-version 9999.*)
+--------------------------------------------------------------------------
+These benchmarks slurp a file into an array. Use the direction option
+of 'in' or 'both' and the context option is 'list' or 'both'.
+
+ Key Description/Source
+ ----- --------------------------
+ FS::read_file Current F::S read_file - returns array
+ FS::read_file_array_ref Current F::S read_file - returns array
+ ref in any context
+ FS::read_file_scalar Current F::S read_file - returns array
+ ref in scalar context
+ old_sysread_file My old fast slurp - calls sysread
+ old_read_file My old fast slurp - calls read
+ orig_read_file Original File::Slurp on CPAN
+ orig_slurp_array Slurp.pm on CPAN - returns array
+ orig_slurp_array_ref Slurp.pm on CPAN - returns array ref
+--------------------------------------------------------------------------
+These benchmarks slurp a file into a scalar. Use the direction option
+of 'in' or 'both' and the context option is 'scalar' or 'both'.
+
+ Key Description/Source
+ ----- --------------------------
+ FS::read_file Current F::S read_file - returns scalar
+ FS12::read_file F::S .12 slower read_file -
+ returns scalar
+ FS::read_file_buf_ref Current F::S read_file - returns
+ via buf_ref argument - new buffer
+ FS::read_file_buf_ref2 Current F::S read_file - returns
+ via buf_ref argument - uses
+ existing buffer
+ FS::read_file_scalar_ref Current F::S read_file - returns a
+ scalar ref
+ old_sysread_file My old fast slurp - calls sysread
+ old_read_file My old fast slurp - calls read
+ orig_read_file Original File::Slurp on CPAN
+ orig_slurp Slurp.pm on CPAN
+ file_contents Very slow slurp code done by a client
+ file_contents_no_OO Same code but doesn't use FileHandle.pm
+--------------------------------------------------------------------------
+LEGEND
+}
+
+sub usage {
+
+ my( $err ) = @_ ;
+
+ $err ||= '' ;
+
+ die <<DIE ;
+$err
+Usage: $0 [--iterations=<iter>] [--direction=<dir>] [--context=<con>]
+ [--sizes=<size_list>] [--legend] [--help]
+
+ --iterations=<iter> Run the benchmarks this many iterations
+ -i=<iter> A positive number is iteration count,
+ a negative number is minimum CPU time in
+ seconds. Default is -2 (run for 2 CPU seconds).
+
+ --direction=<dir> Which direction to slurp: 'in', 'out' or 'both'.
+ -d=<dir> Default is 'both'.
+
+ --context=<con> Which context is used for slurping: 'list',
+ -c=<con> 'scalar' or 'both'. Default is 'both'.
+
+ --sizes=<size_list> What sizes will be used in slurping (either
+ -s=<size_list> direction). This is a comma separated list of
+ integers. You can use 'k' or 'm' as suffixes
+ for 1024 and 1024**2. Default is '512,10k,1m'.
+
+ --unlink Unlink the written file before each time
+ -u a file is written
+
+ --legend Print out a legend of all the benchmark entries.
+ --key
+ -l
+ -k
+
+ --help Print this help text
+ --usage
+DIE
+
+}
+
+__END__
+
diff --git a/lib/File/Slurp.pm b/lib/File/Slurp.pm
new file mode 100755
index 0000000..b0d040b
--- /dev/null
+++ b/lib/File/Slurp.pm
@@ -0,0 +1,1261 @@
+package File::Slurp;
+
+use 5.6.2 ;
+
+use strict;
+use warnings ;
+
+use Carp ;
+use Exporter ;
+use Fcntl qw( :DEFAULT ) ;
+use POSIX qw( :fcntl_h ) ;
+use Errno ;
+#use Symbol ;
+
+use vars qw( @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION ) ;
+@ISA = qw( Exporter ) ;
+
+$VERSION = '9999.19';
+
+my @std_export = qw(
+ read_file
+ write_file
+ overwrite_file
+ append_file
+ read_dir
+) ;
+
+my @edit_export = qw(
+ edit_file
+ edit_file_lines
+) ;
+
+my @ok_export = qw(
+) ;
+
+@EXPORT_OK = (
+ @edit_export,
+ qw(
+ slurp
+ prepend_file
+ ),
+) ;
+
+%EXPORT_TAGS = (
+ 'all' => [ @std_export, @edit_export, @EXPORT_OK ],
+ 'edit' => [ @edit_export ],
+ 'std' => [ @std_export ],
+) ;
+
+@EXPORT = @std_export ;
+
+my $max_fast_slurp_size = 1024 * 100 ;
+
+my $is_win32 = $^O =~ /win32/i ;
+
+# Install subs for various constants that aren't set in older perls
+# (< 5.005). Fcntl on old perls uses Exporter to define subs without a
+# () prototype These can't be overridden with the constant pragma or
+# we get a prototype mismatch. Hence this less than aesthetically
+# appealing BEGIN block:
+
+BEGIN {
+ unless( defined &SEEK_SET ) {
+ *SEEK_SET = sub { 0 };
+ *SEEK_CUR = sub { 1 };
+ *SEEK_END = sub { 2 };
+ }
+
+ unless( defined &O_BINARY ) {
+ *O_BINARY = sub { 0 };
+ *O_RDONLY = sub { 0 };
+ *O_WRONLY = sub { 1 };
+ }
+
+ unless ( defined &O_APPEND ) {
+
+ if ( $^O =~ /olaris/ ) {
+ *O_APPEND = sub { 8 };
+ *O_CREAT = sub { 256 };
+ *O_EXCL = sub { 1024 };
+ }
+ elsif ( $^O =~ /inux/ ) {
+ *O_APPEND = sub { 1024 };
+ *O_CREAT = sub { 64 };
+ *O_EXCL = sub { 128 };
+ }
+ elsif ( $^O =~ /BSD/i ) {
+ *O_APPEND = sub { 8 };
+ *O_CREAT = sub { 512 };
+ *O_EXCL = sub { 2048 };
+ }
+ }
+}
+
+# print "OS [$^O]\n" ;
+
+# print "O_BINARY = ", O_BINARY(), "\n" ;
+# print "O_RDONLY = ", O_RDONLY(), "\n" ;
+# print "O_WRONLY = ", O_WRONLY(), "\n" ;
+# print "O_APPEND = ", O_APPEND(), "\n" ;
+# print "O_CREAT ", O_CREAT(), "\n" ;
+# print "O_EXCL ", O_EXCL(), "\n" ;
+
+
+*slurp = \&read_file ;
+
+sub read_file {
+
+ my $file_name = shift ;
+ my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ;
+
+# this is the optimized read_file for shorter files.
+# the test for -s > 0 is to allow pseudo files to be read with the
+# regular loop since they return a size of 0.
+
+ if ( !ref $file_name && -e $file_name && -s _ > 0 &&
+ -s _ < $max_fast_slurp_size && !%{$opts} && !wantarray ) {
+
+
+ my $fh ;
+ unless( sysopen( $fh, $file_name, O_RDONLY ) ) {
+
+ @_ = ( $opts, "read_file '$file_name' - sysopen: $!");
+ goto &_error ;
+ }
+
+ my $read_cnt = sysread( $fh, my $buf, -s _ ) ;
+
+ unless ( defined $read_cnt ) {
+
+ @_ = ( $opts,
+ "read_file '$file_name' - small sysread: $!");
+ goto &_error ;
+ }
+
+ $buf =~ s/\015\012/\n/g if $is_win32 ;
+ return $buf ;
+ }
+
+# set the buffer to either the passed in one or ours and init it to the null
+# string
+
+ my $buf ;
+ my $buf_ref = $opts->{'buf_ref'} || \$buf ;
+ ${$buf_ref} = '' ;
+
+ my( $read_fh, $size_left, $blk_size ) ;
+
+# deal with ref for a file name
+# it could be an open handle or an overloaded object
+
+ if ( ref $file_name ) {
+
+ my $ref_result = _check_ref( $file_name ) ;
+
+ if ( ref $ref_result ) {
+
+# we got an error, deal with it
+
+ @_ = ( $opts, $ref_result ) ;
+ goto &_error ;
+ }
+
+ if ( $ref_result ) {
+
+# we got an overloaded object and the result is the stringified value
+# use it as the file name
+
+ $file_name = $ref_result ;
+ }
+ else {
+
+# here we have just an open handle. set $read_fh so we don't do a sysopen
+
+ $read_fh = $file_name ;
+ $blk_size = $opts->{'blk_size'} || 1024 * 1024 ;
+ $size_left = $blk_size ;
+ }
+ }
+
+# see if we have a path we need to open
+
+ unless ( $read_fh ) {
+
+# a regular file. set the sysopen mode
+
+ my $mode = O_RDONLY ;
+
+#printf "RD: BINARY %x MODE %x\n", O_BINARY, $mode ;
+
+ $read_fh = local( *FH ) ;
+# $read_fh = gensym ;
+ unless ( sysopen( $read_fh, $file_name, $mode ) ) {
+ @_ = ( $opts, "read_file '$file_name' - sysopen: $!");
+ goto &_error ;
+ }
+
+ if ( my $binmode = $opts->{'binmode'} ) {
+ binmode( $read_fh, $binmode ) ;
+ }
+
+# get the size of the file for use in the read loop
+
+ $size_left = -s $read_fh ;
+
+#print "SIZE $size_left\n" ;
+
+# we need a blk_size if the size is 0 so we can handle pseudofiles like in
+# /proc. these show as 0 size but have data to be slurped.
+
+ unless( $size_left ) {
+
+ $blk_size = $opts->{'blk_size'} || 1024 * 1024 ;
+ $size_left = $blk_size ;
+ }
+ }
+
+# infinite read loop. we exit when we are done slurping
+
+ while( 1 ) {
+
+# do the read and see how much we got
+
+ my $read_cnt = sysread( $read_fh, ${$buf_ref},
+ $size_left, length ${$buf_ref} ) ;
+
+# since we're using sysread Perl won't automatically restart the call
+# when interrupted by a signal.
+
+ next if $!{EINTR};
+
+ unless ( defined $read_cnt ) {
+
+ @_ = ( $opts, "read_file '$file_name' - loop sysread: $!");
+ goto &_error ;
+ }
+
+# good read. see if we hit EOF (nothing left to read)
+
+ last if $read_cnt == 0 ;
+
+# loop if we are slurping a handle. we don't track $size_left then.
+
+ next if $blk_size ;
+
+# count down how much we read and loop if we have more to read.
+
+ $size_left -= $read_cnt ;
+ last if $size_left <= 0 ;
+ }
+
+# fix up cr/lf to be a newline if this is a windows text file
+
+ ${$buf_ref} =~ s/\015\012/\n/g if $is_win32 && !$opts->{'binmode'} ;
+
+ my $sep = $/ ;
+ $sep = '\n\n+' if defined $sep && $sep eq '' ;
+
+# see if caller wants lines
+
+ if( wantarray || $opts->{'array_ref'} ) {
+
+ use re 'taint' ;
+
+ my @lines = length(${$buf_ref}) ?
+ ${$buf_ref} =~ /(.*?$sep|.+)/sg : () ;
+
+ chomp @lines if $opts->{'chomp'} ;
+
+# caller wants an array ref
+
+ return \@lines if $opts->{'array_ref'} ;
+
+# caller wants list of lines
+
+ return @lines ;
+ }
+
+# caller wants a scalar ref to the slurped text
+
+ return $buf_ref if $opts->{'scalar_ref'} ;
+
+# caller wants a scalar with the slurped text (normal scalar context)
+
+ return ${$buf_ref} if defined wantarray ;
+
+# caller passed in an i/o buffer by reference (normal void context)
+
+ return ;
+}
+
+# errors in this sub are returned as scalar refs
+# a normal IO/GLOB handle is an empty return
+# an overloaded object returns its stringified as a scalarfilename
+
+sub _check_ref {
+
+ my( $handle ) = @_ ;
+
+# check if we are reading from a handle (GLOB or IO object)
+
+ if ( eval { $handle->isa( 'GLOB' ) || $handle->isa( 'IO' ) } ) {
+
+# we have a handle. deal with seeking to it if it is DATA
+
+ my $err = _seek_data_handle( $handle ) ;
+
+# return the error string if any
+
+ return \$err if $err ;
+
+# we have good handle
+ return ;
+ }
+
+ eval { require overload } ;
+
+# return an error if we can't load the overload pragma
+# or if the object isn't overloaded
+
+ return \"Bad handle '$handle' is not a GLOB or IO object or overloaded"
+ if $@ || !overload::Overloaded( $handle ) ;
+
+# must be overloaded so return its stringified value
+
+ return "$handle" ;
+}
+
+sub _seek_data_handle {
+
+ my( $handle ) = @_ ;
+
+# DEEP DARK MAGIC. this checks the UNTAINT IO flag of a
+# glob/handle. only the DATA handle is untainted (since it is from
+# trusted data in the source file). this allows us to test if this is
+# the DATA handle and then to do a sysseek to make sure it gets
+# slurped correctly. on some systems, the buffered i/o pointer is not
+# left at the same place as the fd pointer. this sysseek makes them
+# the same so slurping with sysread will work.
+
+ eval{ require B } ;
+
+ if ( $@ ) {
+
+ return <<ERR ;
+Can't find B.pm with this Perl: $!.
+That module is needed to properly slurp the DATA handle.
+ERR
+ }
+
+ if ( B::svref_2object( $handle )->IO->IoFLAGS & 16 ) {
+
+# set the seek position to the current tell.
+
+ unless( sysseek( $handle, tell( $handle ), SEEK_SET ) ) {
+ return "read_file '$handle' - sysseek: $!" ;
+ }
+ }
+
+# seek was successful, return no error string
+
+ return ;
+}
+
+
+sub write_file {
+
+ my $file_name = shift ;
+
+# get the optional argument hash ref from @_ or an empty hash ref.
+
+ my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
+
+ my( $buf_ref, $write_fh, $no_truncate, $orig_file_name, $data_is_ref ) ;
+
+# get the buffer ref - it depends on how the data is passed into write_file
+# after this if/else $buf_ref will have a scalar ref to the data.
+
+ if ( ref $opts->{'buf_ref'} eq 'SCALAR' ) {
+
+# a scalar ref passed in %opts has the data
+# note that the data was passed by ref
+
+ $buf_ref = $opts->{'buf_ref'} ;
+ $data_is_ref = 1 ;
+ }
+ elsif ( ref $_[0] eq 'SCALAR' ) {
+
+# the first value in @_ is the scalar ref to the data
+# note that the data was passed by ref
+
+ $buf_ref = shift ;
+ $data_is_ref = 1 ;
+ }
+ elsif ( ref $_[0] eq 'ARRAY' ) {
+
+# the first value in @_ is the array ref to the data so join it.
+
+ ${$buf_ref} = join '', @{$_[0]} ;
+ }
+ else {
+
+# good old @_ has all the data so join it.
+
+ ${$buf_ref} = join '', @_ ;
+ }
+
+# deal with ref for a file name
+
+ if ( ref $file_name ) {
+
+ my $ref_result = _check_ref( $file_name ) ;
+
+ if ( ref $ref_result ) {
+
+# we got an error, deal with it
+
+ @_ = ( $opts, $ref_result ) ;
+ goto &_error ;
+ }
+
+ if ( $ref_result ) {
+
+# we got an overloaded object and the result is the stringified value
+# use it as the file name
+
+ $file_name = $ref_result ;
+ }
+ else {
+
+# we now have a proper handle ref.
+# make sure we don't call truncate on it.
+
+ $write_fh = $file_name ;
+ $no_truncate = 1 ;
+ }
+ }
+
+# see if we have a path we need to open
+
+ unless( $write_fh ) {
+
+# spew to regular file.
+
+ if ( $opts->{'atomic'} ) {
+
+# in atomic mode, we spew to a temp file so make one and save the original
+# file name.
+ $orig_file_name = $file_name ;
+ $file_name .= ".$$" ;
+ }
+
+# set the mode for the sysopen
+
+ my $mode = O_WRONLY | O_CREAT ;
+ $mode |= O_APPEND if $opts->{'append'} ;
+ $mode |= O_EXCL if $opts->{'no_clobber'} ;
+
+ my $perms = $opts->{perms} ;
+ $perms = 0666 unless defined $perms ;
+
+#printf "WR: BINARY %x MODE %x\n", O_BINARY, $mode ;
+
+# open the file and handle any error.
+
+ $write_fh = local( *FH ) ;
+# $write_fh = gensym ;
+ unless ( sysopen( $write_fh, $file_name, $mode, $perms ) ) {
+
+ @_ = ( $opts, "write_file '$file_name' - sysopen: $!");
+ goto &_error ;
+ }
+ }
+
+ if ( my $binmode = $opts->{'binmode'} ) {
+ binmode( $write_fh, $binmode ) ;
+ }
+
+ sysseek( $write_fh, 0, SEEK_END ) if $opts->{'append'} ;
+
+#print 'WR before data ', unpack( 'H*', ${$buf_ref}), "\n" ;
+
+# fix up newline to write cr/lf if this is a windows text file
+
+ if ( $is_win32 && !$opts->{'binmode'} ) {
+
+# copy the write data if it was passed by ref so we don't clobber the
+# caller's data
+ $buf_ref = \do{ my $copy = ${$buf_ref}; } if $data_is_ref ;
+ ${$buf_ref} =~ s/\n/\015\012/g ;
+ }
+
+#print 'after data ', unpack( 'H*', ${$buf_ref}), "\n" ;
+
+# get the size of how much we are writing and init the offset into that buffer
+
+ my $size_left = length( ${$buf_ref} ) ;
+ my $offset = 0 ;
+
+# loop until we have no more data left to write
+
+ do {
+
+# do the write and track how much we just wrote
+
+ my $write_cnt = syswrite( $write_fh, ${$buf_ref},
+ $size_left, $offset ) ;
+
+# since we're using syswrite Perl won't automatically restart the call
+# when interrupted by a signal.
+
+ next if $!{EINTR};
+
+ unless ( defined $write_cnt ) {
+
+ @_ = ( $opts, "write_file '$file_name' - syswrite: $!");
+ goto &_error ;
+ }
+
+# track how much left to write and where to write from in the buffer
+
+ $size_left -= $write_cnt ;
+ $offset += $write_cnt ;
+
+ } while( $size_left > 0 ) ;
+
+# we truncate regular files in case we overwrite a long file with a shorter file
+# so seek to the current position to get it (same as tell()).
+
+ truncate( $write_fh,
+ sysseek( $write_fh, 0, SEEK_CUR ) ) unless $no_truncate ;
+
+ close( $write_fh ) ;
+
+# handle the atomic mode - move the temp file to the original filename.
+
+ if ( $opts->{'atomic'} && !rename( $file_name, $orig_file_name ) ) {
+
+ @_ = ( $opts, "write_file '$file_name' - rename: $!" ) ;
+ goto &_error ;
+ }
+
+ return 1 ;
+}
+
+# this is for backwards compatibility with the previous File::Slurp module.
+# write_file always overwrites an existing file
+
+*overwrite_file = \&write_file ;
+
+# the current write_file has an append mode so we use that. this
+# supports the same API with an optional second argument which is a
+# hash ref of options.
+
+sub append_file {
+
+# get the optional opts hash ref
+ my $opts = $_[1] ;
+ if ( ref $opts eq 'HASH' ) {
+
+# we were passed an opts ref so just mark the append mode
+
+ $opts->{append} = 1 ;
+ }
+ else {
+
+# no opts hash so insert one with the append mode
+
+ splice( @_, 1, 0, { append => 1 } ) ;
+ }
+
+# magic goto the main write_file sub. this overlays the sub without touching
+# the stack or @_
+
+ goto &write_file
+}
+
+# prepend data to the beginning of a file
+
+sub prepend_file {
+
+ my $file_name = shift ;
+
+#print "FILE $file_name\n" ;
+
+ my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
+
+# delete unsupported options
+
+ my @bad_opts =
+ grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
+
+ delete @{$opts}{@bad_opts} ;
+
+ my $prepend_data = shift ;
+ $prepend_data = '' unless defined $prepend_data ;
+ $prepend_data = ${$prepend_data} if ref $prepend_data eq 'SCALAR' ;
+
+#print "PRE [$prepend_data]\n" ;
+
+ my $err_mode = delete $opts->{err_mode} ;
+ $opts->{ err_mode } = 'croak' ;
+ $opts->{ scalar_ref } = 1 ;
+
+ my $existing_data = eval { read_file( $file_name, $opts ) } ;
+
+ if ( $@ ) {
+
+ @_ = ( { err_mode => $err_mode },
+ "prepend_file '$file_name' - read_file: $!" ) ;
+ goto &_error ;
+ }
+
+#print "EXIST [$$existing_data]\n" ;
+
+ $opts->{atomic} = 1 ;
+ my $write_result =
+ eval { write_file( $file_name, $opts,
+ $prepend_data, $$existing_data ) ;
+ } ;
+
+ if ( $@ ) {
+
+ @_ = ( { err_mode => $err_mode },
+ "prepend_file '$file_name' - write_file: $!" ) ;
+ goto &_error ;
+ }
+
+ return $write_result ;
+}
+
+# edit a file as a scalar in $_
+
+sub edit_file(&$;$) {
+
+ my( $edit_code, $file_name, $opts ) = @_ ;
+ $opts = {} unless ref $opts eq 'HASH' ;
+
+# my $edit_code = shift ;
+# my $file_name = shift ;
+# my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
+
+#print "FILE $file_name\n" ;
+
+# delete unsupported options
+
+ my @bad_opts =
+ grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
+
+ delete @{$opts}{@bad_opts} ;
+
+# keep the user err_mode and force croaking on internal errors
+
+ my $err_mode = delete $opts->{err_mode} ;
+ $opts->{ err_mode } = 'croak' ;
+
+# get a scalar ref for speed and slurp the file into a scalar
+
+ $opts->{ scalar_ref } = 1 ;
+ my $existing_data = eval { read_file( $file_name, $opts ) } ;
+
+ if ( $@ ) {
+
+ @_ = ( { err_mode => $err_mode },
+ "edit_file '$file_name' - read_file: $!" ) ;
+ goto &_error ;
+ }
+
+#print "EXIST [$$existing_data]\n" ;
+
+ my( $edited_data ) = map { $edit_code->(); $_ } $$existing_data ;
+
+ $opts->{atomic} = 1 ;
+ my $write_result =
+ eval { write_file( $file_name, $opts, $edited_data ) } ;
+
+ if ( $@ ) {
+
+ @_ = ( { err_mode => $err_mode },
+ "edit_file '$file_name' - write_file: $!" ) ;
+ goto &_error ;
+ }
+
+ return $write_result ;
+}
+
+sub edit_file_lines(&$;$) {
+
+ my( $edit_code, $file_name, $opts ) = @_ ;
+ $opts = {} unless ref $opts eq 'HASH' ;
+
+# my $edit_code = shift ;
+# my $file_name = shift ;
+# my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
+
+#print "FILE $file_name\n" ;
+
+# delete unsupported options
+
+ my @bad_opts =
+ grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
+
+ delete @{$opts}{@bad_opts} ;
+
+# keep the user err_mode and force croaking on internal errors
+
+ my $err_mode = delete $opts->{err_mode} ;
+ $opts->{ err_mode } = 'croak' ;
+
+# get an array ref for speed and slurp the file into lines
+
+ $opts->{ array_ref } = 1 ;
+ my $existing_data = eval { read_file( $file_name, $opts ) } ;
+
+ if ( $@ ) {
+
+ @_ = ( { err_mode => $err_mode },
+ "edit_file_lines '$file_name' - read_file: $!" ) ;
+ goto &_error ;
+ }
+
+#print "EXIST [$$existing_data]\n" ;
+
+ my @edited_data = map { $edit_code->(); $_ } @$existing_data ;
+
+ $opts->{atomic} = 1 ;
+ my $write_result =
+ eval { write_file( $file_name, $opts, @edited_data ) } ;
+
+ if ( $@ ) {
+
+ @_ = ( { err_mode => $err_mode },
+ "edit_file_lines '$file_name' - write_file: $!" ) ;
+ goto &_error ;
+ }
+
+ return $write_result ;
+}
+
+# basic wrapper around opendir/readdir
+
+sub read_dir {
+
+ my $dir = shift ;
+ my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ;
+
+# this handle will be destroyed upon return
+
+ local(*DIRH);
+
+# open the dir and handle any errors
+
+ unless ( opendir( DIRH, $dir ) ) {
+
+ @_ = ( $opts, "read_dir '$dir' - opendir: $!" ) ;
+ goto &_error ;
+ }
+
+ my @dir_entries = readdir(DIRH) ;
+
+ @dir_entries = grep( $_ ne "." && $_ ne "..", @dir_entries )
+ unless $opts->{'keep_dot_dot'} ;
+
+ if ( $opts->{'prefix'} ) {
+
+ substr( $_, 0, 0, "$dir/" ) for @dir_entries ;
+ }
+
+ return @dir_entries if wantarray ;
+ return \@dir_entries ;
+}
+
+# error handling section
+#
+# all the error handling uses magic goto so the caller will get the
+# error message as if from their code and not this module. if we just
+# did a call on the error code, the carp/croak would report it from
+# this module since the error sub is one level down on the call stack
+# from read_file/write_file/read_dir.
+
+
+my %err_func = (
+ 'carp' => \&carp,
+ 'croak' => \&croak,
+) ;
+
+sub _error {
+
+ my( $opts, $err_msg ) = @_ ;
+
+# get the error function to use
+
+ my $func = $err_func{ $opts->{'err_mode'} || 'croak' } ;
+
+# if we didn't find it in our error function hash, they must have set
+# it to quiet and we don't do anything.
+
+ return unless $func ;
+
+# call the carp/croak function
+
+ $func->($err_msg) if $func ;
+
+# return a hard undef (in list context this will be a single value of
+# undef which is not a legal in-band value)
+
+ return undef ;
+}
+
+1;
+__END__
+
+=head1 NAME
+
+File::Slurp - Simple and Efficient Reading/Writing/Modifying of Complete Files
+
+=head1 SYNOPSIS
+
+ use File::Slurp;
+
+# read in a whole file into a scalar
+ my $text = read_file( 'filename' ) ;
+
+# read in a whole file into an array of lines
+ my @lines = read_file( 'filename' ) ;
+
+# write out a whole file from a scalar
+ write_file( 'filename', $text ) ;
+
+# write out a whole file from an array of lines
+ write_file( 'filename', @lines ) ;
+
+# Here is a simple and fast way to load and save a simple config file
+# made of key=value lines.
+ my %conf = read_file( $file_name ) =~ /^(\w+)=(.*)$/mg ;
+ write_file( $file_name, {atomic => 1}, map "$_=$conf{$_}\n", keys %conf ) ;
+
+# insert text at the beginning of a file
+ prepend_file( 'filename', $text ) ;
+
+# in-place edit to replace all 'foo' with 'bar' in file
+ edit_file { s/foo/bar/g } 'filename' ;
+
+# in-place edit to delete all lines with 'foo' from file
+ edit_file_lines sub { $_ = '' if /foo/ }, 'filename' ;
+
+# read in a whole directory of file names (skipping . and ..)
+ my @files = read_dir( '/path/to/dir' ) ;
+
+=head1 DESCRIPTION
+
+This module provides subs that allow you to read or write entire files
+with one simple call. They are designed to be simple to use, have
+flexible ways to pass in or get the file contents and to be very
+efficient. There is also a sub to read in all the files in a
+directory other than C<.> and C<..>
+
+These slurp/spew subs work for files, pipes and sockets, stdio,
+pseudo-files, and the DATA handle. Read more about why slurping files is
+a good thing in the file 'slurp_article.pod' in the extras/ directory.
+
+If you are interested in how fast these calls work, check out the
+slurp_bench.pl program in the extras/ directory. It compares many
+different forms of slurping. You can select the I/O direction, context
+and file sizes. Use the --help option to see how to run it.
+
+=head2 B<read_file>
+
+This sub reads in an entire file and returns its contents to the
+caller. In scalar context it returns the entire file as a single
+scalar. In list context it will return a list of lines (using the
+current value of $/ as the separator including support for paragraph
+mode when it is set to '').
+
+ my $text = read_file( 'filename' ) ;
+ my $bin = read_file( 'filename' { binmode => ':raw' } ) ;
+ my @lines = read_file( 'filename' ) ;
+ my $lines = read_file( 'filename', array_ref => 1 ) ;
+
+The first argument is the file to slurp in. If the next argument is a
+hash reference, then it is used as the options. Otherwise the rest of
+the argument list are is used as key/value options.
+
+If the file argument is a handle (if it is a ref and is an IO or GLOB
+object), then that handle is slurped in. This mode is supported so you
+slurp handles such as C<DATA> and C<STDIN>. See the test handle.t for
+an example that does C<open( '-|' )> and the child process spews data
+to the parant which slurps it in. All of the options that control how
+the data is returned to the caller still work in this case.
+
+If the first argument is an overloaded object then its stringified value
+is used for the filename and that file is opened. This is a new feature
+in 9999.14. See the stringify.t test for an example.
+
+By default C<read_file> returns an undef in scalar contex or a single
+undef in list context if it encounters an error. Those are both
+impossible to get with a clean read_file call which means you can check
+the return value and always know if you had an error. You can change how
+errors are handled with the C<err_mode> option.
+
+Speed Note: If you call read_file and just get a scalar return value
+it is now optimized to handle shorter files. This is only used if no
+options are used, the file is shorter then 100k bytes, the filename is
+a plain scalar and a scalar file is returned. If you want the fastest
+slurping, use the C<buf_ref> or C<scalar_ref> options (see below)
+
+NOTE: as of version 9999.06, read_file works correctly on the C<DATA>
+handle. It used to need a sysseek workaround but that is now handled
+when needed by the module itself.
+
+You can optionally request that C<slurp()> is exported to your code. This
+is an alias for read_file and is meant to be forward compatible with
+Perl 6 (which will have slurp() built-in).
+
+The options for C<read_file> are:
+
+=head3 binmode
+
+If you set the binmode option, then its value is passed to a call to
+binmode on the opened handle. You can use this to set the file to be
+read in binary mode, utf8, etc. See perldoc -f binmode for more.
+
+ my $bin_data = read_file( $bin_file, binmode => ':raw' ) ;
+ my $utf_text = read_file( $bin_file, binmode => ':utf8' ) ;
+
+=head3 array_ref
+
+If this boolean option is set, the return value (only in scalar
+context) will be an array reference which contains the lines of the
+slurped file. The following two calls are equivalent:
+
+ my $lines_ref = read_file( $bin_file, array_ref => 1 ) ;
+ my $lines_ref = [ read_file( $bin_file ) ] ;
+
+=head3 chomp
+
+If this boolean option is set, the lines are chomped. This only
+happens if you are slurping in a list context or using the
+C<array_ref> option.
+
+=head3 scalar_ref
+
+If this boolean option is set, the return value (only in scalar
+context) will be an scalar reference to a string which is the contents
+of the slurped file. This will usually be faster than returning the
+plain scalar. It will also save memory as it will not make a copy of
+the file to return. Run the extras/slurp_bench.pl script to see speed
+comparisons.
+
+ my $text_ref = read_file( $bin_file, scalar_ref => 1 ) ;
+
+=head3 buf_ref
+
+You can use this option to pass in a scalar reference and the slurped
+file contents will be stored in the scalar. This can be used in
+conjunction with any of the other options. This saves an extra copy of
+the slurped file and can lower ram usage vs returning the file. It is
+usually the fastest way to read a file into a scalar. Run the
+extras/slurp_bench.pl script to see speed comparisons.
+
+
+ read_file( $bin_file, buf_ref => \$buffer ) ;
+
+=head3 blk_size
+
+You can use this option to set the block size used when slurping from
+an already open handle (like \*STDIN). It defaults to 1MB.
+
+ my $text_ref = read_file( $bin_file, blk_size => 10_000_000,
+ array_ref => 1 ) ;
+
+=head3 err_mode
+
+You can use this option to control how read_file behaves when an error
+occurs. This option defaults to 'croak'. You can set it to 'carp' or to
+'quiet to have no special error handling. This code wants to carp and
+then read another file if it fails.
+
+ my $text_ref = read_file( $file, err_mode => 'carp' ) ;
+ unless ( $text_ref ) {
+
+ # read a different file but croak if not found
+ $text_ref = read_file( $another_file ) ;
+ }
+
+ # process ${$text_ref}
+
+=head2 B<write_file>
+
+This sub writes out an entire file in one call.
+
+ write_file( 'filename', @data ) ;
+
+The first argument to C<write_file> is the filename. The next argument
+is an optional hash reference and it contains key/values that can
+modify the behavior of C<write_file>. The rest of the argument list is
+the data to be written to the file.
+
+ write_file( 'filename', {append => 1 }, @data ) ;
+ write_file( 'filename', {binmode => ':raw'}, $buffer ) ;
+
+As a shortcut if the first data argument is a scalar or array reference,
+it is used as the only data to be written to the file. Any following
+arguments in @_ are ignored. This is a faster way to pass in the output
+to be written to the file and is equivalent to the C<buf_ref> option of
+C<read_file>. These following pairs are equivalent but the pass by
+reference call will be faster in most cases (especially with larger
+files).
+
+ write_file( 'filename', \$buffer ) ;
+ write_file( 'filename', $buffer ) ;
+
+ write_file( 'filename', \@lines ) ;
+ write_file( 'filename', @lines ) ;
+
+If the first argument is a handle (if it is a ref and is an IO or GLOB
+object), then that handle is written to. This mode is supported so you
+spew to handles such as \*STDOUT. See the test handle.t for an example
+that does C<open( '-|' )> and child process spews data to the parent
+which slurps it in. All of the options that control how the data are
+passed into C<write_file> still work in this case.
+
+If the first argument is an overloaded object then its stringified value
+is used for the filename and that file is opened. This is new feature
+in 9999.14. See the stringify.t test for an example.
+
+By default C<write_file> returns 1 upon successfully writing the file or
+undef if it encountered an error. You can change how errors are handled
+with the C<err_mode> option.
+
+The options are:
+
+=head3 binmode
+
+If you set the binmode option, then its value is passed to a call to
+binmode on the opened handle. You can use this to set the file to be
+read in binary mode, utf8, etc. See perldoc -f binmode for more.
+
+ write_file( $bin_file, {binmode => ':raw'}, @data ) ;
+ write_file( $bin_file, {binmode => ':utf8'}, $utf_text ) ;
+
+=head3 perms
+
+The perms option sets the permissions of newly-created files. This value
+is modified by your process's umask and defaults to 0666 (same as
+sysopen).
+
+NOTE: this option is new as of File::Slurp version 9999.14;
+
+=head3 buf_ref
+
+You can use this option to pass in a scalar reference which has the
+data to be written. If this is set then any data arguments (including
+the scalar reference shortcut) in @_ will be ignored. These are
+equivalent:
+
+ write_file( $bin_file, { buf_ref => \$buffer } ) ;
+ write_file( $bin_file, \$buffer ) ;
+ write_file( $bin_file, $buffer ) ;
+
+=head3 atomic
+
+If you set this boolean option, the file will be written to in an
+atomic fashion. A temporary file name is created by appending the pid
+($$) to the file name argument and that file is spewed to. After the
+file is closed it is renamed to the original file name (and rename is
+an atomic operation on most OS's). If the program using this were to
+crash in the middle of this, then the file with the pid suffix could
+be left behind.
+
+=head3 append
+
+If you set this boolean option, the data will be written at the end of
+the current file. Internally this sets the sysopen mode flag O_APPEND.
+
+ write_file( $file, {append => 1}, @data ) ;
+
+ You
+can import append_file and it does the same thing.
+
+=head3 no_clobber
+
+If you set this boolean option, an existing file will not be overwritten.
+
+ write_file( $file, {no_clobber => 1}, @data ) ;
+
+=head3 err_mode
+
+You can use this option to control how C<write_file> behaves when an
+error occurs. This option defaults to 'croak'. You can set it to
+'carp' or to 'quiet' to have no error handling other than the return
+value. If the first call to C<write_file> fails it will carp and then
+write to another file. If the second call to C<write_file> fails, it
+will croak.
+
+ unless ( write_file( $file, { err_mode => 'carp', \$data ) ;
+
+ # write a different file but croak if not found
+ write_file( $other_file, \$data ) ;
+ }
+
+=head2 overwrite_file
+
+This sub is just a typeglob alias to write_file since write_file
+always overwrites an existing file. This sub is supported for
+backwards compatibility with the original version of this module. See
+write_file for its API and behavior.
+
+=head2 append_file
+
+This sub will write its data to the end of the file. It is a wrapper
+around write_file and it has the same API so see that for the full
+documentation. These calls are equivalent:
+
+ append_file( $file, @data ) ;
+ write_file( $file, {append => 1}, @data ) ;
+
+
+=head2 prepend_file
+
+This sub writes data to the beginning of a file. The previously existing
+data is written after that so the effect is prepending data in front of
+a file. It is a counterpart to the append_file sub in this module. It
+works by first using C<read_file> to slurp in the file and then calling
+C<write_file> with the new data and the existing file data.
+
+The first argument to C<prepend_file> is the filename. The next argument
+is an optional hash reference and it contains key/values that can modify
+the behavior of C<prepend_file>. The rest of the argument list is the
+data to be written to the file and that is passed to C<write_file> as is
+(see that for allowed data).
+
+Only the C<binmode> and C<err_mode> options are supported. The
+C<write_file> call has the C<atomic> option set so you will always have
+a consistant file. See above for more about those options.
+
+C<prepend_file> is not exported by default, you need to import it
+explicitly.
+
+ use File::Slurp qw( prepend_file ) ;
+ prepend_file( $file, $header ) ;
+ prepend_file( $file, \@lines ) ;
+ prepend_file( $file, { binmode => 'raw:'}, $bin_data ) ;
+
+
+=head2 edit_file, edit_file_lines
+
+These subs read in a file into $_, execute a code block which should
+modify $_ and then write $_ back to the file. The difference between
+them is that C<edit_file> reads the whole file into $_ and calls the
+code block one time. With C<edit_file_lines> each line is read into $_
+and the code is called for each line. In both cases the code should
+modify $_ if desired and it will be written back out. These subs are
+the equivalent of the -pi command line options of Perl but you can
+call them from inside your program and not fork out a process. They
+are in @EXPORT_OK so you need to request them to be imported on the
+use line or you can import both of them with:
+
+ use File::Slurp qw( :edit ) ;
+
+The first argument to C<edit_file> and C<edit_file_lines> is a code
+block or a code reference. The code block is not followed by a comma
+(as with grep and map) but a code reference is followed by a
+comma. See the examples below for both styles. The next argument is
+the filename. The last argument is an optional hash reference and it
+contains key/values that can modify the behavior of
+C<prepend_file>.
+
+Only the C<binmode> and C<err_mode> options are supported. The
+C<write_file> call has the C<atomic> option set so you will always
+have a consistant file. See above for more about those options.
+
+Each group of calls below show a Perl command line instance and the
+equivalent calls to C<edit_file> and C<edit_file_lines>.
+
+ perl -0777 -pi -e 's/foo/bar/g' filename
+ use File::Slurp qw( edit_file ) ;
+ edit_file { s/foo/bar/g } 'filename' ;
+ edit_file sub { s/foo/bar/g }, 'filename' ;
+ edit_file \&replace_foo, 'filename' ;
+ sub replace_foo { s/foo/bar/g }
+
+ perl -pi -e '$_ = "" if /foo/' filename
+ use File::Slurp qw( edit_file_lines ) ;
+ use File::Slurp ;
+ edit_file_lines { $_ = '' if /foo/ } 'filename' ;
+ edit_file_lines sub { $_ = '' if /foo/ }, 'filename' ;
+ edit_file \&delete_foo, 'filename' ;
+ sub delete_foo { $_ = '' if /foo/ }
+
+=head2 read_dir
+
+This sub reads all the file names from directory and returns them to
+the caller but C<.> and C<..> are removed by default.
+
+ my @files = read_dir( '/path/to/dir' ) ;
+
+The first argument is the path to the directory to read. If the next
+argument is a hash reference, then it is used as the options.
+Otherwise the rest of the argument list are is used as key/value
+options.
+
+In list context C<read_dir> returns a list of the entries in the
+directory. In a scalar context it returns an array reference which has
+the entries.
+
+=head3 err_mode
+
+If the C<err_mode> option is set, it selects how errors are handled (see
+C<err_mode> in C<read_file> or C<write_file>).
+
+=head3 keep_dot_dot
+
+If this boolean option is set, C<.> and C<..> are not removed from the
+list of files.
+
+ my @all_files = read_dir( '/path/to/dir', keep_dot_dot => 1 ) ;
+
+=head3 prefix
+
+If this boolean option is set, the string "$dir/" is prefixed to each
+dir entry. This means you can directly use the results to open
+files. A common newbie mistake is not putting the directory in front
+of entries when opening themn.
+
+ my @paths = read_dir( '/path/to/dir', prefix => 1 ) ;
+
+=head2 EXPORT
+
+ These are exported by default or with
+ use File::Slurp qw( :std ) ;
+
+ read_file write_file overwrite_file append_file read_dir
+
+ These are exported with
+ use File::Slurp qw( :edit ) ;
+
+ edit_file edit_file_lines
+
+ You can get all subs in the module exported with
+ use File::Slurp qw( :all ) ;
+
+=head2 LICENSE
+
+ Same as Perl.
+
+=head2 SEE ALSO
+
+An article on file slurping in extras/slurp_article.pod. There is
+also a benchmarking script in extras/slurp_bench.pl.
+
+=head2 BUGS
+
+If run under Perl 5.004, slurping from the DATA handle will fail as
+that requires B.pm which didn't get into core until 5.005.
+
+=head1 AUTHOR
+
+Uri Guttman, E<lt>uri AT stemsystems DOT comE<gt>
+
+=cut
diff --git a/t/TestDriver.pm b/t/TestDriver.pm
new file mode 100644
index 0000000..274e5d3
--- /dev/null
+++ b/t/TestDriver.pm
@@ -0,0 +1,91 @@
+# driver.pm - common test driver code
+
+use Test::More ;
+
+BEGIN {
+ *CORE::GLOBAL::syswrite =
+ sub($$$;$) { my( $h, $b, $s, $o ) = @_; CORE::syswrite $h, $b, $s, $o} ;
+# sub(*\$$;$) { my( $h, $b, $s, $o ) = @_; CORE::syswrite $h, $b, $s, $o } ;
+
+ *CORE::GLOBAL::sysread =
+ sub($$$;$) { my( $h, $b, $s, $o ) = @_; CORE::sysread $h, $b, $s, $o } ;
+# sub(*\$$;$) { my( $h, $b, $s, $o ) = @_; CORE::sysread $h, $b, $s, $o } ;
+
+ *CORE::GLOBAL::rename =
+ sub($$) { my( $old, $new ) = @_; CORE::rename $old, $new } ;
+
+ *CORE::GLOBAL::sysopen =
+ sub($$$;$) { my( $h, $n, $m, $p ) = @_; CORE::sysopen $h, $n, $m, $p } ;
+# sub(*$$;$) { my( $h, $n, $m, $p ) = @_; CORE::sysopen $h, $n, $m, $p } ;
+}
+
+sub test_driver {
+
+ my( $tests ) = @_ ;
+
+use Data::Dumper ;
+
+# plan for one expected ok() call per test
+
+ plan( tests => scalar @{$tests} ) ;
+
+# loop over all the tests
+
+ foreach my $test ( @{$tests} ) {
+
+#print Dumper $test ;
+
+ if ( $test->{skip} ) {
+ ok( 1, "SKIPPING $test->{name}" ) ;
+ next ;
+ }
+
+ my $override = $test->{override} ;
+
+# run any setup sub before this test. this can is used to modify the
+# object for this test or create test files and data.
+
+ if( my $pretest = $test->{pretest} ) {
+
+ $pretest->($test) ;
+ }
+
+ if( my $sub = $test->{sub} ) {
+
+ my $args = $test->{args} ;
+
+ local( $^W ) ;
+ local *{"CORE::GLOBAL::$override"} = sub {}
+ if $override ;
+
+ $test->{result} = eval { $sub->( @{$args} ) } ;
+
+ if ( $@ ) {
+
+# if we had an error and expected it, we pass this test
+
+ if ( $test->{error} &&
+ $@ =~ /$test->{error}/ ) {
+
+ $test->{ok} = 1 ;
+ }
+ else {
+ print "unexpected error: $@\n" ;
+ $test->{ok} = 0 ;
+ }
+ }
+ }
+
+ if( my $posttest = $test->{posttest} ) {
+
+ $posttest->($test) ;
+ }
+
+ ok( $test->{ok}, $test->{name} ) if exists $test->{ok} ;
+ is( $test->{result}, $test->{expected}, $test->{name} ) if
+ exists $test->{expected} ;
+
+ }
+}
+
+1 ;
diff --git a/t/append_null.t b/t/append_null.t
new file mode 100644
index 0000000..3c8b924
--- /dev/null
+++ b/t/append_null.t
@@ -0,0 +1,24 @@
+#!/usr/local/bin/perl -w
+
+use strict ;
+use File::Slurp ;
+
+use Test::More tests => 1 ;
+
+my $data = <<TEXT ;
+line 1
+more text
+TEXT
+
+my $file = 'xxx' ;
+
+unlink $file ;
+
+my $err = write_file( $file, $data ) ;
+append_file( $file, '' ) ;
+
+my $read_data = read_file( $file ) ;
+
+is( $data, $read_data ) ;
+
+unlink $file ;
diff --git a/t/binmode.t b/t/binmode.t
new file mode 100644
index 0000000..03534b8
--- /dev/null
+++ b/t/binmode.t
@@ -0,0 +1,50 @@
+#!/usr/local/bin/perl -w
+
+use strict ;
+use Test::More ;
+use Carp ;
+use File::Slurp ;
+
+BEGIN {
+ plan skip_all => 'Older Perl lacking unicode support'
+ if $] < 5.008001 ;
+}
+
+plan tests => 2 ;
+
+my $suf = 'utf8' ;
+my $mode = ":$suf" ;
+
+my $is_win32 = $^O =~ /win32/i ;
+
+my $orig_text = "\x{20ac}\n" ;
+( my $win32_text = $orig_text ) =~ s/\n/\015\012/ ;
+my $unicode_length = length $orig_text ;
+
+my $control_file = "control.$suf" ;
+my $slurp_file = "slurp.$suf" ;
+
+open( my $fh, ">$mode", $control_file ) or
+ die "cannot create control unicode file '$control_file' $!" ;
+print $fh $orig_text ;
+close $fh ;
+
+my $slurp_utf = read_file( $control_file, binmode => $mode ) ;
+my $written_text = $is_win32 ? $win32_text : $orig_text ;
+is( $slurp_utf, $written_text, "read_file of $mode file" ) ;
+
+# my $slurp_utf_length = length $slurp_utf ;
+# my $slurp_text = read_file( $control_file ) ;
+# my $slurp_text_length = length $slurp_text ;
+# print "LEN UTF $slurp_utf_length TXT $slurp_text_length\n" ;
+
+write_file( $slurp_file, {binmode => $mode}, $orig_text ) ;
+
+open( $fh, "<$mode", $slurp_file ) or
+ die "cannot open slurp test file '$slurp_file' $!" ;
+my $read_length = read( $fh, my $utf_text, $unicode_length ) ;
+close $fh ;
+
+is( $utf_text, $orig_text, "write_file of $mode file" ) ;
+
+unlink( $control_file, $slurp_file ) ;
diff --git a/t/chomp.t b/t/chomp.t
new file mode 100644
index 0000000..e14319b
--- /dev/null
+++ b/t/chomp.t
@@ -0,0 +1,53 @@
+
+use strict ;
+use warnings ;
+
+use lib qw(t) ;
+
+use File::Slurp qw( read_file write_file ) ;
+use Test::More ;
+
+use TestDriver ;
+
+my $file = 'edit_file_data' ;
+
+my $existing_data = <<PRE ;
+line 1
+line 2
+more
+foo
+bar
+junk here and foo
+last line
+PRE
+
+my $tests = [
+ {
+ name => 'read_file - chomp',
+ sub => \&read_file,
+ args => [
+ $file,
+ {
+ 'chomp' => 1,
+ array_ref => 1
+ },
+ ],
+ pretest => sub {
+ my( $test ) = @_ ;
+ write_file( $file, $existing_data ) ;
+ },
+ posttest => sub {
+ my( $test ) = @_ ;
+ $test->{ok} = eq_array(
+ $test->{result},
+ [$existing_data =~ /^(.+)\n/gm]
+ ) ;
+ },
+ },
+] ;
+
+test_driver( $tests ) ;
+
+unlink $file ;
+
+exit ;
diff --git a/t/data_list.t b/t/data_list.t
new file mode 100644
index 0000000..ac85b2e
--- /dev/null
+++ b/t/data_list.t
@@ -0,0 +1,62 @@
+#!/usr/local/bin/perl -w
+
+use strict ;
+use File::Slurp ;
+
+use Carp ;
+use POSIX qw( :fcntl_h ) ;
+use Test::More tests => 1 ;
+
+# in case SEEK_SET isn't defined in older perls. it seems to always be 0
+
+BEGIN {
+
+ *SEEK_SET = sub { 0 } unless defined \&SEEK_SET ;
+}
+
+SKIP: {
+
+ eval { require B } ;
+
+ skip <<TEXT, 1 if $@ ;
+B.pm not found in this Perl. This will cause slurping of
+the DATA handle to fail.
+TEXT
+
+ test_data_list_slurp() ;
+}
+
+exit ;
+
+
+sub test_data_list_slurp {
+
+ my $data_seek = tell( \*DATA );
+
+# first slurp in the lines
+
+ my @slurp_lines = read_file( \*DATA ) ;
+
+# now seek back and read all the lines with the <> op and we make
+# golden data sets
+
+ seek( \*DATA, $data_seek, SEEK_SET ) || die "seek $!" ;
+ my @data_lines = <DATA> ;
+
+# test the array slurp
+
+ ok( eq_array( \@data_lines, \@slurp_lines ), 'list slurp of DATA' ) ;
+}
+
+__DATA__
+line one
+second line
+more lines
+still more
+
+enough lines
+
+we can't test long handle slurps from DATA since i would have to type
+too much stuff
+
+so we will stop here
diff --git a/t/data_scalar.t b/t/data_scalar.t
new file mode 100644
index 0000000..eb24337
--- /dev/null
+++ b/t/data_scalar.t
@@ -0,0 +1,62 @@
+#!/usr/local/bin/perl -w
+
+use strict ;
+use File::Slurp ;
+
+use Carp ;
+use POSIX qw( :fcntl_h ) ;
+use Test::More tests => 1 ;
+
+# in case SEEK_SET isn't defined in older perls. it seems to always be 0
+
+BEGIN {
+
+ *SEEK_SET = sub { 0 } unless defined \&SEEK_SET ;
+}
+
+eval { require B } ;
+
+SKIP: {
+
+ skip <<TEXT, 1 if $@ ;
+B.pm not found in this Perl. Note this will cause slurping of
+the DATA handle to fail.
+TEXT
+
+ test_data_scalar_slurp() ;
+}
+
+exit ;
+
+
+
+exit ;
+
+sub test_data_scalar_slurp {
+
+ my $data_seek = tell( \*DATA );
+
+# first slurp in the text
+
+ my $slurp_text = read_file( \*DATA ) ;
+
+# now we need to get the golden data
+
+ seek( \*DATA, $data_seek, SEEK_SET ) || die "seek $!" ;
+ my $data_text = join( '', <DATA> ) ;
+
+ is( $slurp_text, $data_text, 'scalar slurp of DATA' ) ;
+}
+
+__DATA__
+line one
+second line
+more lines
+still more
+
+enough lines
+
+we can't test long handle slurps from DATA since i would have to type
+too much stuff
+
+so we will stop here
diff --git a/t/edit_file.t b/t/edit_file.t
new file mode 100644
index 0000000..240103a
--- /dev/null
+++ b/t/edit_file.t
@@ -0,0 +1,107 @@
+
+use strict ;
+use warnings ;
+
+use lib qw(t) ;
+
+use File::Slurp qw( :edit read_file write_file ) ;
+use Test::More ;
+
+use TestDriver ;
+
+my $file = 'edit_file_data' ;
+
+my $existing_data = <<PRE ;
+line 1
+line 2
+more
+foo
+bar
+junk here and foo
+last line
+PRE
+
+my $tests = [
+ {
+ name => 'edit_file - no-op',
+ sub => \&edit_file,
+ pretest => sub {
+ my( $test ) = @_ ;
+ write_file( $file, $existing_data ) ;
+ $test->{args} = [
+ sub {},
+ $file
+ ] ;
+ $test->{expected} = $existing_data ;
+ },
+ posttest => sub { $_[0]->{result} = read_file( $file ) },
+ },
+ {
+
+ name => 'edit_file - s/foo/bar/',
+ sub => \&edit_file,
+ pretest => sub {
+ my( $test ) = @_ ;
+ write_file( $file, $existing_data ) ;
+ $test->{args} = [
+ sub { s/foo/bar/g },
+ $file
+ ] ;
+ ( $test->{expected} = $existing_data )
+ =~ s/foo/bar/g ;
+ },
+ posttest => sub { $_[0]->{result} = read_file( $file ) },
+ },
+ {
+
+ name => 'edit_file - upper first words',
+ sub => \&edit_file,
+ pretest => sub {
+ my( $test ) = @_ ;
+ write_file( $file, $existing_data ) ;
+ $test->{args} = [
+ sub { s/^(\w+)/\U$1/gm },
+ $file
+ ] ;
+ ( $test->{expected} = $existing_data )
+ =~ s/^(\w+)/\U$1/gm ;
+ },
+ posttest => sub { $_[0]->{result} = read_file( $file ) },
+ },
+ {
+ name => 'edit_file_lines - no-op',
+ sub => \&edit_file_lines,
+ pretest => sub {
+ my( $test ) = @_ ;
+ write_file( $file, $existing_data ) ;
+ $test->{args} = [
+ sub {},
+ $file
+ ] ;
+ $test->{expected} = $existing_data ;
+ },
+ posttest => sub { $_[0]->{result} = read_file( $file ) },
+ },
+ {
+
+ name => 'edit_file - delete foo lines',
+ sub => \&edit_file_lines,
+ pretest => sub {
+ my( $test ) = @_ ;
+ write_file( $file, $existing_data ) ;
+ $test->{args} = [
+ sub { $_ = '' if /foo/ },
+ $file
+ ] ;
+ ( $test->{expected} = $existing_data )
+ =~ s/^.*foo.*\n//gm ;
+ },
+ posttest => sub { $_[0]->{result} = read_file( $file ) },
+ },
+] ;
+
+test_driver( $tests ) ;
+
+unlink $file ;
+
+exit ;
diff --git a/t/error.t b/t/error.t
new file mode 100644
index 0000000..a241ee5
--- /dev/null
+++ b/t/error.t
@@ -0,0 +1,125 @@
+##!/usr/local/bin/perl -w
+
+use lib qw(t) ;
+use strict ;
+use Test::More ;
+
+BEGIN {
+ plan skip_all => "these tests need Perl 5.5" if $] < 5.005 ;
+}
+
+use TestDriver ;
+use File::Slurp qw( :all prepend_file edit_file ) ;
+
+my $is_win32 = $^O =~ /cygwin|win32/i ;
+
+my $file_name = 'test_file' ;
+my $dir_name = 'test_dir' ;
+
+my $tests = [
+ {
+ name => 'read_file open error',
+ sub => \&read_file,
+ args => [ $file_name ],
+ error => qr/open/,
+ },
+ {
+ name => 'write_file open error',
+ sub => \&write_file,
+ args => [ $file_name, '' ],
+ override => 'sysopen',
+ error => qr/open/,
+ },
+ {
+ name => 'write_file syswrite error',
+ sub => \&write_file,
+ args => [ $file_name, '' ],
+ override => 'syswrite',
+ posttest => sub { unlink( $file_name ) },
+ error => qr/write/,
+ },
+ {
+ name => 'read_file small sysread error',
+ sub => \&read_file,
+ args => [ $file_name ],
+ override => 'sysread',
+ pretest => sub { write_file( $file_name, '' ) },
+ posttest => sub { unlink( $file_name ) },
+ error => qr/read/,
+ },
+ {
+ name => 'read_file loop sysread error',
+ sub => \&read_file,
+ args => [ $file_name ],
+ override => 'sysread',
+ pretest => sub { write_file( $file_name, 'x' x 100_000 ) },
+ posttest => sub { unlink( $file_name ) },
+ error => qr/read/,
+ },
+ {
+ name => 'atomic rename error',
+# this test is meaningless on Win32
+ skip => $is_win32,
+ sub => \&write_file,
+ args => [ $file_name, { atomic => 1 }, '' ],
+ override => 'rename',
+ posttest => sub { "$file_name.$$" },
+ error => qr/rename/,
+ },
+ {
+ name => 'read_dir opendir error',
+ sub => \&read_dir,
+ args => [ $dir_name ],
+ error => qr/open/,
+ },
+ {
+ name => 'prepend_file read error',
+ sub => \&prepend_file,
+ args => [ $file_name ],
+ error => qr/read_file/,
+ },
+ {
+ name => 'prepend_file write error',
+ sub => \&prepend_file,
+ pretest => sub { write_file( $file_name, '' ) },
+ args => [ $file_name, '' ],
+ override => 'syswrite',
+ error => qr/write_file/,
+ posttest => sub { unlink $file_name, "$file_name.$$" },
+ },
+ {
+ name => 'edit_file read error',
+ sub => \&edit_file,
+ args => [ sub{}, $file_name ],
+ error => qr/read_file/,
+ },
+ {
+ name => 'edit_file write error',
+ sub => \&edit_file,
+ pretest => sub { write_file( $file_name, '' ) },
+ args => [ sub{}, $file_name ],
+ override => 'syswrite',
+ error => qr/write_file/,
+ posttest => sub { unlink $file_name, "$file_name.$$" },
+ },
+ {
+ name => 'edit_file_lines read error',
+ sub => \&edit_file_lines,
+ args => [ sub{}, $file_name ],
+ error => qr/read_file/,
+ },
+ {
+ name => 'edit_file_lines write error',
+ sub => \&edit_file_lines,
+ pretest => sub { write_file( $file_name, '' ) },
+ args => [ sub{}, $file_name ],
+ override => 'syswrite',
+ error => qr/write_file/,
+ posttest => sub { unlink $file_name, "$file_name.$$" },
+ },
+] ;
+
+test_driver( $tests ) ;
+
+exit ;
+
diff --git a/t/error_mode.t b/t/error_mode.t
new file mode 100644
index 0000000..915c184
--- /dev/null
+++ b/t/error_mode.t
@@ -0,0 +1,59 @@
+##!/usr/local/bin/perl -w
+
+use strict ;
+use File::Slurp ;
+
+use Carp ;
+use Test::More tests => 9 ;
+
+my $file = 'missing/file' ;
+#unlink $file ;
+
+
+my %modes = (
+ 'croak' => \&test_croak,
+ 'carp' => \&test_carp,
+ 'quiet' => \&test_quiet,
+) ;
+
+while( my( $mode, $sub ) = each %modes ) {
+
+ $sub->( 'read_file', \&read_file, $file, err_mode => $mode ) ;
+ $sub->( 'write_file', \&write_file, $file,
+ { err_mode => $mode }, 'junk' ) ;
+ $sub->( 'read_dir', \&read_dir, $file, err_mode => $mode ) ;
+}
+
+
+sub test_croak {
+
+ my ( $name, $sub, @args ) = @_ ;
+
+ eval {
+ $sub->( @args ) ;
+ } ;
+
+ ok( $@, "$name can croak" ) ;
+}
+
+sub test_carp {
+
+ my ( $name, $sub, @args ) = @_ ;
+
+ local $SIG{__WARN__} = sub { ok( 1, "$name can carp" ) } ;
+
+ $sub->( @args ) ;
+}
+
+sub test_quiet {
+
+ my ( $name, $sub, @args ) = @_ ;
+
+ local $SIG{__WARN__} = sub { ok( 0, "$name can be quiet" ) } ;
+
+ eval {
+ $sub->( @args ) ;
+ } ;
+
+ ok( !$@, "$name can be quiet" ) ;
+}
diff --git a/t/file_object.t b/t/file_object.t
new file mode 100644
index 0000000..1a6f242
--- /dev/null
+++ b/t/file_object.t
@@ -0,0 +1,75 @@
+#!perl
+use strict;
+use Test::More;
+use File::Slurp;
+
+use IO::Handle ;
+
+use UNIVERSAL ;
+
+plan tests => 4;
+
+my $path = "data.txt";
+my $data = "random junk\n";
+
+# create an object
+my $obj = FileObject->new($path);
+isa_ok( $obj, 'FileObject' );
+is( "$obj", $path, "check that the object correctly stringifies" );
+
+my $is_glob = eval{ $obj->isa( 'GLOB' ) } ;
+#print "GLOB $is_glob\n" ;
+
+my $is_io = eval{ $obj->isa( 'IO' ) } ;
+#print "IO $is_io\n" ;
+
+my $io = IO::Handle->new() ;
+#print "IO2: $io\n" ;
+
+my $is_io2 = eval{ $io->isa( 'GLOB' ) } ;
+#print "IO2 $is_io2\n" ;
+
+open( FH, "<$0" ) or die "can't open $0: $!" ;
+
+my $io3 = *FH{IO} ;
+#print "IO3: $io3\n" ;
+
+my $is_io3 = eval{ $io3->isa( 'IO' ) } ;
+#print "IO3 $is_io3\n" ;
+
+my $io4 = *FH{GLOB} ;
+#print "IO4: $io4\n" ;
+
+my $is_io4 = eval{ $io4->isa( 'GLOB' ) } ;
+#print "IO4 $is_io4\n" ;
+
+
+SKIP: {
+ # write something to that file
+ open(FILE, ">$obj") or skip 4, "can't write to '$path': $!";
+ print FILE $data;
+ close(FILE);
+
+ # pass it to read_file()
+ my $content = eval { read_file($obj) };
+ is( $@, '', "passing an object to read_file()" );
+ is( $content, $data, "checking that the content matches the data" );
+}
+
+unlink $path;
+
+
+# the following mimics the parts from Path::Class causing
+# problems with File::Slurp
+package FileObject;
+use overload
+ q[""] => \&stringify, fallback => 1;
+
+sub new {
+ return bless { path => $_[1] }, $_[0]
+}
+
+sub stringify {
+ return $_[0]->{path}
+}
+
diff --git a/t/handle.t b/t/handle.t
new file mode 100644
index 0000000..4f26847
--- /dev/null
+++ b/t/handle.t
@@ -0,0 +1,222 @@
+#!/usr/local/bin/perl -w
+
+use strict ;
+use File::Slurp ;
+
+use Carp ;
+use POSIX qw( :fcntl_h ) ;
+use Socket ;
+use Symbol ;
+use Test::More ;
+
+# in case SEEK_SET isn't defined in older perls. it seems to always be 0
+
+BEGIN {
+ *SEEK_SET = sub() { 0 } unless defined \&SEEK_SET ;
+}
+
+my @pipe_data = (
+ '',
+ 'abc',
+ 'abc' x 100_000,
+ 'abc' x 1_000_000,
+) ;
+
+plan( tests => scalar @pipe_data ) ;
+
+#test_data_slurp() ;
+
+#test_fork_pipe_slurp() ;
+
+SKIP: {
+
+ eval { test_socketpair_slurp() } ;
+
+ skip "socketpair not found in this Perl", scalar( @pipe_data ) if $@ ;
+}
+
+sub test_socketpair_slurp {
+
+ foreach my $data ( @pipe_data ) {
+
+ my $size = length( $data ) ;
+
+ my $read_fh = gensym ;
+ my $write_fh = gensym ;
+
+ socketpair( $read_fh, $write_fh,
+ AF_UNIX, SOCK_STREAM, PF_UNSPEC);
+
+ if ( fork() ) {
+
+#warn "PARENT SOCKET\n" ;
+ close( $write_fh ) ;
+ my $read_buf = read_file( $read_fh ) ;
+
+ is( $read_buf, $data,
+ "socket slurp/spew of $size bytes" ) ;
+
+ }
+ else {
+
+#child
+#warn "CHILD SOCKET\n" ;
+ close( $read_fh ) ;
+ eval { write_file( $write_fh, $data ) } ;
+ exit() ;
+ }
+ }
+}
+
+sub test_data_slurp {
+
+ my $data_seek = tell( \*DATA );
+
+# first slurp in the lines
+ my @slurp_lines = read_file( \*DATA ) ;
+
+# now seek back and read all the lines with the <> op and we make
+# golden data sets
+
+ seek( \*DATA, $data_seek, SEEK_SET ) || die "seek $!" ;
+ my @data_lines = <DATA> ;
+ my $data_text = join( '', @data_lines ) ;
+
+# now slurp in as one string and test
+
+ sysseek( \*DATA, $data_seek, SEEK_SET ) || die "seek $!" ;
+ my $slurp_text = read_file( \*DATA ) ;
+ is( $slurp_text, $data_text, 'scalar slurp DATA' ) ;
+
+# test the array slurp
+
+ ok( eq_array( \@data_lines, \@slurp_lines ), 'list slurp of DATA' ) ;
+}
+
+sub test_fork_pipe_slurp {
+
+ foreach my $data ( @pipe_data ) {
+
+ test_to_pipe( $data ) ;
+ test_from_pipe( $data ) ;
+ }
+}
+
+
+sub test_from_pipe {
+
+ my( $data ) = @_ ;
+
+ my $size = length( $data ) ;
+
+ if ( pipe_from_fork( \*READ_FH ) ) {
+
+# parent
+ my $read_buf = read_file( \*READ_FH ) ;
+warn "PARENT read\n" ;
+
+ is( $read_buf, $data, "pipe slurp/spew of $size bytes" ) ;
+
+ close \*READ_FH ;
+# return ;
+ }
+ else {
+# child
+warn "CHILD write\n" ;
+ # write_file( \*STDOUT, $data ) ;
+ syswrite( \*STDOUT, $data, length( $data ) ) ;
+
+ close \*STDOUT;
+ exit(0);
+ }
+}
+
+
+sub pipe_from_fork {
+
+ my ( $parent_fh ) = @_ ;
+
+ my $child = gensym ;
+
+ pipe( $parent_fh, $child ) or die;
+
+ my $pid = fork();
+ die "fork() failed: $!" unless defined $pid;
+
+ if ($pid) {
+
+warn "PARENT\n" ;
+ close $child;
+ return $pid ;
+ }
+
+warn "CHILD FILENO ", fileno($child), "\n" ;
+ close $parent_fh ;
+ open(STDOUT, ">&=" . fileno($child)) or die "no fileno" ;
+
+ return ;
+}
+
+
+sub test_to_pipe {
+
+ my( $data ) = @_ ;
+
+ my $size = length( $data ) ;
+
+ if ( pipe_to_fork( \*WRITE_FH ) ) {
+
+# parent
+ syswrite( \*WRITE_FH, $data, length( $data ) ) ;
+# write_file( \*WRITE_FH, $data ) ;
+warn "PARENT write\n" ;
+
+# is( $read_buf, $data, "pipe slurp/spew of $size bytes" ) ;
+
+ close \*WRITE_FH ;
+# return ;
+ }
+ else {
+# child
+warn "CHILD read FILENO ", fileno(\*STDIN), "\n" ;
+
+ my $read_buf = read_file( \*STDIN ) ;
+ is( $read_buf, $data, "pipe slurp/spew of $size bytes" ) ;
+ close \*STDIN;
+ exit(0);
+ }
+}
+
+sub pipe_to_fork {
+ my ( $parent_fh ) = @_ ;
+
+ my $child = gensym ;
+
+ pipe( $child, $parent_fh ) or die ;
+
+ my $pid = fork();
+ die "fork() failed: $!" unless defined $pid;
+
+ if ( $pid ) {
+ close $child;
+ return $pid ;
+ }
+
+ close $parent_fh ;
+ open(STDIN, "<&=" . fileno($child)) or die;
+
+ return ;
+}
+
+__DATA__
+line one
+second line
+more lines
+still more
+
+enough lines
+
+we don't test long handle slurps from DATA since i would have to type
+too much stuff :-)
+
+so we will stop here
diff --git a/t/inode.t b/t/inode.t
new file mode 100644
index 0000000..c477baf
--- /dev/null
+++ b/t/inode.t
@@ -0,0 +1,44 @@
+#!/usr/local/bin/perl -w
+
+use strict ;
+
+use File::Slurp ;
+
+use Carp ;
+use Socket ;
+use Symbol ;
+use Test::More ;
+
+BEGIN {
+ if( $^O =~ '32' ) {
+ plan skip_all => 'skip inode test on windows';
+ exit ;
+ }
+
+ plan tests => 2 ;
+}
+
+my $data = <<TEXT ;
+line 1
+more text
+TEXT
+
+my $file = 'inode' ;
+
+write_file( $file, $data ) ;
+my $inode_num = (stat $file)[1] ;
+write_file( $file, $data ) ;
+my $inode_num2 = (stat $file)[1] ;
+
+#print "I1 $inode_num I2 $inode_num2\n" ;
+
+ok( $inode_num == $inode_num2, 'same inode' ) ;
+
+write_file( $file, {atomic => 1}, $data ) ;
+$inode_num2 = (stat $file)[1] ;
+
+#print "I1 $inode_num I2 $inode_num2\n" ;
+
+ok( $inode_num != $inode_num2, 'different inode' ) ;
+
+unlink $file ;
diff --git a/t/large.t b/t/large.t
new file mode 100644
index 0000000..3bd78b7
--- /dev/null
+++ b/t/large.t
@@ -0,0 +1,175 @@
+#!/usr/local/bin/perl -w
+
+use strict ;
+
+use Test::More ;
+use Carp ;
+use File::Slurp ;
+
+my $file = 'slurp.data' ;
+unlink $file ;
+
+my @text_data = (
+ [],
+ [ 'a' x 8 ],
+ [ ("\n") x 5 ],
+ [ map( "aaaaaaaa\n", 1 .. 3 ) ],
+ [ map( "aaaaaaaa\n", 1 .. 3 ), 'aaaaaaaa' ],
+ [ map ( 'a' x 100 . "\n", 1 .. 1024 ) ],
+ [ map ( 'a' x 100 . "\n", 1 .. 1024 ), 'a' x 100 ],
+ [ map ( 'a' x 1024 . "\n", 1 .. 1024 ) ],
+ [ map ( 'a' x 1024 . "\n", 1 .. 1024 ), 'a' x 10240 ],
+ [],
+) ;
+
+my @bin_sizes = ( 1000, 1024 * 1024 ) ;
+
+my @bin_stuff = ( "\012", "\015", "\012\015", "\015\012",
+ map chr, 0 .. 32 ) ;
+
+my @bin_data ;
+
+foreach my $size ( @bin_sizes ) {
+
+ my $data = '' ;
+
+ while ( length( $data ) < $size ) {
+
+ $data .= $bin_stuff[ rand @bin_stuff ] ;
+ }
+
+ push @bin_data, $data ;
+}
+
+plan( tests => 17 * @text_data + 8 * @bin_data ) ;
+
+#print "# text slurp\n" ;
+
+foreach my $data ( @text_data ) {
+
+ test_text_slurp( $data ) ;
+}
+
+#print "# BIN slurp\n" ;
+
+SKIP: {
+ skip "binmode not available in this version of Perl", 8 * @bin_data
+ if $] < 5.006 ;
+
+ foreach my $data ( @bin_data ) {
+
+ test_bin_slurp( $data ) ;
+ }
+}
+
+unlink $file ;
+
+exit ;
+
+sub test_text_slurp {
+
+ my( $data_ref ) = @_ ;
+
+ my @data_lines = @{$data_ref} ;
+ my $data_text = join( '', @data_lines ) ;
+
+
+ my $err = write_file( $file, $data_text ) ;
+ ok( $err, 'write_file - ' . length $data_text ) ;
+ my $text = read_file( $file ) ;
+ ok( $text eq $data_text, 'scalar read_file - ' . length $data_text ) ;
+
+ $err = write_file( $file, \$data_text ) ;
+ ok( $err, 'write_file ref arg - ' . length $data_text ) ;
+ $text = read_file( $file ) ;
+ ok( $text eq $data_text, 'scalar read_file - ' . length $data_text ) ;
+
+ $err = write_file( $file, { buf_ref => \$data_text } ) ;
+ ok( $err, 'write_file buf ref opt - ' . length $data_text ) ;
+ $text = read_file( $file ) ;
+ ok( $text eq $data_text, 'scalar read_file - ' . length $data_text ) ;
+
+ my $text_ref = read_file( $file, scalar_ref => 1 ) ;
+ ok( ${$text_ref} eq $data_text,
+ 'scalar ref read_file - ' . length $data_text ) ;
+
+ read_file( $file, buf_ref => \my $buffer ) ;
+ ok( $buffer eq $data_text,
+ 'buf_ref read_file - ' . length $data_text ) ;
+
+# my @data_lines = split( m|(?<=$/)|, $data_text ) ;
+
+ $err = write_file( $file, \@data_lines ) ;
+ ok( $err, 'write_file list ref arg - ' . length $data_text ) ;
+ $text = read_file( $file ) ;
+ ok( $text eq $data_text, 'scalar read_file - ' . length $data_text ) ;
+
+#print map "[$_]\n", @data_lines ;
+#print "DATA <@data_lines>\n" ;
+
+ my @array = read_file( $file ) ;
+
+#print map "{$_}\n", @array ;
+#print "ARRAY <@array>\n" ;
+
+ ok( eq_array( \@array, \@data_lines ),
+ 'array read_file - ' . length $data_text ) ;
+
+ print "READ:\n", map( "[$_]\n", @array ),
+ "EXP:\n", map( "[$_]\n", @data_lines )
+ unless eq_array( \@array, \@data_lines ) ;
+
+ my $array_ref = read_file( $file, array_ref => 1 ) ;
+ ok( eq_array( $array_ref, \@data_lines ),
+ 'array ref read_file - ' . length $data_text ) ;
+
+ ($array_ref) = read_file( $file, {array_ref => 1} ) ;
+ ok( eq_array( $array_ref, \@data_lines ),
+ 'array ref list context args ref read_file - ' . length $data_text ) ;
+
+ $err = write_file( $file, { append => 1 }, $data_text ) ;
+ ok( $err, 'write_file append - ' . length $data_text ) ;
+
+ my $text2 = read_file( $file ) ;
+ ok( $text2 eq $data_text x 2, 'read_file append - ' . length $data_text ) ;
+
+ $err = append_file( $file, $data_text ) ;
+ ok( $err, 'append_file - ' . length $data_text ) ;
+
+ my $bin3 = read_file( $file ) ;
+ ok( $bin3 eq $data_text x 3, 'read_file append_file - ' . length $data_text ) ;
+
+ return ;
+}
+
+sub test_bin_slurp {
+
+ my( $data ) = @_ ;
+
+ my $err = write_file( $file, {'binmode' => ':raw'}, $data ) ;
+ ok( $err, 'write_file bin - ' . length $data ) ;
+
+ my $bin = read_file( $file, 'binmode' => ':raw' ) ;
+ ok( $bin eq $data, 'scalar read_file bin - ' . length $data ) ;
+
+ my $bin_ref = read_file( $file, scalar_ref => 1, 'binmode' => ':raw' ) ;
+ ok( ${$bin_ref} eq $data,
+ 'scalar ref read_file bin - ' . length $data ) ;
+
+ read_file( $file, buf_ref => \(my $buffer), 'binmode' => ':raw' ) ;
+ ok( $buffer eq $data, 'buf_ref read_file bin - ' . length $data ) ;
+
+ $err = write_file( $file, { append => 1, 'binmode' => ':raw' }, $data ) ;
+ ok( $err, 'write_file append bin - ' . length $data ) ;
+
+ my $bin2 = read_file( $file, 'binmode' => ':raw' ) ;
+ ok( $bin2 eq $data x 2, 'read_file append bin - ' . length $data ) ;
+
+ $err = append_file( $file, { 'binmode' => ':raw' }, $data ) ;
+ ok( $err, 'append_file bin - ' . length $data ) ;
+
+ my $bin3 = read_file( $file, 'binmode' => ':raw' ) ;
+ ok( $bin3 eq $data x 3, 'read_file bin - ' . length $data ) ;
+
+ return ;
+}
diff --git a/t/newline.t b/t/newline.t
new file mode 100644
index 0000000..70e09d6
--- /dev/null
+++ b/t/newline.t
@@ -0,0 +1,52 @@
+use Test::More tests => 2 ;
+
+use strict;
+use File::Slurp ;
+
+my $data = "\r\n\r\n\r\n" ;
+my $file_name = 'newline.txt' ;
+
+stdio_write_file( $file_name, $data ) ;
+my $slurped_data = read_file( $file_name ) ;
+
+my $stdio_slurped_data = stdio_read_file( $file_name ) ;
+
+
+print 'data ', unpack( 'H*', $data), "\n",
+'slurp ', unpack('H*', $slurped_data), "\n",
+'stdio slurp ', unpack('H*', $stdio_slurped_data), "\n";
+
+is( $data, $slurped_data, 'slurp' ) ;
+
+write_file( $file_name, $data ) ;
+$slurped_data = stdio_read_file( $file_name ) ;
+
+is( $data, $slurped_data, 'spew' ) ;
+
+unlink $file_name ;
+
+sub stdio_write_file {
+
+ my( $file_name, $data ) = @_ ;
+
+ local( *FH ) ;
+
+ open( FH, ">$file_name" ) || die "Couldn't create $file_name: $!";
+
+ print FH $data ;
+}
+
+sub stdio_read_file {
+
+ my( $file_name ) = @_ ;
+
+ open( FH, $file_name ) || die "Couldn't open $file_name: $!";
+
+ local( $/ ) ;
+
+ my $data = <FH> ;
+
+ return $data ;
+}
+
+
diff --git a/t/no_clobber.t b/t/no_clobber.t
new file mode 100644
index 0000000..0251a1c
--- /dev/null
+++ b/t/no_clobber.t
@@ -0,0 +1,26 @@
+#!/usr/local/bin/perl -w
+
+use strict ;
+use File::Slurp ;
+
+use Test::More tests => 2 ;
+
+
+my $data = <<TEXT ;
+line 1
+more text
+TEXT
+
+my $file = 'xxx' ;
+
+unlink $file ;
+
+
+my $err = write_file( $file, { no_clobber => 1 }, $data ) ;
+ok( $err, 'new write_file' ) ;
+
+$err = write_file( $file, { no_clobber => 1, err_mode => 'quiet' }, $data ) ;
+
+ok( !$err, 'no_clobber write_file' ) ;
+
+unlink $file ;
diff --git a/t/original.t b/t/original.t
new file mode 100644
index 0000000..aa2a98f
--- /dev/null
+++ b/t/original.t
@@ -0,0 +1,55 @@
+#!/usr/bin/perl -I.
+
+# try to honor possible tempdirs
+$tmp = "file_$$";
+
+$short = <<END;
+small
+file
+END
+
+$long = <<END;
+This is a much longer bit of contents
+to store in a file.
+END
+
+print "1..7\n";
+
+use File::Slurp;
+
+&write_file($tmp, $long);
+if (&read_file($tmp) eq $long) {print "ok 1\n";} else {print "not ok 1\n";}
+
+@x = &read_file($tmp);
+@y = grep( $_ ne '', split(/(.*?\n)/, $long));
+while (@x && @y) {
+ last unless $x[0] eq $y[0];
+ shift @x;
+ shift @y;
+}
+if (@x == @y && (@x ? $x[0] eq $y[0] : 1)) { print "ok 2\n";} else {print "not ok 2\n"}
+
+&append_file($tmp, $short);
+if (&read_file($tmp) eq "$long$short") {print "ok 3\n";} else {print "not ok 3\n";}
+
+$iold = (stat($tmp))[1];
+&overwrite_file($tmp, $short);
+$inew = (stat($tmp))[1];
+
+if (&read_file($tmp) eq $short) {print "ok 4\n";} else {print "not ok 4\n";}
+
+if ($inew == $iold) {print "ok 5\n";} else {print "not ok 5\n";}
+
+unlink($tmp);
+
+&overwrite_file($tmp, $long);
+if (&read_file($tmp) eq $long) {print "ok 6\n";} else {print "not ok 6\n";}
+
+unlink($tmp);
+
+&append_file($tmp, $short);
+if (&read_file($tmp) eq $short) {print "ok 7\n";} else {print "not ok 7\n";}
+
+unlink($tmp);
+
+
diff --git a/t/paragraph.t b/t/paragraph.t
new file mode 100644
index 0000000..62cbad7
--- /dev/null
+++ b/t/paragraph.t
@@ -0,0 +1,64 @@
+#!/usr/local/bin/perl -w
+
+use strict ;
+
+use File::Slurp ;
+use Test::More ;
+use Carp ;
+
+
+my $file = 'slurp.data' ;
+unlink $file ;
+
+my @text_data = (
+ [],
+ [ 'a' x 8 ],
+ [ "\n" x 5 ],
+ [ map( "aaaaaaaa\n\n", 1 .. 3 ) ],
+ [ map( "aaaaaaaa\n\n", 1 .. 3 ), 'aaaaaaaa' ],
+ [ map( "aaaaaaaa" . ( "\n" x (2 + rand 3) ), 1 .. 100 ) ],
+ [ map( "aaaaaaaa" . ( "\n" x (2 + rand 3) ), 1 .. 100 ), 'aaaaaaaa' ],
+ [],
+) ;
+
+plan( tests => 3 * @text_data ) ;
+
+#print "# text slurp\n" ;
+
+foreach my $data ( @text_data ) {
+
+ test_text_slurp( $data ) ;
+}
+
+
+unlink $file ;
+
+exit ;
+
+sub test_text_slurp {
+
+ my( $data_ref ) = @_ ;
+
+ my @data_lines = @{$data_ref} ;
+ my $data_text = join( '', @data_lines ) ;
+
+ local( $/ ) = '' ;
+
+ my $err = write_file( $file, $data_text ) ;
+ ok( $err, 'write_file - ' . length $data_text ) ;
+
+
+ my @array = read_file( $file ) ;
+ ok( eq_array( \@array, \@data_lines ),
+ 'array read_file - ' . length $data_text ) ;
+
+ print "READ:\n", map( "[$_]\n", @array ),
+ "EXP:\n", map( "[$_]\n", @data_lines )
+ unless eq_array( \@array, \@data_lines ) ;
+
+ my $array_ref = read_file( $file, array_ref => 1 ) ;
+ ok( eq_array( $array_ref, \@data_lines ),
+ 'array ref read_file - ' . length $data_text ) ;
+
+ return ;
+}
diff --git a/t/perms.t b/t/perms.t
new file mode 100644
index 0000000..4cd01fa
--- /dev/null
+++ b/t/perms.t
@@ -0,0 +1,31 @@
+#!/usr/local/bin/perl -w
+
+use strict ;
+use Test::More ;
+use File::Slurp ;
+
+plan skip_all => "meaningless on Win32" if $^O =~ /win32/i ;
+plan tests => 2 ;
+
+my $file = "perms.$$" ;
+
+my $text = <<END ;
+This is a bit of contents
+to store in a file.
+END
+
+umask 027 ;
+
+write_file( $file, $text ) ;
+is( getmode( $file ), 0640, 'default perms works' ) ;
+unlink $file ;
+
+write_file( $file, { perms => 0777 }, $text ) ;
+is( getmode( $file ), 0750, 'set perms works' ) ;
+unlink $file ;
+
+exit ;
+
+sub getmode {
+ return 07777 & (stat $_[0])[2] ;
+}
diff --git a/t/pod.t b/t/pod.t
new file mode 100644
index 0000000..37fdead
--- /dev/null
+++ b/t/pod.t
@@ -0,0 +1,13 @@
+#!/usr/local/bin/perl
+
+use Test::More;
+
+eval 'use Test::Pod 1.14' ;
+plan skip_all =>
+ 'Test::Pod 1.14 required for testing POD' if $@ ;
+
+all_pod_files_ok(
+# {
+# trustme => [ qr/slurp/ ]
+# }
+) ;
diff --git a/t/pod_coverage.t b/t/pod_coverage.t
new file mode 100644
index 0000000..0026d96
--- /dev/null
+++ b/t/pod_coverage.t
@@ -0,0 +1,24 @@
+#!/usr/local/bin/perl
+
+use Test::More;
+
+eval 'use Test::Pod::Coverage 1.04' ;
+plan skip_all =>
+ 'Test::Pod::Coverage 1.04 required for testing POD coverage' if $@ ;
+
+all_pod_coverage_ok(
+ {
+ trustme => [
+ 'slurp',
+ 'O_APPEND',
+ 'O_BINARY',
+ 'O_CREAT',
+ 'O_EXCL',
+ 'O_RDONLY',
+ 'O_WRONLY',
+ 'SEEK_CUR',
+ 'SEEK_END',
+ 'SEEK_SET',
+ ],
+ }
+) ;
diff --git a/t/prepend_file.t b/t/prepend_file.t
new file mode 100644
index 0000000..ec3c8bb
--- /dev/null
+++ b/t/prepend_file.t
@@ -0,0 +1,74 @@
+
+use strict ;
+use warnings ;
+
+use lib qw(t) ;
+
+use File::Slurp qw( read_file write_file prepend_file ) ;
+use Test::More ;
+
+use TestDriver ;
+
+my $file = 'prepend_file' ;
+my $existing_data = <<PRE ;
+line 1
+line 2
+more
+PRE
+
+my $tests = [
+ {
+ name => 'prepend null',
+ sub => \&prepend_file,
+ prepend_data => '',
+ pretest => sub {
+ my( $test ) = @_ ;
+ write_file( $file, $existing_data ) ;
+ my $prepend_data = $test->{prepend_data} ;
+ $test->{args} = [
+ $file,
+ $prepend_data,
+ ] ;
+ $test->{expected} = "$prepend_data$existing_data" ;
+ },
+ posttest => sub { $_[0]->{result} = read_file( $file ) },
+ },
+ {
+ name => 'prepend line',
+ sub => \&prepend_file,
+ prepend_data => "line 0\n",
+ pretest => sub {
+ my( $test ) = @_ ;
+ write_file( $file, $existing_data ) ;
+ my $prepend_data = $test->{prepend_data} ;
+ $test->{args} = [
+ $file,
+ $prepend_data,
+ ] ;
+ $test->{expected} = "$prepend_data$existing_data" ;
+ },
+ posttest => sub { $_[0]->{result} = read_file( $file ) },
+ },
+ {
+ name => 'prepend partial line',
+ sub => \&prepend_file,
+ prepend_data => 'partial line',
+ pretest => sub {
+ my( $test ) = @_ ;
+ write_file( $file, $existing_data ) ;
+ my $prepend_data = $test->{prepend_data} ;
+ $test->{args} = [
+ $file,
+ $prepend_data,
+ ] ;
+ $test->{expected} = "$prepend_data$existing_data" ;
+ },
+ posttest => sub { $_[0]->{result} = read_file( $file ) },
+ },
+] ;
+
+test_driver( $tests ) ;
+
+unlink $file ;
+
+exit ;
diff --git a/t/pseudo.t b/t/pseudo.t
new file mode 100644
index 0000000..5deda84
--- /dev/null
+++ b/t/pseudo.t
@@ -0,0 +1,34 @@
+#!/usr/local/bin/perl -w
+
+use strict ;
+
+use File::Slurp ;
+use Carp ;
+use Test::More ;
+
+plan( tests => 1 ) ;
+
+my $proc_file = "/proc/$$/auxv" ;
+
+SKIP: {
+
+ unless ( -r $proc_file ) {
+
+ skip "can't find pseudo file $proc_file", 1 ;
+ }
+
+ test_pseudo_file() ;
+}
+
+sub test_pseudo_file {
+
+ my $data_do = do{ local( @ARGV, $/ ) = $proc_file; <> } ;
+
+#print "LEN: ", length $data_do, "\n" ;
+
+ my $data_slurp = read_file( $proc_file ) ;
+#print "LEN2: ", length $data_slurp, "\n" ;
+#print "LEN3: ", -s $proc_file, "\n" ;
+
+ is( $data_do, $data_slurp, 'pseudo' ) ;
+}
diff --git a/t/read_dir.t b/t/read_dir.t
new file mode 100644
index 0000000..d04351f
--- /dev/null
+++ b/t/read_dir.t
@@ -0,0 +1,66 @@
+#!/usr/bin/perl -w -I.
+
+use strict ;
+use Test::More tests => 9 ;
+
+use File::Slurp ;
+
+# try to honor possible tempdirs
+
+my $test_dir = "read_dir_$$" ;
+
+mkdir( $test_dir, 0700) || die "mkdir $test_dir: $!" ;
+
+my @dir_entries = read_dir( $test_dir );
+
+ok( @dir_entries == 0, 'empty dir' ) ;
+
+@dir_entries = read_dir( $test_dir, keep_dot_dot => 1 ) ;
+
+ok( @dir_entries == 2, 'empty dir with . ..' ) ;
+
+@dir_entries = read_dir( $test_dir, { keep_dot_dot => 1 } ) ;
+
+ok( @dir_entries == 2, 'empty dir with . .. - args ref' ) ;
+
+write_file( "$test_dir/x", "foo\n" ) ;
+
+@dir_entries = read_dir( $test_dir ) ;
+
+ok( @dir_entries == 1, 'dir with 1 file' ) ;
+
+ok( $dir_entries[0] eq 'x', 'dir with file x' ) ;
+
+my $file_cnt = 23 ;
+
+my @expected_entries = sort( 'x', 1 .. $file_cnt ) ;
+
+for ( 1 .. $file_cnt ) {
+
+ write_file( "$test_dir/$_", "foo\n" ) ;
+}
+
+@dir_entries = read_dir( $test_dir ) ;
+@dir_entries = sort @dir_entries ;
+
+ok( eq_array( \@dir_entries, \@expected_entries ),
+ "dir with $file_cnt files" ) ;
+
+my $dir_entries_ref = read_dir( $test_dir ) ;
+@{$dir_entries_ref} = sort @{$dir_entries_ref} ;
+
+ok( eq_array( $dir_entries_ref, \@expected_entries ),
+ "dir in array ref" ) ;
+
+my @prefixed_entries = read_dir( $test_dir, {prefix => 1} ) ;
+@prefixed_entries = sort @prefixed_entries ;
+ok( eq_array( \@prefixed_entries, [map "$test_dir/$_", @dir_entries] ),
+ 'prefix option' ) ;
+
+# clean up
+
+unlink map "$test_dir/$_", @dir_entries ;
+rmdir( $test_dir ) || die "rmdir $test_dir: $!";
+ok( 1, 'cleanup' ) ;
+
+__END__
diff --git a/t/signal.t b/t/signal.t
new file mode 100644
index 0000000..2c692d0
--- /dev/null
+++ b/t/signal.t
@@ -0,0 +1,34 @@
+#!/usr/local/bin/perl -w
+
+use strict ;
+use File::Slurp qw(read_file);
+
+use Carp ;
+use Test::More ;
+
+BEGIN {
+ if( $^O =~ '32' ) {
+ plan skip_all => 'skip signal test on windows';
+ exit ;
+ }
+
+ plan tests => 1 ;
+}
+
+$SIG{CHLD} = sub {};
+
+pipe(IN, OUT);
+
+print "forking\n";
+if (!fork) {
+ sleep 1;
+ exit;
+}
+if (!fork) {
+ sleep 2;
+ print OUT "success";
+ exit;
+}
+close OUT;
+my $data = read_file(\*IN);
+is ($data, "success", "handle EINTR failed");
diff --git a/t/slurp.t b/t/slurp.t
new file mode 100644
index 0000000..3ba53e3
--- /dev/null
+++ b/t/slurp.t
@@ -0,0 +1,19 @@
+#!/usr/local/bin/perl -w -T
+
+use strict ;
+use File::Slurp qw( write_file slurp ) ;
+
+use Test::More tests => 1 ;
+
+my $data = <<TEXT ;
+line 1
+more text
+TEXT
+
+my $file = 'xxx' ;
+
+write_file( $file, $data ) ;
+my $read_buf = slurp( $file ) ;
+is( $read_buf, $data, 'slurp alias' ) ;
+
+unlink $file ;
diff --git a/t/stdin.t b/t/stdin.t
new file mode 100644
index 0000000..071e0d3
--- /dev/null
+++ b/t/stdin.t
@@ -0,0 +1,23 @@
+#!/usr/local/bin/perl -w
+
+use strict ;
+use File::Slurp ;
+
+use Carp ;
+use Socket ;
+use Symbol ;
+use Test::More tests => 6 ;
+
+my $data = <<TEXT ;
+line 1
+more text
+TEXT
+
+foreach my $file ( qw( stdin STDIN stdout STDOUT stderr STDERR ) ) {
+
+ write_file( $file, $data ) ;
+ my $read_buf = read_file( $file ) ;
+ is( $read_buf, $data, 'read/write of file [$file]' ) ;
+
+ unlink $file ;
+}
diff --git a/t/stringify.t b/t/stringify.t
new file mode 100644
index 0000000..c3809cb
--- /dev/null
+++ b/t/stringify.t
@@ -0,0 +1,45 @@
+#!perl -T
+
+use strict;
+
+use Test::More;
+use File::Slurp;
+use IO::Handle ;
+use UNIVERSAL ;
+
+plan tests => 3 ;
+
+my $path = "data.txt";
+my $data = "random junk\n";
+
+# create an object with an overloaded path
+
+my $obj = FileObject->new( $path ) ;
+
+isa_ok( $obj, 'FileObject' ) ;
+is( "$obj", $path, "object stringifies to path" );
+
+write_file( $obj, $data ) ;
+
+my $read_text = read_file( $obj ) ;
+is( $data, $read_text, 'read_file of stringified object' ) ;
+
+unlink $path ;
+
+exit ;
+
+# this code creates the object which has a stringified path
+
+package FileObject;
+
+use overload
+ q[""] => \&stringify,
+ fallback => 1 ;
+
+sub new {
+ return bless { path => $_[1] }, $_[0]
+}
+
+sub stringify {
+ return $_[0]->{path}
+}
diff --git a/t/tainted.t b/t/tainted.t
new file mode 100644
index 0000000..6805d48
--- /dev/null
+++ b/t/tainted.t
@@ -0,0 +1,69 @@
+#!perl -T
+
+use strict;
+use Test::More;
+use File::Slurp;
+
+plan 'skip_all', "Scalar::Util not available" unless
+ eval 'use Scalar::Util qw(tainted) ; tainted($0) ; 1';
+
+plan 'tests', 5;
+
+my $path = "data.txt";
+my $data = "random junk\nline2";
+
+SKIP: {
+ # write something to that file
+ open(FILE, ">$path") or skip 4, "can't write to '$path': $!";
+ print FILE $data;
+ close(FILE);
+
+ # read the file using File::Slurp in scalar context
+ my $content = eval { read_file($path) };
+ is( $@, '', "read_file() in scalar context" );
+ ok( tainted($content), " => returned content should be tainted" );
+
+
+# # reconstruct the full lines by merging items by pairs
+# for my $k (0..int($#lines/2)) {
+# my $i = $k * 2;
+# $lines[$k] = (defined $lines[$i] ? $lines[$i] : '')
+# . (defined $lines[$i+1] ? $lines[$i+1] : '');
+# }
+
+# # remove the rest of the items
+# splice(@lines, int($#lines/2)+1);
+# pop @lines unless $lines[-1];
+
+# $_ .= $/ for @lines ;
+
+# my @lines = split m{$/}, $content, -1;
+# my @parts = split m{($/)}, $content, -1;
+
+# # my @parts = $content =~ m{.+?(?:$/)?}g ;
+
+# my @lines ;
+# while( @parts > 2 ) {
+
+# my( $line, $sep ) = splice( @parts, 0, 2 ) ;
+# push @lines, "$line$sep" ;
+# }
+
+# push @lines, shift @parts if @parts ;
+
+# # ok( tainted($lines[0]), " text => returned content should be tainted" );
+
+ # read the file using File::Slurp in list context
+ my @content = eval { read_file($path) };
+ is( $@, '', "read_file() in list context" );
+ ok( tainted($content[0]), " => returned content should be tainted" );
+
+ my $text = join( '', @content ) ;
+
+ is( $text, $content, "list eq scalar" );
+
+
+# ok( tainted($lines[0]), " => returned content should be tainted" );
+}
+
+unlink $path;
diff --git a/t/write_file_win32.t b/t/write_file_win32.t
new file mode 100644
index 0000000..1e42456
--- /dev/null
+++ b/t/write_file_win32.t
@@ -0,0 +1,29 @@
+use strict;
+use File::Slurp ;
+
+use Test::More tests => 1;
+
+BEGIN { $^W = 1 }
+
+sub simple_write_file {
+ open FH, ">$_[0]" or die "Couldn't open $_[0] for write: $!";
+ print FH $_[1];
+ close FH ;
+}
+
+sub newline_size {
+ my ($code) = @_;
+
+ my $file = __FILE__ . '.tmp';
+
+ local $\ = '';
+ $code->($file, "\n" x 3);
+
+ my $size = -s $file;
+
+ unlink $file;
+
+ return $size;
+}
+
+is(newline_size(\&write_file), newline_size(\&simple_write_file), 'newline');