diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2011-06-07 08:06:16 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2011-06-07 08:06:16 +0000 |
commit | 9d4173f2716c2f9a2d26f8f9ab0f47b351b87de7 (patch) | |
tree | 98ae3fb82096d8bb96686512ada27fa72126b09a | |
download | File-Slurp-tarball-9d4173f2716c2f9a2d26f8f9ab0f47b351b87de7.tar.gz |
File-Slurp-9999.19HEADFile-Slurp-9999.19master
-rw-r--r-- | Changes | 167 | ||||
-rw-r--r-- | MANIFEST | 39 | ||||
-rw-r--r-- | META.yml | 17 | ||||
-rw-r--r-- | Makefile.PL | 22 | ||||
-rw-r--r-- | README | 37 | ||||
-rw-r--r-- | TODO | 33 | ||||
-rw-r--r-- | extras/FileSlurp_12.pm | 260 | ||||
-rw-r--r-- | extras/slurp_article.pod | 743 | ||||
-rwxr-xr-x | extras/slurp_bench.pl | 587 | ||||
-rwxr-xr-x | lib/File/Slurp.pm | 1261 | ||||
-rw-r--r-- | t/TestDriver.pm | 91 | ||||
-rw-r--r-- | t/append_null.t | 24 | ||||
-rw-r--r-- | t/binmode.t | 50 | ||||
-rw-r--r-- | t/chomp.t | 53 | ||||
-rw-r--r-- | t/data_list.t | 62 | ||||
-rw-r--r-- | t/data_scalar.t | 62 | ||||
-rw-r--r-- | t/edit_file.t | 107 | ||||
-rw-r--r-- | t/error.t | 125 | ||||
-rw-r--r-- | t/error_mode.t | 59 | ||||
-rw-r--r-- | t/file_object.t | 75 | ||||
-rw-r--r-- | t/handle.t | 222 | ||||
-rw-r--r-- | t/inode.t | 44 | ||||
-rw-r--r-- | t/large.t | 175 | ||||
-rw-r--r-- | t/newline.t | 52 | ||||
-rw-r--r-- | t/no_clobber.t | 26 | ||||
-rw-r--r-- | t/original.t | 55 | ||||
-rw-r--r-- | t/paragraph.t | 64 | ||||
-rw-r--r-- | t/perms.t | 31 | ||||
-rw-r--r-- | t/pod.t | 13 | ||||
-rw-r--r-- | t/pod_coverage.t | 24 | ||||
-rw-r--r-- | t/prepend_file.t | 74 | ||||
-rw-r--r-- | t/pseudo.t | 34 | ||||
-rw-r--r-- | t/read_dir.t | 66 | ||||
-rw-r--r-- | t/signal.t | 34 | ||||
-rw-r--r-- | t/slurp.t | 19 | ||||
-rw-r--r-- | t/stdin.t | 23 | ||||
-rw-r--r-- | t/stringify.t | 45 | ||||
-rw-r--r-- | t/tainted.t | 69 | ||||
-rw-r--r-- | t/write_file_win32.t | 29 |
39 files changed, 4973 insertions, 0 deletions
@@ -0,0 +1,167 @@ +Revision history File::Slurp + +9999.19 Tue Jun 7 04:06:06 EDT 2011 + - Fixed use line in t/edit_file.t to import :edit first + Thanks to paul + - read_file and write_file work even when interrupted by signals + this includes a test for read_file interrupt + Thanks to Andrew Danforth + - Fixed bugs in the config synopsis example + +9999.18 Fri May 13 02:30:05 EDT 2011 + - Added :std and :edit export tags + - Cleaned up EXPORT vars + - Documented importing edit_file and edit_file_lines + - Fixed some pod spelling + +9999.17 Wed Apr 27 02:20:03 EDT 2011 + - Requiring Perl 5.6.2 (first time older Perls were dropped) + This is because of use of the re 'taint' pragma + - Added major new features: edit_file and edit_file_lines + - Speed up of tainted slurp with return of lines + - Added chomp option to read_file + - Added prefix option to read_dir + - Fixed optimization of reading small files. + +9999.16 Wed Apr 13 03:47:26 EDT 2011 + - Added support for read_file options to be a hash reference. + - Added support for read_dir options to be a hash reference. + - Added new feature prepend_file + - Fixed bug with array_ref in list context. was introduced by .15/.14 + Thanks to Norbert Gruener + - Cleaned up some pod + +9999.15 Thu Mar 24 16:40:19 EDT 2011 + - Fixed error.t test so it works when run as root + - Removed skip lines from error.t + - Fixed pod about binmode option to reflect changes in .14 + +9999.14 Sun Mar 20 16:26:47 EDT 2011 + - Added LICENCE (same as perl) to POD + - Added special faster code to slurp in small text files which + is a common case + - Rewrote the extras/slurp_bench.pl script. It has a full + legend, better CLI options, size is selectable, benchmark + entries have more consistant names and it compares the new + fast slurp for small files to the general slurp code. + Thanks to Mark Friendlich + - Cleaned up pod + - Added more Synopsis examples + - Added t/error.t to actually test error conditions. Previous + error.t was renamed to error_mode.t which better reflects its + tests. + - t/error.t uses a new test driver module. this may get used by + other tests in the future. + - Fixed check for SEEK_SET and other constant subs being defined + - Added support for binmode other than :raw and binmode.t test + Thanks to Martin J. Evans, Peter Edwards, Bryce Nesbitt + - Added support for perms option in write_file and perms.t test + Thanks to Peter Corlett and Paul Miller + - Added check to the rename call in atomic mode. Tested in error.t. + Thanks to Daniel Scott Sterling + - Added POD to state that using scalar_ref or buf_ref will be faster + and save memory due to not making a copy + Thanks to Peter Edwards + - read_file in list mode keeps data tainted + Thanks to Sébastien Aperghis-Tramoni + - read_file checks for an overloaded object to get the file + name. + Thanks to Sébastien Aperghis-Tramoni + +9999.13 Tue Oct 10 02:04:51 EDT 2006 + - Refactored the extras/slurp_bench.pl script. It has options, + a key the benchmarks, help and more benchmarks. + - Reordered changes so recent entries are first + - Added error check on atomic rename and test for it + Thanks to Daniel Scott Sterling + +9999.12 Thu Feb 2 02:26:31 EST 2006 + - Fixed bug on windows with classic slurping and File::Slurp not + agreeing on newline conversion. + - Added t/newline.t test to check for that fix. + - When passing text data by scalar reference to write_file under + windows, the buffer is copied so the newline conversion won't + modify the caller's data. + - Thanks to Johan Lodin <lodin@cpan.org> for a test script which + I modified into t/newline.t + +9999.11 Fri Jan 20 01:24:00 EDT 2005 + - Quick release to remove code that forced the faked SEEK_* + values to be used. Showed up when tested on OSX which doesn't + need that backport. + +9999.10 Thu Jan 19 11:38:00 EDT 2005 + - t/*.t modules don't use Fcntl.pm + - using POSIX qw( :fcntl_h ) instead of Fcntl qw( :seek ) for + backwards compatiblity to 5.00503 + - added conditional definitions of SEEK_* and O_* subs as they are not + defined in perl 5.004 + - File::Slurp now runs on perl 5.004 and newer (see BUGS section) + All of the above thanks to Smylers <Smylers@stripey.com>, + Piers Kent <piers.kent@bbc.co.uk> and + John Alden <john.alden@bbc.co.uk> + - Added pod.t and pod_coverage.t tests. This is to pass all + the CPANTS tests. + +9999.09 Tue Apr 19 01:21:55 EDT 2005 + - t/original.t and read_dir.t no longer search for tempdirs. they just + use the current dir which should be in the build directory + - t/readdir.t renamed to read_dir.t for consistancy + - write_file return values are docuemented + Thanks to Adam Kennedy <adamk@cpan.org> + - added no_clobber option to write_file and t/no_clobber.t test for it + Thanks to <pagaltzis@gmx.de> + - fixed bug when appending a null string to a file which then + truncates it. seems to be an odd way for linux and OS X to + handle O_APPEND mode on sysopen. they don't seek to the end of + the file so it gets truncated. fixed by adding a seek to the + end if in append mode.n + Thanks to Chris Dolan <cdolan@cpan.org> + +9999.08 Sat Apr 16 01:01:27 EDT 2005 + - read_dir returns an array ref in scalar context + - read_dir keeps . and .. if keep_dot_dot option is set. + Thanks to John Alden <john.alden@bbc.co.uk> + - slurp() is an optional exported alias to read_file + Thanks to Damian Conway <damian@conway.org> + +9999.07 Tue Jan 25 01:33:11 EST 2005 + - Slurping in pseudo files (as in /proc) which show a size of 0 + but actually have data works. This seems to be the case on + linux but on Solaris those files show their proper size. + Thanks to Juerd Waalboer <juerd@cpan.org> + +9999.06 Mon Sep 20 01:57:00 EDT 2004 + - Slurping the DATA handle now works without the workaround. + tests are in t/data_scalar.t and t/data_list.t + - Paragraph mode in read_file is supported. As with <> when $/ + (input record separator) is set to '', then the input file is + split on multiple newlines (/\n\n+/). + Thanks to Geoffrey Leach <geoff@direcway.com> + +9999.05 Tue Feb 24 21:14:55 EST 2004 + - skip handle tests where socketpair is not supported (pre 5.8 + on windows) + Thanks to Mike Arms <marms@sandia.gov> + +9999.04 Mon Feb 23 14:20:52 EST 2004 + - fixed DATA handle bug in t/handle.t (not seen on most OS's) + Thanks to James Willmore <jwillmore@adelphia.net> + +9999.03 Mon Dec 22 01:44:43 EST 2003 + - fixed DATA handle bugs in t/handle.t on osx (should be fixed + on BSD as well) + - added more comments to code + +9999.02 Wed Dec 17 03:40:49 EST 2003 + - skip DATA test in handle.t on OSX (bug in perl with sysread on DATA) + - changed checking if file handle from fileno to ref + from Randal Schwartz <merlyn@stonehenge.com> + - added support for atomic spewing + - added new test stdin.t for the fileno/ref change + - added new test inode.t to test atomic spewing + +9999.01 Mon Sep 1 00:20:56 2003 + - original version; created by h2xs 1.21 with options + -AX -n File::FastSlurp + diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..f4c0506 --- /dev/null +++ b/MANIFEST @@ -0,0 +1,39 @@ +Changes +lib/File/Slurp.pm +Makefile.PL +MANIFEST +README +TODO +t/TestDriver.pm +t/append_null.t +t/binmode.t +t/chomp.t +t/data_list.t +t/data_scalar.t +t/edit_file.t +t/error.t +t/error_mode.t +t/file_object.t +t/handle.t +t/inode.t +t/large.t +t/newline.t +t/no_clobber.t +t/original.t +t/paragraph.t +t/perms.t +t/pod.t +t/pod_coverage.t +t/prepend_file.t +t/pseudo.t +t/read_dir.t +t/signal.t +t/slurp.t +t/stdin.t +t/stringify.t +t/tainted.t +t/write_file_win32.t +extras/slurp_bench.pl +extras/FileSlurp_12.pm +extras/slurp_article.pod +META.yml Module meta-data (added by MakeMaker) diff --git a/META.yml b/META.yml new file mode 100644 index 0000000..2ab5ccf --- /dev/null +++ b/META.yml @@ -0,0 +1,17 @@ +--- #YAML:1.0 +name: File-Slurp +version: 9999.19 +abstract: Simple and Efficient Reading/Writing/Modifying of Complete Files +license: perl +author: + - Uri Guttman <uri@stemsystems.com> +generated_by: ExtUtils::MakeMaker version 6.42 +distribution_type: module +requires: + Carp: 0 + Exporter: 0 + Fcntl: 0 + POSIX: 0 +meta-spec: + url: http://module-build.sourceforge.net/META-spec-v1.3.html + version: 1.3 diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..364dc45 --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,22 @@ +use strict ; +use ExtUtils::MakeMaker; +# See lib/ExtUtils/MakeMaker.pm for details of how to influence +# the contents of the Makefile that is written. +WriteMakefile( + 'NAME' => 'File::Slurp', + 'LICENSE' => 'perl', + 'AUTHOR' => 'Uri Guttman <uri@stemsystems.com>', + 'VERSION_FROM' => 'lib/File/Slurp.pm', + 'ABSTRACT_FROM' => 'lib/File/Slurp.pm', + 'META_MERGE' => { + requires => { + perl => 5.004, + }, + }, + 'PREREQ_PM' => { + 'Carp' => 0, + 'Exporter' => 0, + 'Fcntl' => 0, + 'POSIX' => 0, + }, +); @@ -0,0 +1,37 @@ +File::Slurp.pm +=========================== + +This module provides subroutines to read or write entire files with a +simple call. It also has a subroutine for reading the list of filenames +in a directory. + +In the extras/ directory you can read an article (slurp_article.pod) +about file slurping and also run a benchmark (slurp_bench.pl) that +compares many ways of slurping/spewing files. This benchmark was +rewritten for .14 and is much better. + +This module was first written and owned by David Muir Sharnoff (MUIR on +CPAN). I checked out his module and decided to write a new version +which would be faster and with many more features. To that end, David +graciously transfered the namespace to me. + +There have been some comments about the somewhat unusual version number. +The problem was that David used a future date (2004.0904) in his version +number, and the only way I could get CPAN to index my new module was to +make it have a version number higher than the old one, so I chose the +9999 prefix and appended the real revision number to it. + +INSTALLATION + +To install this module type the following: + + perl Makefile.PL + make + make test + make install + +COPYRIGHT AND LICENCE + +Copyright (C) 2010 Uri Guttman <uri@PerlHunter.com> + +Licensed the same as Perl. @@ -0,0 +1,33 @@ + +File::Slurp TODO + +NEW FEATURES + +prepend_file() -- prepend text to the front of a file + + options: lock file? enable atomic + +edit_file() -- slurp into $_, call edit code block, write out $_ + + options: lock file? + +edit_file_lines() -- slurp each line into $_, call edit code block, + write out $_ + + options: lock file? + +read_file_lines() + reads lines to array ref or list + same as $list = read_file( $file, { array_ref => 1 } + or @lines = read_file() + +new options for read_dir + prepend -- prepend the dir name to each dir entry. + filter -- grep dir entries with qr// or code ref. + +BUGS: + +restart sysread/write after a signal (or check i/o count) + +FEATURE REQUESTS + diff --git a/extras/FileSlurp_12.pm b/extras/FileSlurp_12.pm new file mode 100644 index 0000000..5f24792 --- /dev/null +++ b/extras/FileSlurp_12.pm @@ -0,0 +1,260 @@ +package FileSlurp_12; + +use strict; + +use Carp ; +use Fcntl qw( :DEFAULT ) ; +use POSIX qw( :fcntl_h ) ; +use Symbol ; + +use base 'Exporter' ; +use vars qw( %EXPORT_TAGS @EXPORT_OK $VERSION @EXPORT ) ; + +%EXPORT_TAGS = ( 'all' => [ + qw( read_file write_file overwrite_file append_file read_dir ) ] ) ; + +@EXPORT = ( @{ $EXPORT_TAGS{'all'} } ); +@EXPORT_OK = qw( slurp ) ; + +$VERSION = '9999.13'; + +my $is_win32 = $^O =~ /win32/i ; + +# Install subs for various constants that aren't set in older perls +# (< 5.005). Fcntl on old perls uses Exporter to define subs without a +# () prototype These can't be overridden with the constant pragma or +# we get a prototype mismatch. Hence this less than aesthetically +# appealing BEGIN block: + +BEGIN { + unless( eval { defined SEEK_SET() } ) { + *SEEK_SET = sub { 0 }; + *SEEK_CUR = sub { 1 }; + *SEEK_END = sub { 2 }; + } + + unless( eval { defined O_BINARY() } ) { + *O_BINARY = sub { 0 }; + *O_RDONLY = sub { 0 }; + *O_WRONLY = sub { 1 }; + } + + unless ( eval { defined O_APPEND() } ) { + + if ( $^O =~ /olaris/ ) { + *O_APPEND = sub { 8 }; + *O_CREAT = sub { 256 }; + *O_EXCL = sub { 1024 }; + } + elsif ( $^O =~ /inux/ ) { + *O_APPEND = sub { 1024 }; + *O_CREAT = sub { 64 }; + *O_EXCL = sub { 128 }; + } + elsif ( $^O =~ /BSD/i ) { + *O_APPEND = sub { 8 }; + *O_CREAT = sub { 512 }; + *O_EXCL = sub { 2048 }; + } + } +} + +# print "OS [$^O]\n" ; + +# print "O_BINARY = ", O_BINARY(), "\n" ; +# print "O_RDONLY = ", O_RDONLY(), "\n" ; +# print "O_WRONLY = ", O_WRONLY(), "\n" ; +# print "O_APPEND = ", O_APPEND(), "\n" ; +# print "O_CREAT ", O_CREAT(), "\n" ; +# print "O_EXCL ", O_EXCL(), "\n" ; + + +*slurp = \&read_file ; + +sub read_file { + + my( $file_name, %args ) = @_ ; + +# set the buffer to either the passed in one or ours and init it to the null +# string + + my $buf ; + my $buf_ref = $args{'buf_ref'} || \$buf ; + ${$buf_ref} = '' ; + + my( $read_fh, $size_left, $blk_size ) ; + +# check if we are reading from a handle (glob ref or IO:: object) + + if ( ref $file_name ) { + +# slurping a handle so use it and don't open anything. +# set the block size so we know it is a handle and read that amount + + $read_fh = $file_name ; + $blk_size = $args{'blk_size'} || 1024 * 1024 ; + $size_left = $blk_size ; + +# DEEP DARK MAGIC. this checks the UNTAINT IO flag of a +# glob/handle. only the DATA handle is untainted (since it is from +# trusted data in the source file). this allows us to test if this is +# the DATA handle and then to do a sysseek to make sure it gets +# slurped correctly. on some systems, the buffered i/o pointer is not +# left at the same place as the fd pointer. this sysseek makes them +# the same so slurping with sysread will work. + + eval{ require B } ; + + if ( $@ ) { + + @_ = ( \%args, <<ERR ) ; +Can't find B.pm with this Perl: $!. +That module is needed to slurp the DATA handle. +ERR + goto &_error ; + } + + if ( B::svref_2object( $read_fh )->IO->IoFLAGS & 16 ) { + +# set the seek position to the current tell. + + sysseek( $read_fh, tell( $read_fh ), SEEK_SET ) || + croak "sysseek $!" ; + } + } + else { + +# a regular file. set the sysopen mode + + my $mode = O_RDONLY ; + $mode |= O_BINARY if $args{'binmode'} ; + +#printf "RD: BINARY %x MODE %x\n", O_BINARY, $mode ; + +# open the file and handle any error + + $read_fh = gensym ; + unless ( sysopen( $read_fh, $file_name, $mode ) ) { + @_ = ( \%args, "read_file '$file_name' - sysopen: $!"); + goto &_error ; + } + +# get the size of the file for use in the read loop + + $size_left = -s $read_fh ; + + unless( $size_left ) { + + $blk_size = $args{'blk_size'} || 1024 * 1024 ; + $size_left = $blk_size ; + } + } + +# infinite read loop. we exit when we are done slurping + + while( 1 ) { + +# do the read and see how much we got + + my $read_cnt = sysread( $read_fh, ${$buf_ref}, + $size_left, length ${$buf_ref} ) ; + + if ( defined $read_cnt ) { + +# good read. see if we hit EOF (nothing left to read) + + last if $read_cnt == 0 ; + +# loop if we are slurping a handle. we don't track $size_left then. + + next if $blk_size ; + +# count down how much we read and loop if we have more to read. + $size_left -= $read_cnt ; + last if $size_left <= 0 ; + next ; + } + +# handle the read error + + @_ = ( \%args, "read_file '$file_name' - sysread: $!"); + goto &_error ; + } + +# fix up cr/lf to be a newline if this is a windows text file + + ${$buf_ref} =~ s/\015\012/\n/g if $is_win32 && !$args{'binmode'} ; + +# this is the 5 returns in a row. each handles one possible +# combination of caller context and requested return type + + my $sep = $/ ; + $sep = '\n\n+' if defined $sep && $sep eq '' ; + +# caller wants to get an array ref of lines + +# this split doesn't work since it tries to use variable length lookbehind +# the m// line works. +# return [ split( m|(?<=$sep)|, ${$buf_ref} ) ] if $args{'array_ref'} ; + return [ length(${$buf_ref}) ? ${$buf_ref} =~ /(.*?$sep|.+)/sg : () ] + if $args{'array_ref'} ; + +# caller wants a list of lines (normal list context) + +# same problem with this split as before. +# return split( m|(?<=$sep)|, ${$buf_ref} ) if wantarray ; + return length(${$buf_ref}) ? ${$buf_ref} =~ /(.*?$sep|.+)/sg : () + if wantarray ; + +# caller wants a scalar ref to the slurped text + + return $buf_ref if $args{'scalar_ref'} ; + +# caller wants a scalar with the slurped text (normal scalar context) + + return ${$buf_ref} if defined wantarray ; + +# caller passed in an i/o buffer by reference (normal void context) + + return ; +} + + +# error handling section +# +# all the error handling uses magic goto so the caller will get the +# error message as if from their code and not this module. if we just +# did a call on the error code, the carp/croak would report it from +# this module since the error sub is one level down on the call stack +# from read_file/write_file/read_dir. + + +my %err_func = ( + 'carp' => \&carp, + 'croak' => \&croak, +) ; + +sub _error { + + my( $args, $err_msg ) = @_ ; + +# get the error function to use + + my $func = $err_func{ $args->{'err_mode'} || 'croak' } ; + +# if we didn't find it in our error function hash, they must have set +# it to quiet and we don't do anything. + + return unless $func ; + +# call the carp/croak function + + $func->($err_msg) ; + +# return a hard undef (in list context this will be a single value of +# undef which is not a legal in-band value) + + return undef ; +} + +1; diff --git a/extras/slurp_article.pod b/extras/slurp_article.pod new file mode 100644 index 0000000..8b000f7 --- /dev/null +++ b/extras/slurp_article.pod @@ -0,0 +1,743 @@ +=head1 Perl Slurp Ease + +=head2 Introduction + + +One of the common Perl idioms is processing text files line by line: + + while( <FH> ) { + do something with $_ + } + +This idiom has several variants, but the key point is that it reads in +only one line from the file in each loop iteration. This has several +advantages, including limiting memory use to one line, the ability to +handle any size file (including data piped in via STDIN), and it is +easily taught and understood to Perl newbies. In fact newbies are the +ones who do silly things like this: + + while( <FH> ) { + push @lines, $_ ; + } + + foreach ( @lines ) { + do something with $_ + } + +Line by line processing is fine, but it isn't the only way to deal with +reading files. The other common style is reading the entire file into a +scalar or array, and that is commonly known as slurping. Now, slurping has +somewhat of a poor reputation, and this article is an attempt at +rehabilitating it. Slurping files has advantages and limitations, and is +not something you should just do when line by line processing is fine. +It is best when you need the entire file in memory for processing all at +once. Slurping with in memory processing can be faster and lead to +simpler code than line by line if done properly. + +The biggest issue to watch for with slurping is file size. Slurping very +large files or unknown amounts of data from STDIN can be disastrous to +your memory usage and cause swap disk thrashing. You can slurp STDIN if +you know that you can handle the maximum size input without +detrimentally affecting your memory usage. So I advocate slurping only +disk files and only when you know their size is reasonable and you have +a real reason to process the file as a whole. Note that reasonable size +these days is larger than the bad old days of limited RAM. Slurping in a +megabyte is not an issue on most systems. But most of the +files I tend to slurp in are much smaller than that. Typical files that +work well with slurping are configuration files, (mini-)language scripts, +some data (especially binary) files, and other files of known sizes +which need fast processing. + +Another major win for slurping over line by line is speed. Perl's IO +system (like many others) is slow. Calling C<< <> >> for each line +requires a check for the end of line, checks for EOF, copying a line, +munging the internal handle structure, etc. Plenty of work for each line +read in. Whereas slurping, if done correctly, will usually involve only +one I/O call and no extra data copying. The same is true for writing +files to disk, and we will cover that as well (even though the term +slurping is traditionally a read operation, I use the term ``slurp'' for +the concept of doing I/O with an entire file in one operation). + +Finally, when you have slurped the entire file into memory, you can do +operations on the data that are not possible or easily done with line by +line processing. These include global search/replace (without regard for +newlines), grabbing all matches with one call of C<//g>, complex parsing +(which in many cases must ignore newlines), processing *ML (where line +endings are just white space) and performing complex transformations +such as template expansion. + +=head2 Global Operations + +Here are some simple global operations that can be done quickly and +easily on an entire file that has been slurped in. They could also be +done with line by line processing but that would be slower and require +more code. + +A common problem is reading in a file with key/value pairs. There are +modules which do this but who needs them for simple formats? Just slurp +in the file and do a single parse to grab all the key/value pairs. + + my $text = read_file( $file ) ; + my %config = $text =~ /^(\w+)=(.+)$/mg ; + +That matches a key which starts a line (anywhere inside the string +because of the C</m> modifier), the '=' char and the text to the end of the +line (again, C</m> makes that work). In fact the ending C<$> is not even needed +since C<.> will not normally match a newline. Since the key and value are +grabbed and the C<m//> is in list context with the C</g> modifier, it will +grab all key/value pairs and return them. The C<%config>hash will be +assigned this list and now you have the file fully parsed into a hash. + +Various projects I have worked on needed some simple templating and I +wasn't in the mood to use a full module (please, no flames about your +favorite template module :-). So I rolled my own by slurping in the +template file, setting up a template hash and doing this one line: + + $text =~ s/<%(.+?)%>/$template{$1}/g ; + +That only works if the entire file was slurped in. With a little +extra work it can handle chunks of text to be expanded: + + $text =~ s/<%(\w+)_START%>(.+?)<%\1_END%>/ template($1, $2)/sge ; + +Just supply a C<template> sub to expand the text between the markers and +you have yourself a simple system with minimal code. Note that this will +work and grab over multiple lines due the the C</s> modifier. This is +something that is much trickier with line by line processing. + +Note that this is a very simple templating system, and it can't directly +handle nested tags and other complex features. But even if you use one +of the myriad of template modules on the CPAN, you will gain by having +speedier ways to read and write files. + +Slurping in a file into an array also offers some useful advantages. +One simple example is reading in a flat database where each record has +fields separated by a character such as C<:>: + + my @pw_fields = map [ split /:/ ], read_file( '/etc/passwd' ) ; + +Random access to any line of the slurped file is another advantage. Also +a line index could be built to speed up searching the array of lines. + + +=head2 Traditional Slurping + +Perl has always supported slurping files with minimal code. Slurping of +a file to a list of lines is trivial, just call the C<< <> >> operator +in a list context: + + my @lines = <FH> ; + +and slurping to a scalar isn't much more work. Just set the built in +variable C<$/> (the input record separator to the undefined value and read +in the file with C<< <> >>: + + { + local( $/, *FH ) ; + open( FH, $file ) or die "sudden flaming death\n" + $text = <FH> + } + +Notice the use of C<local()>. It sets C<$/> to C<undef> for you and when +the scope exits it will revert C<$/> back to its previous value (most +likely "\n"). + +Here is a Perl idiom that allows the C<$text> variable to be declared, +and there is no need for a tightly nested block. The C<do> block will +execute C<< <FH> >> in a scalar context and slurp in the file named by +C<$text>: + + local( *FH ) ; + open( FH, $file ) or die "sudden flaming death\n" + my $text = do { local( $/ ) ; <FH> } ; + +Both of those slurps used localized filehandles to be compatible with +5.005. Here they are with 5.6.0 lexical autovivified handles: + + { + local( $/ ) ; + open( my $fh, $file ) or die "sudden flaming death\n" + $text = <$fh> + } + + open( my $fh, $file ) or die "sudden flaming death\n" + my $text = do { local( $/ ) ; <$fh> } ; + +And this is a variant of that idiom that removes the need for the open +call: + + my $text = do { local( @ARGV, $/ ) = $file ; <> } ; + +The filename in C<$file> is assigned to a localized C<@ARGV> and the +null filehandle is used which reads the data from the files in C<@ARGV>. + +Instead of assigning to a scalar, all the above slurps can assign to an +array and it will get the file but split into lines (using C<$/> as the +end of line marker). + +There is one common variant of those slurps which is very slow and not +good code. You see it around, and it is almost always cargo cult code: + + my $text = join( '', <FH> ) ; + +That needlessly splits the input file into lines (C<join> provides a +list context to C<< <FH> >>) and then joins up those lines again. The +original coder of this idiom obviously never read I<perlvar> and learned +how to use C<$/> to allow scalar slurping. + +=head2 Write Slurping + +While reading in entire files at one time is common, writing out entire +files is also done. We call it ``slurping'' when we read in files, but +there is no commonly accepted term for the write operation. I asked some +Perl colleagues and got two interesting nominations. Peter Scott said to +call it ``burping'' (rhymes with ``slurping'' and suggests movement in +the opposite direction). Others suggested ``spewing'' which has a +stronger visual image :-) Tell me your favorite or suggest your own. I +will use both in this section so you can see how they work for you. + +Spewing a file is a much simpler operation than slurping. You don't have +context issues to worry about and there is no efficiency problem with +returning a buffer. Here is a simple burp subroutine: + + sub burp { + my( $file_name ) = shift ; + open( my $fh, ">$file_name" ) || + die "can't create $file_name $!" ; + print $fh @_ ; + } + +Note that it doesn't copy the input text but passes @_ directly to +print. We will look at faster variations of that later on. + +=head2 Slurp on the CPAN + +As you would expect there are modules in the CPAN that will slurp files +for you. The two I found are called Slurp.pm (by Rob Casey - ROBAU on +CPAN) and File::Slurp.pm (by David Muir Sharnoff - MUIR on CPAN). + +Here is the code from Slurp.pm: + + sub slurp { + local( $/, @ARGV ) = ( wantarray ? $/ : undef, @_ ); + return <ARGV>; + } + + sub to_array { + my @array = slurp( @_ ); + return wantarray ? @array : \@array; + } + + sub to_scalar { + my $scalar = slurp( @_ ); + return $scalar; + } + ++The subroutine C<slurp()> uses the magic undefined value of C<$/> and +the magic file +handle C<ARGV> to support slurping into a scalar or +array. It also provides two wrapper subs that allow the caller to +control the context of the slurp. And the C<to_array()> subroutine will +return the list of slurped lines or a anonymous array of them according +to its caller's context by checking C<wantarray>. It has 'slurp' in +C<@EXPORT> and all three subroutines in C<@EXPORT_OK>. + +<Footnote: Slurp.pm is poorly named and it shouldn't be in the top level +namespace.> + +The original File::Slurp.pm has this code: + +sub read_file +{ + my ($file) = @_; + + local($/) = wantarray ? $/ : undef; + local(*F); + my $r; + my (@r); + + open(F, "<$file") || croak "open $file: $!"; + @r = <F>; + close(F) || croak "close $file: $!"; + + return $r[0] unless wantarray; + return @r; +} + +This module provides several subroutines including C<read_file()> (more +on the others later). C<read_file()> behaves simularly to +C<Slurp::slurp()> in that it will slurp a list of lines or a single +scalar depending on the caller's context. It also uses the magic +undefined value of C<$/> for scalar slurping but it uses an explicit +open call rather than using a localized C<@ARGV> and the other module +did. Also it doesn't provide a way to get an anonymous array of the +lines but that can easily be rectified by calling it inside an anonymous +array constuctor C<[]>. + +Both of these modules make it easier for Perl coders to slurp in +files. They both use the magic C<$/> to slurp in scalar mode and the +natural behavior of C<< <> >> in list context to slurp as lines. But +neither is optmized for speed nor can they handle C<binmode()> to +support binary or unicode files. See below for more on slurp features +and speedups. + +=head2 Slurping API Design + +The slurp modules on CPAN are have a very simple API and don't support +C<binmode()>. This section will cover various API design issues such as +efficient return by reference, C<binmode()> and calling variations. + +Let's start with the call variations. Slurped files can be returned in +four formats: as a single scalar, as a reference to a scalar, as a list +of lines or as an anonymous array of lines. But the caller can only +provide two contexts: scalar or list. So we have to either provide an +API with more than one subroutine (as Slurp.pm did) or just provide one +subroutine which only returns a scalar or a list (not an anonymous +array) as File::Slurp does. + +I have used my own C<read_file()> subroutine for years and it has the +same API as File::Slurp: a single subroutine that returns a scalar or a +list of lines depending on context. But I recognize the interest of +those that want an anonymous array for line slurping. For one thing, it +is easier to pass around to other subs and for another, it eliminates +the extra copying of the lines via C<return>. So my module provides only +one slurp subroutine that returns the file data based on context and any +format options passed in. There is no need for a specific +slurp-in-as-a-scalar or list subroutine as the general C<read_file()> +sub will do that by default in the appropriate context. If you want +C<read_file()> to return a scalar reference or anonymous array of lines, +you can request those formats with options. You can even pass in a +reference to a scalar (e.g. a previously allocated buffer) and have that +filled with the slurped data (and that is one of the fastest slurp +modes. see the benchmark section for more on that). If you want to +slurp a scalar into an array, just select the desired array element and +that will provide scalar context to the C<read_file()> subroutine. + +The next area to cover is what to name the slurp sub. I will go with +C<read_file()>. It is descriptive and keeps compatibilty with the +current simple and don't use the 'slurp' nickname (though that nickname +is in the module name). Also I decided to keep the File::Slurp +namespace which was graciously handed over to me by its current owner, +David Muir. + +Another critical area when designing APIs is how to pass in +arguments. The C<read_file()> subroutine takes one required argument +which is the file name. To support C<binmode()> we need another optional +argument. A third optional argument is needed to support returning a +slurped scalar by reference. My first thought was to design the API with +3 positional arguments - file name, buffer reference and binmode. But if +you want to set the binmode and not pass in a buffer reference, you have +to fill the second argument with C<undef> and that is ugly. So I decided +to make the filename argument positional and the other two named. The +subroutine starts off like this: + + sub read_file { + + my( $file_name, %args ) = @_ ; + + my $buf ; + my $buf_ref = $args{'buf'} || \$buf ; + +The other sub (C<read_file_lines()>) will only take an optional binmode +(so you can read files with binary delimiters). It doesn't need a buffer +reference argument since it can return an anonymous array if the called +in a scalar context. So this subroutine could use positional arguments, +but to keep its API similar to the API of C<read_file()>, it will also +use pass by name for the optional arguments. This also means that new +optional arguments can be added later without breaking any legacy +code. A bonus with keeping the API the same for both subs will be seen +how the two subs are optimized to work together. + +Write slurping (or spewing or burping :-)) needs to have its API +designed as well. The biggest issue is not only needing to support +optional arguments but a list of arguments to be written is needed. Perl +6 will be able to handle that with optional named arguments and a final +slurp argument. Since this is Perl 5 we have to do it using some +cleverness. The first argument is the file name and it will be +positional as with the C<read_file> subroutine. But how can we pass in +the optional arguments and also a list of data? The solution lies in the +fact that the data list should never contain a reference. +Burping/spewing works only on plain data. So if the next argument is a +hash reference, we can assume it cointains the optional arguments and +the rest of the arguments is the data list. So the C<write_file()> +subroutine will start off like this: + + sub write_file { + + my $file_name = shift ; + + my $args = ( ref $_[0] eq 'HASH' ) ? shift : {} ; + +Whether or not optional arguments are passed in, we leave the data list +in C<@_> to minimize any more copying. You call C<write_file()> like this: + + write_file( 'foo', { binmode => ':raw' }, @data ) ; + write_file( 'junk', { append => 1 }, @more_junk ) ; + write_file( 'bar', @spew ) ; + +=head2 Fast Slurping + +Somewhere along the line, I learned about a way to slurp files faster +than by setting $/ to undef. The method is very simple, you do a single +read call with the size of the file (which the -s operator provides). +This bypasses the I/O loop inside perl that checks for EOF and does all +sorts of processing. I then decided to experiment and found that +sysread is even faster as you would expect. sysread bypasses all of +Perl's stdio and reads the file from the kernel buffers directly into a +Perl scalar. This is why the slurp code in File::Slurp uses +sysopen/sysread/syswrite. All the rest of the code is just to support +the various options and data passing techniques. + + +=head2 Benchmarks + +Benchmarks can be enlightening, informative, frustrating and +deceiving. It would make no sense to create a new and more complex slurp +module unless it also gained signifigantly in speed. So I created a +benchmark script which compares various slurp methods with differing +file sizes and calling contexts. This script can be run from the main +directory of the tarball like this: + + perl -Ilib extras/slurp_bench.pl + +If you pass in an argument on the command line, it will be passed to +timethese() and it will control the duration. It defaults to -2 which +makes each benchmark run to at least 2 seconds of cpu time. + +The following numbers are from a run I did on my 300Mhz sparc. You will +most likely get much faster counts on your boxes but the relative speeds +shouldn't change by much. If you see major differences on your +benchmarks, please send me the results and your Perl and OS +versions. Also you can play with the benchmark script and add more slurp +variations or data files. + +The rest of this section will be discussing the results of the +benchmarks. You can refer to extras/slurp_bench.pl to see the code for +the individual benchmarks. If the benchmark name starts with cpan_, it +is either from Slurp.pm or File::Slurp.pm. Those starting with new_ are +from the new File::Slurp.pm. Those that start with file_contents_ are +from a client's code base. The rest are variations I created to +highlight certain aspects of the benchmarks. + +The short and long file data is made like this: + + my @lines = ( 'abc' x 30 . "\n") x 100 ; + my $text = join( '', @lines ) ; + + @lines = ( 'abc' x 40 . "\n") x 1000 ; + $text = join( '', @lines ) ; + +So the short file is 9,100 bytes and the long file is 121,000 +bytes. + +=head3 Scalar Slurp of Short File + + file_contents 651/s + file_contents_no_OO 828/s + cpan_read_file 1866/s + cpan_slurp 1934/s + read_file 2079/s + new 2270/s + new_buf_ref 2403/s + new_scalar_ref 2415/s + sysread_file 2572/s + +=head3 Scalar Slurp of Long File + + file_contents_no_OO 82.9/s + file_contents 85.4/s + cpan_read_file 250/s + cpan_slurp 257/s + read_file 323/s + new 468/s + sysread_file 489/s + new_scalar_ref 766/s + new_buf_ref 767/s + +The primary inference you get from looking at the mumbers above is that +when slurping a file into a scalar, the longer the file, the more time +you save by returning the result via a scalar reference. The time for +the extra buffer copy can add up. The new module came out on top overall +except for the very simple sysread_file entry which was added to +highlight the overhead of the more flexible new module which isn't that +much. The file_contents entries are always the worst since they do a +list slurp and then a join, which is a classic newbie and cargo culted +style which is extremely slow. Also the OO code in file_contents slows +it down even more (I added the file_contents_no_OO entry to show this). +The two CPAN modules are decent with small files but they are laggards +compared to the new module when the file gets much larger. + +=head3 List Slurp of Short File + + cpan_read_file 589/s + cpan_slurp_to_array 620/s + read_file 824/s + new_array_ref 824/s + sysread_file 828/s + new 829/s + new_in_anon_array 833/s + cpan_slurp_to_array_ref 836/s + +=head3 List Slurp of Long File + + cpan_read_file 62.4/s + cpan_slurp_to_array 62.7/s + read_file 92.9/s + sysread_file 94.8/s + new_array_ref 95.5/s + new 96.2/s + cpan_slurp_to_array_ref 96.3/s + new_in_anon_array 97.2/s + +This is perhaps the most interesting result of this benchmark. Five +different entries have effectively tied for the lead. The logical +conclusion is that splitting the input into lines is the bounding +operation, no matter how the file gets slurped. This is the only +benchmark where the new module isn't the clear winner (in the long file +entries - it is no worse than a close second in the short file +entries). + + +Note: In the benchmark information for all the spew entries, the extra +number at the end of each line is how many wallclock seconds the whole +entry took. The benchmarks were run for at least 2 CPU seconds per +entry. The unusually large wallclock times will be discussed below. + +=head3 Scalar Spew of Short File + + cpan_write_file 1035/s 38 + print_file 1055/s 41 + syswrite_file 1135/s 44 + new 1519/s 2 + print_join_file 1766/s 2 + new_ref 1900/s 2 + syswrite_file2 2138/s 2 + +=head3 Scalar Spew of Long File + + cpan_write_file 164/s 20 + print_file 211/s 26 + syswrite_file 236/s 25 + print_join_file 277/s 2 + new 295/s 2 + syswrite_file2 428/s 2 + new_ref 608/s 2 + +In the scalar spew entries, the new module API wins when it is passed a +reference to the scalar buffer. The C<syswrite_file2> entry beats it +with the shorter file due to its simpler code. The old CPAN module is +the slowest due to its extra copying of the data and its use of print. + +=head3 List Spew of Short File + + cpan_write_file 794/s 29 + syswrite_file 1000/s 38 + print_file 1013/s 42 + new 1399/s 2 + print_join_file 1557/s 2 + +=head3 List Spew of Long File + + cpan_write_file 112/s 12 + print_file 179/s 21 + syswrite_file 181/s 19 + print_join_file 205/s 2 + new 228/s 2 + +Again, the simple C<print_join_file> entry beats the new module when +spewing a short list of lines to a file. But is loses to the new module +when the file size gets longer. The old CPAN module lags behind the +others since it first makes an extra copy of the lines and then it calls +C<print> on the output list and that is much slower than passing to +C<print> a single scalar generated by join. The C<print_file> entry +shows the advantage of directly printing C<@_> and the +C<print_join_file> adds the join optimization. + +Now about those long wallclock times. If you look carefully at the +benchmark code of all the spew entries, you will find that some always +write to new files and some overwrite existing files. When I asked David +Muir why the old File::Slurp module had an C<overwrite> subroutine, he +answered that by overwriting a file, you always guarantee something +readable is in the file. If you create a new file, there is a moment +when the new file is created but has no data in it. I feel this is not a +good enough answer. Even when overwriting, you can write a shorter file +than the existing file and then you have to truncate the file to the new +size. There is a small race window there where another process can slurp +in the file with the new data followed by leftover junk from the +previous version of the file. This reinforces the point that the only +way to ensure consistant file data is the proper use of file locks. + +But what about those long times? Well it is all about the difference +between creating files and overwriting existing ones. The former have to +allocate new inodes (or the equivilent on other file systems) and the +latter can reuse the exising inode. This mean the overwrite will save on +disk seeks as well as on cpu time. In fact when running this benchmark, +I could hear my disk going crazy allocating inodes during the spew +operations. This speedup in both cpu and wallclock is why the new module +always does overwriting when spewing files. It also does the proper +truncate (and this is checked in the tests by spewing shorter files +after longer ones had previously been written). The C<overwrite> +subroutine is just an typeglob alias to C<write_file> and is there for +backwards compatibilty with the old File::Slurp module. + +=head3 Benchmark Conclusion + +Other than a few cases where a simpler entry beat it out, the new +File::Slurp module is either the speed leader or among the leaders. Its +special APIs for passing buffers by reference prove to be very useful +speedups. Also it uses all the other optimizations including using +C<sysread/syswrite> and joining output lines. I expect many projects +that extensively use slurping will notice the speed improvements, +especially if they rewrite their code to take advantage of the new API +features. Even if they don't touch their code and use the simple API +they will get a significant speedup. + +=head2 Error Handling + +Slurp subroutines are subject to conditions such as not being able to +open the file, or I/O errors. How these errors are handled, and what the +caller will see, are important aspects of the design of an API. The +classic error handling for slurping has been to call C<die()> or even +better, C<croak()>. But sometimes you want the slurp to either +C<warn()>/C<carp()> or allow your code to handle the error. Sure, this +can be done by wrapping the slurp in a C<eval> block to catch a fatal +error, but not everyone wants all that extra code. So I have added +another option to all the subroutines which selects the error +handling. If the 'err_mode' option is 'croak' (which is also the +default), the called subroutine will croak. If set to 'carp' then carp +will be called. Set to any other string (use 'quiet' when you want to +be explicit) and no error handler is called. Then the caller can use the +error status from the call. + +C<write_file()> doesn't use the return value for data so it can return a +false status value in-band to mark an error. C<read_file()> does use its +return value for data, but we can still make it pass back the error +status. A successful read in any scalar mode will return either a +defined data string or a reference to a scalar or array. So a bare +return would work here. But if you slurp in lines by calling it in a +list context, a bare C<return> will return an empty list, which is the +same value it would get from an existing but empty file. So now, +C<read_file()> will do something I normally strongly advocate against, +i.e., returning an explicit C<undef> value. In the scalar context this +still returns a error, and in list context, the returned first value +will be C<undef>, and that is not legal data for the first element. So +the list context also gets a error status it can detect: + + my @lines = read_file( $file_name, err_mode => 'quiet' ) ; + your_handle_error( "$file_name can't be read\n" ) unless + @lines && defined $lines[0] ; + + +=head2 File::FastSlurp + + sub read_file { + + my( $file_name, %args ) = @_ ; + + my $buf ; + my $buf_ref = $args{'buf_ref'} || \$buf ; + + my $mode = O_RDONLY ; + $mode |= O_BINARY if $args{'binmode'} ; + + local( *FH ) ; + sysopen( FH, $file_name, $mode ) or + carp "Can't open $file_name: $!" ; + + my $size_left = -s FH ; + + while( $size_left > 0 ) { + + my $read_cnt = sysread( FH, ${$buf_ref}, + $size_left, length ${$buf_ref} ) ; + + unless( $read_cnt ) { + + carp "read error in file $file_name: $!" ; + last ; + } + + $size_left -= $read_cnt ; + } + + # handle void context (return scalar by buffer reference) + + return unless defined wantarray ; + + # handle list context + + return split m|?<$/|g, ${$buf_ref} if wantarray ; + + # handle scalar context + + return ${$buf_ref} ; + } + + sub write_file { + + my $file_name = shift ; + + my $args = ( ref $_[0] eq 'HASH' ) ? shift : {} ; + my $buf = join '', @_ ; + + + my $mode = O_WRONLY ; + $mode |= O_BINARY if $args->{'binmode'} ; + $mode |= O_APPEND if $args->{'append'} ; + + local( *FH ) ; + sysopen( FH, $file_name, $mode ) or + carp "Can't open $file_name: $!" ; + + my $size_left = length( $buf ) ; + my $offset = 0 ; + + while( $size_left > 0 ) { + + my $write_cnt = syswrite( FH, $buf, + $size_left, $offset ) ; + + unless( $write_cnt ) { + + carp "write error in file $file_name: $!" ; + last ; + } + + $size_left -= $write_cnt ; + $offset += $write_cnt ; + } + + return ; + } + +=head2 Slurping in Perl 6 + +As usual with Perl 6, much of the work in this article will be put to +pasture. Perl 6 will allow you to set a 'slurp' property on file handles +and when you read from such a handle, the file is slurped. List and +scalar context will still be supported so you can slurp into lines or a +<scalar. I would expect that support for slurping in Perl 6 will be +optimized and bypass the stdio subsystem since it can use the slurp +property to trigger a call to special code. Otherwise some enterprising +individual will just create a File::FastSlurp module for Perl 6. The +code in the Perl 5 module could easily be modified to Perl 6 syntax and +semantics. Any volunteers? + +=head2 In Summary + +We have compared classic line by line processing with munging a whole +file in memory. Slurping files can speed up your programs and simplify +your code if done properly. You must still be aware to not slurp +humongous files (logs, DNA sequences, etc.) or STDIN where you don't +know how much data you will read in. But slurping megabyte sized files +is not an major issue on today's systems with the typical amount of RAM +installed. When Perl was first being used in depth (Perl 4), slurping +was limited by the smaller RAM size of 10 years ago. Now, you should be +able to slurp almost any reasonably sized file, whether it contains +configuration, source code, data, etc. + +=head2 Acknowledgements + + + + + diff --git a/extras/slurp_bench.pl b/extras/slurp_bench.pl new file mode 100755 index 0000000..68eb5fd --- /dev/null +++ b/extras/slurp_bench.pl @@ -0,0 +1,587 @@ +#!/usr/local/bin/perl + +use strict ; +use warnings ; + +use Getopt::Long ; +use Benchmark qw( timethese cmpthese ) ; +use Carp ; +use FileHandle ; +use Fcntl qw( :DEFAULT :seek ); + +use File::Slurp () ; +use FileSlurp_12 () ; + +my $file_name = 'slurp_data' ; +my( @lines, $text ) ; + +my %opts ; + +parse_options() ; + +run_benchmarks() ; + +unlink $file_name ; + +exit ; + +sub run_benchmarks { + + foreach my $size ( @{$opts{size_list}} ) { + + @lines = ( 'a' x 80 . "\n") x ( $size / 81 + 1 ) ; + $text = join( '', @lines ) ; + + my $overage = length($text) - $size ; + substr( $text, -$overage, $overage, '' ) ; + substr( $lines[-1], -$overage, $overage, '' ) ; + + if ( $opts{slurp} ) { + + File::Slurp::write_file( $file_name, $text ) ; + + bench_list_slurp( $size ) if $opts{list} ; + bench_scalar_slurp( $size ) if $opts{scalar} ; + } + + if ( $opts{spew} ) { + + bench_spew_list( $size ) if $opts{list} ; + bench_scalar_spew( $size ) if $opts{scalar} ; + } + } +} + +########################################## +########################################## +sub bench_scalar_slurp { + + my ( $size ) = @_ ; + + print "\n\nReading (Slurp) into a scalar: Size = $size bytes\n\n" ; + + my $buffer ; + + my $result = timethese( $opts{iterations}, { + + 'FS::read_file' => + sub { my $text = File::Slurp::read_file( $file_name ) }, + + 'FS12::read_file' => + sub { my $text = FileSlurp_12::read_file( $file_name ) }, + + 'FS::read_file_buf_ref' => + sub { my $text ; + File::Slurp::read_file( $file_name, buf_ref => \$text ) }, + 'FS::read_file_buf_ref2' => + sub { + File::Slurp::read_file( $file_name, buf_ref => \$buffer ) }, + 'FS::read_file_scalar_ref' => + sub { my $text = + File::Slurp::read_file( $file_name, scalar_ref => 1 ) }, + + old_sysread_file => + sub { my $text = old_sysread_file( $file_name ) }, + + old_read_file => + sub { my $text = old_read_file( $file_name ) }, + + orig_read_file => + sub { my $text = orig_read_file( $file_name ) }, + + orig_slurp => + sub { my $text = orig_slurp_scalar( $file_name ) }, + + file_contents => + sub { my $text = file_contents( $file_name ) }, + + file_contents_no_OO => + sub { my $text = file_contents_no_OO( $file_name ) }, + } ) ; + + cmpthese( $result ) ; +} + +########################################## + +sub bench_list_slurp { + + my ( $size ) = @_ ; + + print "\n\nReading (Slurp) into a list: Size = $size bytes\n\n" ; + + my $result = timethese( $opts{iterations}, { + + 'FS::read_file' => + sub { my @lines = File::Slurp::read_file( $file_name ) }, + + 'FS::read_file_array_ref' => + sub { my $lines_ref = + File::Slurp::read_file( $file_name, array_ref => 1 ) }, + + 'FS::read_file_scalar' => + sub { my $lines_ref = + [ File::Slurp::read_file( $file_name ) ] }, + + old_sysread_file => + sub { my @lines = old_sysread_file( $file_name ) }, + + old_read_file => + sub { my @lines = old_read_file( $file_name ) }, + + orig_read_file => + sub { my @lines = orig_read_file( $file_name ) }, + + orig_slurp_array => + sub { my @lines = orig_slurp_array( $file_name ) }, + + orig_slurp_array_ref => + sub { my $lines_ref = orig_slurp_array( $file_name ) }, + } ) ; + + cmpthese( $result ) ; +} + +###################################### +# uri's old fast slurp + +sub old_read_file { + + my( $file_name ) = shift ; + + local( *FH ) ; + open( FH, $file_name ) || carp "can't open $file_name $!" ; + + return <FH> if wantarray ; + + my $buf ; + + read( FH, $buf, -s FH ) ; + return $buf ; +} + +sub old_sysread_file { + + my( $file_name ) = shift ; + + local( *FH ) ; + open( FH, $file_name ) || carp "can't open $file_name $!" ; + + return <FH> if wantarray ; + + my $buf ; + + sysread( FH, $buf, -s FH ) ; + return $buf ; +} + +###################################### +# from File::Slurp.pm on cpan + +sub orig_read_file +{ + my ($file) = @_; + + local($/) = wantarray ? $/ : undef; + local(*F); + my $r; + my (@r); + + open(F, "<$file") || croak "open $file: $!"; + @r = <F>; + close(F) || croak "close $file: $!"; + + return $r[0] unless wantarray; + return @r; +} + + +###################################### +# from Slurp.pm on cpan + +sub orig_slurp { + local( $/, @ARGV ) = ( wantarray ? $/ : undef, @_ ); + return <ARGV>; +} + +sub orig_slurp_array { + my @array = orig_slurp( @_ ); + return wantarray ? @array : \@array; +} + +sub orig_slurp_scalar { + my $scalar = orig_slurp( @_ ); + return $scalar; +} + +###################################### +# very slow slurp code used by a client + +sub file_contents { + my $file = shift; + my $fh = new FileHandle $file or + warn("Util::file_contents:Can't open file $file"), return ''; + return join '', <$fh>; +} + +# same code but doesn't use FileHandle.pm + +sub file_contents_no_OO { + my $file = shift; + + local( *FH ) ; + open( FH, $file ) || carp "can't open $file $!" ; + + return join '', <FH>; +} + +########################################## +########################################## + +sub bench_spew_list { + + my( $size ) = @_ ; + + print "\n\nWriting (Spew) a list of lines: Size = $size bytes\n\n" ; + + my $result = timethese( $opts{iterations}, { + 'FS::write_file' => sub { unlink $file_name if $opts{unlink} ; + File::Slurp::write_file( $file_name, @lines ) }, + 'FS::write_file Aref' => sub { unlink $file_name if $opts{unlink} ; + File::Slurp::write_file( $file_name, \@lines ) }, + 'print' => sub { unlink $file_name if $opts{unlink} ; + print_file( $file_name, @lines ) }, + 'print/join' => sub { unlink $file_name if $opts{unlink} ; + print_join_file( $file_name, @lines ) }, + 'syswrite/join' => sub { unlink $file_name if $opts{unlink} ; + syswrite_join_file( $file_name, @lines ) }, + 'original write_file' => sub { unlink $file_name if $opts{unlink} ; + orig_write_file( $file_name, @lines ) }, + } ) ; + + cmpthese( $result ) ; +} + +sub print_file { + + my( $file_name ) = shift ; + + local( *FH ) ; + open( FH, ">$file_name" ) || carp "can't create $file_name $!" ; + + print FH @_ ; +} + +sub print_join_file { + + my( $file_name ) = shift ; + + local( *FH ) ; + open( FH, ">$file_name" ) || carp "can't create $file_name $!" ; + + print FH join( '', @_ ) ; +} + +sub syswrite_join_file { + + my( $file_name ) = shift ; + + local( *FH ) ; + open( FH, ">$file_name" ) || carp "can't create $file_name $!" ; + + syswrite( FH, join( '', @_ ) ) ; +} + +sub sysopen_syswrite_join_file { + + my( $file_name ) = shift ; + + local( *FH ) ; + sysopen( FH, $file_name, O_WRONLY | O_CREAT ) || + carp "can't create $file_name $!" ; + + syswrite( FH, join( '', @_ ) ) ; +} + +sub orig_write_file +{ + my ($f, @data) = @_; + + local(*F); + + open(F, ">$f") || croak "open >$f: $!"; + (print F @data) || croak "write $f: $!"; + close(F) || croak "close $f: $!"; + return 1; +} + +########################################## + +sub bench_scalar_spew { + + my ( $size ) = @_ ; + + print "\n\nWriting (Spew) a scalar: Size = $size bytes\n\n" ; + + my $result = timethese( $opts{iterations}, { + 'FS::write_file' => sub { unlink $file_name if $opts{unlink} ; + File::Slurp::write_file( $file_name, $text ) }, + 'FS::write_file Sref' => sub { unlink $file_name if $opts{unlink} ; + File::Slurp::write_file( $file_name, \$text ) }, + 'print' => sub { unlink $file_name if $opts{unlink} ; + print_file( $file_name, $text ) }, + 'syswrite_file' => sub { unlink $file_name if $opts{unlink} ; + syswrite_file( $file_name, $text ) }, + 'syswrite_file_ref' => sub { unlink $file_name if $opts{unlink} ; + syswrite_file_ref( $file_name, \$text ) }, + 'orig_write_file' => sub { unlink $file_name if $opts{unlink} ; + orig_write_file( $file_name, $text ) }, + } ) ; + + cmpthese( $result ) ; +} + +sub syswrite_file { + + my( $file_name, $text ) = @_ ; + + local( *FH ) ; + open( FH, ">$file_name" ) || carp "can't create $file_name $!" ; + + syswrite( FH, $text ) ; +} + +sub syswrite_file_ref { + + my( $file_name, $text_ref ) = @_ ; + + local( *FH ) ; + open( FH, ">$file_name" ) || carp "can't create $file_name $!" ; + + syswrite( FH, ${$text_ref} ) ; +} + +sub parse_options { + + my $result = GetOptions (\%opts, qw( + iterations|i=s + direction|d=s + context|c=s + sizes|s=s + unlink|u + legend|key|l|k + help|usage + ) ) ; + + usage() unless $result ; + + usage() if $opts{help} ; + + legend() if $opts{legend} ; + +# set defaults + + $opts{direction} ||= 'both' ; + $opts{context} ||= 'both' ; + $opts{iterations} ||= -2 ; + $opts{sizes} ||= '512,10k,1m' ; + + if ( $opts{direction} eq 'both' ) { + + $opts{spew} = 1 ; + $opts{slurp} = 1 ; + } + elsif ( $opts{direction} eq 'in' ) { + + $opts{slurp} = 1 ; + + } + elsif ( $opts{direction} eq 'out' ) { + + $opts{spew} = 1 ; + } + else { + + usage( "Unknown direction: $opts{direction}" ) ; + } + + if ( $opts{context} eq 'both' ) { + + $opts{list} = 1 ; + $opts{scalar} = 1 ; + } + elsif ( $opts{context} eq 'scalar' ) { + + $opts{scalar} = 1 ; + + } + elsif ( $opts{context} eq 'list' ) { + + $opts{list} = 1 ; + } + else { + + usage( "Unknown context: $opts{context}" ) ; + } + + if ( $opts{context} eq 'both' ) { + + $opts{list} = 1 ; + $opts{scalar} = 1 ; + } + elsif ( $opts{context} eq 'scalar' ) { + + $opts{scalar} = 1 ; + + } + elsif ( $opts{context} eq 'list' ) { + + $opts{list} = 1 ; + } + else { + + usage( "Unknown context: $opts{context}" ) ; + } + + foreach my $size ( split ',', ( $opts{sizes} ) ) { + + +# check for valid size and suffix. grab both. + + usage( "Illegal size: $size") unless $size =~ /^(\d+)([km])?$/ ; + +# handle suffix multipliers + + $size = $1 * (( $2 eq 'k' ) ? 1024 : 1024*1024) if $2 ; + + push( @{$opts{size_list}}, $size ) ; + } + +#use Data::Dumper ; +#print Dumper \%opts ; +} + +sub legend { + + die <<'LEGEND' ; +-------------------------------------------------------------------------- +Legend for the Slurp Benchmark Entries + +In all cases below 'FS' or 'F::S' means the current File::Slurp module +is being used in the benchmark. The full name and description will say +which options are being used. +-------------------------------------------------------------------------- +These benchmarks write a list of lines to a file. Use the direction option +of 'out' or 'both' and the context option is 'list' or 'both'. + + Key Description/Source + ----- -------------------------- + FS::write_file Current F::S write_file + FS::write_file Aref Current F::S write_file on array ref of data + print Open a file and call print() on the list data + print/join Open a file and call print() on the joined list + data + syswrite/join Open a file, call syswrite on joined list data + sysopen/syswrite Sysopen a file, call syswrite on joined list + data + original write_file write_file code from original File::Slurp + (pre-version 9999.*) +-------------------------------------------------------------------------- +These benchmarks write a scalar to a file. Use the direction option +of 'out' or 'both' and the context option is 'scalar' or 'both'. + + Key Description/Source + ----- -------------------------- + FS::write_file Current F::S write_file + FS::write_file Sref Current F::S write_file of scalar ref of data + print Open a file and call print() on the scalar data + syswrite_file Open a file, call syswrite on scalar data + syswrite_file_ref Open a file, call syswrite on scalar ref of + data + orig_write_file write_file code from original File::Slurp + (pre-version 9999.*) +-------------------------------------------------------------------------- +These benchmarks slurp a file into an array. Use the direction option +of 'in' or 'both' and the context option is 'list' or 'both'. + + Key Description/Source + ----- -------------------------- + FS::read_file Current F::S read_file - returns array + FS::read_file_array_ref Current F::S read_file - returns array + ref in any context + FS::read_file_scalar Current F::S read_file - returns array + ref in scalar context + old_sysread_file My old fast slurp - calls sysread + old_read_file My old fast slurp - calls read + orig_read_file Original File::Slurp on CPAN + orig_slurp_array Slurp.pm on CPAN - returns array + orig_slurp_array_ref Slurp.pm on CPAN - returns array ref +-------------------------------------------------------------------------- +These benchmarks slurp a file into a scalar. Use the direction option +of 'in' or 'both' and the context option is 'scalar' or 'both'. + + Key Description/Source + ----- -------------------------- + FS::read_file Current F::S read_file - returns scalar + FS12::read_file F::S .12 slower read_file - + returns scalar + FS::read_file_buf_ref Current F::S read_file - returns + via buf_ref argument - new buffer + FS::read_file_buf_ref2 Current F::S read_file - returns + via buf_ref argument - uses + existing buffer + FS::read_file_scalar_ref Current F::S read_file - returns a + scalar ref + old_sysread_file My old fast slurp - calls sysread + old_read_file My old fast slurp - calls read + orig_read_file Original File::Slurp on CPAN + orig_slurp Slurp.pm on CPAN + file_contents Very slow slurp code done by a client + file_contents_no_OO Same code but doesn't use FileHandle.pm +-------------------------------------------------------------------------- +LEGEND +} + +sub usage { + + my( $err ) = @_ ; + + $err ||= '' ; + + die <<DIE ; +$err +Usage: $0 [--iterations=<iter>] [--direction=<dir>] [--context=<con>] + [--sizes=<size_list>] [--legend] [--help] + + --iterations=<iter> Run the benchmarks this many iterations + -i=<iter> A positive number is iteration count, + a negative number is minimum CPU time in + seconds. Default is -2 (run for 2 CPU seconds). + + --direction=<dir> Which direction to slurp: 'in', 'out' or 'both'. + -d=<dir> Default is 'both'. + + --context=<con> Which context is used for slurping: 'list', + -c=<con> 'scalar' or 'both'. Default is 'both'. + + --sizes=<size_list> What sizes will be used in slurping (either + -s=<size_list> direction). This is a comma separated list of + integers. You can use 'k' or 'm' as suffixes + for 1024 and 1024**2. Default is '512,10k,1m'. + + --unlink Unlink the written file before each time + -u a file is written + + --legend Print out a legend of all the benchmark entries. + --key + -l + -k + + --help Print this help text + --usage +DIE + +} + +__END__ + diff --git a/lib/File/Slurp.pm b/lib/File/Slurp.pm new file mode 100755 index 0000000..b0d040b --- /dev/null +++ b/lib/File/Slurp.pm @@ -0,0 +1,1261 @@ +package File::Slurp; + +use 5.6.2 ; + +use strict; +use warnings ; + +use Carp ; +use Exporter ; +use Fcntl qw( :DEFAULT ) ; +use POSIX qw( :fcntl_h ) ; +use Errno ; +#use Symbol ; + +use vars qw( @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION ) ; +@ISA = qw( Exporter ) ; + +$VERSION = '9999.19'; + +my @std_export = qw( + read_file + write_file + overwrite_file + append_file + read_dir +) ; + +my @edit_export = qw( + edit_file + edit_file_lines +) ; + +my @ok_export = qw( +) ; + +@EXPORT_OK = ( + @edit_export, + qw( + slurp + prepend_file + ), +) ; + +%EXPORT_TAGS = ( + 'all' => [ @std_export, @edit_export, @EXPORT_OK ], + 'edit' => [ @edit_export ], + 'std' => [ @std_export ], +) ; + +@EXPORT = @std_export ; + +my $max_fast_slurp_size = 1024 * 100 ; + +my $is_win32 = $^O =~ /win32/i ; + +# Install subs for various constants that aren't set in older perls +# (< 5.005). Fcntl on old perls uses Exporter to define subs without a +# () prototype These can't be overridden with the constant pragma or +# we get a prototype mismatch. Hence this less than aesthetically +# appealing BEGIN block: + +BEGIN { + unless( defined &SEEK_SET ) { + *SEEK_SET = sub { 0 }; + *SEEK_CUR = sub { 1 }; + *SEEK_END = sub { 2 }; + } + + unless( defined &O_BINARY ) { + *O_BINARY = sub { 0 }; + *O_RDONLY = sub { 0 }; + *O_WRONLY = sub { 1 }; + } + + unless ( defined &O_APPEND ) { + + if ( $^O =~ /olaris/ ) { + *O_APPEND = sub { 8 }; + *O_CREAT = sub { 256 }; + *O_EXCL = sub { 1024 }; + } + elsif ( $^O =~ /inux/ ) { + *O_APPEND = sub { 1024 }; + *O_CREAT = sub { 64 }; + *O_EXCL = sub { 128 }; + } + elsif ( $^O =~ /BSD/i ) { + *O_APPEND = sub { 8 }; + *O_CREAT = sub { 512 }; + *O_EXCL = sub { 2048 }; + } + } +} + +# print "OS [$^O]\n" ; + +# print "O_BINARY = ", O_BINARY(), "\n" ; +# print "O_RDONLY = ", O_RDONLY(), "\n" ; +# print "O_WRONLY = ", O_WRONLY(), "\n" ; +# print "O_APPEND = ", O_APPEND(), "\n" ; +# print "O_CREAT ", O_CREAT(), "\n" ; +# print "O_EXCL ", O_EXCL(), "\n" ; + + +*slurp = \&read_file ; + +sub read_file { + + my $file_name = shift ; + my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ; + +# this is the optimized read_file for shorter files. +# the test for -s > 0 is to allow pseudo files to be read with the +# regular loop since they return a size of 0. + + if ( !ref $file_name && -e $file_name && -s _ > 0 && + -s _ < $max_fast_slurp_size && !%{$opts} && !wantarray ) { + + + my $fh ; + unless( sysopen( $fh, $file_name, O_RDONLY ) ) { + + @_ = ( $opts, "read_file '$file_name' - sysopen: $!"); + goto &_error ; + } + + my $read_cnt = sysread( $fh, my $buf, -s _ ) ; + + unless ( defined $read_cnt ) { + + @_ = ( $opts, + "read_file '$file_name' - small sysread: $!"); + goto &_error ; + } + + $buf =~ s/\015\012/\n/g if $is_win32 ; + return $buf ; + } + +# set the buffer to either the passed in one or ours and init it to the null +# string + + my $buf ; + my $buf_ref = $opts->{'buf_ref'} || \$buf ; + ${$buf_ref} = '' ; + + my( $read_fh, $size_left, $blk_size ) ; + +# deal with ref for a file name +# it could be an open handle or an overloaded object + + if ( ref $file_name ) { + + my $ref_result = _check_ref( $file_name ) ; + + if ( ref $ref_result ) { + +# we got an error, deal with it + + @_ = ( $opts, $ref_result ) ; + goto &_error ; + } + + if ( $ref_result ) { + +# we got an overloaded object and the result is the stringified value +# use it as the file name + + $file_name = $ref_result ; + } + else { + +# here we have just an open handle. set $read_fh so we don't do a sysopen + + $read_fh = $file_name ; + $blk_size = $opts->{'blk_size'} || 1024 * 1024 ; + $size_left = $blk_size ; + } + } + +# see if we have a path we need to open + + unless ( $read_fh ) { + +# a regular file. set the sysopen mode + + my $mode = O_RDONLY ; + +#printf "RD: BINARY %x MODE %x\n", O_BINARY, $mode ; + + $read_fh = local( *FH ) ; +# $read_fh = gensym ; + unless ( sysopen( $read_fh, $file_name, $mode ) ) { + @_ = ( $opts, "read_file '$file_name' - sysopen: $!"); + goto &_error ; + } + + if ( my $binmode = $opts->{'binmode'} ) { + binmode( $read_fh, $binmode ) ; + } + +# get the size of the file for use in the read loop + + $size_left = -s $read_fh ; + +#print "SIZE $size_left\n" ; + +# we need a blk_size if the size is 0 so we can handle pseudofiles like in +# /proc. these show as 0 size but have data to be slurped. + + unless( $size_left ) { + + $blk_size = $opts->{'blk_size'} || 1024 * 1024 ; + $size_left = $blk_size ; + } + } + +# infinite read loop. we exit when we are done slurping + + while( 1 ) { + +# do the read and see how much we got + + my $read_cnt = sysread( $read_fh, ${$buf_ref}, + $size_left, length ${$buf_ref} ) ; + +# since we're using sysread Perl won't automatically restart the call +# when interrupted by a signal. + + next if $!{EINTR}; + + unless ( defined $read_cnt ) { + + @_ = ( $opts, "read_file '$file_name' - loop sysread: $!"); + goto &_error ; + } + +# good read. see if we hit EOF (nothing left to read) + + last if $read_cnt == 0 ; + +# loop if we are slurping a handle. we don't track $size_left then. + + next if $blk_size ; + +# count down how much we read and loop if we have more to read. + + $size_left -= $read_cnt ; + last if $size_left <= 0 ; + } + +# fix up cr/lf to be a newline if this is a windows text file + + ${$buf_ref} =~ s/\015\012/\n/g if $is_win32 && !$opts->{'binmode'} ; + + my $sep = $/ ; + $sep = '\n\n+' if defined $sep && $sep eq '' ; + +# see if caller wants lines + + if( wantarray || $opts->{'array_ref'} ) { + + use re 'taint' ; + + my @lines = length(${$buf_ref}) ? + ${$buf_ref} =~ /(.*?$sep|.+)/sg : () ; + + chomp @lines if $opts->{'chomp'} ; + +# caller wants an array ref + + return \@lines if $opts->{'array_ref'} ; + +# caller wants list of lines + + return @lines ; + } + +# caller wants a scalar ref to the slurped text + + return $buf_ref if $opts->{'scalar_ref'} ; + +# caller wants a scalar with the slurped text (normal scalar context) + + return ${$buf_ref} if defined wantarray ; + +# caller passed in an i/o buffer by reference (normal void context) + + return ; +} + +# errors in this sub are returned as scalar refs +# a normal IO/GLOB handle is an empty return +# an overloaded object returns its stringified as a scalarfilename + +sub _check_ref { + + my( $handle ) = @_ ; + +# check if we are reading from a handle (GLOB or IO object) + + if ( eval { $handle->isa( 'GLOB' ) || $handle->isa( 'IO' ) } ) { + +# we have a handle. deal with seeking to it if it is DATA + + my $err = _seek_data_handle( $handle ) ; + +# return the error string if any + + return \$err if $err ; + +# we have good handle + return ; + } + + eval { require overload } ; + +# return an error if we can't load the overload pragma +# or if the object isn't overloaded + + return \"Bad handle '$handle' is not a GLOB or IO object or overloaded" + if $@ || !overload::Overloaded( $handle ) ; + +# must be overloaded so return its stringified value + + return "$handle" ; +} + +sub _seek_data_handle { + + my( $handle ) = @_ ; + +# DEEP DARK MAGIC. this checks the UNTAINT IO flag of a +# glob/handle. only the DATA handle is untainted (since it is from +# trusted data in the source file). this allows us to test if this is +# the DATA handle and then to do a sysseek to make sure it gets +# slurped correctly. on some systems, the buffered i/o pointer is not +# left at the same place as the fd pointer. this sysseek makes them +# the same so slurping with sysread will work. + + eval{ require B } ; + + if ( $@ ) { + + return <<ERR ; +Can't find B.pm with this Perl: $!. +That module is needed to properly slurp the DATA handle. +ERR + } + + if ( B::svref_2object( $handle )->IO->IoFLAGS & 16 ) { + +# set the seek position to the current tell. + + unless( sysseek( $handle, tell( $handle ), SEEK_SET ) ) { + return "read_file '$handle' - sysseek: $!" ; + } + } + +# seek was successful, return no error string + + return ; +} + + +sub write_file { + + my $file_name = shift ; + +# get the optional argument hash ref from @_ or an empty hash ref. + + my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ; + + my( $buf_ref, $write_fh, $no_truncate, $orig_file_name, $data_is_ref ) ; + +# get the buffer ref - it depends on how the data is passed into write_file +# after this if/else $buf_ref will have a scalar ref to the data. + + if ( ref $opts->{'buf_ref'} eq 'SCALAR' ) { + +# a scalar ref passed in %opts has the data +# note that the data was passed by ref + + $buf_ref = $opts->{'buf_ref'} ; + $data_is_ref = 1 ; + } + elsif ( ref $_[0] eq 'SCALAR' ) { + +# the first value in @_ is the scalar ref to the data +# note that the data was passed by ref + + $buf_ref = shift ; + $data_is_ref = 1 ; + } + elsif ( ref $_[0] eq 'ARRAY' ) { + +# the first value in @_ is the array ref to the data so join it. + + ${$buf_ref} = join '', @{$_[0]} ; + } + else { + +# good old @_ has all the data so join it. + + ${$buf_ref} = join '', @_ ; + } + +# deal with ref for a file name + + if ( ref $file_name ) { + + my $ref_result = _check_ref( $file_name ) ; + + if ( ref $ref_result ) { + +# we got an error, deal with it + + @_ = ( $opts, $ref_result ) ; + goto &_error ; + } + + if ( $ref_result ) { + +# we got an overloaded object and the result is the stringified value +# use it as the file name + + $file_name = $ref_result ; + } + else { + +# we now have a proper handle ref. +# make sure we don't call truncate on it. + + $write_fh = $file_name ; + $no_truncate = 1 ; + } + } + +# see if we have a path we need to open + + unless( $write_fh ) { + +# spew to regular file. + + if ( $opts->{'atomic'} ) { + +# in atomic mode, we spew to a temp file so make one and save the original +# file name. + $orig_file_name = $file_name ; + $file_name .= ".$$" ; + } + +# set the mode for the sysopen + + my $mode = O_WRONLY | O_CREAT ; + $mode |= O_APPEND if $opts->{'append'} ; + $mode |= O_EXCL if $opts->{'no_clobber'} ; + + my $perms = $opts->{perms} ; + $perms = 0666 unless defined $perms ; + +#printf "WR: BINARY %x MODE %x\n", O_BINARY, $mode ; + +# open the file and handle any error. + + $write_fh = local( *FH ) ; +# $write_fh = gensym ; + unless ( sysopen( $write_fh, $file_name, $mode, $perms ) ) { + + @_ = ( $opts, "write_file '$file_name' - sysopen: $!"); + goto &_error ; + } + } + + if ( my $binmode = $opts->{'binmode'} ) { + binmode( $write_fh, $binmode ) ; + } + + sysseek( $write_fh, 0, SEEK_END ) if $opts->{'append'} ; + +#print 'WR before data ', unpack( 'H*', ${$buf_ref}), "\n" ; + +# fix up newline to write cr/lf if this is a windows text file + + if ( $is_win32 && !$opts->{'binmode'} ) { + +# copy the write data if it was passed by ref so we don't clobber the +# caller's data + $buf_ref = \do{ my $copy = ${$buf_ref}; } if $data_is_ref ; + ${$buf_ref} =~ s/\n/\015\012/g ; + } + +#print 'after data ', unpack( 'H*', ${$buf_ref}), "\n" ; + +# get the size of how much we are writing and init the offset into that buffer + + my $size_left = length( ${$buf_ref} ) ; + my $offset = 0 ; + +# loop until we have no more data left to write + + do { + +# do the write and track how much we just wrote + + my $write_cnt = syswrite( $write_fh, ${$buf_ref}, + $size_left, $offset ) ; + +# since we're using syswrite Perl won't automatically restart the call +# when interrupted by a signal. + + next if $!{EINTR}; + + unless ( defined $write_cnt ) { + + @_ = ( $opts, "write_file '$file_name' - syswrite: $!"); + goto &_error ; + } + +# track how much left to write and where to write from in the buffer + + $size_left -= $write_cnt ; + $offset += $write_cnt ; + + } while( $size_left > 0 ) ; + +# we truncate regular files in case we overwrite a long file with a shorter file +# so seek to the current position to get it (same as tell()). + + truncate( $write_fh, + sysseek( $write_fh, 0, SEEK_CUR ) ) unless $no_truncate ; + + close( $write_fh ) ; + +# handle the atomic mode - move the temp file to the original filename. + + if ( $opts->{'atomic'} && !rename( $file_name, $orig_file_name ) ) { + + @_ = ( $opts, "write_file '$file_name' - rename: $!" ) ; + goto &_error ; + } + + return 1 ; +} + +# this is for backwards compatibility with the previous File::Slurp module. +# write_file always overwrites an existing file + +*overwrite_file = \&write_file ; + +# the current write_file has an append mode so we use that. this +# supports the same API with an optional second argument which is a +# hash ref of options. + +sub append_file { + +# get the optional opts hash ref + my $opts = $_[1] ; + if ( ref $opts eq 'HASH' ) { + +# we were passed an opts ref so just mark the append mode + + $opts->{append} = 1 ; + } + else { + +# no opts hash so insert one with the append mode + + splice( @_, 1, 0, { append => 1 } ) ; + } + +# magic goto the main write_file sub. this overlays the sub without touching +# the stack or @_ + + goto &write_file +} + +# prepend data to the beginning of a file + +sub prepend_file { + + my $file_name = shift ; + +#print "FILE $file_name\n" ; + + my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ; + +# delete unsupported options + + my @bad_opts = + grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ; + + delete @{$opts}{@bad_opts} ; + + my $prepend_data = shift ; + $prepend_data = '' unless defined $prepend_data ; + $prepend_data = ${$prepend_data} if ref $prepend_data eq 'SCALAR' ; + +#print "PRE [$prepend_data]\n" ; + + my $err_mode = delete $opts->{err_mode} ; + $opts->{ err_mode } = 'croak' ; + $opts->{ scalar_ref } = 1 ; + + my $existing_data = eval { read_file( $file_name, $opts ) } ; + + if ( $@ ) { + + @_ = ( { err_mode => $err_mode }, + "prepend_file '$file_name' - read_file: $!" ) ; + goto &_error ; + } + +#print "EXIST [$$existing_data]\n" ; + + $opts->{atomic} = 1 ; + my $write_result = + eval { write_file( $file_name, $opts, + $prepend_data, $$existing_data ) ; + } ; + + if ( $@ ) { + + @_ = ( { err_mode => $err_mode }, + "prepend_file '$file_name' - write_file: $!" ) ; + goto &_error ; + } + + return $write_result ; +} + +# edit a file as a scalar in $_ + +sub edit_file(&$;$) { + + my( $edit_code, $file_name, $opts ) = @_ ; + $opts = {} unless ref $opts eq 'HASH' ; + +# my $edit_code = shift ; +# my $file_name = shift ; +# my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ; + +#print "FILE $file_name\n" ; + +# delete unsupported options + + my @bad_opts = + grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ; + + delete @{$opts}{@bad_opts} ; + +# keep the user err_mode and force croaking on internal errors + + my $err_mode = delete $opts->{err_mode} ; + $opts->{ err_mode } = 'croak' ; + +# get a scalar ref for speed and slurp the file into a scalar + + $opts->{ scalar_ref } = 1 ; + my $existing_data = eval { read_file( $file_name, $opts ) } ; + + if ( $@ ) { + + @_ = ( { err_mode => $err_mode }, + "edit_file '$file_name' - read_file: $!" ) ; + goto &_error ; + } + +#print "EXIST [$$existing_data]\n" ; + + my( $edited_data ) = map { $edit_code->(); $_ } $$existing_data ; + + $opts->{atomic} = 1 ; + my $write_result = + eval { write_file( $file_name, $opts, $edited_data ) } ; + + if ( $@ ) { + + @_ = ( { err_mode => $err_mode }, + "edit_file '$file_name' - write_file: $!" ) ; + goto &_error ; + } + + return $write_result ; +} + +sub edit_file_lines(&$;$) { + + my( $edit_code, $file_name, $opts ) = @_ ; + $opts = {} unless ref $opts eq 'HASH' ; + +# my $edit_code = shift ; +# my $file_name = shift ; +# my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ; + +#print "FILE $file_name\n" ; + +# delete unsupported options + + my @bad_opts = + grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ; + + delete @{$opts}{@bad_opts} ; + +# keep the user err_mode and force croaking on internal errors + + my $err_mode = delete $opts->{err_mode} ; + $opts->{ err_mode } = 'croak' ; + +# get an array ref for speed and slurp the file into lines + + $opts->{ array_ref } = 1 ; + my $existing_data = eval { read_file( $file_name, $opts ) } ; + + if ( $@ ) { + + @_ = ( { err_mode => $err_mode }, + "edit_file_lines '$file_name' - read_file: $!" ) ; + goto &_error ; + } + +#print "EXIST [$$existing_data]\n" ; + + my @edited_data = map { $edit_code->(); $_ } @$existing_data ; + + $opts->{atomic} = 1 ; + my $write_result = + eval { write_file( $file_name, $opts, @edited_data ) } ; + + if ( $@ ) { + + @_ = ( { err_mode => $err_mode }, + "edit_file_lines '$file_name' - write_file: $!" ) ; + goto &_error ; + } + + return $write_result ; +} + +# basic wrapper around opendir/readdir + +sub read_dir { + + my $dir = shift ; + my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ; + +# this handle will be destroyed upon return + + local(*DIRH); + +# open the dir and handle any errors + + unless ( opendir( DIRH, $dir ) ) { + + @_ = ( $opts, "read_dir '$dir' - opendir: $!" ) ; + goto &_error ; + } + + my @dir_entries = readdir(DIRH) ; + + @dir_entries = grep( $_ ne "." && $_ ne "..", @dir_entries ) + unless $opts->{'keep_dot_dot'} ; + + if ( $opts->{'prefix'} ) { + + substr( $_, 0, 0, "$dir/" ) for @dir_entries ; + } + + return @dir_entries if wantarray ; + return \@dir_entries ; +} + +# error handling section +# +# all the error handling uses magic goto so the caller will get the +# error message as if from their code and not this module. if we just +# did a call on the error code, the carp/croak would report it from +# this module since the error sub is one level down on the call stack +# from read_file/write_file/read_dir. + + +my %err_func = ( + 'carp' => \&carp, + 'croak' => \&croak, +) ; + +sub _error { + + my( $opts, $err_msg ) = @_ ; + +# get the error function to use + + my $func = $err_func{ $opts->{'err_mode'} || 'croak' } ; + +# if we didn't find it in our error function hash, they must have set +# it to quiet and we don't do anything. + + return unless $func ; + +# call the carp/croak function + + $func->($err_msg) if $func ; + +# return a hard undef (in list context this will be a single value of +# undef which is not a legal in-band value) + + return undef ; +} + +1; +__END__ + +=head1 NAME + +File::Slurp - Simple and Efficient Reading/Writing/Modifying of Complete Files + +=head1 SYNOPSIS + + use File::Slurp; + +# read in a whole file into a scalar + my $text = read_file( 'filename' ) ; + +# read in a whole file into an array of lines + my @lines = read_file( 'filename' ) ; + +# write out a whole file from a scalar + write_file( 'filename', $text ) ; + +# write out a whole file from an array of lines + write_file( 'filename', @lines ) ; + +# Here is a simple and fast way to load and save a simple config file +# made of key=value lines. + my %conf = read_file( $file_name ) =~ /^(\w+)=(.*)$/mg ; + write_file( $file_name, {atomic => 1}, map "$_=$conf{$_}\n", keys %conf ) ; + +# insert text at the beginning of a file + prepend_file( 'filename', $text ) ; + +# in-place edit to replace all 'foo' with 'bar' in file + edit_file { s/foo/bar/g } 'filename' ; + +# in-place edit to delete all lines with 'foo' from file + edit_file_lines sub { $_ = '' if /foo/ }, 'filename' ; + +# read in a whole directory of file names (skipping . and ..) + my @files = read_dir( '/path/to/dir' ) ; + +=head1 DESCRIPTION + +This module provides subs that allow you to read or write entire files +with one simple call. They are designed to be simple to use, have +flexible ways to pass in or get the file contents and to be very +efficient. There is also a sub to read in all the files in a +directory other than C<.> and C<..> + +These slurp/spew subs work for files, pipes and sockets, stdio, +pseudo-files, and the DATA handle. Read more about why slurping files is +a good thing in the file 'slurp_article.pod' in the extras/ directory. + +If you are interested in how fast these calls work, check out the +slurp_bench.pl program in the extras/ directory. It compares many +different forms of slurping. You can select the I/O direction, context +and file sizes. Use the --help option to see how to run it. + +=head2 B<read_file> + +This sub reads in an entire file and returns its contents to the +caller. In scalar context it returns the entire file as a single +scalar. In list context it will return a list of lines (using the +current value of $/ as the separator including support for paragraph +mode when it is set to ''). + + my $text = read_file( 'filename' ) ; + my $bin = read_file( 'filename' { binmode => ':raw' } ) ; + my @lines = read_file( 'filename' ) ; + my $lines = read_file( 'filename', array_ref => 1 ) ; + +The first argument is the file to slurp in. If the next argument is a +hash reference, then it is used as the options. Otherwise the rest of +the argument list are is used as key/value options. + +If the file argument is a handle (if it is a ref and is an IO or GLOB +object), then that handle is slurped in. This mode is supported so you +slurp handles such as C<DATA> and C<STDIN>. See the test handle.t for +an example that does C<open( '-|' )> and the child process spews data +to the parant which slurps it in. All of the options that control how +the data is returned to the caller still work in this case. + +If the first argument is an overloaded object then its stringified value +is used for the filename and that file is opened. This is a new feature +in 9999.14. See the stringify.t test for an example. + +By default C<read_file> returns an undef in scalar contex or a single +undef in list context if it encounters an error. Those are both +impossible to get with a clean read_file call which means you can check +the return value and always know if you had an error. You can change how +errors are handled with the C<err_mode> option. + +Speed Note: If you call read_file and just get a scalar return value +it is now optimized to handle shorter files. This is only used if no +options are used, the file is shorter then 100k bytes, the filename is +a plain scalar and a scalar file is returned. If you want the fastest +slurping, use the C<buf_ref> or C<scalar_ref> options (see below) + +NOTE: as of version 9999.06, read_file works correctly on the C<DATA> +handle. It used to need a sysseek workaround but that is now handled +when needed by the module itself. + +You can optionally request that C<slurp()> is exported to your code. This +is an alias for read_file and is meant to be forward compatible with +Perl 6 (which will have slurp() built-in). + +The options for C<read_file> are: + +=head3 binmode + +If you set the binmode option, then its value is passed to a call to +binmode on the opened handle. You can use this to set the file to be +read in binary mode, utf8, etc. See perldoc -f binmode for more. + + my $bin_data = read_file( $bin_file, binmode => ':raw' ) ; + my $utf_text = read_file( $bin_file, binmode => ':utf8' ) ; + +=head3 array_ref + +If this boolean option is set, the return value (only in scalar +context) will be an array reference which contains the lines of the +slurped file. The following two calls are equivalent: + + my $lines_ref = read_file( $bin_file, array_ref => 1 ) ; + my $lines_ref = [ read_file( $bin_file ) ] ; + +=head3 chomp + +If this boolean option is set, the lines are chomped. This only +happens if you are slurping in a list context or using the +C<array_ref> option. + +=head3 scalar_ref + +If this boolean option is set, the return value (only in scalar +context) will be an scalar reference to a string which is the contents +of the slurped file. This will usually be faster than returning the +plain scalar. It will also save memory as it will not make a copy of +the file to return. Run the extras/slurp_bench.pl script to see speed +comparisons. + + my $text_ref = read_file( $bin_file, scalar_ref => 1 ) ; + +=head3 buf_ref + +You can use this option to pass in a scalar reference and the slurped +file contents will be stored in the scalar. This can be used in +conjunction with any of the other options. This saves an extra copy of +the slurped file and can lower ram usage vs returning the file. It is +usually the fastest way to read a file into a scalar. Run the +extras/slurp_bench.pl script to see speed comparisons. + + + read_file( $bin_file, buf_ref => \$buffer ) ; + +=head3 blk_size + +You can use this option to set the block size used when slurping from +an already open handle (like \*STDIN). It defaults to 1MB. + + my $text_ref = read_file( $bin_file, blk_size => 10_000_000, + array_ref => 1 ) ; + +=head3 err_mode + +You can use this option to control how read_file behaves when an error +occurs. This option defaults to 'croak'. You can set it to 'carp' or to +'quiet to have no special error handling. This code wants to carp and +then read another file if it fails. + + my $text_ref = read_file( $file, err_mode => 'carp' ) ; + unless ( $text_ref ) { + + # read a different file but croak if not found + $text_ref = read_file( $another_file ) ; + } + + # process ${$text_ref} + +=head2 B<write_file> + +This sub writes out an entire file in one call. + + write_file( 'filename', @data ) ; + +The first argument to C<write_file> is the filename. The next argument +is an optional hash reference and it contains key/values that can +modify the behavior of C<write_file>. The rest of the argument list is +the data to be written to the file. + + write_file( 'filename', {append => 1 }, @data ) ; + write_file( 'filename', {binmode => ':raw'}, $buffer ) ; + +As a shortcut if the first data argument is a scalar or array reference, +it is used as the only data to be written to the file. Any following +arguments in @_ are ignored. This is a faster way to pass in the output +to be written to the file and is equivalent to the C<buf_ref> option of +C<read_file>. These following pairs are equivalent but the pass by +reference call will be faster in most cases (especially with larger +files). + + write_file( 'filename', \$buffer ) ; + write_file( 'filename', $buffer ) ; + + write_file( 'filename', \@lines ) ; + write_file( 'filename', @lines ) ; + +If the first argument is a handle (if it is a ref and is an IO or GLOB +object), then that handle is written to. This mode is supported so you +spew to handles such as \*STDOUT. See the test handle.t for an example +that does C<open( '-|' )> and child process spews data to the parent +which slurps it in. All of the options that control how the data are +passed into C<write_file> still work in this case. + +If the first argument is an overloaded object then its stringified value +is used for the filename and that file is opened. This is new feature +in 9999.14. See the stringify.t test for an example. + +By default C<write_file> returns 1 upon successfully writing the file or +undef if it encountered an error. You can change how errors are handled +with the C<err_mode> option. + +The options are: + +=head3 binmode + +If you set the binmode option, then its value is passed to a call to +binmode on the opened handle. You can use this to set the file to be +read in binary mode, utf8, etc. See perldoc -f binmode for more. + + write_file( $bin_file, {binmode => ':raw'}, @data ) ; + write_file( $bin_file, {binmode => ':utf8'}, $utf_text ) ; + +=head3 perms + +The perms option sets the permissions of newly-created files. This value +is modified by your process's umask and defaults to 0666 (same as +sysopen). + +NOTE: this option is new as of File::Slurp version 9999.14; + +=head3 buf_ref + +You can use this option to pass in a scalar reference which has the +data to be written. If this is set then any data arguments (including +the scalar reference shortcut) in @_ will be ignored. These are +equivalent: + + write_file( $bin_file, { buf_ref => \$buffer } ) ; + write_file( $bin_file, \$buffer ) ; + write_file( $bin_file, $buffer ) ; + +=head3 atomic + +If you set this boolean option, the file will be written to in an +atomic fashion. A temporary file name is created by appending the pid +($$) to the file name argument and that file is spewed to. After the +file is closed it is renamed to the original file name (and rename is +an atomic operation on most OS's). If the program using this were to +crash in the middle of this, then the file with the pid suffix could +be left behind. + +=head3 append + +If you set this boolean option, the data will be written at the end of +the current file. Internally this sets the sysopen mode flag O_APPEND. + + write_file( $file, {append => 1}, @data ) ; + + You +can import append_file and it does the same thing. + +=head3 no_clobber + +If you set this boolean option, an existing file will not be overwritten. + + write_file( $file, {no_clobber => 1}, @data ) ; + +=head3 err_mode + +You can use this option to control how C<write_file> behaves when an +error occurs. This option defaults to 'croak'. You can set it to +'carp' or to 'quiet' to have no error handling other than the return +value. If the first call to C<write_file> fails it will carp and then +write to another file. If the second call to C<write_file> fails, it +will croak. + + unless ( write_file( $file, { err_mode => 'carp', \$data ) ; + + # write a different file but croak if not found + write_file( $other_file, \$data ) ; + } + +=head2 overwrite_file + +This sub is just a typeglob alias to write_file since write_file +always overwrites an existing file. This sub is supported for +backwards compatibility with the original version of this module. See +write_file for its API and behavior. + +=head2 append_file + +This sub will write its data to the end of the file. It is a wrapper +around write_file and it has the same API so see that for the full +documentation. These calls are equivalent: + + append_file( $file, @data ) ; + write_file( $file, {append => 1}, @data ) ; + + +=head2 prepend_file + +This sub writes data to the beginning of a file. The previously existing +data is written after that so the effect is prepending data in front of +a file. It is a counterpart to the append_file sub in this module. It +works by first using C<read_file> to slurp in the file and then calling +C<write_file> with the new data and the existing file data. + +The first argument to C<prepend_file> is the filename. The next argument +is an optional hash reference and it contains key/values that can modify +the behavior of C<prepend_file>. The rest of the argument list is the +data to be written to the file and that is passed to C<write_file> as is +(see that for allowed data). + +Only the C<binmode> and C<err_mode> options are supported. The +C<write_file> call has the C<atomic> option set so you will always have +a consistant file. See above for more about those options. + +C<prepend_file> is not exported by default, you need to import it +explicitly. + + use File::Slurp qw( prepend_file ) ; + prepend_file( $file, $header ) ; + prepend_file( $file, \@lines ) ; + prepend_file( $file, { binmode => 'raw:'}, $bin_data ) ; + + +=head2 edit_file, edit_file_lines + +These subs read in a file into $_, execute a code block which should +modify $_ and then write $_ back to the file. The difference between +them is that C<edit_file> reads the whole file into $_ and calls the +code block one time. With C<edit_file_lines> each line is read into $_ +and the code is called for each line. In both cases the code should +modify $_ if desired and it will be written back out. These subs are +the equivalent of the -pi command line options of Perl but you can +call them from inside your program and not fork out a process. They +are in @EXPORT_OK so you need to request them to be imported on the +use line or you can import both of them with: + + use File::Slurp qw( :edit ) ; + +The first argument to C<edit_file> and C<edit_file_lines> is a code +block or a code reference. The code block is not followed by a comma +(as with grep and map) but a code reference is followed by a +comma. See the examples below for both styles. The next argument is +the filename. The last argument is an optional hash reference and it +contains key/values that can modify the behavior of +C<prepend_file>. + +Only the C<binmode> and C<err_mode> options are supported. The +C<write_file> call has the C<atomic> option set so you will always +have a consistant file. See above for more about those options. + +Each group of calls below show a Perl command line instance and the +equivalent calls to C<edit_file> and C<edit_file_lines>. + + perl -0777 -pi -e 's/foo/bar/g' filename + use File::Slurp qw( edit_file ) ; + edit_file { s/foo/bar/g } 'filename' ; + edit_file sub { s/foo/bar/g }, 'filename' ; + edit_file \&replace_foo, 'filename' ; + sub replace_foo { s/foo/bar/g } + + perl -pi -e '$_ = "" if /foo/' filename + use File::Slurp qw( edit_file_lines ) ; + use File::Slurp ; + edit_file_lines { $_ = '' if /foo/ } 'filename' ; + edit_file_lines sub { $_ = '' if /foo/ }, 'filename' ; + edit_file \&delete_foo, 'filename' ; + sub delete_foo { $_ = '' if /foo/ } + +=head2 read_dir + +This sub reads all the file names from directory and returns them to +the caller but C<.> and C<..> are removed by default. + + my @files = read_dir( '/path/to/dir' ) ; + +The first argument is the path to the directory to read. If the next +argument is a hash reference, then it is used as the options. +Otherwise the rest of the argument list are is used as key/value +options. + +In list context C<read_dir> returns a list of the entries in the +directory. In a scalar context it returns an array reference which has +the entries. + +=head3 err_mode + +If the C<err_mode> option is set, it selects how errors are handled (see +C<err_mode> in C<read_file> or C<write_file>). + +=head3 keep_dot_dot + +If this boolean option is set, C<.> and C<..> are not removed from the +list of files. + + my @all_files = read_dir( '/path/to/dir', keep_dot_dot => 1 ) ; + +=head3 prefix + +If this boolean option is set, the string "$dir/" is prefixed to each +dir entry. This means you can directly use the results to open +files. A common newbie mistake is not putting the directory in front +of entries when opening themn. + + my @paths = read_dir( '/path/to/dir', prefix => 1 ) ; + +=head2 EXPORT + + These are exported by default or with + use File::Slurp qw( :std ) ; + + read_file write_file overwrite_file append_file read_dir + + These are exported with + use File::Slurp qw( :edit ) ; + + edit_file edit_file_lines + + You can get all subs in the module exported with + use File::Slurp qw( :all ) ; + +=head2 LICENSE + + Same as Perl. + +=head2 SEE ALSO + +An article on file slurping in extras/slurp_article.pod. There is +also a benchmarking script in extras/slurp_bench.pl. + +=head2 BUGS + +If run under Perl 5.004, slurping from the DATA handle will fail as +that requires B.pm which didn't get into core until 5.005. + +=head1 AUTHOR + +Uri Guttman, E<lt>uri AT stemsystems DOT comE<gt> + +=cut diff --git a/t/TestDriver.pm b/t/TestDriver.pm new file mode 100644 index 0000000..274e5d3 --- /dev/null +++ b/t/TestDriver.pm @@ -0,0 +1,91 @@ +# driver.pm - common test driver code + +use Test::More ; + +BEGIN { + *CORE::GLOBAL::syswrite = + sub($$$;$) { my( $h, $b, $s, $o ) = @_; CORE::syswrite $h, $b, $s, $o} ; +# sub(*\$$;$) { my( $h, $b, $s, $o ) = @_; CORE::syswrite $h, $b, $s, $o } ; + + *CORE::GLOBAL::sysread = + sub($$$;$) { my( $h, $b, $s, $o ) = @_; CORE::sysread $h, $b, $s, $o } ; +# sub(*\$$;$) { my( $h, $b, $s, $o ) = @_; CORE::sysread $h, $b, $s, $o } ; + + *CORE::GLOBAL::rename = + sub($$) { my( $old, $new ) = @_; CORE::rename $old, $new } ; + + *CORE::GLOBAL::sysopen = + sub($$$;$) { my( $h, $n, $m, $p ) = @_; CORE::sysopen $h, $n, $m, $p } ; +# sub(*$$;$) { my( $h, $n, $m, $p ) = @_; CORE::sysopen $h, $n, $m, $p } ; +} + +sub test_driver { + + my( $tests ) = @_ ; + +use Data::Dumper ; + +# plan for one expected ok() call per test + + plan( tests => scalar @{$tests} ) ; + +# loop over all the tests + + foreach my $test ( @{$tests} ) { + +#print Dumper $test ; + + if ( $test->{skip} ) { + ok( 1, "SKIPPING $test->{name}" ) ; + next ; + } + + my $override = $test->{override} ; + +# run any setup sub before this test. this can is used to modify the +# object for this test or create test files and data. + + if( my $pretest = $test->{pretest} ) { + + $pretest->($test) ; + } + + if( my $sub = $test->{sub} ) { + + my $args = $test->{args} ; + + local( $^W ) ; + local *{"CORE::GLOBAL::$override"} = sub {} + if $override ; + + $test->{result} = eval { $sub->( @{$args} ) } ; + + if ( $@ ) { + +# if we had an error and expected it, we pass this test + + if ( $test->{error} && + $@ =~ /$test->{error}/ ) { + + $test->{ok} = 1 ; + } + else { + print "unexpected error: $@\n" ; + $test->{ok} = 0 ; + } + } + } + + if( my $posttest = $test->{posttest} ) { + + $posttest->($test) ; + } + + ok( $test->{ok}, $test->{name} ) if exists $test->{ok} ; + is( $test->{result}, $test->{expected}, $test->{name} ) if + exists $test->{expected} ; + + } +} + +1 ; diff --git a/t/append_null.t b/t/append_null.t new file mode 100644 index 0000000..3c8b924 --- /dev/null +++ b/t/append_null.t @@ -0,0 +1,24 @@ +#!/usr/local/bin/perl -w + +use strict ; +use File::Slurp ; + +use Test::More tests => 1 ; + +my $data = <<TEXT ; +line 1 +more text +TEXT + +my $file = 'xxx' ; + +unlink $file ; + +my $err = write_file( $file, $data ) ; +append_file( $file, '' ) ; + +my $read_data = read_file( $file ) ; + +is( $data, $read_data ) ; + +unlink $file ; diff --git a/t/binmode.t b/t/binmode.t new file mode 100644 index 0000000..03534b8 --- /dev/null +++ b/t/binmode.t @@ -0,0 +1,50 @@ +#!/usr/local/bin/perl -w + +use strict ; +use Test::More ; +use Carp ; +use File::Slurp ; + +BEGIN { + plan skip_all => 'Older Perl lacking unicode support' + if $] < 5.008001 ; +} + +plan tests => 2 ; + +my $suf = 'utf8' ; +my $mode = ":$suf" ; + +my $is_win32 = $^O =~ /win32/i ; + +my $orig_text = "\x{20ac}\n" ; +( my $win32_text = $orig_text ) =~ s/\n/\015\012/ ; +my $unicode_length = length $orig_text ; + +my $control_file = "control.$suf" ; +my $slurp_file = "slurp.$suf" ; + +open( my $fh, ">$mode", $control_file ) or + die "cannot create control unicode file '$control_file' $!" ; +print $fh $orig_text ; +close $fh ; + +my $slurp_utf = read_file( $control_file, binmode => $mode ) ; +my $written_text = $is_win32 ? $win32_text : $orig_text ; +is( $slurp_utf, $written_text, "read_file of $mode file" ) ; + +# my $slurp_utf_length = length $slurp_utf ; +# my $slurp_text = read_file( $control_file ) ; +# my $slurp_text_length = length $slurp_text ; +# print "LEN UTF $slurp_utf_length TXT $slurp_text_length\n" ; + +write_file( $slurp_file, {binmode => $mode}, $orig_text ) ; + +open( $fh, "<$mode", $slurp_file ) or + die "cannot open slurp test file '$slurp_file' $!" ; +my $read_length = read( $fh, my $utf_text, $unicode_length ) ; +close $fh ; + +is( $utf_text, $orig_text, "write_file of $mode file" ) ; + +unlink( $control_file, $slurp_file ) ; diff --git a/t/chomp.t b/t/chomp.t new file mode 100644 index 0000000..e14319b --- /dev/null +++ b/t/chomp.t @@ -0,0 +1,53 @@ + +use strict ; +use warnings ; + +use lib qw(t) ; + +use File::Slurp qw( read_file write_file ) ; +use Test::More ; + +use TestDriver ; + +my $file = 'edit_file_data' ; + +my $existing_data = <<PRE ; +line 1 +line 2 +more +foo +bar +junk here and foo +last line +PRE + +my $tests = [ + { + name => 'read_file - chomp', + sub => \&read_file, + args => [ + $file, + { + 'chomp' => 1, + array_ref => 1 + }, + ], + pretest => sub { + my( $test ) = @_ ; + write_file( $file, $existing_data ) ; + }, + posttest => sub { + my( $test ) = @_ ; + $test->{ok} = eq_array( + $test->{result}, + [$existing_data =~ /^(.+)\n/gm] + ) ; + }, + }, +] ; + +test_driver( $tests ) ; + +unlink $file ; + +exit ; diff --git a/t/data_list.t b/t/data_list.t new file mode 100644 index 0000000..ac85b2e --- /dev/null +++ b/t/data_list.t @@ -0,0 +1,62 @@ +#!/usr/local/bin/perl -w + +use strict ; +use File::Slurp ; + +use Carp ; +use POSIX qw( :fcntl_h ) ; +use Test::More tests => 1 ; + +# in case SEEK_SET isn't defined in older perls. it seems to always be 0 + +BEGIN { + + *SEEK_SET = sub { 0 } unless defined \&SEEK_SET ; +} + +SKIP: { + + eval { require B } ; + + skip <<TEXT, 1 if $@ ; +B.pm not found in this Perl. This will cause slurping of +the DATA handle to fail. +TEXT + + test_data_list_slurp() ; +} + +exit ; + + +sub test_data_list_slurp { + + my $data_seek = tell( \*DATA ); + +# first slurp in the lines + + my @slurp_lines = read_file( \*DATA ) ; + +# now seek back and read all the lines with the <> op and we make +# golden data sets + + seek( \*DATA, $data_seek, SEEK_SET ) || die "seek $!" ; + my @data_lines = <DATA> ; + +# test the array slurp + + ok( eq_array( \@data_lines, \@slurp_lines ), 'list slurp of DATA' ) ; +} + +__DATA__ +line one +second line +more lines +still more + +enough lines + +we can't test long handle slurps from DATA since i would have to type +too much stuff + +so we will stop here diff --git a/t/data_scalar.t b/t/data_scalar.t new file mode 100644 index 0000000..eb24337 --- /dev/null +++ b/t/data_scalar.t @@ -0,0 +1,62 @@ +#!/usr/local/bin/perl -w + +use strict ; +use File::Slurp ; + +use Carp ; +use POSIX qw( :fcntl_h ) ; +use Test::More tests => 1 ; + +# in case SEEK_SET isn't defined in older perls. it seems to always be 0 + +BEGIN { + + *SEEK_SET = sub { 0 } unless defined \&SEEK_SET ; +} + +eval { require B } ; + +SKIP: { + + skip <<TEXT, 1 if $@ ; +B.pm not found in this Perl. Note this will cause slurping of +the DATA handle to fail. +TEXT + + test_data_scalar_slurp() ; +} + +exit ; + + + +exit ; + +sub test_data_scalar_slurp { + + my $data_seek = tell( \*DATA ); + +# first slurp in the text + + my $slurp_text = read_file( \*DATA ) ; + +# now we need to get the golden data + + seek( \*DATA, $data_seek, SEEK_SET ) || die "seek $!" ; + my $data_text = join( '', <DATA> ) ; + + is( $slurp_text, $data_text, 'scalar slurp of DATA' ) ; +} + +__DATA__ +line one +second line +more lines +still more + +enough lines + +we can't test long handle slurps from DATA since i would have to type +too much stuff + +so we will stop here diff --git a/t/edit_file.t b/t/edit_file.t new file mode 100644 index 0000000..240103a --- /dev/null +++ b/t/edit_file.t @@ -0,0 +1,107 @@ + +use strict ; +use warnings ; + +use lib qw(t) ; + +use File::Slurp qw( :edit read_file write_file ) ; +use Test::More ; + +use TestDriver ; + +my $file = 'edit_file_data' ; + +my $existing_data = <<PRE ; +line 1 +line 2 +more +foo +bar +junk here and foo +last line +PRE + +my $tests = [ + { + name => 'edit_file - no-op', + sub => \&edit_file, + pretest => sub { + my( $test ) = @_ ; + write_file( $file, $existing_data ) ; + $test->{args} = [ + sub {}, + $file + ] ; + $test->{expected} = $existing_data ; + }, + posttest => sub { $_[0]->{result} = read_file( $file ) }, + }, + { + + name => 'edit_file - s/foo/bar/', + sub => \&edit_file, + pretest => sub { + my( $test ) = @_ ; + write_file( $file, $existing_data ) ; + $test->{args} = [ + sub { s/foo/bar/g }, + $file + ] ; + ( $test->{expected} = $existing_data ) + =~ s/foo/bar/g ; + }, + posttest => sub { $_[0]->{result} = read_file( $file ) }, + }, + { + + name => 'edit_file - upper first words', + sub => \&edit_file, + pretest => sub { + my( $test ) = @_ ; + write_file( $file, $existing_data ) ; + $test->{args} = [ + sub { s/^(\w+)/\U$1/gm }, + $file + ] ; + ( $test->{expected} = $existing_data ) + =~ s/^(\w+)/\U$1/gm ; + }, + posttest => sub { $_[0]->{result} = read_file( $file ) }, + }, + { + name => 'edit_file_lines - no-op', + sub => \&edit_file_lines, + pretest => sub { + my( $test ) = @_ ; + write_file( $file, $existing_data ) ; + $test->{args} = [ + sub {}, + $file + ] ; + $test->{expected} = $existing_data ; + }, + posttest => sub { $_[0]->{result} = read_file( $file ) }, + }, + { + + name => 'edit_file - delete foo lines', + sub => \&edit_file_lines, + pretest => sub { + my( $test ) = @_ ; + write_file( $file, $existing_data ) ; + $test->{args} = [ + sub { $_ = '' if /foo/ }, + $file + ] ; + ( $test->{expected} = $existing_data ) + =~ s/^.*foo.*\n//gm ; + }, + posttest => sub { $_[0]->{result} = read_file( $file ) }, + }, +] ; + +test_driver( $tests ) ; + +unlink $file ; + +exit ; diff --git a/t/error.t b/t/error.t new file mode 100644 index 0000000..a241ee5 --- /dev/null +++ b/t/error.t @@ -0,0 +1,125 @@ +##!/usr/local/bin/perl -w + +use lib qw(t) ; +use strict ; +use Test::More ; + +BEGIN { + plan skip_all => "these tests need Perl 5.5" if $] < 5.005 ; +} + +use TestDriver ; +use File::Slurp qw( :all prepend_file edit_file ) ; + +my $is_win32 = $^O =~ /cygwin|win32/i ; + +my $file_name = 'test_file' ; +my $dir_name = 'test_dir' ; + +my $tests = [ + { + name => 'read_file open error', + sub => \&read_file, + args => [ $file_name ], + error => qr/open/, + }, + { + name => 'write_file open error', + sub => \&write_file, + args => [ $file_name, '' ], + override => 'sysopen', + error => qr/open/, + }, + { + name => 'write_file syswrite error', + sub => \&write_file, + args => [ $file_name, '' ], + override => 'syswrite', + posttest => sub { unlink( $file_name ) }, + error => qr/write/, + }, + { + name => 'read_file small sysread error', + sub => \&read_file, + args => [ $file_name ], + override => 'sysread', + pretest => sub { write_file( $file_name, '' ) }, + posttest => sub { unlink( $file_name ) }, + error => qr/read/, + }, + { + name => 'read_file loop sysread error', + sub => \&read_file, + args => [ $file_name ], + override => 'sysread', + pretest => sub { write_file( $file_name, 'x' x 100_000 ) }, + posttest => sub { unlink( $file_name ) }, + error => qr/read/, + }, + { + name => 'atomic rename error', +# this test is meaningless on Win32 + skip => $is_win32, + sub => \&write_file, + args => [ $file_name, { atomic => 1 }, '' ], + override => 'rename', + posttest => sub { "$file_name.$$" }, + error => qr/rename/, + }, + { + name => 'read_dir opendir error', + sub => \&read_dir, + args => [ $dir_name ], + error => qr/open/, + }, + { + name => 'prepend_file read error', + sub => \&prepend_file, + args => [ $file_name ], + error => qr/read_file/, + }, + { + name => 'prepend_file write error', + sub => \&prepend_file, + pretest => sub { write_file( $file_name, '' ) }, + args => [ $file_name, '' ], + override => 'syswrite', + error => qr/write_file/, + posttest => sub { unlink $file_name, "$file_name.$$" }, + }, + { + name => 'edit_file read error', + sub => \&edit_file, + args => [ sub{}, $file_name ], + error => qr/read_file/, + }, + { + name => 'edit_file write error', + sub => \&edit_file, + pretest => sub { write_file( $file_name, '' ) }, + args => [ sub{}, $file_name ], + override => 'syswrite', + error => qr/write_file/, + posttest => sub { unlink $file_name, "$file_name.$$" }, + }, + { + name => 'edit_file_lines read error', + sub => \&edit_file_lines, + args => [ sub{}, $file_name ], + error => qr/read_file/, + }, + { + name => 'edit_file_lines write error', + sub => \&edit_file_lines, + pretest => sub { write_file( $file_name, '' ) }, + args => [ sub{}, $file_name ], + override => 'syswrite', + error => qr/write_file/, + posttest => sub { unlink $file_name, "$file_name.$$" }, + }, +] ; + +test_driver( $tests ) ; + +exit ; + diff --git a/t/error_mode.t b/t/error_mode.t new file mode 100644 index 0000000..915c184 --- /dev/null +++ b/t/error_mode.t @@ -0,0 +1,59 @@ +##!/usr/local/bin/perl -w + +use strict ; +use File::Slurp ; + +use Carp ; +use Test::More tests => 9 ; + +my $file = 'missing/file' ; +#unlink $file ; + + +my %modes = ( + 'croak' => \&test_croak, + 'carp' => \&test_carp, + 'quiet' => \&test_quiet, +) ; + +while( my( $mode, $sub ) = each %modes ) { + + $sub->( 'read_file', \&read_file, $file, err_mode => $mode ) ; + $sub->( 'write_file', \&write_file, $file, + { err_mode => $mode }, 'junk' ) ; + $sub->( 'read_dir', \&read_dir, $file, err_mode => $mode ) ; +} + + +sub test_croak { + + my ( $name, $sub, @args ) = @_ ; + + eval { + $sub->( @args ) ; + } ; + + ok( $@, "$name can croak" ) ; +} + +sub test_carp { + + my ( $name, $sub, @args ) = @_ ; + + local $SIG{__WARN__} = sub { ok( 1, "$name can carp" ) } ; + + $sub->( @args ) ; +} + +sub test_quiet { + + my ( $name, $sub, @args ) = @_ ; + + local $SIG{__WARN__} = sub { ok( 0, "$name can be quiet" ) } ; + + eval { + $sub->( @args ) ; + } ; + + ok( !$@, "$name can be quiet" ) ; +} diff --git a/t/file_object.t b/t/file_object.t new file mode 100644 index 0000000..1a6f242 --- /dev/null +++ b/t/file_object.t @@ -0,0 +1,75 @@ +#!perl +use strict; +use Test::More; +use File::Slurp; + +use IO::Handle ; + +use UNIVERSAL ; + +plan tests => 4; + +my $path = "data.txt"; +my $data = "random junk\n"; + +# create an object +my $obj = FileObject->new($path); +isa_ok( $obj, 'FileObject' ); +is( "$obj", $path, "check that the object correctly stringifies" ); + +my $is_glob = eval{ $obj->isa( 'GLOB' ) } ; +#print "GLOB $is_glob\n" ; + +my $is_io = eval{ $obj->isa( 'IO' ) } ; +#print "IO $is_io\n" ; + +my $io = IO::Handle->new() ; +#print "IO2: $io\n" ; + +my $is_io2 = eval{ $io->isa( 'GLOB' ) } ; +#print "IO2 $is_io2\n" ; + +open( FH, "<$0" ) or die "can't open $0: $!" ; + +my $io3 = *FH{IO} ; +#print "IO3: $io3\n" ; + +my $is_io3 = eval{ $io3->isa( 'IO' ) } ; +#print "IO3 $is_io3\n" ; + +my $io4 = *FH{GLOB} ; +#print "IO4: $io4\n" ; + +my $is_io4 = eval{ $io4->isa( 'GLOB' ) } ; +#print "IO4 $is_io4\n" ; + + +SKIP: { + # write something to that file + open(FILE, ">$obj") or skip 4, "can't write to '$path': $!"; + print FILE $data; + close(FILE); + + # pass it to read_file() + my $content = eval { read_file($obj) }; + is( $@, '', "passing an object to read_file()" ); + is( $content, $data, "checking that the content matches the data" ); +} + +unlink $path; + + +# the following mimics the parts from Path::Class causing +# problems with File::Slurp +package FileObject; +use overload + q[""] => \&stringify, fallback => 1; + +sub new { + return bless { path => $_[1] }, $_[0] +} + +sub stringify { + return $_[0]->{path} +} + diff --git a/t/handle.t b/t/handle.t new file mode 100644 index 0000000..4f26847 --- /dev/null +++ b/t/handle.t @@ -0,0 +1,222 @@ +#!/usr/local/bin/perl -w + +use strict ; +use File::Slurp ; + +use Carp ; +use POSIX qw( :fcntl_h ) ; +use Socket ; +use Symbol ; +use Test::More ; + +# in case SEEK_SET isn't defined in older perls. it seems to always be 0 + +BEGIN { + *SEEK_SET = sub() { 0 } unless defined \&SEEK_SET ; +} + +my @pipe_data = ( + '', + 'abc', + 'abc' x 100_000, + 'abc' x 1_000_000, +) ; + +plan( tests => scalar @pipe_data ) ; + +#test_data_slurp() ; + +#test_fork_pipe_slurp() ; + +SKIP: { + + eval { test_socketpair_slurp() } ; + + skip "socketpair not found in this Perl", scalar( @pipe_data ) if $@ ; +} + +sub test_socketpair_slurp { + + foreach my $data ( @pipe_data ) { + + my $size = length( $data ) ; + + my $read_fh = gensym ; + my $write_fh = gensym ; + + socketpair( $read_fh, $write_fh, + AF_UNIX, SOCK_STREAM, PF_UNSPEC); + + if ( fork() ) { + +#warn "PARENT SOCKET\n" ; + close( $write_fh ) ; + my $read_buf = read_file( $read_fh ) ; + + is( $read_buf, $data, + "socket slurp/spew of $size bytes" ) ; + + } + else { + +#child +#warn "CHILD SOCKET\n" ; + close( $read_fh ) ; + eval { write_file( $write_fh, $data ) } ; + exit() ; + } + } +} + +sub test_data_slurp { + + my $data_seek = tell( \*DATA ); + +# first slurp in the lines + my @slurp_lines = read_file( \*DATA ) ; + +# now seek back and read all the lines with the <> op and we make +# golden data sets + + seek( \*DATA, $data_seek, SEEK_SET ) || die "seek $!" ; + my @data_lines = <DATA> ; + my $data_text = join( '', @data_lines ) ; + +# now slurp in as one string and test + + sysseek( \*DATA, $data_seek, SEEK_SET ) || die "seek $!" ; + my $slurp_text = read_file( \*DATA ) ; + is( $slurp_text, $data_text, 'scalar slurp DATA' ) ; + +# test the array slurp + + ok( eq_array( \@data_lines, \@slurp_lines ), 'list slurp of DATA' ) ; +} + +sub test_fork_pipe_slurp { + + foreach my $data ( @pipe_data ) { + + test_to_pipe( $data ) ; + test_from_pipe( $data ) ; + } +} + + +sub test_from_pipe { + + my( $data ) = @_ ; + + my $size = length( $data ) ; + + if ( pipe_from_fork( \*READ_FH ) ) { + +# parent + my $read_buf = read_file( \*READ_FH ) ; +warn "PARENT read\n" ; + + is( $read_buf, $data, "pipe slurp/spew of $size bytes" ) ; + + close \*READ_FH ; +# return ; + } + else { +# child +warn "CHILD write\n" ; + # write_file( \*STDOUT, $data ) ; + syswrite( \*STDOUT, $data, length( $data ) ) ; + + close \*STDOUT; + exit(0); + } +} + + +sub pipe_from_fork { + + my ( $parent_fh ) = @_ ; + + my $child = gensym ; + + pipe( $parent_fh, $child ) or die; + + my $pid = fork(); + die "fork() failed: $!" unless defined $pid; + + if ($pid) { + +warn "PARENT\n" ; + close $child; + return $pid ; + } + +warn "CHILD FILENO ", fileno($child), "\n" ; + close $parent_fh ; + open(STDOUT, ">&=" . fileno($child)) or die "no fileno" ; + + return ; +} + + +sub test_to_pipe { + + my( $data ) = @_ ; + + my $size = length( $data ) ; + + if ( pipe_to_fork( \*WRITE_FH ) ) { + +# parent + syswrite( \*WRITE_FH, $data, length( $data ) ) ; +# write_file( \*WRITE_FH, $data ) ; +warn "PARENT write\n" ; + +# is( $read_buf, $data, "pipe slurp/spew of $size bytes" ) ; + + close \*WRITE_FH ; +# return ; + } + else { +# child +warn "CHILD read FILENO ", fileno(\*STDIN), "\n" ; + + my $read_buf = read_file( \*STDIN ) ; + is( $read_buf, $data, "pipe slurp/spew of $size bytes" ) ; + close \*STDIN; + exit(0); + } +} + +sub pipe_to_fork { + my ( $parent_fh ) = @_ ; + + my $child = gensym ; + + pipe( $child, $parent_fh ) or die ; + + my $pid = fork(); + die "fork() failed: $!" unless defined $pid; + + if ( $pid ) { + close $child; + return $pid ; + } + + close $parent_fh ; + open(STDIN, "<&=" . fileno($child)) or die; + + return ; +} + +__DATA__ +line one +second line +more lines +still more + +enough lines + +we don't test long handle slurps from DATA since i would have to type +too much stuff :-) + +so we will stop here diff --git a/t/inode.t b/t/inode.t new file mode 100644 index 0000000..c477baf --- /dev/null +++ b/t/inode.t @@ -0,0 +1,44 @@ +#!/usr/local/bin/perl -w + +use strict ; + +use File::Slurp ; + +use Carp ; +use Socket ; +use Symbol ; +use Test::More ; + +BEGIN { + if( $^O =~ '32' ) { + plan skip_all => 'skip inode test on windows'; + exit ; + } + + plan tests => 2 ; +} + +my $data = <<TEXT ; +line 1 +more text +TEXT + +my $file = 'inode' ; + +write_file( $file, $data ) ; +my $inode_num = (stat $file)[1] ; +write_file( $file, $data ) ; +my $inode_num2 = (stat $file)[1] ; + +#print "I1 $inode_num I2 $inode_num2\n" ; + +ok( $inode_num == $inode_num2, 'same inode' ) ; + +write_file( $file, {atomic => 1}, $data ) ; +$inode_num2 = (stat $file)[1] ; + +#print "I1 $inode_num I2 $inode_num2\n" ; + +ok( $inode_num != $inode_num2, 'different inode' ) ; + +unlink $file ; diff --git a/t/large.t b/t/large.t new file mode 100644 index 0000000..3bd78b7 --- /dev/null +++ b/t/large.t @@ -0,0 +1,175 @@ +#!/usr/local/bin/perl -w + +use strict ; + +use Test::More ; +use Carp ; +use File::Slurp ; + +my $file = 'slurp.data' ; +unlink $file ; + +my @text_data = ( + [], + [ 'a' x 8 ], + [ ("\n") x 5 ], + [ map( "aaaaaaaa\n", 1 .. 3 ) ], + [ map( "aaaaaaaa\n", 1 .. 3 ), 'aaaaaaaa' ], + [ map ( 'a' x 100 . "\n", 1 .. 1024 ) ], + [ map ( 'a' x 100 . "\n", 1 .. 1024 ), 'a' x 100 ], + [ map ( 'a' x 1024 . "\n", 1 .. 1024 ) ], + [ map ( 'a' x 1024 . "\n", 1 .. 1024 ), 'a' x 10240 ], + [], +) ; + +my @bin_sizes = ( 1000, 1024 * 1024 ) ; + +my @bin_stuff = ( "\012", "\015", "\012\015", "\015\012", + map chr, 0 .. 32 ) ; + +my @bin_data ; + +foreach my $size ( @bin_sizes ) { + + my $data = '' ; + + while ( length( $data ) < $size ) { + + $data .= $bin_stuff[ rand @bin_stuff ] ; + } + + push @bin_data, $data ; +} + +plan( tests => 17 * @text_data + 8 * @bin_data ) ; + +#print "# text slurp\n" ; + +foreach my $data ( @text_data ) { + + test_text_slurp( $data ) ; +} + +#print "# BIN slurp\n" ; + +SKIP: { + skip "binmode not available in this version of Perl", 8 * @bin_data + if $] < 5.006 ; + + foreach my $data ( @bin_data ) { + + test_bin_slurp( $data ) ; + } +} + +unlink $file ; + +exit ; + +sub test_text_slurp { + + my( $data_ref ) = @_ ; + + my @data_lines = @{$data_ref} ; + my $data_text = join( '', @data_lines ) ; + + + my $err = write_file( $file, $data_text ) ; + ok( $err, 'write_file - ' . length $data_text ) ; + my $text = read_file( $file ) ; + ok( $text eq $data_text, 'scalar read_file - ' . length $data_text ) ; + + $err = write_file( $file, \$data_text ) ; + ok( $err, 'write_file ref arg - ' . length $data_text ) ; + $text = read_file( $file ) ; + ok( $text eq $data_text, 'scalar read_file - ' . length $data_text ) ; + + $err = write_file( $file, { buf_ref => \$data_text } ) ; + ok( $err, 'write_file buf ref opt - ' . length $data_text ) ; + $text = read_file( $file ) ; + ok( $text eq $data_text, 'scalar read_file - ' . length $data_text ) ; + + my $text_ref = read_file( $file, scalar_ref => 1 ) ; + ok( ${$text_ref} eq $data_text, + 'scalar ref read_file - ' . length $data_text ) ; + + read_file( $file, buf_ref => \my $buffer ) ; + ok( $buffer eq $data_text, + 'buf_ref read_file - ' . length $data_text ) ; + +# my @data_lines = split( m|(?<=$/)|, $data_text ) ; + + $err = write_file( $file, \@data_lines ) ; + ok( $err, 'write_file list ref arg - ' . length $data_text ) ; + $text = read_file( $file ) ; + ok( $text eq $data_text, 'scalar read_file - ' . length $data_text ) ; + +#print map "[$_]\n", @data_lines ; +#print "DATA <@data_lines>\n" ; + + my @array = read_file( $file ) ; + +#print map "{$_}\n", @array ; +#print "ARRAY <@array>\n" ; + + ok( eq_array( \@array, \@data_lines ), + 'array read_file - ' . length $data_text ) ; + + print "READ:\n", map( "[$_]\n", @array ), + "EXP:\n", map( "[$_]\n", @data_lines ) + unless eq_array( \@array, \@data_lines ) ; + + my $array_ref = read_file( $file, array_ref => 1 ) ; + ok( eq_array( $array_ref, \@data_lines ), + 'array ref read_file - ' . length $data_text ) ; + + ($array_ref) = read_file( $file, {array_ref => 1} ) ; + ok( eq_array( $array_ref, \@data_lines ), + 'array ref list context args ref read_file - ' . length $data_text ) ; + + $err = write_file( $file, { append => 1 }, $data_text ) ; + ok( $err, 'write_file append - ' . length $data_text ) ; + + my $text2 = read_file( $file ) ; + ok( $text2 eq $data_text x 2, 'read_file append - ' . length $data_text ) ; + + $err = append_file( $file, $data_text ) ; + ok( $err, 'append_file - ' . length $data_text ) ; + + my $bin3 = read_file( $file ) ; + ok( $bin3 eq $data_text x 3, 'read_file append_file - ' . length $data_text ) ; + + return ; +} + +sub test_bin_slurp { + + my( $data ) = @_ ; + + my $err = write_file( $file, {'binmode' => ':raw'}, $data ) ; + ok( $err, 'write_file bin - ' . length $data ) ; + + my $bin = read_file( $file, 'binmode' => ':raw' ) ; + ok( $bin eq $data, 'scalar read_file bin - ' . length $data ) ; + + my $bin_ref = read_file( $file, scalar_ref => 1, 'binmode' => ':raw' ) ; + ok( ${$bin_ref} eq $data, + 'scalar ref read_file bin - ' . length $data ) ; + + read_file( $file, buf_ref => \(my $buffer), 'binmode' => ':raw' ) ; + ok( $buffer eq $data, 'buf_ref read_file bin - ' . length $data ) ; + + $err = write_file( $file, { append => 1, 'binmode' => ':raw' }, $data ) ; + ok( $err, 'write_file append bin - ' . length $data ) ; + + my $bin2 = read_file( $file, 'binmode' => ':raw' ) ; + ok( $bin2 eq $data x 2, 'read_file append bin - ' . length $data ) ; + + $err = append_file( $file, { 'binmode' => ':raw' }, $data ) ; + ok( $err, 'append_file bin - ' . length $data ) ; + + my $bin3 = read_file( $file, 'binmode' => ':raw' ) ; + ok( $bin3 eq $data x 3, 'read_file bin - ' . length $data ) ; + + return ; +} diff --git a/t/newline.t b/t/newline.t new file mode 100644 index 0000000..70e09d6 --- /dev/null +++ b/t/newline.t @@ -0,0 +1,52 @@ +use Test::More tests => 2 ; + +use strict; +use File::Slurp ; + +my $data = "\r\n\r\n\r\n" ; +my $file_name = 'newline.txt' ; + +stdio_write_file( $file_name, $data ) ; +my $slurped_data = read_file( $file_name ) ; + +my $stdio_slurped_data = stdio_read_file( $file_name ) ; + + +print 'data ', unpack( 'H*', $data), "\n", +'slurp ', unpack('H*', $slurped_data), "\n", +'stdio slurp ', unpack('H*', $stdio_slurped_data), "\n"; + +is( $data, $slurped_data, 'slurp' ) ; + +write_file( $file_name, $data ) ; +$slurped_data = stdio_read_file( $file_name ) ; + +is( $data, $slurped_data, 'spew' ) ; + +unlink $file_name ; + +sub stdio_write_file { + + my( $file_name, $data ) = @_ ; + + local( *FH ) ; + + open( FH, ">$file_name" ) || die "Couldn't create $file_name: $!"; + + print FH $data ; +} + +sub stdio_read_file { + + my( $file_name ) = @_ ; + + open( FH, $file_name ) || die "Couldn't open $file_name: $!"; + + local( $/ ) ; + + my $data = <FH> ; + + return $data ; +} + + diff --git a/t/no_clobber.t b/t/no_clobber.t new file mode 100644 index 0000000..0251a1c --- /dev/null +++ b/t/no_clobber.t @@ -0,0 +1,26 @@ +#!/usr/local/bin/perl -w + +use strict ; +use File::Slurp ; + +use Test::More tests => 2 ; + + +my $data = <<TEXT ; +line 1 +more text +TEXT + +my $file = 'xxx' ; + +unlink $file ; + + +my $err = write_file( $file, { no_clobber => 1 }, $data ) ; +ok( $err, 'new write_file' ) ; + +$err = write_file( $file, { no_clobber => 1, err_mode => 'quiet' }, $data ) ; + +ok( !$err, 'no_clobber write_file' ) ; + +unlink $file ; diff --git a/t/original.t b/t/original.t new file mode 100644 index 0000000..aa2a98f --- /dev/null +++ b/t/original.t @@ -0,0 +1,55 @@ +#!/usr/bin/perl -I. + +# try to honor possible tempdirs +$tmp = "file_$$"; + +$short = <<END; +small +file +END + +$long = <<END; +This is a much longer bit of contents +to store in a file. +END + +print "1..7\n"; + +use File::Slurp; + +&write_file($tmp, $long); +if (&read_file($tmp) eq $long) {print "ok 1\n";} else {print "not ok 1\n";} + +@x = &read_file($tmp); +@y = grep( $_ ne '', split(/(.*?\n)/, $long)); +while (@x && @y) { + last unless $x[0] eq $y[0]; + shift @x; + shift @y; +} +if (@x == @y && (@x ? $x[0] eq $y[0] : 1)) { print "ok 2\n";} else {print "not ok 2\n"} + +&append_file($tmp, $short); +if (&read_file($tmp) eq "$long$short") {print "ok 3\n";} else {print "not ok 3\n";} + +$iold = (stat($tmp))[1]; +&overwrite_file($tmp, $short); +$inew = (stat($tmp))[1]; + +if (&read_file($tmp) eq $short) {print "ok 4\n";} else {print "not ok 4\n";} + +if ($inew == $iold) {print "ok 5\n";} else {print "not ok 5\n";} + +unlink($tmp); + +&overwrite_file($tmp, $long); +if (&read_file($tmp) eq $long) {print "ok 6\n";} else {print "not ok 6\n";} + +unlink($tmp); + +&append_file($tmp, $short); +if (&read_file($tmp) eq $short) {print "ok 7\n";} else {print "not ok 7\n";} + +unlink($tmp); + + diff --git a/t/paragraph.t b/t/paragraph.t new file mode 100644 index 0000000..62cbad7 --- /dev/null +++ b/t/paragraph.t @@ -0,0 +1,64 @@ +#!/usr/local/bin/perl -w + +use strict ; + +use File::Slurp ; +use Test::More ; +use Carp ; + + +my $file = 'slurp.data' ; +unlink $file ; + +my @text_data = ( + [], + [ 'a' x 8 ], + [ "\n" x 5 ], + [ map( "aaaaaaaa\n\n", 1 .. 3 ) ], + [ map( "aaaaaaaa\n\n", 1 .. 3 ), 'aaaaaaaa' ], + [ map( "aaaaaaaa" . ( "\n" x (2 + rand 3) ), 1 .. 100 ) ], + [ map( "aaaaaaaa" . ( "\n" x (2 + rand 3) ), 1 .. 100 ), 'aaaaaaaa' ], + [], +) ; + +plan( tests => 3 * @text_data ) ; + +#print "# text slurp\n" ; + +foreach my $data ( @text_data ) { + + test_text_slurp( $data ) ; +} + + +unlink $file ; + +exit ; + +sub test_text_slurp { + + my( $data_ref ) = @_ ; + + my @data_lines = @{$data_ref} ; + my $data_text = join( '', @data_lines ) ; + + local( $/ ) = '' ; + + my $err = write_file( $file, $data_text ) ; + ok( $err, 'write_file - ' . length $data_text ) ; + + + my @array = read_file( $file ) ; + ok( eq_array( \@array, \@data_lines ), + 'array read_file - ' . length $data_text ) ; + + print "READ:\n", map( "[$_]\n", @array ), + "EXP:\n", map( "[$_]\n", @data_lines ) + unless eq_array( \@array, \@data_lines ) ; + + my $array_ref = read_file( $file, array_ref => 1 ) ; + ok( eq_array( $array_ref, \@data_lines ), + 'array ref read_file - ' . length $data_text ) ; + + return ; +} diff --git a/t/perms.t b/t/perms.t new file mode 100644 index 0000000..4cd01fa --- /dev/null +++ b/t/perms.t @@ -0,0 +1,31 @@ +#!/usr/local/bin/perl -w + +use strict ; +use Test::More ; +use File::Slurp ; + +plan skip_all => "meaningless on Win32" if $^O =~ /win32/i ; +plan tests => 2 ; + +my $file = "perms.$$" ; + +my $text = <<END ; +This is a bit of contents +to store in a file. +END + +umask 027 ; + +write_file( $file, $text ) ; +is( getmode( $file ), 0640, 'default perms works' ) ; +unlink $file ; + +write_file( $file, { perms => 0777 }, $text ) ; +is( getmode( $file ), 0750, 'set perms works' ) ; +unlink $file ; + +exit ; + +sub getmode { + return 07777 & (stat $_[0])[2] ; +} @@ -0,0 +1,13 @@ +#!/usr/local/bin/perl + +use Test::More; + +eval 'use Test::Pod 1.14' ; +plan skip_all => + 'Test::Pod 1.14 required for testing POD' if $@ ; + +all_pod_files_ok( +# { +# trustme => [ qr/slurp/ ] +# } +) ; diff --git a/t/pod_coverage.t b/t/pod_coverage.t new file mode 100644 index 0000000..0026d96 --- /dev/null +++ b/t/pod_coverage.t @@ -0,0 +1,24 @@ +#!/usr/local/bin/perl + +use Test::More; + +eval 'use Test::Pod::Coverage 1.04' ; +plan skip_all => + 'Test::Pod::Coverage 1.04 required for testing POD coverage' if $@ ; + +all_pod_coverage_ok( + { + trustme => [ + 'slurp', + 'O_APPEND', + 'O_BINARY', + 'O_CREAT', + 'O_EXCL', + 'O_RDONLY', + 'O_WRONLY', + 'SEEK_CUR', + 'SEEK_END', + 'SEEK_SET', + ], + } +) ; diff --git a/t/prepend_file.t b/t/prepend_file.t new file mode 100644 index 0000000..ec3c8bb --- /dev/null +++ b/t/prepend_file.t @@ -0,0 +1,74 @@ + +use strict ; +use warnings ; + +use lib qw(t) ; + +use File::Slurp qw( read_file write_file prepend_file ) ; +use Test::More ; + +use TestDriver ; + +my $file = 'prepend_file' ; +my $existing_data = <<PRE ; +line 1 +line 2 +more +PRE + +my $tests = [ + { + name => 'prepend null', + sub => \&prepend_file, + prepend_data => '', + pretest => sub { + my( $test ) = @_ ; + write_file( $file, $existing_data ) ; + my $prepend_data = $test->{prepend_data} ; + $test->{args} = [ + $file, + $prepend_data, + ] ; + $test->{expected} = "$prepend_data$existing_data" ; + }, + posttest => sub { $_[0]->{result} = read_file( $file ) }, + }, + { + name => 'prepend line', + sub => \&prepend_file, + prepend_data => "line 0\n", + pretest => sub { + my( $test ) = @_ ; + write_file( $file, $existing_data ) ; + my $prepend_data = $test->{prepend_data} ; + $test->{args} = [ + $file, + $prepend_data, + ] ; + $test->{expected} = "$prepend_data$existing_data" ; + }, + posttest => sub { $_[0]->{result} = read_file( $file ) }, + }, + { + name => 'prepend partial line', + sub => \&prepend_file, + prepend_data => 'partial line', + pretest => sub { + my( $test ) = @_ ; + write_file( $file, $existing_data ) ; + my $prepend_data = $test->{prepend_data} ; + $test->{args} = [ + $file, + $prepend_data, + ] ; + $test->{expected} = "$prepend_data$existing_data" ; + }, + posttest => sub { $_[0]->{result} = read_file( $file ) }, + }, +] ; + +test_driver( $tests ) ; + +unlink $file ; + +exit ; diff --git a/t/pseudo.t b/t/pseudo.t new file mode 100644 index 0000000..5deda84 --- /dev/null +++ b/t/pseudo.t @@ -0,0 +1,34 @@ +#!/usr/local/bin/perl -w + +use strict ; + +use File::Slurp ; +use Carp ; +use Test::More ; + +plan( tests => 1 ) ; + +my $proc_file = "/proc/$$/auxv" ; + +SKIP: { + + unless ( -r $proc_file ) { + + skip "can't find pseudo file $proc_file", 1 ; + } + + test_pseudo_file() ; +} + +sub test_pseudo_file { + + my $data_do = do{ local( @ARGV, $/ ) = $proc_file; <> } ; + +#print "LEN: ", length $data_do, "\n" ; + + my $data_slurp = read_file( $proc_file ) ; +#print "LEN2: ", length $data_slurp, "\n" ; +#print "LEN3: ", -s $proc_file, "\n" ; + + is( $data_do, $data_slurp, 'pseudo' ) ; +} diff --git a/t/read_dir.t b/t/read_dir.t new file mode 100644 index 0000000..d04351f --- /dev/null +++ b/t/read_dir.t @@ -0,0 +1,66 @@ +#!/usr/bin/perl -w -I. + +use strict ; +use Test::More tests => 9 ; + +use File::Slurp ; + +# try to honor possible tempdirs + +my $test_dir = "read_dir_$$" ; + +mkdir( $test_dir, 0700) || die "mkdir $test_dir: $!" ; + +my @dir_entries = read_dir( $test_dir ); + +ok( @dir_entries == 0, 'empty dir' ) ; + +@dir_entries = read_dir( $test_dir, keep_dot_dot => 1 ) ; + +ok( @dir_entries == 2, 'empty dir with . ..' ) ; + +@dir_entries = read_dir( $test_dir, { keep_dot_dot => 1 } ) ; + +ok( @dir_entries == 2, 'empty dir with . .. - args ref' ) ; + +write_file( "$test_dir/x", "foo\n" ) ; + +@dir_entries = read_dir( $test_dir ) ; + +ok( @dir_entries == 1, 'dir with 1 file' ) ; + +ok( $dir_entries[0] eq 'x', 'dir with file x' ) ; + +my $file_cnt = 23 ; + +my @expected_entries = sort( 'x', 1 .. $file_cnt ) ; + +for ( 1 .. $file_cnt ) { + + write_file( "$test_dir/$_", "foo\n" ) ; +} + +@dir_entries = read_dir( $test_dir ) ; +@dir_entries = sort @dir_entries ; + +ok( eq_array( \@dir_entries, \@expected_entries ), + "dir with $file_cnt files" ) ; + +my $dir_entries_ref = read_dir( $test_dir ) ; +@{$dir_entries_ref} = sort @{$dir_entries_ref} ; + +ok( eq_array( $dir_entries_ref, \@expected_entries ), + "dir in array ref" ) ; + +my @prefixed_entries = read_dir( $test_dir, {prefix => 1} ) ; +@prefixed_entries = sort @prefixed_entries ; +ok( eq_array( \@prefixed_entries, [map "$test_dir/$_", @dir_entries] ), + 'prefix option' ) ; + +# clean up + +unlink map "$test_dir/$_", @dir_entries ; +rmdir( $test_dir ) || die "rmdir $test_dir: $!"; +ok( 1, 'cleanup' ) ; + +__END__ diff --git a/t/signal.t b/t/signal.t new file mode 100644 index 0000000..2c692d0 --- /dev/null +++ b/t/signal.t @@ -0,0 +1,34 @@ +#!/usr/local/bin/perl -w + +use strict ; +use File::Slurp qw(read_file); + +use Carp ; +use Test::More ; + +BEGIN { + if( $^O =~ '32' ) { + plan skip_all => 'skip signal test on windows'; + exit ; + } + + plan tests => 1 ; +} + +$SIG{CHLD} = sub {}; + +pipe(IN, OUT); + +print "forking\n"; +if (!fork) { + sleep 1; + exit; +} +if (!fork) { + sleep 2; + print OUT "success"; + exit; +} +close OUT; +my $data = read_file(\*IN); +is ($data, "success", "handle EINTR failed"); diff --git a/t/slurp.t b/t/slurp.t new file mode 100644 index 0000000..3ba53e3 --- /dev/null +++ b/t/slurp.t @@ -0,0 +1,19 @@ +#!/usr/local/bin/perl -w -T + +use strict ; +use File::Slurp qw( write_file slurp ) ; + +use Test::More tests => 1 ; + +my $data = <<TEXT ; +line 1 +more text +TEXT + +my $file = 'xxx' ; + +write_file( $file, $data ) ; +my $read_buf = slurp( $file ) ; +is( $read_buf, $data, 'slurp alias' ) ; + +unlink $file ; diff --git a/t/stdin.t b/t/stdin.t new file mode 100644 index 0000000..071e0d3 --- /dev/null +++ b/t/stdin.t @@ -0,0 +1,23 @@ +#!/usr/local/bin/perl -w + +use strict ; +use File::Slurp ; + +use Carp ; +use Socket ; +use Symbol ; +use Test::More tests => 6 ; + +my $data = <<TEXT ; +line 1 +more text +TEXT + +foreach my $file ( qw( stdin STDIN stdout STDOUT stderr STDERR ) ) { + + write_file( $file, $data ) ; + my $read_buf = read_file( $file ) ; + is( $read_buf, $data, 'read/write of file [$file]' ) ; + + unlink $file ; +} diff --git a/t/stringify.t b/t/stringify.t new file mode 100644 index 0000000..c3809cb --- /dev/null +++ b/t/stringify.t @@ -0,0 +1,45 @@ +#!perl -T + +use strict; + +use Test::More; +use File::Slurp; +use IO::Handle ; +use UNIVERSAL ; + +plan tests => 3 ; + +my $path = "data.txt"; +my $data = "random junk\n"; + +# create an object with an overloaded path + +my $obj = FileObject->new( $path ) ; + +isa_ok( $obj, 'FileObject' ) ; +is( "$obj", $path, "object stringifies to path" ); + +write_file( $obj, $data ) ; + +my $read_text = read_file( $obj ) ; +is( $data, $read_text, 'read_file of stringified object' ) ; + +unlink $path ; + +exit ; + +# this code creates the object which has a stringified path + +package FileObject; + +use overload + q[""] => \&stringify, + fallback => 1 ; + +sub new { + return bless { path => $_[1] }, $_[0] +} + +sub stringify { + return $_[0]->{path} +} diff --git a/t/tainted.t b/t/tainted.t new file mode 100644 index 0000000..6805d48 --- /dev/null +++ b/t/tainted.t @@ -0,0 +1,69 @@ +#!perl -T + +use strict; +use Test::More; +use File::Slurp; + +plan 'skip_all', "Scalar::Util not available" unless + eval 'use Scalar::Util qw(tainted) ; tainted($0) ; 1'; + +plan 'tests', 5; + +my $path = "data.txt"; +my $data = "random junk\nline2"; + +SKIP: { + # write something to that file + open(FILE, ">$path") or skip 4, "can't write to '$path': $!"; + print FILE $data; + close(FILE); + + # read the file using File::Slurp in scalar context + my $content = eval { read_file($path) }; + is( $@, '', "read_file() in scalar context" ); + ok( tainted($content), " => returned content should be tainted" ); + + +# # reconstruct the full lines by merging items by pairs +# for my $k (0..int($#lines/2)) { +# my $i = $k * 2; +# $lines[$k] = (defined $lines[$i] ? $lines[$i] : '') +# . (defined $lines[$i+1] ? $lines[$i+1] : ''); +# } + +# # remove the rest of the items +# splice(@lines, int($#lines/2)+1); +# pop @lines unless $lines[-1]; + +# $_ .= $/ for @lines ; + +# my @lines = split m{$/}, $content, -1; +# my @parts = split m{($/)}, $content, -1; + +# # my @parts = $content =~ m{.+?(?:$/)?}g ; + +# my @lines ; +# while( @parts > 2 ) { + +# my( $line, $sep ) = splice( @parts, 0, 2 ) ; +# push @lines, "$line$sep" ; +# } + +# push @lines, shift @parts if @parts ; + +# # ok( tainted($lines[0]), " text => returned content should be tainted" ); + + # read the file using File::Slurp in list context + my @content = eval { read_file($path) }; + is( $@, '', "read_file() in list context" ); + ok( tainted($content[0]), " => returned content should be tainted" ); + + my $text = join( '', @content ) ; + + is( $text, $content, "list eq scalar" ); + + +# ok( tainted($lines[0]), " => returned content should be tainted" ); +} + +unlink $path; diff --git a/t/write_file_win32.t b/t/write_file_win32.t new file mode 100644 index 0000000..1e42456 --- /dev/null +++ b/t/write_file_win32.t @@ -0,0 +1,29 @@ +use strict; +use File::Slurp ; + +use Test::More tests => 1; + +BEGIN { $^W = 1 } + +sub simple_write_file { + open FH, ">$_[0]" or die "Couldn't open $_[0] for write: $!"; + print FH $_[1]; + close FH ; +} + +sub newline_size { + my ($code) = @_; + + my $file = __FILE__ . '.tmp'; + + local $\ = ''; + $code->($file, "\n" x 3); + + my $size = -s $file; + + unlink $file; + + return $size; +} + +is(newline_size(\&write_file), newline_size(\&simple_write_file), 'newline'); |