diff options
author | Abhijit Menon-Sen <ams@wiw.org> | 2002-03-17 23:41:35 +0000 |
---|---|---|
committer | Abhijit Menon-Sen <ams@wiw.org> | 2002-03-17 23:41:35 +0000 |
commit | 57c7bc0807db52907ce13229410345ba8f983f8a (patch) | |
tree | 140441f114c2fa095ee54e3de6cea0c985fd1d74 /lib | |
parent | 7d3e948e24bb3d2cd32830e4d6a913f9068a0d8c (diff) | |
download | perl-57c7bc0807db52907ce13229410345ba8f983f8a.tar.gz |
Upgrade to Tie::File 0.21.
p4raw-id: //depot/perl@15277
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Tie/File.pm | 496 | ||||
-rw-r--r-- | lib/Tie/File/t/00_version.t | 10 | ||||
-rw-r--r-- | lib/Tie/File/t/15_pushpop.t | 1 | ||||
-rw-r--r-- | lib/Tie/File/t/17_misc_meth.t | 68 | ||||
-rw-r--r-- | lib/Tie/File/t/22_autochomp.t | 12 | ||||
-rw-r--r-- | lib/Tie/File/t/30_defer.t | 319 | ||||
-rw-r--r-- | lib/Tie/File/t/31_autodefer.t | 65 | ||||
-rw-r--r-- | lib/Tie/File/t/32_defer_misc.t | 230 |
8 files changed, 1071 insertions, 130 deletions
diff --git a/lib/Tie/File.pm b/lib/Tie/File.pm index 5b545aa3dc..ec9a8207c7 100644 --- a/lib/Tie/File.pm +++ b/lib/Tie/File.pm @@ -5,20 +5,7 @@ use POSIX 'SEEK_SET'; use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX'; require 5.005; -$VERSION = "0.20"; - -# Idea: The object will always contain an array of byte offsets -# this will be filled in as is necessary and convenient. -# fetch will do seek-read. -# There will be a cache parameter that controls the amount of cached *data* -# Also an LRU queue of cached records -# store will read the relevant record into the cache -# If it's the same length as what is being written, it will overwrite it in -# place; if not, it will do a from-to copying write. -# The record separator string is also a parameter - -# Record numbers start at ZERO. - +$VERSION = "0.21"; my $DEFAULT_MEMORY_SIZE = 1<<21; # 2 megabytes my %good_opt = map {$_ => 1, "-$_" => 1} @@ -47,12 +34,15 @@ sub TIEARRAY { $opts{memory} = $DEFAULT_MEMORY_SIZE; $opts{memory} = $opts{dw_size} if defined $opts{dw_size} && $opts{dw_size} > $DEFAULT_MEMORY_SIZE; + # Dora Winifred Read } $opts{dw_size} = $opts{memory} unless defined $opts{dw_size}; if ($opts{dw_size} > $opts{memory}) { croak("$pack: dw_size may not be larger than total memory allocation\n"); } - $opts{deferred} = {}; # no records presently deferred + # are we in deferred-write mode? + $opts{defer} = 0 unless defined $opts{defer}; + $opts{deferred} = {}; # no records are presently deferred $opts{deferred_s} = 0; # count of total bytes in ->{deferred} # the cache is a hash instead of an array because it is likely to be @@ -77,10 +67,15 @@ sub TIEARRAY { my $fh; if (UNIVERSAL::isa($file, 'GLOB')) { - unless (seek $file, 0, SEEK_SET) { + # We use 1 here on the theory that some systems + # may not indicate failure if we use 0. + # MSWin32 does not indicate failure with 0, but I don't know if + # it will indicate failure with 1 or not. + unless (seek $file, 1, SEEK_SET) { croak "$pack: your filehandle does not appear to be seekable"; } - $fh = $file; + seek $file, 0, SEEK_SET # put it back + $fh = $file; # setting binmode is the user's problem } elsif (ref $file) { croak "usage: tie \@array, $pack, filename, [option => value]..."; } else { @@ -102,7 +97,9 @@ sub TIEARRAY { sub FETCH { my ($self, $n) = @_; - $self->_chomp1($self->_fetch($n)); + my $rec = exists $self->{deferred}{$n} + ? $self->{deferred}{$n} : $self->_fetch($n); + $self->_chomp1($rec); } # Chomp many records in-place; return nothing useful @@ -172,14 +169,11 @@ sub STORE { # Note we have to do this before we alter the cache my $oldrec = $self->_fetch($n); - # _check_cache promotes record $n to MRU. Is this correct behavior? if (my $cached = $self->_check_cache($n)) { my $len_diff = length($rec) - length($cached); $self->{cache}{$n} = $rec; $self->{cached} += $len_diff; - $self->_cache_flush - if $len_diff > 0 - && $self->{deferred_s} + $self->{cached} > $self->{memory}; + $self->_cache_flush if $len_diff > 0 && $self->_cache_too_full; } if (not defined $oldrec) { @@ -208,19 +202,31 @@ sub _store_deferred { $self->{deferred_s} += length($rec); $self->{deferred_s} -= length($old_deferred) if defined $old_deferred; if ($self->{deferred_s} > $self->{dw_size}) { - $self->flush; - $self->defer; # flush clears the 'defer' flag - } elsif ($self->{deferred_s} + $self->{cached} > $self->{memory}) { + $self->_flush; + } elsif ($self->_cache_too_full) { $self->_cache_flush; } } +# Remove a single record from the deferred-write buffer without writing it +# The record need not be present +sub _delete_deferred { + my ($self, $n) = @_; + my $rec = delete $self->{deferred}{$n}; + return unless defined $rec; + $self->{deferred_s} -= length $rec; +} + sub FETCHSIZE { my $self = shift; my $n = $#{$self->{offsets}}; + # 20020317 Change this to binary search while (defined ($self->_fill_offsets_to($n+1))) { ++$n; } + for my $k (keys %{$self->{deferred}}) { + $n = $k+1 if $n < $k+1; + } $n; } @@ -231,11 +237,23 @@ sub STORESIZE { # file gets longer if ($len > $olen) { - $self->_extend_file_to($len); + if ($self->{defer}) { + for ($olen .. $len-1) { + $self->_store_deferred($_, $self->{recsep}); + } + } else { + $self->_extend_file_to($len); + } return; } # file gets shorter + if ($self->{defer}) { + for (grep $_ >= $len, keys %{$self->{deferred}}) { + $self->_delete_deferred($_); + } + } + $self->_seek($len); $self->_chop_file; $#{$self->{offsets}} = $len; @@ -247,7 +265,7 @@ sub STORESIZE { sub PUSH { my $self = shift; $self->SPLICE($self->FETCHSIZE, scalar(@_), @_); - $self->FETCHSIZE; +# $self->FETCHSIZE; # av.c takes care of this for me } sub POP { @@ -266,12 +284,17 @@ sub SHIFT { sub UNSHIFT { my $self = shift; $self->SPLICE(0, 0, @_); - $self->FETCHSIZE; + # $self->FETCHSIZE; # av.c takes care of this for me } sub CLEAR { # And enable auto-defer mode, since it's likely that they just - # did @a = (...); + # did @a = (...); + # + # 20020316 + # Maybe that's too much dwimmery. But stuffing a fake '-1' into the + # autodefer history might not be too much. If you did that, you + # could also special-case [ -1, 0 ], which might not be too much. my $self = shift; $self->_seekb(0); $self->_chop_file; @@ -279,32 +302,46 @@ sub CLEAR { $self->{cached} = 0; @{$self->{lru}} = (); @{$self->{offsets}} = (0); + %{$self->{deferred}}= (); + $self->{deferred_s} = 0; } sub EXTEND { my ($self, $n) = @_; + + # No need to pre-extend anything in this case + return if $self->{defer}; + $self->_fill_offsets_to($n); $self->_extend_file_to($n); } sub DELETE { my ($self, $n) = @_; + $self->_delete_deferred($n) if $self->{defer}; my $lastrec = $self->FETCHSIZE-1; + my $rec = $self->FETCH($n); if ($n == $lastrec) { $self->_seek($n); $self->_chop_file; $#{$self->{offsets}}--; $self->_uncache($n); # perhaps in this case I should also remove trailing null records? - } else { + # 20020316 + # Note that delete @a[-3..-1] deletes the records in the wrong order, + # so we only chop the very last one out of the file. We could repair this + # by tracking deleted records inside the object. + } elsif ($n < $lastrec) { $self->STORE($n, ""); } + $rec; } sub EXISTS { my ($self, $n) = @_; - $self->_fill_offsets_to($n); - 0 <= $n && $n < $self->FETCHSIZE; + return 1 if exists $self->{deferred}{$n}; + $self->_fill_offsets_to($n); # I think this is unnecessary + $n < $self->FETCHSIZE; } sub SPLICE { @@ -319,6 +356,7 @@ sub SPLICE { } sub DESTROY { + my $self = shift; $self->flush if $self->{defer}; } @@ -505,6 +543,12 @@ sub _fixrecs { } } + +################################################################ +# +# Basic read, write, and seek +# + # seek to the beginning of record #$n # Assumes that the offsets table is already correctly populated # @@ -570,6 +614,12 @@ sub _read_record { $rec; } +################################################################ +# +# Read cache management + +# Insert a record into the cache at position $n +# Only appropriate when no data is cached for $n already sub _cache_insert { my ($self, $n, $rec) = @_; @@ -580,9 +630,11 @@ sub _cache_insert { $self->{cached} += length $rec; push @{$self->{lru}}, $n; # most-recently-used is at the END - $self->_cache_flush if $self->{cached} > $self->{memory}; + $self->_cache_flush if $self->_cache_too_full; } +# Remove cached data for record $n, if there is any +# (It is OK if $n is not in the cache at all) sub _uncache { my $self = shift; for my $n (@_) { @@ -593,6 +645,7 @@ sub _uncache { } } +# _check_cache promotes record $n to MRU. Is this correct behavior? sub _check_cache { my ($self, $n) = @_; my $rec; @@ -600,19 +653,31 @@ sub _check_cache { # cache hit; update LRU queue and return $rec # replace this with a heap in a later version + # 20020317 This should be a separate method @{$self->{lru}} = ((grep $_ ne $n, @{$self->{lru}}), $n); $rec; } +sub _cache_too_full { + my $self = shift; + $self->{cached} + $self->{deferred_s} > $self->{memory}; +} + sub _cache_flush { my ($self) = @_; - while ($self->{cached} + $self->{deferred_s} > $self->{memory}) { + while ($self->_cache_too_full) { my $lru = shift @{$self->{lru}}; my $rec = delete $self->{cache}{$lru}; $self->{cached} -= length $rec; } } +################################################################ +# +# File custodial services +# + + # We have read to the end of the file and have the offsets table # entirely populated. Now we need to write a new record beyond # the end of the file. We prepare for this by writing @@ -641,6 +706,7 @@ sub _chop_file { truncate $self->{fh}, tell($self->{fh}); } + # compute the size of a buffer suitable for moving # all the data in a file forward $n bytes # ($n may be negative) @@ -653,6 +719,11 @@ sub _bufsize { $b; } +################################################################ +# +# Miscellaneous public methods +# + # Lock the file sub flock { my ($self, $op) = @_; @@ -665,12 +736,6 @@ sub flock { flock $fh, $op; } -# Defer writes -sub defer { - my $self = shift; - $self->{defer} = 1; -} - # Get/set autochomp option sub autochomp { my $self = shift; @@ -683,6 +748,17 @@ sub autochomp { } } +################################################################ +# +# Matters related to deferred writing +# + +# Defer writes +sub defer { + my $self = shift; + $self->{defer} = 1; +} + # Flush deferred writes # # This could be better optimized to write the file in one pass, instead @@ -711,23 +787,33 @@ sub _flush { @{$self->{deferred}}{$first_rec .. $last_rec}); } - $self->discard; # clear out defered-write-cache + $self->_discard; # clear out defered-write-cache } -# Discard deferred writes +# Discard deferred writes and disable future deferred writes sub discard { my $self = shift; - undef $self->{deferred}; - $self->{deferred_s} = 0; + $self->_discard; $self->{defer} = 0; } +# Discard deferred writes, but retain old deferred writing mode +sub _discard { + my $self = shift; + $self->{deferred} = {}; + $self->{deferred_s} = 0; +} + # Not yet implemented sub autodefer { } +# This is NOT a method. It is here for two reasons: +# 1. To factor a fairly complicated block out of the constructor +# 2. To provide access for the test suite, which need to be sure +# files are being written properly. sub _default_recsep { my $recsep = $/; - if ($^O eq 'MSWin32') { + if ($^O eq 'MSWin32') { # Dos too? # Windows users expect files to be terminated with \r\n # But $/ is set to \n instead # Note that this also transforms \n\n into \r\n\r\n. @@ -737,23 +823,36 @@ sub _default_recsep { $recsep; } +# Utility function for _check_integrity +sub _ci_warn { + my $msg = shift; + $msg =~ s/\n/\\n/g; + $msg =~ s/\r/\\r/g; + print "# $msg\n"; +} + # Given a file, make sure the cache is consistent with the -# file contents +# file contents and the internal data structures are consistent with +# each other. Returns true if everything checks out, false if not +# +# The $file argument is no longer used. It is retained for compatibility +# with the existing test suite. sub _check_integrity { my ($self, $file, $warn) = @_; my $good = 1; if (not defined $self->{offsets}[0]) { - $warn && print STDERR "# offset 0 is missing!\n"; + _ci_warn("offset 0 is missing!"); $good = 0; } elsif ($self->{offsets}[0] != 0) { - $warn && print STDERR "# rec 0: offset <$self->{offsets}[0]> s/b 0!\n"; + _ci_warn("rec 0: offset <$self->{offsets}[0]> s/b 0!"); $good = 0; } local *F = $self->{fh}; seek F, 0, SEEK_SET; local $/ = $self->{recsep}; + my $rsl = $self->{recseplen}; $. = 0; while (<F>) { @@ -762,26 +861,29 @@ sub _check_integrity { my $offset = $self->{offsets}[$.]; my $ao = tell F; if (defined $offset && $offset != $ao) { - $warn && print STDERR "# rec $n: offset <$offset> actual <$ao>\n"; + _ci_warn("rec $n: offset <$offset> actual <$ao>"); $good = 0; } if (defined $cached && $_ ne $cached) { $good = 0; chomp $cached; chomp; - $warn && print STDERR "# rec $n: cached <$cached> actual <$_>\n"; + _ci_warn("rec $n: cached <$cached> actual <$_>"); + } + if (defined $cached && substr($cached, -$rsl) ne $/) { + _ci_warn("rec $n in the cache is missing the record separator"); } } - my $memory = 0; + my $cached = 0; while (my ($n, $r) = each %{$self->{cache}}) { - $memory += length($r); + $cached += length($r); next if $n+1 <= $.; # checked this already - $warn && print STDERR "# spurious caching of record $n\n"; + _ci_warn("spurious caching of record $n"); $good = 0; } - if ($memory != $self->{cached}) { - $warn && print STDERR "# cache size is $self->{cached}, should be $memory\n"; + if ($cached != $self->{cached}) { + _ci_warn("cache size is $self->{cached}, should be $cached"); $good = 0; } @@ -789,7 +891,7 @@ sub _check_integrity { for (@{$self->{lru}}) { $seen{$_}++; if (not exists $self->{cache}{$_}) { - $warn && print "# $_ is mentioned in the LRU queue, but not in the cache\n"; + _ci_warn("$_ is mentioned in the LRU queue, but not in the cache"); $good = 0; } } @@ -797,16 +899,56 @@ sub _check_integrity { if (@duplicate) { my $records = @duplicate == 1 ? 'Record' : 'Records'; my $appear = @duplicate == 1 ? 'appears' : 'appear'; - $warn && print "# $records @duplicate $appear multiple times in LRU queue: @{$self->{lru}}\n"; + _ci_warn("$records @duplicate $appear multiple times in LRU queue: @{$self->{lru}}"); $good = 0; } for (keys %{$self->{cache}}) { unless (exists $seen{$_}) { - print "# record $_ is in the cache but not the LRU queue\n"; + _ci_warn("record $_ is in the cache but not the LRU queue"); $good = 0; } } + # Now let's check the deferbuffer + # Unless deferred writing is enabled, it should be empty + if (! $self->{defer} && %{$self->{deferred}}) { + _ci_warn("deferred writing disabled, but deferbuffer nonempty"); + $good = 0; + } + + # Any record in the deferbuffer should *not* be present in the readcache + my $deferred_s = 0; + while (my ($n, $r) = each %{$self->{deferred}}) { + $deferred_s += length($r); + if (exists $self->{cache}{$n}) { + _ci_warn("record $n is in the deferbuffer *and* the readcache"); + $good = 0; + } + if (substr($r, -$rsl) ne $/) { + _ci_warn("rec $n in the deferbuffer is missing the record separator"); + $good = 0; + } + } + + # Total size of deferbuffer should match internal total + if ($deferred_s != $self->{deferred_s}) { + _ci_warn("buffer size is $self->{deferred_s}, should be $deferred_s"); + $good = 0; + } + + # Total size of deferbuffer should not exceed the specified limit + if ($deferred_s > $self->{dw_size}) { + _ci_warn("buffer size is $self->{deferred_s} which exceeds the limit of $self->{dw_size}"); + $good = 0; + } + + # Total size of cached data should not exceed the specified limit + if ($deferred_s + $cached > $self->{memory}) { + my $total = $deferred_s + $cached; + _ci_warn("total stored data size is $total which exceeds the limit of $self->{memory}"); + $good = 0; + } + $good; } @@ -818,7 +960,7 @@ Tie::File - Access the lines of a disk file via a Perl array =head1 SYNOPSIS - # This file documents Tie::File version 0.20 + # This file documents Tie::File version 0.21 tie @array, 'Tie::File', filename or die ...; @@ -826,9 +968,15 @@ Tie::File - Access the lines of a disk file via a Perl array print $array[42]; # display line 42 of the file $n_recs = @array; # how many records are in the file? - $#array = $n_recs - 2; # chop records off the end + $#array -= 2; # chop two records off the end + - # As you would expect: + for (@array) { + s/PERL/Perl/g; # Replace PERL with Perl everywhere in the file + } + + # These are just like regular push, pop, unshift, shift, and splice + # Except that they modify the file in the way you would expect push @array, new recs...; my $r1 = pop @array; @@ -838,6 +986,7 @@ Tie::File - Access the lines of a disk file via a Perl array untie @array; # all finished + =head1 DESCRIPTION C<Tie::File> represents a regular text file as a Perl array. Each @@ -850,7 +999,7 @@ gigantic files. Changes to the array are reflected in the file immediately. -Lazy people may now stop reading the manual. +Lazy people and beginners may now stop reading the manual. =head2 C<recsep> @@ -918,8 +1067,9 @@ the file contains the text Frankincense Myrrh -the tied array will appear to contain C<("Gold", "Frankincense", "Myrrh")>. -If you set C<autochomp> to a false value, the record separator will not be removed. If the file above was tied with +the tied array will appear to contain C<("Gold", "Frankincense", +"Myrrh")>. If you set C<autochomp> to a false value, the record +separator will not be removed. If the file above was tied with tie @gifts, "Tie::File", $gifts, autochomp => 0; @@ -952,9 +1102,10 @@ Opening the data file in write-only or append mode is not supported. =head2 C<memory> -This is an (inexact) upper limit on the amount of memory that -C<Tie::File> will consume at any time while managing the file. -At present, this is used as a bound on the size of the read cache. +This is an upper limit on the amount of memory that C<Tie::File> will +consume at any time while managing the file. This is used for two +things: managing the I<read cache> and managing the I<deferred write +buffer>. Records read in from the file are cached, to avoid having to re-read them repeatedly. If you read the same record twice, the first time it @@ -974,6 +1125,28 @@ desired cache size, in bytes. Setting the memory limit to 0 will inhibit caching; records will be fetched from disk every time you examine them. +=head2 C<dw_size> + +(This is an advanced feature. Skip this section on first reading.) + +If you use deferred writing (See L<"Deferred Writing">, below) then +data you write into the array will not be written directly to the +file; instead, it will be saved in the I<deferred write buffer> to be +written out later. Data in the deferred write buffer is also charged +against the memory limit you set with the C<memory> option. + +You may set the C<dw_size> option to limit the amount of data that can +be saved in the deferred write buffer. This limit may not exceed the +total memory limit. For example, if you set C<dw_size> to 1000 and +C<memory> to 2500, that means that no more than 1000 bytes of deferred +writes will be saved up. The space available for the read cache will +vary, but it will always be at least 1500 bytes (if the deferred write +buffer is full) and it could grow as large as 2500 bytes (if the +deferred write buffer is empty.) + +If you don't specify a C<dw_size>, it defaults to the entire memory +limit. + =head2 Option Format C<-mode> is a synonym for C<mode>. C<-recsep> is a synonym for @@ -1000,8 +1173,19 @@ argument to the Perl built-in C<flock> function; for example C<LOCK_SH> or C<LOCK_EX | LOCK_NB>. (These constants are provided by the C<use Fcntl ':flock'> declaration.) -C<MODE> is optional; C<$o-E<gt>flock> simply locks the file with -C<LOCK_EX>. +C<MODE> is optional; the default is C<LOCK_EX>. + +C<Tie::File> promises that the following sequence of operations will +be safe: + + my $o = tie @array, "Tie::File", $filename; + $o->flock; + +In particular, C<Tie::File> will I<not> read or write the file during +the C<tie> call. (Exception: Using C<mode =E<gt> O_TRUNC> will, of +course, erase the file during the C<tie> call. If you want to do this +safely, then open the file without C<O_TRUNC>, lock the file, and use +C<@array = ()>.) The best way to unlock a file is to discard the object and untie the array. It is probably unsafe to unlock the file without also untying @@ -1028,6 +1212,10 @@ the idiot does not also have a green light at the same time. See L<"autochomp">, above. +=head2 C<defer>, C<flush>, and C<discard> + +See L<"Deferred Writing">, below. + =head1 Tying to an already-opened filehandle If C<$fh> is a filehandle, such as is returned by C<IO::File> or one @@ -1041,10 +1229,81 @@ C<sysopen>, you may use: tie @array, 'Tie::File', \*FH, ...; Handles that were opened write-only won't work. Handles that were -opened read-only will work as long as you don't try to write to them. -Handles must be attached to seekable sources of data---that means no -pipes or sockets. If you supply a non-seekable handle, the C<tie> -call will try to abort your program. +opened read-only will work as long as you don't try to modify the +array. Handles must be attached to seekable sources of data---that +means no pipes or sockets. If you supply a non-seekable handle, the +C<tie> call will try to throw an exception. (On Unix systems, it +B<will> throw an exception.) + +=head1 Deferred Writing + +(This is an advanced feature. Skip this section on first reading.) + +Normally, modifying a C<Tie::File> array writes to the underlying file +immediately. Every assignment like C<$a[3] = ...> rewrites as much of +the file as is necessary; typically, everything from line 3 through +the end will need to be rewritten. This is the simplest and most +transparent behavior. Performance even for large files is reasonably +good. + +However, under some circumstances, this behavior may be excessively +slow. For example, suppose you have a million-record file, and you +want to do: + + for (@FILE) { + $_ = "> $_"; + } + +The first time through the loop, you will rewrite the entire file, +from line 0 through the end. The second time through the loop, you +will rewrite the entire file from line 1 through the end. The third +time through the loop, you will rewrite the entire file from line 2 to +the end. And so on. + +If the performance in such cases is unacceptable, you may defer the +actual writing, and then have it done all at once. The following loop +will perform much better for large files: + + (tied @a)->defer; + for (@a) { + $_ = "> $_"; + } + (tied @a)->flush; + +If C<Tie::File>'s memory limit is large enough, all the writing will +done in memory. Then, when you call C<-E<gt>flush>, the entire file +will be rewritten in a single pass. + +Calling C<-E<gt>flush> returns the array to immediate-write mode. If +you wish to discard the deferred writes, you may call C<-E<gt>discard> +instead of C<-E<gt>flush>. Note that in some cases, some of the data +will have been written already, and it will be too late for +C<-E<gt>discard> to discard all the changes. + +Deferred writes are cached in memory up to the limit specified by the +C<dw_size> option (see above). If the deferred-write buffer is full +and you try to write still more deferred data, the buffer will be +flushed. All buffered data will be written immediately, the buffer +will be emptied, and the now-empty space will be used for future +deferred writes. + +If the deferred-write buffer isn't yet full, but the total size of the +buffer and the read cache would exceed the C<memory> limit, the oldest +records will be flushed out of the read cache until total usage is +under the limit. + +C<push>, C<pop>, C<shift>, C<unshift>, and C<splice> cannot be +deferred. When you perform one of these operations, any deferred data +is written to the file and the operation is performed immediately. +This may change in a future version. + +A soon-to-be-released version of this module may enabled deferred +write mode automagically if it guesses that you are about to write +many consecutive records. To disable this feature, use + + (tied @o)->autodefer(0); + +(At present, this call does nothing.) =head1 CAVEATS @@ -1064,39 +1323,25 @@ changing the size of a record in the middle of a large file will always be fairly slow, because everything after the new record must be moved. -In particular, note that the following innocent-looking loop has very -bad behavior: - - # million-line file - for (@file_array) { - $_ .= 'x'; - } - -This is likely to be very slow, because the first iteration must -relocate lines 1 through 999,999; the second iteration must relocate -lines 2 through 999,999, and so on. The relocation is done using -block writes, however, so it's not as slow as it might be. - -A soon-to-be-released version of this module will provide a mechanism -for getting better performance in such cases, by deferring the writing -until it can be done all at once. This deferred writing feature might -be enabled automagically if C<Tie::File> guesses that you are about to write many consecutive records. To disable this feature, use - - (tied @o)->autodefer(0); - -(At present, this call does nothing.) - =item * The behavior of tied arrays is not precisely the same as for regular arrays. For example: - undef $a[10]; print "How unusual!\n" if $a[10]; + # This DOES print "How unusual!" + undef $a[10]; print "How unusual!\n" if defined $a[10]; C<undef>-ing a C<Tie::File> array element just blanks out the corresponding record in the file. When you read it back again, you'll -see the record separator (typically, $a[10] will appear to contain -"\n") so the supposedly-C<undef>'ed value will be true. +get the empty string, so the supposedly-C<undef>'ed value will be +defined. Similarly, if you have C<autochomp> disabled, then + + # This DOES print "How unusual!" if 'autochomp' is disabled + undef $a[10]; + print "How unusual!\n" if $a[10]; + +Because when C<autochomp> is disabled, C<$a[10]> will read back as +C<"\n"> (or whatever the record separator string is.) There are other minor differences, but in general, the correspondence is extremely close. @@ -1106,8 +1351,15 @@ is extremely close. Not quite every effort was made to make this module as efficient as possible. C<FETCHSIZE> should use binary search instead of linear search. The cache's LRU queue should be a heap instead of a list. + +The performance of the C<flush> method could be improved. At present, +it still rewrites the tail of the file once for each block of +contiguous lines to be changed. In the typical case, this will result +in only one rewrite, but in peculiar cases it might be bad. It should +be possible to perform I<all> deferred writing with a single rewrite. + These defects are probably minor; in any event, they will be fixed in -a later version of the module. +a future version of the module. =item * @@ -1119,8 +1371,19 @@ suggests, for example, that an LRU read-cache is a good tradeoff, even if it requires substantial adjustment following a C<splice> operation. +=item * +You might be tempted to think that deferred writing is like +transactions, with C<flush> as C<commit> and C<discard> as +C<rollback>, but it isn't, so don't. + =back +=head1 SUBCLASSING + +This version promises absolutely nothing about the internals, which +may change without notice. A future version of the module will have a +well-defined and stable subclassing API. + =head1 WHAT ABOUT C<DB_File>? C<DB_File>'s C<DB_RECNO> feature does something similar to @@ -1172,8 +1435,8 @@ can be inconvenient to arrange for concurrent access to the same file by two or more processes. Each process needs to call C<$db-E<gt>sync> after every write. When you change a C<Tie::File> array, the changes are reflected in the file immediately; no explicit C<-E<gt>sync> call -is required. (The forthcoming "deferred writing" mode will allow you -to request that writes be held in memory until explicitly C<sync>'ed.) +is required. (Or you can enable deferred writing mode to require that +changes be explicitly sync'ed.) =item * @@ -1215,15 +1478,21 @@ To receive an announcement whenever a new version of this module is released, send a blank email message to C<mjd-perl-tiefile-subscribe@plover.com>. +The most recent version of this module, including documentation and +any news of importance, will be available at + + http://perl.plover.com/TieFile/ + + =head1 LICENSE -C<Tie::File> version 0.20 is copyright (C) 2002 Mark Jason Dominus. +C<Tie::File> version 0.21 is copyright (C) 2002 Mark Jason Dominus. This library is free software; you may redistribute it and/or modify it under the same terms as Perl itself. -These terms include your choice of (1) the Perl Artistic Licence, or -(2) version 2 of the GNU General Public License as published by the +These terms are your choice of any of (1) the Perl Artistic Licence, +or (2) version 2 of the GNU General Public License as published by the Free Software Foundation, or (3) any later version of the GNU General Public License. @@ -1245,7 +1514,7 @@ For licensing inquiries, contact the author at: =head1 WARRANTY -C<Tie::File> version 0.20 comes with ABSOLUTELY NO WARRANTY. +C<Tie::File> version 0.21 comes with ABSOLUTELY NO WARRANTY. For details, see the license. =head1 THANKS @@ -1255,12 +1524,13 @@ core when I hadn't written it yet, and for generally being helpful, supportive, and competent. (Usually the rule is "choose any one.") Also big thanks to Abhijit Menon-Sen for all of the same things. -Special thanks to Craig Berry (for VMS portability help), Randy Kobes -(for Win32 portability help), Clinton Pierce and Autrijus Tang (for -heroic eleventh-hour Win32 testing above and beyond the call of duty), -and the rest of the CPAN testers (for testing generally). +Special thanks to Craig Berry and Peter Prymmer (for VMS portability +help), Randy Kobes (for Win32 portability help), Clinton Pierce and +Autrijus Tang (for heroic eleventh-hour Win32 testing above and beyond +the call of duty), and the rest of the CPAN testers (for testing +generally). -More thanks to: +Additional thanks to: Edward Avis / Gerrit Haase / Nikola Knezevic / @@ -1269,6 +1539,7 @@ Tassilo von Parseval / H. Dieter Pearcey / Slaven Rezic / Peter Somu / +Autrijus Tang (again) / Tels =head1 TODO @@ -1288,12 +1559,11 @@ Fixed-length mode. Maybe an autolocking mode? -Finish deferred writing. - Autodeferment. Record locking with fcntl()? Then you might support an undo log and -get real transactions. What a coup that would be. +get real transactions. What a coup that would be. All would bow +before my might. Leave-blanks mode diff --git a/lib/Tie/File/t/00_version.t b/lib/Tie/File/t/00_version.t index 8a154b1b72..5d950b9c20 100644 --- a/lib/Tie/File/t/00_version.t +++ b/lib/Tie/File/t/00_version.t @@ -2,14 +2,16 @@ print "1..1\n"; +my $testversion = "0.21"; use Tie::File; -if ($Tie::File::VERSION != 0.20) { +if ($Tie::File::VERSION != $testversion) { print STDERR " -WHOA THERE!! -You seem to be running version $Tie::File::VERSION of the module against -version 0.20 of the test suite! +*** WHOA THERE!!! *** + +You seem to be running version $Tie::File::VERSION of the module +against version $testversion of the test suite! None of the other test results will be reliable. "; diff --git a/lib/Tie/File/t/15_pushpop.t b/lib/Tie/File/t/15_pushpop.t index cc09b02d5d..4b6d1bc959 100644 --- a/lib/Tie/File/t/15_pushpop.t +++ b/lib/Tie/File/t/15_pushpop.t @@ -28,7 +28,6 @@ $N++; my ($n, @r); - # (3-11) PUSH tests $n = push @a, "rec0", "rec1", "rec2"; check_contents($data); diff --git a/lib/Tie/File/t/17_misc_meth.t b/lib/Tie/File/t/17_misc_meth.t index 87749616fc..b7543898d4 100644 --- a/lib/Tie/File/t/17_misc_meth.t +++ b/lib/Tie/File/t/17_misc_meth.t @@ -8,7 +8,7 @@ my $file = "tf$$.txt"; $: = Tie::File::_default_recsep(); 1 while unlink $file; -print "1..24\n"; +print "1..35\n"; my $N = 1; use Tie::File; @@ -30,7 +30,7 @@ check_contents("$:$:$:$:"); @a = (); check_contents(""); -# (11-16) EXISTS +# (11-20) EXISTS if ($] >= 5.006) { eval << 'TESTS'; print !exists $a[0] ? "ok $N\n" : "not ok $N\n"; @@ -48,28 +48,52 @@ print exists $a[1] ? "ok $N\n" : "ok $N\n"; $N++; print exists $a[2] ? "ok $N\n" : "not ok $N\n"; $N++; +print exists $a[-1] ? "ok $N\n" : "not ok $N\n"; +$N++; +print exists $a[-2] ? "ok $N\n" : "not ok $N\n"; +$N++; +print exists $a[-3] ? "ok $N\n" : "not ok $N\n"; +$N++; +print !exists $a[-4] ? "ok $N\n" : "not ok $N\n"; +$N++; TESTS } else { # perl 5.005 doesn't have exists $array[1] - for (11..16) { + for (11..20) { print "ok $_ \# skipped (no exists for arrays)\n"; $N++; } } -# (17-24) DELETE +my $del; + +# (21-35) DELETE if ($] >= 5.006) { eval << 'TESTS'; -delete $a[0]; +$del = delete $a[0]; check_contents("$:$:GIVE ME PIE$:"); -delete $a[2]; +# 20020317 Through 0.20, the 'delete' function returned the wrong values. +expect($del, "I like pie."); +$del = delete $a[2]; check_contents("$:$:"); -delete $a[0]; +expect($del, "GIVE ME PIE"); +$del = delete $a[0]; check_contents("$:$:"); -delete $a[1]; +expect($del, ""); +$del = delete $a[1]; check_contents("$:"); +expect($del, ""); + +# 20020317 Through 0.20, we had a bug where deleting an element past the +# end of the array would actually extend the array to that length. +$del = delete $a[4]; +check_contents("$:"); +expect($del, undef); + + + TESTS } else { # perl 5.005 doesn't have delete $array[1] - for (17..24) { + for (21..35) { print "ok $_ \# skipped (no delete for arrays)\n"; $N++; } @@ -87,13 +111,37 @@ sub check_contents { print "ok $N\n"; } else { ctrlfix(my $msg = "# expected <$x>, got <$a>"); - print "not ok $N\n$msg\n"; + print "not ok $N # $msg\n"; } $N++; print $o->_check_integrity($file, $ENV{INTEGRITY}) ? "ok $N\n" : "not ok $N\n"; $N++; } +sub expect { + if (@_ == 1) { + print $_[0] ? "ok $N\n" : "not ok $N\n"; + } elsif (@_ == 2) { + my ($a, $x) = @_; + if (! defined($a) && ! defined($x)) { print "ok $N\n" } + elsif ( defined($a) && ! defined($x)) { + ctrlfix(my $msg = "expected UNDEF, got <$a>"); + print "not ok $N \# $msg\n"; + } + elsif (! defined($a) && defined($x)) { + ctrlfix(my $msg = "expected <$x>, got UNDEF"); + print "not ok $N \# $msg\n"; + } elsif ($a eq $x) { print "ok $N\n" } + else { + ctrlfix(my $msg = "expected <$x>, got <$a>"); + print "not ok $N \# $msg\n"; + } + } else { + die "expect() got ", scalar(@_), " args, should have been 1 or 2"; + } + $N++; +} + sub ctrlfix { for (@_) { s/\n/\\n/g; diff --git a/lib/Tie/File/t/22_autochomp.t b/lib/Tie/File/t/22_autochomp.t index 70974d4b49..caa7150e0c 100644 --- a/lib/Tie/File/t/22_autochomp.t +++ b/lib/Tie/File/t/22_autochomp.t @@ -141,10 +141,18 @@ sub expect { print $_[0] ? "ok $N\n" : "not ok $N\n"; } elsif (@_ == 2) { my ($a, $x) = @_; - if ($a eq $x) { print "ok $N\n" } + if (! defined($a) && ! defined($x)) { print "ok $N\n" } + elsif ( defined($a) && ! defined($x)) { + ctrlfix(my $msg = "expected UNDEF, got <$a>"); + print "not ok $N \# $msg\n"; + } + elsif (! defined($a) && defined($x)) { + ctrlfix(my $msg = "expected <$x>, got UNDEF"); + print "not ok $N \# $msg\n"; + } elsif ($a eq $x) { print "ok $N\n" } else { ctrlfix(my $msg = "expected <$x>, got <$a>"); - print "not ok $N # $msg\n"; + print "not ok $N \# $msg\n"; } } else { die "expect() got ", scalar(@_), " args, should have been 1 or 2"; diff --git a/lib/Tie/File/t/30_defer.t b/lib/Tie/File/t/30_defer.t new file mode 100644 index 0000000000..4c32825fe2 --- /dev/null +++ b/lib/Tie/File/t/30_defer.t @@ -0,0 +1,319 @@ +#!/usr/bin/perl +# +# Check ->defer and ->flush methods +# + +use POSIX 'SEEK_SET'; +my $file = "tf$$.txt"; +$: = Tie::File::_default_recsep(); +my $data = "rec0$:rec1$:rec2$:"; +my ($o, $n); + +print "1..79\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; + +open F, "> $file" or die $!; +binmode F; +print F $data; +close F; +$o = tie @a, 'Tie::File', $file; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; + +# (3-6) Deferred storage +$o->defer; +$a[3] = "rec3"; +check_contents($data); # nothing written yet +$a[4] = "rec4"; +check_contents($data); # nothing written yet + +# (7-8) Flush +$o->flush; +check_contents($data . "rec3$:rec4$:"); # now it's written + +# (9-12) Deferred writing disabled? +$a[3] = "rec9"; +check_contents("${data}rec9$:rec4$:"); +$a[4] = "rec8"; +check_contents("${data}rec9$:rec8$:"); + +# (13-18) Now let's try two batches of records +$#a = 2; +$o->defer; +$a[0] = "record0"; +check_contents($data); # nothing written yet +$a[2] = "record2"; +check_contents($data); # nothing written yet +$o->flush; +check_contents("record0$:rec1$:record2$:"); + +# (19-22) Deferred writing past the end of the file +$o->defer; +$a[4] = "record4"; +check_contents("record0$:rec1$:record2$:"); +$o->flush; +check_contents("record0$:rec1$:record2$:$:record4$:"); + + +# (23-26) Now two long batches +$o->defer; +for (0..2, 4..6) { + $a[$_] = "r$_"; +} +check_contents("record0$:rec1$:record2$:$:record4$:"); +$o->flush; +check_contents(join $:, "r0".."r2", "", "r4".."r6", ""); + +# (27-30) Now let's make sure that discarded writes are really discarded +# We have a 2Mib buffer here, so we can be sure that we aren't accidentally +# filling it up +$o->defer; +for (0, 3, 7) { + $a[$_] = "discarded$_"; +} +check_contents(join $:, "r0".."r2", "", "r4".."r6", ""); +$o->discard; +check_contents(join $:, "r0".."r2", "", "r4".."r6", ""); + +################################################################ +# +# Now we're going to test the results of a small memory limit +# +# +undef $o; untie @a; +$data = join "$:", map("record$_", 0..7), ""; # records are 8 or 9 bytes long +open F, "> $file" or die $!; +binmode F; +print F $data; +close F; + +# Limit cache+buffer size to 47 bytes +my $MAX = 47; +# -- that's enough space for 5 records, but not 6, on both \n and \r\n systems +my $BUF = 20; +# -- that's enough space for 2 records, but not 3, on both \n and \r\n systems +$o = tie @a, 'Tie::File', $file, memory => $MAX, dw_size => $BUF; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; + +# (31-32) Fill up the read cache +my @z; +@z = @a; +# the cache now contains records 3,4,5,6,7. +check_caches({map(($_ => "record$_$:"), 3..7)}, + {}); + +# (33-44) See if overloading the defer starts by flushing the read cache +# and then flushes out the defer +$o->defer; +$a[0] = "recordA"; # That should flush record 3 from the cache +check_caches({map(($_ => "record$_$:"), 4..7)}, + {0 => "recordA$:"}); +check_contents($data); + +$a[1] = "recordB"; # That should flush record 4 from the cache +check_caches({map(($_ => "record$_$:"), 5..7)}, + {0 => "recordA$:", + 1 => "recordB$:"}); +check_contents($data); + +$a[2] = "recordC"; # That should flush the whole darn defer +# Flushing the defer requires looking up the true lengths of records +# 0..2, which flushes out the read cache, leaving only 1..2 there. +# Then the splicer updates the cached versions of 1..2 to contain the +# new data +check_caches({1 => "recordB$:", 2 => "recordC$:"}, + {}); # URRRP +check_contents(join("$:", qw(recordA recordB recordC + record3 record4 record5 record6 record7)) . "$:"); + +$a[3] = "recordD"; # even though we flushed, deferring is STILL ENABLED +check_caches({1 => "recordB$:", 2 => "recordC$:"}, + {3 => "recordD$:"}); +check_contents(join("$:", qw(recordA recordB recordC + record3 record4 record5 record6 record7)) . "$:"); + +# Check readcache-deferbuffer interactions + +# (45-47) This should remove outdated data from the read cache +$a[2] = "recordE"; +check_caches({1 => "recordB$:", }, + {3 => "recordD$:", 2 => "recordE$:"}); +check_contents(join("$:", qw(recordA recordB recordC + record3 record4 record5 record6 record7)) . "$:"); + +# (48-51) This should read back out of the defer buffer +# without adding anything to the read cache +my $z; +$z = $a[2]; +print $z eq "recordE" ? "ok $N\n" : "not ok $N\n"; $N++; +check_caches({1 => "recordB$:", }, + {3 => "recordD$:", 2 => "recordE$:"}); +check_contents(join("$:", qw(recordA recordB recordC + record3 record4 record5 record6 record7)) . "$:"); + +# (52-55) This should repopulate the read cache with a new record +$z = $a[0]; +print $z eq "recordA" ? "ok $N\n" : "not ok $N\n"; $N++; +check_caches({1 => "recordB$:", 0 => "recordA$:"}, + {3 => "recordD$:", 2 => "recordE$:"}); +check_contents(join("$:", qw(recordA recordB recordC + record3 record4 record5 record6 record7)) . "$:"); + +# (56-59) This should flush the LRU record from the read cache +$z = $a[4]; $z = $a[5]; +print $z eq "record5" ? "ok $N\n" : "not ok $N\n"; $N++; +check_caches({5 => "record5$:", 0 => "recordA$:", 4 => "record4$:"}, + {3 => "recordD$:", 2 => "recordE$:"}); +check_contents(join("$:", qw(recordA recordB recordC + record3 record4 record5 record6 record7)) . "$:"); + +# (60-63) This should FLUSH the deferred buffer +# In doing so, it will read in records 2 and 3, flushing 0 and 4 +# from the read cache, leaving 2, 3, and 5. +$z = splice @a, 3, 1, "recordZ"; +print $z eq "recordD" ? "ok $N\n" : "not ok $N\n"; $N++; +check_caches({5 => "record5$:", 3 => "recordZ$:", 2 => "recordE$:"}, + {}); +check_contents(join("$:", qw(recordA recordB recordE + recordZ record4 record5 record6 record7)) . "$:"); + +# (64-66) We should STILL be in deferred writing mode +$a[5] = "recordX"; +check_caches({3 => "recordZ$:", 2 => "recordE$:"}, + {5 => "recordX$:"}); +check_contents(join("$:", qw(recordA recordB recordE + recordZ record4 record5 record6 record7)) . "$:"); + +# Fill up the defer buffer again +$a[4] = "recordP"; +# (67-69) This should OVERWRITE the existing deferred record +# and NOT flush the buffer +$a[5] = "recordQ"; +check_caches({3 => "recordZ$:", 2 => "recordE$:"}, + {5 => "recordQ$:", 4 => "recordP$:"}); +check_contents(join("$:", qw(recordA recordB recordE + recordZ record4 record5 record6 record7)) . "$:"); + + +# (70-72) Discard should just dump the whole deferbuffer +$o->discard; +check_caches({3 => "recordZ$:", 2 => "recordE$:"}, + {}); +check_contents(join("$:", qw(recordA recordB recordE + recordZ record4 record5 record6 record7)) . "$:"); +# (73-75) NOW we are out of deferred writing mode +$a[0] = "recordF"; +check_caches({3 => "recordZ$:", 2 => "recordE$:", 0 => "recordF$:"}, + {}); +check_contents(join("$:", qw(recordF recordB recordE + recordZ record4 record5 record6 record7)) . "$:"); + +# (76-79) Last call--untying the array should flush the deferbuffer +$o->defer; +$a[0] = "flushed"; +check_caches({3 => "recordZ$:", 2 => "recordE$:"}, + {0 => "flushed$:" }); +check_contents(join("$:", qw(recordF recordB recordE + recordZ record4 record5 record6 record7)) . "$:"); +undef $o; +untie @a; +# (79) We can't use check_contents any more, because the object is dead +open F, "< $file" or die; +{ local $/ ; $z = <F> } +close F; +my $x = join("$:", qw(flushed recordB recordE + recordZ record4 record5 record6 record7)) . "$:"; +if ($z eq $x) { + print "ok $N\n"; +} else { + my $msg = ctrlfix("expected <$x>, got <$z>"); + print "not ok $N \# $msg\n"; +} +$N++; + +################################################################ + + +sub check_caches { + my ($xcache, $xdefer) = @_; + +# my $integrity = $o->_check_integrity($file, $ENV{INTEGRITY}); +# print $integrity ? "ok $N\n" : "not ok $N\n"; +# $N++; + + my $good = 1; + $good &&= hash_equal($o->{cache}, $xcache, "true cache", "expected cache"); + $good &&= hash_equal($o->{deferred}, $xdefer, "true defer", "expected defer"); + print $good ? "ok $N\n" : "not ok $N\n"; + $N++; +} + +sub hash_equal { + my ($a, $b, $ha, $hb) = @_; + $ha = 'first hash' unless defined $ha; + $hb = 'second hash' unless defined $hb; + + my $good = 1; + my %b_seen; + + for my $k (keys %$a) { + if (! exists $b->{$k}) { + print ctrlfix("# Key $k is in $ha but not $hb"), "\n"; + $good = 0; + } elsif ($b->{$k} ne $a->{$k}) { + print ctrlfix("# Key $k is <$a->{$k}> in $ha but <$b->{$k}> in $hb"), "\n"; + $b_seen{$k} = 1; + $good = 0; + } else { + $b_seen{$k} = 1; + } + } + + for my $k (keys %$b) { + unless ($b_seen{$k}) { + print ctrlfix("# Key $k is in $hb but not $ha"), "\n"; + $good = 0; + } + } + + $good; +} + + +sub check_contents { + my $x = shift; + + my $integrity = $o->_check_integrity($file, $ENV{INTEGRITY}); + print $integrity ? "ok $N\n" : "not ok $N\n"; + $N++; + + local *FH = $o->{fh}; + seek FH, 0, SEEK_SET; + + my $a; + { local $/; $a = <FH> } + $a = "" unless defined $a; + if ($a eq $x) { + print "ok $N\n"; + } else { + my $msg = ctrlfix("# expected <$x>, got <$a>"); + print "not ok $N\n$msg\n"; + } + $N++; +} + +sub ctrlfix { + local $_ = shift; + s/\n/\\n/g; + s/\r/\\r/g; + $_; +} + +END { + 1 while unlink $file; +} + diff --git a/lib/Tie/File/t/31_autodefer.t b/lib/Tie/File/t/31_autodefer.t new file mode 100644 index 0000000000..38d89dacd3 --- /dev/null +++ b/lib/Tie/File/t/31_autodefer.t @@ -0,0 +1,65 @@ +#!/usr/bin/perl +# +# Check behavior of 'autodefer' feature +# Mostly this isn't implemented yet +# This file is primarily here to make sure that the promised ->autodefer +# method doesn't croak. +# + +use POSIX 'SEEK_SET'; +my $file = "tf$$.txt"; +$: = Tie::File::_default_recsep(); +my $data = "rec0$:rec1$:rec2$:"; +my ($o, $n, @a); + +print "1..3\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; + +open F, "> $file" or die $!; +binmode F; +print F $data; +close F; +$o = tie @a, 'Tie::File', $file; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; + +# (3) You promised this interface, so it better not die + +eval {$o->autodefer(0)}; +print $@ ? "not ok $N # $@\n" : "ok $N\n"; + + + +sub check_contents { + my $x = shift; + my $integrity = $o->_check_integrity($file, $ENV{INTEGRITY}); + local *FH = $o->{fh}; + seek FH, 0, SEEK_SET; + print $integrity ? "ok $N\n" : "not ok $N\n"; + $N++; + my $a; + { local $/; $a = <FH> } + $a = "" unless defined $a; + if ($a eq $x) { + print "ok $N\n"; + } else { + ctrlfix(my $msg = "# expected <$x>, got <$a>"); + print "not ok $N\n$msg\n"; + } + $N++; +} + +sub ctrlfix { + for (@_) { + s/\n/\\n/g; + s/\r/\\r/g; + } +} + +END { + 1 while unlink $file; +} + diff --git a/lib/Tie/File/t/32_defer_misc.t b/lib/Tie/File/t/32_defer_misc.t new file mode 100644 index 0000000000..8e6edf94a4 --- /dev/null +++ b/lib/Tie/File/t/32_defer_misc.t @@ -0,0 +1,230 @@ +#!/usr/bin/perl +# +# Check interactions of deferred writing +# with miscellaneous methods like DELETE, EXISTS, +# FETCHSIZE, STORESIZE, CLEAR, EXTEND +# + +use POSIX 'SEEK_SET'; +my $file = "tf$$.txt"; +$: = Tie::File::_default_recsep(); +my $data = "rec0$:rec1$:rec2$:"; +my ($o, $n); + +print "1..42\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; + +open F, "> $file" or die $!; +binmode F; +print F $data; +close F; +$o = tie @a, 'Tie::File', $file; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; + +# (3-6) EXISTS +if ($] >= 5.006) { + eval << 'TESTS'; +$o->defer; +expect(not exists $a[4]); +$a[4] = "rec4"; +expect(exists $a[4]); +check_contents($data); # nothing written yet +$o->discard; +TESTS +} else { + for (3..6) { + print "ok $_ \# skipped (no exists for arrays)\n"; + $N++; + } +} + +# (7-10) FETCHSIZE +$o->defer; +expect($#a, 2); +$a[4] = "rec4"; +expect($#a, 4); +check_contents($data); # nothing written yet +$o->discard; + +# (11-21) STORESIZE +$o->defer; +$#a = 4; +check_contents($data); # nothing written yet +expect($#a, 4); +$o->flush; +expect($#a, 4); +check_contents("$data$:$:"); # two extra empty records + +$o->defer; +$a[4] = "rec4"; +$#a = 2; +expect($a[4], undef); +check_contents($data); # written data was unwritten +$o->flush; +check_contents($data); # nothing left to write + +# (22-28) CLEAR +$o->defer; +$a[9] = "rec9"; +check_contents($data); # nothing written yet +@a = (); +check_contents(""); # this happens right away +expect($a[9], undef); +$o->flush; +check_contents(""); # nothing left to write + +# (29-34) EXTEND +# Actually it's not real clear what these tests are for +# since EXTEND has no defined semantics +$o->defer; +@a = (0..3); +check_contents(""); # nothing happened yet +expect($a[3], "3"); +expect($a[4], undef); +$o->flush; +check_contents("0$:1$:2$:3$:"); # file now 4 records long + +# (35-53) DELETE +if ($] >= 5.006) { + eval << 'TESTS'; +my $del; +$o->defer; +$del = delete $a[2]; +check_contents("0$:1$:2$:3$:"); # nothing happened yet +expect($a[2], ""); +expect($del, "2"); +$del = delete $a[3]; # shortens file! +check_contents("0$:1$:2$:"); # deferred writes NOT flushed +expect($a[3], undef); +expect($a[2], ""); +exoect($del, "3"); +$a[2] = "cookies"; +$del = delete $a[2]; # shortens file! +expect($a[2], undef); +exoect($del, 'cookies'); +check_contents("0$:1$:"); +$a[0] = "crackers"; +$del = delete $a[0]; # file unchanged +expect($a[0], ""); +exoect($del, 'crackers'); +check_contents("0$:1$:"); # no change yet +$o->flush; +check_contents("$:1$:"); # record 0 is NOT 'cookies'; +TESTS +} else { + for (35..53) { + print "ok $_ \# skipped (no delete for arrays)\n"; + $N++; + } +} + +################################################################ + + +sub check_caches { + my ($xcache, $xdefer) = @_; + +# my $integrity = $o->_check_integrity($file, $ENV{INTEGRITY}); +# print $integrity ? "ok $N\n" : "not ok $N\n"; +# $N++; + + my $good = 1; + $good &&= hash_equal($o->{cache}, $xcache, "true cache", "expected cache"); + $good &&= hash_equal($o->{deferred}, $xdefer, "true defer", "expected defer"); + print $good ? "ok $N\n" : "not ok $N\n"; + $N++; +} + +sub hash_equal { + my ($a, $b, $ha, $hb) = @_; + $ha = 'first hash' unless defined $ha; + $hb = 'second hash' unless defined $hb; + + my $good = 1; + my %b_seen; + + for my $k (keys %$a) { + if (! exists $b->{$k}) { + print ctrlfix("# Key $k is in $ha but not $hb"), "\n"; + $good = 0; + } elsif ($b->{$k} ne $a->{$k}) { + print ctrlfix("# Key $k is <$a->{$k}> in $ha but <$b->{$k}> in $hb"), "\n"; + $b_seen{$k} = 1; + $good = 0; + } else { + $b_seen{$k} = 1; + } + } + + for my $k (keys %$b) { + unless ($b_seen{$k}) { + print ctrlfix("# Key $k is in $hb but not $ha"), "\n"; + $good = 0; + } + } + + $good; +} + + +sub check_contents { + my $x = shift; + + my $integrity = $o->_check_integrity($file, $ENV{INTEGRITY}); + print $integrity ? "ok $N\n" : "not ok $N\n"; + $N++; + + local *FH = $o->{fh}; + seek FH, 0, SEEK_SET; + + my $a; + { local $/; $a = <FH> } + $a = "" unless defined $a; + if ($a eq $x) { + print "ok $N\n"; + } else { + my $msg = ctrlfix("# expected <$x>, got <$a>"); + print "not ok $N\n$msg\n"; + } + $N++; +} + +sub expect { + if (@_ == 1) { + print $_[0] ? "ok $N\n" : "not ok $N\n"; + } elsif (@_ == 2) { + my ($a, $x) = @_; + if (! defined($a) && ! defined($x)) { print "ok $N\n" } + elsif ( defined($a) && ! defined($x)) { + ctrlfix(my $msg = "expected UNDEF, got <$a>"); + print "not ok $N \# $msg\n"; + } + elsif (! defined($a) && defined($x)) { + ctrlfix(my $msg = "expected <$x>, got UNDEF"); + print "not ok $N \# $msg\n"; + } elsif ($a eq $x) { print "ok $N\n" } + else { + ctrlfix(my $msg = "expected <$x>, got <$a>"); + print "not ok $N \# $msg\n"; + } + } else { + die "expect() got ", scalar(@_), " args, should have been 1 or 2"; + } + $N++; +} + +sub ctrlfix { + local $_ = shift; + s/\n/\\n/g; + s/\r/\\r/g; + $_; +} + +END { + 1 while unlink $file; +} + |