summaryrefslogtreecommitdiff
path: root/lib/Tie
diff options
context:
space:
mode:
authorAbhijit Menon-Sen <ams@wiw.org>2002-03-17 23:41:35 +0000
committerAbhijit Menon-Sen <ams@wiw.org>2002-03-17 23:41:35 +0000
commit57c7bc0807db52907ce13229410345ba8f983f8a (patch)
tree140441f114c2fa095ee54e3de6cea0c985fd1d74 /lib/Tie
parent7d3e948e24bb3d2cd32830e4d6a913f9068a0d8c (diff)
downloadperl-57c7bc0807db52907ce13229410345ba8f983f8a.tar.gz
Upgrade to Tie::File 0.21.
p4raw-id: //depot/perl@15277
Diffstat (limited to 'lib/Tie')
-rw-r--r--lib/Tie/File.pm496
-rw-r--r--lib/Tie/File/t/00_version.t10
-rw-r--r--lib/Tie/File/t/15_pushpop.t1
-rw-r--r--lib/Tie/File/t/17_misc_meth.t68
-rw-r--r--lib/Tie/File/t/22_autochomp.t12
-rw-r--r--lib/Tie/File/t/30_defer.t319
-rw-r--r--lib/Tie/File/t/31_autodefer.t65
-rw-r--r--lib/Tie/File/t/32_defer_misc.t230
8 files changed, 1071 insertions, 130 deletions
diff --git a/lib/Tie/File.pm b/lib/Tie/File.pm
index 5b545aa3dc..ec9a8207c7 100644
--- a/lib/Tie/File.pm
+++ b/lib/Tie/File.pm
@@ -5,20 +5,7 @@ use POSIX 'SEEK_SET';
use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX';
require 5.005;
-$VERSION = "0.20";
-
-# Idea: The object will always contain an array of byte offsets
-# this will be filled in as is necessary and convenient.
-# fetch will do seek-read.
-# There will be a cache parameter that controls the amount of cached *data*
-# Also an LRU queue of cached records
-# store will read the relevant record into the cache
-# If it's the same length as what is being written, it will overwrite it in
-# place; if not, it will do a from-to copying write.
-# The record separator string is also a parameter
-
-# Record numbers start at ZERO.
-
+$VERSION = "0.21";
my $DEFAULT_MEMORY_SIZE = 1<<21; # 2 megabytes
my %good_opt = map {$_ => 1, "-$_" => 1}
@@ -47,12 +34,15 @@ sub TIEARRAY {
$opts{memory} = $DEFAULT_MEMORY_SIZE;
$opts{memory} = $opts{dw_size}
if defined $opts{dw_size} && $opts{dw_size} > $DEFAULT_MEMORY_SIZE;
+ # Dora Winifred Read
}
$opts{dw_size} = $opts{memory} unless defined $opts{dw_size};
if ($opts{dw_size} > $opts{memory}) {
croak("$pack: dw_size may not be larger than total memory allocation\n");
}
- $opts{deferred} = {}; # no records presently deferred
+ # are we in deferred-write mode?
+ $opts{defer} = 0 unless defined $opts{defer};
+ $opts{deferred} = {}; # no records are presently deferred
$opts{deferred_s} = 0; # count of total bytes in ->{deferred}
# the cache is a hash instead of an array because it is likely to be
@@ -77,10 +67,15 @@ sub TIEARRAY {
my $fh;
if (UNIVERSAL::isa($file, 'GLOB')) {
- unless (seek $file, 0, SEEK_SET) {
+ # We use 1 here on the theory that some systems
+ # may not indicate failure if we use 0.
+ # MSWin32 does not indicate failure with 0, but I don't know if
+ # it will indicate failure with 1 or not.
+ unless (seek $file, 1, SEEK_SET) {
croak "$pack: your filehandle does not appear to be seekable";
}
- $fh = $file;
+ seek $file, 0, SEEK_SET # put it back
+ $fh = $file; # setting binmode is the user's problem
} elsif (ref $file) {
croak "usage: tie \@array, $pack, filename, [option => value]...";
} else {
@@ -102,7 +97,9 @@ sub TIEARRAY {
sub FETCH {
my ($self, $n) = @_;
- $self->_chomp1($self->_fetch($n));
+ my $rec = exists $self->{deferred}{$n}
+ ? $self->{deferred}{$n} : $self->_fetch($n);
+ $self->_chomp1($rec);
}
# Chomp many records in-place; return nothing useful
@@ -172,14 +169,11 @@ sub STORE {
# Note we have to do this before we alter the cache
my $oldrec = $self->_fetch($n);
- # _check_cache promotes record $n to MRU. Is this correct behavior?
if (my $cached = $self->_check_cache($n)) {
my $len_diff = length($rec) - length($cached);
$self->{cache}{$n} = $rec;
$self->{cached} += $len_diff;
- $self->_cache_flush
- if $len_diff > 0
- && $self->{deferred_s} + $self->{cached} > $self->{memory};
+ $self->_cache_flush if $len_diff > 0 && $self->_cache_too_full;
}
if (not defined $oldrec) {
@@ -208,19 +202,31 @@ sub _store_deferred {
$self->{deferred_s} += length($rec);
$self->{deferred_s} -= length($old_deferred) if defined $old_deferred;
if ($self->{deferred_s} > $self->{dw_size}) {
- $self->flush;
- $self->defer; # flush clears the 'defer' flag
- } elsif ($self->{deferred_s} + $self->{cached} > $self->{memory}) {
+ $self->_flush;
+ } elsif ($self->_cache_too_full) {
$self->_cache_flush;
}
}
+# Remove a single record from the deferred-write buffer without writing it
+# The record need not be present
+sub _delete_deferred {
+ my ($self, $n) = @_;
+ my $rec = delete $self->{deferred}{$n};
+ return unless defined $rec;
+ $self->{deferred_s} -= length $rec;
+}
+
sub FETCHSIZE {
my $self = shift;
my $n = $#{$self->{offsets}};
+ # 20020317 Change this to binary search
while (defined ($self->_fill_offsets_to($n+1))) {
++$n;
}
+ for my $k (keys %{$self->{deferred}}) {
+ $n = $k+1 if $n < $k+1;
+ }
$n;
}
@@ -231,11 +237,23 @@ sub STORESIZE {
# file gets longer
if ($len > $olen) {
- $self->_extend_file_to($len);
+ if ($self->{defer}) {
+ for ($olen .. $len-1) {
+ $self->_store_deferred($_, $self->{recsep});
+ }
+ } else {
+ $self->_extend_file_to($len);
+ }
return;
}
# file gets shorter
+ if ($self->{defer}) {
+ for (grep $_ >= $len, keys %{$self->{deferred}}) {
+ $self->_delete_deferred($_);
+ }
+ }
+
$self->_seek($len);
$self->_chop_file;
$#{$self->{offsets}} = $len;
@@ -247,7 +265,7 @@ sub STORESIZE {
sub PUSH {
my $self = shift;
$self->SPLICE($self->FETCHSIZE, scalar(@_), @_);
- $self->FETCHSIZE;
+# $self->FETCHSIZE; # av.c takes care of this for me
}
sub POP {
@@ -266,12 +284,17 @@ sub SHIFT {
sub UNSHIFT {
my $self = shift;
$self->SPLICE(0, 0, @_);
- $self->FETCHSIZE;
+ # $self->FETCHSIZE; # av.c takes care of this for me
}
sub CLEAR {
# And enable auto-defer mode, since it's likely that they just
- # did @a = (...);
+ # did @a = (...);
+ #
+ # 20020316
+ # Maybe that's too much dwimmery. But stuffing a fake '-1' into the
+ # autodefer history might not be too much. If you did that, you
+ # could also special-case [ -1, 0 ], which might not be too much.
my $self = shift;
$self->_seekb(0);
$self->_chop_file;
@@ -279,32 +302,46 @@ sub CLEAR {
$self->{cached} = 0;
@{$self->{lru}} = ();
@{$self->{offsets}} = (0);
+ %{$self->{deferred}}= ();
+ $self->{deferred_s} = 0;
}
sub EXTEND {
my ($self, $n) = @_;
+
+ # No need to pre-extend anything in this case
+ return if $self->{defer};
+
$self->_fill_offsets_to($n);
$self->_extend_file_to($n);
}
sub DELETE {
my ($self, $n) = @_;
+ $self->_delete_deferred($n) if $self->{defer};
my $lastrec = $self->FETCHSIZE-1;
+ my $rec = $self->FETCH($n);
if ($n == $lastrec) {
$self->_seek($n);
$self->_chop_file;
$#{$self->{offsets}}--;
$self->_uncache($n);
# perhaps in this case I should also remove trailing null records?
- } else {
+ # 20020316
+ # Note that delete @a[-3..-1] deletes the records in the wrong order,
+ # so we only chop the very last one out of the file. We could repair this
+ # by tracking deleted records inside the object.
+ } elsif ($n < $lastrec) {
$self->STORE($n, "");
}
+ $rec;
}
sub EXISTS {
my ($self, $n) = @_;
- $self->_fill_offsets_to($n);
- 0 <= $n && $n < $self->FETCHSIZE;
+ return 1 if exists $self->{deferred}{$n};
+ $self->_fill_offsets_to($n); # I think this is unnecessary
+ $n < $self->FETCHSIZE;
}
sub SPLICE {
@@ -319,6 +356,7 @@ sub SPLICE {
}
sub DESTROY {
+ my $self = shift;
$self->flush if $self->{defer};
}
@@ -505,6 +543,12 @@ sub _fixrecs {
}
}
+
+################################################################
+#
+# Basic read, write, and seek
+#
+
# seek to the beginning of record #$n
# Assumes that the offsets table is already correctly populated
#
@@ -570,6 +614,12 @@ sub _read_record {
$rec;
}
+################################################################
+#
+# Read cache management
+
+# Insert a record into the cache at position $n
+# Only appropriate when no data is cached for $n already
sub _cache_insert {
my ($self, $n, $rec) = @_;
@@ -580,9 +630,11 @@ sub _cache_insert {
$self->{cached} += length $rec;
push @{$self->{lru}}, $n; # most-recently-used is at the END
- $self->_cache_flush if $self->{cached} > $self->{memory};
+ $self->_cache_flush if $self->_cache_too_full;
}
+# Remove cached data for record $n, if there is any
+# (It is OK if $n is not in the cache at all)
sub _uncache {
my $self = shift;
for my $n (@_) {
@@ -593,6 +645,7 @@ sub _uncache {
}
}
+# _check_cache promotes record $n to MRU. Is this correct behavior?
sub _check_cache {
my ($self, $n) = @_;
my $rec;
@@ -600,19 +653,31 @@ sub _check_cache {
# cache hit; update LRU queue and return $rec
# replace this with a heap in a later version
+ # 20020317 This should be a separate method
@{$self->{lru}} = ((grep $_ ne $n, @{$self->{lru}}), $n);
$rec;
}
+sub _cache_too_full {
+ my $self = shift;
+ $self->{cached} + $self->{deferred_s} > $self->{memory};
+}
+
sub _cache_flush {
my ($self) = @_;
- while ($self->{cached} + $self->{deferred_s} > $self->{memory}) {
+ while ($self->_cache_too_full) {
my $lru = shift @{$self->{lru}};
my $rec = delete $self->{cache}{$lru};
$self->{cached} -= length $rec;
}
}
+################################################################
+#
+# File custodial services
+#
+
+
# We have read to the end of the file and have the offsets table
# entirely populated. Now we need to write a new record beyond
# the end of the file. We prepare for this by writing
@@ -641,6 +706,7 @@ sub _chop_file {
truncate $self->{fh}, tell($self->{fh});
}
+
# compute the size of a buffer suitable for moving
# all the data in a file forward $n bytes
# ($n may be negative)
@@ -653,6 +719,11 @@ sub _bufsize {
$b;
}
+################################################################
+#
+# Miscellaneous public methods
+#
+
# Lock the file
sub flock {
my ($self, $op) = @_;
@@ -665,12 +736,6 @@ sub flock {
flock $fh, $op;
}
-# Defer writes
-sub defer {
- my $self = shift;
- $self->{defer} = 1;
-}
-
# Get/set autochomp option
sub autochomp {
my $self = shift;
@@ -683,6 +748,17 @@ sub autochomp {
}
}
+################################################################
+#
+# Matters related to deferred writing
+#
+
+# Defer writes
+sub defer {
+ my $self = shift;
+ $self->{defer} = 1;
+}
+
# Flush deferred writes
#
# This could be better optimized to write the file in one pass, instead
@@ -711,23 +787,33 @@ sub _flush {
@{$self->{deferred}}{$first_rec .. $last_rec});
}
- $self->discard; # clear out defered-write-cache
+ $self->_discard; # clear out defered-write-cache
}
-# Discard deferred writes
+# Discard deferred writes and disable future deferred writes
sub discard {
my $self = shift;
- undef $self->{deferred};
- $self->{deferred_s} = 0;
+ $self->_discard;
$self->{defer} = 0;
}
+# Discard deferred writes, but retain old deferred writing mode
+sub _discard {
+ my $self = shift;
+ $self->{deferred} = {};
+ $self->{deferred_s} = 0;
+}
+
# Not yet implemented
sub autodefer { }
+# This is NOT a method. It is here for two reasons:
+# 1. To factor a fairly complicated block out of the constructor
+# 2. To provide access for the test suite, which need to be sure
+# files are being written properly.
sub _default_recsep {
my $recsep = $/;
- if ($^O eq 'MSWin32') {
+ if ($^O eq 'MSWin32') { # Dos too?
# Windows users expect files to be terminated with \r\n
# But $/ is set to \n instead
# Note that this also transforms \n\n into \r\n\r\n.
@@ -737,23 +823,36 @@ sub _default_recsep {
$recsep;
}
+# Utility function for _check_integrity
+sub _ci_warn {
+ my $msg = shift;
+ $msg =~ s/\n/\\n/g;
+ $msg =~ s/\r/\\r/g;
+ print "# $msg\n";
+}
+
# Given a file, make sure the cache is consistent with the
-# file contents
+# file contents and the internal data structures are consistent with
+# each other. Returns true if everything checks out, false if not
+#
+# The $file argument is no longer used. It is retained for compatibility
+# with the existing test suite.
sub _check_integrity {
my ($self, $file, $warn) = @_;
my $good = 1;
if (not defined $self->{offsets}[0]) {
- $warn && print STDERR "# offset 0 is missing!\n";
+ _ci_warn("offset 0 is missing!");
$good = 0;
} elsif ($self->{offsets}[0] != 0) {
- $warn && print STDERR "# rec 0: offset <$self->{offsets}[0]> s/b 0!\n";
+ _ci_warn("rec 0: offset <$self->{offsets}[0]> s/b 0!");
$good = 0;
}
local *F = $self->{fh};
seek F, 0, SEEK_SET;
local $/ = $self->{recsep};
+ my $rsl = $self->{recseplen};
$. = 0;
while (<F>) {
@@ -762,26 +861,29 @@ sub _check_integrity {
my $offset = $self->{offsets}[$.];
my $ao = tell F;
if (defined $offset && $offset != $ao) {
- $warn && print STDERR "# rec $n: offset <$offset> actual <$ao>\n";
+ _ci_warn("rec $n: offset <$offset> actual <$ao>");
$good = 0;
}
if (defined $cached && $_ ne $cached) {
$good = 0;
chomp $cached;
chomp;
- $warn && print STDERR "# rec $n: cached <$cached> actual <$_>\n";
+ _ci_warn("rec $n: cached <$cached> actual <$_>");
+ }
+ if (defined $cached && substr($cached, -$rsl) ne $/) {
+ _ci_warn("rec $n in the cache is missing the record separator");
}
}
- my $memory = 0;
+ my $cached = 0;
while (my ($n, $r) = each %{$self->{cache}}) {
- $memory += length($r);
+ $cached += length($r);
next if $n+1 <= $.; # checked this already
- $warn && print STDERR "# spurious caching of record $n\n";
+ _ci_warn("spurious caching of record $n");
$good = 0;
}
- if ($memory != $self->{cached}) {
- $warn && print STDERR "# cache size is $self->{cached}, should be $memory\n";
+ if ($cached != $self->{cached}) {
+ _ci_warn("cache size is $self->{cached}, should be $cached");
$good = 0;
}
@@ -789,7 +891,7 @@ sub _check_integrity {
for (@{$self->{lru}}) {
$seen{$_}++;
if (not exists $self->{cache}{$_}) {
- $warn && print "# $_ is mentioned in the LRU queue, but not in the cache\n";
+ _ci_warn("$_ is mentioned in the LRU queue, but not in the cache");
$good = 0;
}
}
@@ -797,16 +899,56 @@ sub _check_integrity {
if (@duplicate) {
my $records = @duplicate == 1 ? 'Record' : 'Records';
my $appear = @duplicate == 1 ? 'appears' : 'appear';
- $warn && print "# $records @duplicate $appear multiple times in LRU queue: @{$self->{lru}}\n";
+ _ci_warn("$records @duplicate $appear multiple times in LRU queue: @{$self->{lru}}");
$good = 0;
}
for (keys %{$self->{cache}}) {
unless (exists $seen{$_}) {
- print "# record $_ is in the cache but not the LRU queue\n";
+ _ci_warn("record $_ is in the cache but not the LRU queue");
$good = 0;
}
}
+ # Now let's check the deferbuffer
+ # Unless deferred writing is enabled, it should be empty
+ if (! $self->{defer} && %{$self->{deferred}}) {
+ _ci_warn("deferred writing disabled, but deferbuffer nonempty");
+ $good = 0;
+ }
+
+ # Any record in the deferbuffer should *not* be present in the readcache
+ my $deferred_s = 0;
+ while (my ($n, $r) = each %{$self->{deferred}}) {
+ $deferred_s += length($r);
+ if (exists $self->{cache}{$n}) {
+ _ci_warn("record $n is in the deferbuffer *and* the readcache");
+ $good = 0;
+ }
+ if (substr($r, -$rsl) ne $/) {
+ _ci_warn("rec $n in the deferbuffer is missing the record separator");
+ $good = 0;
+ }
+ }
+
+ # Total size of deferbuffer should match internal total
+ if ($deferred_s != $self->{deferred_s}) {
+ _ci_warn("buffer size is $self->{deferred_s}, should be $deferred_s");
+ $good = 0;
+ }
+
+ # Total size of deferbuffer should not exceed the specified limit
+ if ($deferred_s > $self->{dw_size}) {
+ _ci_warn("buffer size is $self->{deferred_s} which exceeds the limit of $self->{dw_size}");
+ $good = 0;
+ }
+
+ # Total size of cached data should not exceed the specified limit
+ if ($deferred_s + $cached > $self->{memory}) {
+ my $total = $deferred_s + $cached;
+ _ci_warn("total stored data size is $total which exceeds the limit of $self->{memory}");
+ $good = 0;
+ }
+
$good;
}
@@ -818,7 +960,7 @@ Tie::File - Access the lines of a disk file via a Perl array
=head1 SYNOPSIS
- # This file documents Tie::File version 0.20
+ # This file documents Tie::File version 0.21
tie @array, 'Tie::File', filename or die ...;
@@ -826,9 +968,15 @@ Tie::File - Access the lines of a disk file via a Perl array
print $array[42]; # display line 42 of the file
$n_recs = @array; # how many records are in the file?
- $#array = $n_recs - 2; # chop records off the end
+ $#array -= 2; # chop two records off the end
+
- # As you would expect:
+ for (@array) {
+ s/PERL/Perl/g; # Replace PERL with Perl everywhere in the file
+ }
+
+ # These are just like regular push, pop, unshift, shift, and splice
+ # Except that they modify the file in the way you would expect
push @array, new recs...;
my $r1 = pop @array;
@@ -838,6 +986,7 @@ Tie::File - Access the lines of a disk file via a Perl array
untie @array; # all finished
+
=head1 DESCRIPTION
C<Tie::File> represents a regular text file as a Perl array. Each
@@ -850,7 +999,7 @@ gigantic files.
Changes to the array are reflected in the file immediately.
-Lazy people may now stop reading the manual.
+Lazy people and beginners may now stop reading the manual.
=head2 C<recsep>
@@ -918,8 +1067,9 @@ the file contains the text
Frankincense
Myrrh
-the tied array will appear to contain C<("Gold", "Frankincense", "Myrrh")>.
-If you set C<autochomp> to a false value, the record separator will not be removed. If the file above was tied with
+the tied array will appear to contain C<("Gold", "Frankincense",
+"Myrrh")>. If you set C<autochomp> to a false value, the record
+separator will not be removed. If the file above was tied with
tie @gifts, "Tie::File", $gifts, autochomp => 0;
@@ -952,9 +1102,10 @@ Opening the data file in write-only or append mode is not supported.
=head2 C<memory>
-This is an (inexact) upper limit on the amount of memory that
-C<Tie::File> will consume at any time while managing the file.
-At present, this is used as a bound on the size of the read cache.
+This is an upper limit on the amount of memory that C<Tie::File> will
+consume at any time while managing the file. This is used for two
+things: managing the I<read cache> and managing the I<deferred write
+buffer>.
Records read in from the file are cached, to avoid having to re-read
them repeatedly. If you read the same record twice, the first time it
@@ -974,6 +1125,28 @@ desired cache size, in bytes.
Setting the memory limit to 0 will inhibit caching; records will be
fetched from disk every time you examine them.
+=head2 C<dw_size>
+
+(This is an advanced feature. Skip this section on first reading.)
+
+If you use deferred writing (See L<"Deferred Writing">, below) then
+data you write into the array will not be written directly to the
+file; instead, it will be saved in the I<deferred write buffer> to be
+written out later. Data in the deferred write buffer is also charged
+against the memory limit you set with the C<memory> option.
+
+You may set the C<dw_size> option to limit the amount of data that can
+be saved in the deferred write buffer. This limit may not exceed the
+total memory limit. For example, if you set C<dw_size> to 1000 and
+C<memory> to 2500, that means that no more than 1000 bytes of deferred
+writes will be saved up. The space available for the read cache will
+vary, but it will always be at least 1500 bytes (if the deferred write
+buffer is full) and it could grow as large as 2500 bytes (if the
+deferred write buffer is empty.)
+
+If you don't specify a C<dw_size>, it defaults to the entire memory
+limit.
+
=head2 Option Format
C<-mode> is a synonym for C<mode>. C<-recsep> is a synonym for
@@ -1000,8 +1173,19 @@ argument to the Perl built-in C<flock> function; for example
C<LOCK_SH> or C<LOCK_EX | LOCK_NB>. (These constants are provided by
the C<use Fcntl ':flock'> declaration.)
-C<MODE> is optional; C<$o-E<gt>flock> simply locks the file with
-C<LOCK_EX>.
+C<MODE> is optional; the default is C<LOCK_EX>.
+
+C<Tie::File> promises that the following sequence of operations will
+be safe:
+
+ my $o = tie @array, "Tie::File", $filename;
+ $o->flock;
+
+In particular, C<Tie::File> will I<not> read or write the file during
+the C<tie> call. (Exception: Using C<mode =E<gt> O_TRUNC> will, of
+course, erase the file during the C<tie> call. If you want to do this
+safely, then open the file without C<O_TRUNC>, lock the file, and use
+C<@array = ()>.)
The best way to unlock a file is to discard the object and untie the
array. It is probably unsafe to unlock the file without also untying
@@ -1028,6 +1212,10 @@ the idiot does not also have a green light at the same time.
See L<"autochomp">, above.
+=head2 C<defer>, C<flush>, and C<discard>
+
+See L<"Deferred Writing">, below.
+
=head1 Tying to an already-opened filehandle
If C<$fh> is a filehandle, such as is returned by C<IO::File> or one
@@ -1041,10 +1229,81 @@ C<sysopen>, you may use:
tie @array, 'Tie::File', \*FH, ...;
Handles that were opened write-only won't work. Handles that were
-opened read-only will work as long as you don't try to write to them.
-Handles must be attached to seekable sources of data---that means no
-pipes or sockets. If you supply a non-seekable handle, the C<tie>
-call will try to abort your program.
+opened read-only will work as long as you don't try to modify the
+array. Handles must be attached to seekable sources of data---that
+means no pipes or sockets. If you supply a non-seekable handle, the
+C<tie> call will try to throw an exception. (On Unix systems, it
+B<will> throw an exception.)
+
+=head1 Deferred Writing
+
+(This is an advanced feature. Skip this section on first reading.)
+
+Normally, modifying a C<Tie::File> array writes to the underlying file
+immediately. Every assignment like C<$a[3] = ...> rewrites as much of
+the file as is necessary; typically, everything from line 3 through
+the end will need to be rewritten. This is the simplest and most
+transparent behavior. Performance even for large files is reasonably
+good.
+
+However, under some circumstances, this behavior may be excessively
+slow. For example, suppose you have a million-record file, and you
+want to do:
+
+ for (@FILE) {
+ $_ = "> $_";
+ }
+
+The first time through the loop, you will rewrite the entire file,
+from line 0 through the end. The second time through the loop, you
+will rewrite the entire file from line 1 through the end. The third
+time through the loop, you will rewrite the entire file from line 2 to
+the end. And so on.
+
+If the performance in such cases is unacceptable, you may defer the
+actual writing, and then have it done all at once. The following loop
+will perform much better for large files:
+
+ (tied @a)->defer;
+ for (@a) {
+ $_ = "> $_";
+ }
+ (tied @a)->flush;
+
+If C<Tie::File>'s memory limit is large enough, all the writing will
+done in memory. Then, when you call C<-E<gt>flush>, the entire file
+will be rewritten in a single pass.
+
+Calling C<-E<gt>flush> returns the array to immediate-write mode. If
+you wish to discard the deferred writes, you may call C<-E<gt>discard>
+instead of C<-E<gt>flush>. Note that in some cases, some of the data
+will have been written already, and it will be too late for
+C<-E<gt>discard> to discard all the changes.
+
+Deferred writes are cached in memory up to the limit specified by the
+C<dw_size> option (see above). If the deferred-write buffer is full
+and you try to write still more deferred data, the buffer will be
+flushed. All buffered data will be written immediately, the buffer
+will be emptied, and the now-empty space will be used for future
+deferred writes.
+
+If the deferred-write buffer isn't yet full, but the total size of the
+buffer and the read cache would exceed the C<memory> limit, the oldest
+records will be flushed out of the read cache until total usage is
+under the limit.
+
+C<push>, C<pop>, C<shift>, C<unshift>, and C<splice> cannot be
+deferred. When you perform one of these operations, any deferred data
+is written to the file and the operation is performed immediately.
+This may change in a future version.
+
+A soon-to-be-released version of this module may enabled deferred
+write mode automagically if it guesses that you are about to write
+many consecutive records. To disable this feature, use
+
+ (tied @o)->autodefer(0);
+
+(At present, this call does nothing.)
=head1 CAVEATS
@@ -1064,39 +1323,25 @@ changing the size of a record in the middle of a large file will
always be fairly slow, because everything after the new record must be
moved.
-In particular, note that the following innocent-looking loop has very
-bad behavior:
-
- # million-line file
- for (@file_array) {
- $_ .= 'x';
- }
-
-This is likely to be very slow, because the first iteration must
-relocate lines 1 through 999,999; the second iteration must relocate
-lines 2 through 999,999, and so on. The relocation is done using
-block writes, however, so it's not as slow as it might be.
-
-A soon-to-be-released version of this module will provide a mechanism
-for getting better performance in such cases, by deferring the writing
-until it can be done all at once. This deferred writing feature might
-be enabled automagically if C<Tie::File> guesses that you are about to write many consecutive records. To disable this feature, use
-
- (tied @o)->autodefer(0);
-
-(At present, this call does nothing.)
-
=item *
The behavior of tied arrays is not precisely the same as for regular
arrays. For example:
- undef $a[10]; print "How unusual!\n" if $a[10];
+ # This DOES print "How unusual!"
+ undef $a[10]; print "How unusual!\n" if defined $a[10];
C<undef>-ing a C<Tie::File> array element just blanks out the
corresponding record in the file. When you read it back again, you'll
-see the record separator (typically, $a[10] will appear to contain
-"\n") so the supposedly-C<undef>'ed value will be true.
+get the empty string, so the supposedly-C<undef>'ed value will be
+defined. Similarly, if you have C<autochomp> disabled, then
+
+ # This DOES print "How unusual!" if 'autochomp' is disabled
+ undef $a[10];
+ print "How unusual!\n" if $a[10];
+
+Because when C<autochomp> is disabled, C<$a[10]> will read back as
+C<"\n"> (or whatever the record separator string is.)
There are other minor differences, but in general, the correspondence
is extremely close.
@@ -1106,8 +1351,15 @@ is extremely close.
Not quite every effort was made to make this module as efficient as
possible. C<FETCHSIZE> should use binary search instead of linear
search. The cache's LRU queue should be a heap instead of a list.
+
+The performance of the C<flush> method could be improved. At present,
+it still rewrites the tail of the file once for each block of
+contiguous lines to be changed. In the typical case, this will result
+in only one rewrite, but in peculiar cases it might be bad. It should
+be possible to perform I<all> deferred writing with a single rewrite.
+
These defects are probably minor; in any event, they will be fixed in
-a later version of the module.
+a future version of the module.
=item *
@@ -1119,8 +1371,19 @@ suggests, for example, that an LRU read-cache is a good tradeoff,
even if it requires substantial adjustment following a C<splice>
operation.
+=item *
+You might be tempted to think that deferred writing is like
+transactions, with C<flush> as C<commit> and C<discard> as
+C<rollback>, but it isn't, so don't.
+
=back
+=head1 SUBCLASSING
+
+This version promises absolutely nothing about the internals, which
+may change without notice. A future version of the module will have a
+well-defined and stable subclassing API.
+
=head1 WHAT ABOUT C<DB_File>?
C<DB_File>'s C<DB_RECNO> feature does something similar to
@@ -1172,8 +1435,8 @@ can be inconvenient to arrange for concurrent access to the same file
by two or more processes. Each process needs to call C<$db-E<gt>sync>
after every write. When you change a C<Tie::File> array, the changes
are reflected in the file immediately; no explicit C<-E<gt>sync> call
-is required. (The forthcoming "deferred writing" mode will allow you
-to request that writes be held in memory until explicitly C<sync>'ed.)
+is required. (Or you can enable deferred writing mode to require that
+changes be explicitly sync'ed.)
=item *
@@ -1215,15 +1478,21 @@ To receive an announcement whenever a new version of this module is
released, send a blank email message to
C<mjd-perl-tiefile-subscribe@plover.com>.
+The most recent version of this module, including documentation and
+any news of importance, will be available at
+
+ http://perl.plover.com/TieFile/
+
+
=head1 LICENSE
-C<Tie::File> version 0.20 is copyright (C) 2002 Mark Jason Dominus.
+C<Tie::File> version 0.21 is copyright (C) 2002 Mark Jason Dominus.
This library is free software; you may redistribute it and/or modify
it under the same terms as Perl itself.
-These terms include your choice of (1) the Perl Artistic Licence, or
-(2) version 2 of the GNU General Public License as published by the
+These terms are your choice of any of (1) the Perl Artistic Licence,
+or (2) version 2 of the GNU General Public License as published by the
Free Software Foundation, or (3) any later version of the GNU General
Public License.
@@ -1245,7 +1514,7 @@ For licensing inquiries, contact the author at:
=head1 WARRANTY
-C<Tie::File> version 0.20 comes with ABSOLUTELY NO WARRANTY.
+C<Tie::File> version 0.21 comes with ABSOLUTELY NO WARRANTY.
For details, see the license.
=head1 THANKS
@@ -1255,12 +1524,13 @@ core when I hadn't written it yet, and for generally being helpful,
supportive, and competent. (Usually the rule is "choose any one.")
Also big thanks to Abhijit Menon-Sen for all of the same things.
-Special thanks to Craig Berry (for VMS portability help), Randy Kobes
-(for Win32 portability help), Clinton Pierce and Autrijus Tang (for
-heroic eleventh-hour Win32 testing above and beyond the call of duty),
-and the rest of the CPAN testers (for testing generally).
+Special thanks to Craig Berry and Peter Prymmer (for VMS portability
+help), Randy Kobes (for Win32 portability help), Clinton Pierce and
+Autrijus Tang (for heroic eleventh-hour Win32 testing above and beyond
+the call of duty), and the rest of the CPAN testers (for testing
+generally).
-More thanks to:
+Additional thanks to:
Edward Avis /
Gerrit Haase /
Nikola Knezevic /
@@ -1269,6 +1539,7 @@ Tassilo von Parseval /
H. Dieter Pearcey /
Slaven Rezic /
Peter Somu /
+Autrijus Tang (again) /
Tels
=head1 TODO
@@ -1288,12 +1559,11 @@ Fixed-length mode.
Maybe an autolocking mode?
-Finish deferred writing.
-
Autodeferment.
Record locking with fcntl()? Then you might support an undo log and
-get real transactions. What a coup that would be.
+get real transactions. What a coup that would be. All would bow
+before my might.
Leave-blanks mode
diff --git a/lib/Tie/File/t/00_version.t b/lib/Tie/File/t/00_version.t
index 8a154b1b72..5d950b9c20 100644
--- a/lib/Tie/File/t/00_version.t
+++ b/lib/Tie/File/t/00_version.t
@@ -2,14 +2,16 @@
print "1..1\n";
+my $testversion = "0.21";
use Tie::File;
-if ($Tie::File::VERSION != 0.20) {
+if ($Tie::File::VERSION != $testversion) {
print STDERR "
-WHOA THERE!!
-You seem to be running version $Tie::File::VERSION of the module against
-version 0.20 of the test suite!
+*** WHOA THERE!!! ***
+
+You seem to be running version $Tie::File::VERSION of the module
+against version $testversion of the test suite!
None of the other test results will be reliable.
";
diff --git a/lib/Tie/File/t/15_pushpop.t b/lib/Tie/File/t/15_pushpop.t
index cc09b02d5d..4b6d1bc959 100644
--- a/lib/Tie/File/t/15_pushpop.t
+++ b/lib/Tie/File/t/15_pushpop.t
@@ -28,7 +28,6 @@ $N++;
my ($n, @r);
-
# (3-11) PUSH tests
$n = push @a, "rec0", "rec1", "rec2";
check_contents($data);
diff --git a/lib/Tie/File/t/17_misc_meth.t b/lib/Tie/File/t/17_misc_meth.t
index 87749616fc..b7543898d4 100644
--- a/lib/Tie/File/t/17_misc_meth.t
+++ b/lib/Tie/File/t/17_misc_meth.t
@@ -8,7 +8,7 @@ my $file = "tf$$.txt";
$: = Tie::File::_default_recsep();
1 while unlink $file;
-print "1..24\n";
+print "1..35\n";
my $N = 1;
use Tie::File;
@@ -30,7 +30,7 @@ check_contents("$:$:$:$:");
@a = ();
check_contents("");
-# (11-16) EXISTS
+# (11-20) EXISTS
if ($] >= 5.006) {
eval << 'TESTS';
print !exists $a[0] ? "ok $N\n" : "not ok $N\n";
@@ -48,28 +48,52 @@ print exists $a[1] ? "ok $N\n" : "ok $N\n";
$N++;
print exists $a[2] ? "ok $N\n" : "not ok $N\n";
$N++;
+print exists $a[-1] ? "ok $N\n" : "not ok $N\n";
+$N++;
+print exists $a[-2] ? "ok $N\n" : "not ok $N\n";
+$N++;
+print exists $a[-3] ? "ok $N\n" : "not ok $N\n";
+$N++;
+print !exists $a[-4] ? "ok $N\n" : "not ok $N\n";
+$N++;
TESTS
} else { # perl 5.005 doesn't have exists $array[1]
- for (11..16) {
+ for (11..20) {
print "ok $_ \# skipped (no exists for arrays)\n";
$N++;
}
}
-# (17-24) DELETE
+my $del;
+
+# (21-35) DELETE
if ($] >= 5.006) {
eval << 'TESTS';
-delete $a[0];
+$del = delete $a[0];
check_contents("$:$:GIVE ME PIE$:");
-delete $a[2];
+# 20020317 Through 0.20, the 'delete' function returned the wrong values.
+expect($del, "I like pie.");
+$del = delete $a[2];
check_contents("$:$:");
-delete $a[0];
+expect($del, "GIVE ME PIE");
+$del = delete $a[0];
check_contents("$:$:");
-delete $a[1];
+expect($del, "");
+$del = delete $a[1];
check_contents("$:");
+expect($del, "");
+
+# 20020317 Through 0.20, we had a bug where deleting an element past the
+# end of the array would actually extend the array to that length.
+$del = delete $a[4];
+check_contents("$:");
+expect($del, undef);
+
+
+
TESTS
} else { # perl 5.005 doesn't have delete $array[1]
- for (17..24) {
+ for (21..35) {
print "ok $_ \# skipped (no delete for arrays)\n";
$N++;
}
@@ -87,13 +111,37 @@ sub check_contents {
print "ok $N\n";
} else {
ctrlfix(my $msg = "# expected <$x>, got <$a>");
- print "not ok $N\n$msg\n";
+ print "not ok $N # $msg\n";
}
$N++;
print $o->_check_integrity($file, $ENV{INTEGRITY}) ? "ok $N\n" : "not ok $N\n";
$N++;
}
+sub expect {
+ if (@_ == 1) {
+ print $_[0] ? "ok $N\n" : "not ok $N\n";
+ } elsif (@_ == 2) {
+ my ($a, $x) = @_;
+ if (! defined($a) && ! defined($x)) { print "ok $N\n" }
+ elsif ( defined($a) && ! defined($x)) {
+ ctrlfix(my $msg = "expected UNDEF, got <$a>");
+ print "not ok $N \# $msg\n";
+ }
+ elsif (! defined($a) && defined($x)) {
+ ctrlfix(my $msg = "expected <$x>, got UNDEF");
+ print "not ok $N \# $msg\n";
+ } elsif ($a eq $x) { print "ok $N\n" }
+ else {
+ ctrlfix(my $msg = "expected <$x>, got <$a>");
+ print "not ok $N \# $msg\n";
+ }
+ } else {
+ die "expect() got ", scalar(@_), " args, should have been 1 or 2";
+ }
+ $N++;
+}
+
sub ctrlfix {
for (@_) {
s/\n/\\n/g;
diff --git a/lib/Tie/File/t/22_autochomp.t b/lib/Tie/File/t/22_autochomp.t
index 70974d4b49..caa7150e0c 100644
--- a/lib/Tie/File/t/22_autochomp.t
+++ b/lib/Tie/File/t/22_autochomp.t
@@ -141,10 +141,18 @@ sub expect {
print $_[0] ? "ok $N\n" : "not ok $N\n";
} elsif (@_ == 2) {
my ($a, $x) = @_;
- if ($a eq $x) { print "ok $N\n" }
+ if (! defined($a) && ! defined($x)) { print "ok $N\n" }
+ elsif ( defined($a) && ! defined($x)) {
+ ctrlfix(my $msg = "expected UNDEF, got <$a>");
+ print "not ok $N \# $msg\n";
+ }
+ elsif (! defined($a) && defined($x)) {
+ ctrlfix(my $msg = "expected <$x>, got UNDEF");
+ print "not ok $N \# $msg\n";
+ } elsif ($a eq $x) { print "ok $N\n" }
else {
ctrlfix(my $msg = "expected <$x>, got <$a>");
- print "not ok $N # $msg\n";
+ print "not ok $N \# $msg\n";
}
} else {
die "expect() got ", scalar(@_), " args, should have been 1 or 2";
diff --git a/lib/Tie/File/t/30_defer.t b/lib/Tie/File/t/30_defer.t
new file mode 100644
index 0000000000..4c32825fe2
--- /dev/null
+++ b/lib/Tie/File/t/30_defer.t
@@ -0,0 +1,319 @@
+#!/usr/bin/perl
+#
+# Check ->defer and ->flush methods
+#
+
+use POSIX 'SEEK_SET';
+my $file = "tf$$.txt";
+$: = Tie::File::_default_recsep();
+my $data = "rec0$:rec1$:rec2$:";
+my ($o, $n);
+
+print "1..79\n";
+
+my $N = 1;
+use Tie::File;
+print "ok $N\n"; $N++;
+
+open F, "> $file" or die $!;
+binmode F;
+print F $data;
+close F;
+$o = tie @a, 'Tie::File', $file;
+print $o ? "ok $N\n" : "not ok $N\n";
+$N++;
+
+# (3-6) Deferred storage
+$o->defer;
+$a[3] = "rec3";
+check_contents($data); # nothing written yet
+$a[4] = "rec4";
+check_contents($data); # nothing written yet
+
+# (7-8) Flush
+$o->flush;
+check_contents($data . "rec3$:rec4$:"); # now it's written
+
+# (9-12) Deferred writing disabled?
+$a[3] = "rec9";
+check_contents("${data}rec9$:rec4$:");
+$a[4] = "rec8";
+check_contents("${data}rec9$:rec8$:");
+
+# (13-18) Now let's try two batches of records
+$#a = 2;
+$o->defer;
+$a[0] = "record0";
+check_contents($data); # nothing written yet
+$a[2] = "record2";
+check_contents($data); # nothing written yet
+$o->flush;
+check_contents("record0$:rec1$:record2$:");
+
+# (19-22) Deferred writing past the end of the file
+$o->defer;
+$a[4] = "record4";
+check_contents("record0$:rec1$:record2$:");
+$o->flush;
+check_contents("record0$:rec1$:record2$:$:record4$:");
+
+
+# (23-26) Now two long batches
+$o->defer;
+for (0..2, 4..6) {
+ $a[$_] = "r$_";
+}
+check_contents("record0$:rec1$:record2$:$:record4$:");
+$o->flush;
+check_contents(join $:, "r0".."r2", "", "r4".."r6", "");
+
+# (27-30) Now let's make sure that discarded writes are really discarded
+# We have a 2Mib buffer here, so we can be sure that we aren't accidentally
+# filling it up
+$o->defer;
+for (0, 3, 7) {
+ $a[$_] = "discarded$_";
+}
+check_contents(join $:, "r0".."r2", "", "r4".."r6", "");
+$o->discard;
+check_contents(join $:, "r0".."r2", "", "r4".."r6", "");
+
+################################################################
+#
+# Now we're going to test the results of a small memory limit
+#
+#
+undef $o; untie @a;
+$data = join "$:", map("record$_", 0..7), ""; # records are 8 or 9 bytes long
+open F, "> $file" or die $!;
+binmode F;
+print F $data;
+close F;
+
+# Limit cache+buffer size to 47 bytes
+my $MAX = 47;
+# -- that's enough space for 5 records, but not 6, on both \n and \r\n systems
+my $BUF = 20;
+# -- that's enough space for 2 records, but not 3, on both \n and \r\n systems
+$o = tie @a, 'Tie::File', $file, memory => $MAX, dw_size => $BUF;
+print $o ? "ok $N\n" : "not ok $N\n";
+$N++;
+
+# (31-32) Fill up the read cache
+my @z;
+@z = @a;
+# the cache now contains records 3,4,5,6,7.
+check_caches({map(($_ => "record$_$:"), 3..7)},
+ {});
+
+# (33-44) See if overloading the defer starts by flushing the read cache
+# and then flushes out the defer
+$o->defer;
+$a[0] = "recordA"; # That should flush record 3 from the cache
+check_caches({map(($_ => "record$_$:"), 4..7)},
+ {0 => "recordA$:"});
+check_contents($data);
+
+$a[1] = "recordB"; # That should flush record 4 from the cache
+check_caches({map(($_ => "record$_$:"), 5..7)},
+ {0 => "recordA$:",
+ 1 => "recordB$:"});
+check_contents($data);
+
+$a[2] = "recordC"; # That should flush the whole darn defer
+# Flushing the defer requires looking up the true lengths of records
+# 0..2, which flushes out the read cache, leaving only 1..2 there.
+# Then the splicer updates the cached versions of 1..2 to contain the
+# new data
+check_caches({1 => "recordB$:", 2 => "recordC$:"},
+ {}); # URRRP
+check_contents(join("$:", qw(recordA recordB recordC
+ record3 record4 record5 record6 record7)) . "$:");
+
+$a[3] = "recordD"; # even though we flushed, deferring is STILL ENABLED
+check_caches({1 => "recordB$:", 2 => "recordC$:"},
+ {3 => "recordD$:"});
+check_contents(join("$:", qw(recordA recordB recordC
+ record3 record4 record5 record6 record7)) . "$:");
+
+# Check readcache-deferbuffer interactions
+
+# (45-47) This should remove outdated data from the read cache
+$a[2] = "recordE";
+check_caches({1 => "recordB$:", },
+ {3 => "recordD$:", 2 => "recordE$:"});
+check_contents(join("$:", qw(recordA recordB recordC
+ record3 record4 record5 record6 record7)) . "$:");
+
+# (48-51) This should read back out of the defer buffer
+# without adding anything to the read cache
+my $z;
+$z = $a[2];
+print $z eq "recordE" ? "ok $N\n" : "not ok $N\n"; $N++;
+check_caches({1 => "recordB$:", },
+ {3 => "recordD$:", 2 => "recordE$:"});
+check_contents(join("$:", qw(recordA recordB recordC
+ record3 record4 record5 record6 record7)) . "$:");
+
+# (52-55) This should repopulate the read cache with a new record
+$z = $a[0];
+print $z eq "recordA" ? "ok $N\n" : "not ok $N\n"; $N++;
+check_caches({1 => "recordB$:", 0 => "recordA$:"},
+ {3 => "recordD$:", 2 => "recordE$:"});
+check_contents(join("$:", qw(recordA recordB recordC
+ record3 record4 record5 record6 record7)) . "$:");
+
+# (56-59) This should flush the LRU record from the read cache
+$z = $a[4]; $z = $a[5];
+print $z eq "record5" ? "ok $N\n" : "not ok $N\n"; $N++;
+check_caches({5 => "record5$:", 0 => "recordA$:", 4 => "record4$:"},
+ {3 => "recordD$:", 2 => "recordE$:"});
+check_contents(join("$:", qw(recordA recordB recordC
+ record3 record4 record5 record6 record7)) . "$:");
+
+# (60-63) This should FLUSH the deferred buffer
+# In doing so, it will read in records 2 and 3, flushing 0 and 4
+# from the read cache, leaving 2, 3, and 5.
+$z = splice @a, 3, 1, "recordZ";
+print $z eq "recordD" ? "ok $N\n" : "not ok $N\n"; $N++;
+check_caches({5 => "record5$:", 3 => "recordZ$:", 2 => "recordE$:"},
+ {});
+check_contents(join("$:", qw(recordA recordB recordE
+ recordZ record4 record5 record6 record7)) . "$:");
+
+# (64-66) We should STILL be in deferred writing mode
+$a[5] = "recordX";
+check_caches({3 => "recordZ$:", 2 => "recordE$:"},
+ {5 => "recordX$:"});
+check_contents(join("$:", qw(recordA recordB recordE
+ recordZ record4 record5 record6 record7)) . "$:");
+
+# Fill up the defer buffer again
+$a[4] = "recordP";
+# (67-69) This should OVERWRITE the existing deferred record
+# and NOT flush the buffer
+$a[5] = "recordQ";
+check_caches({3 => "recordZ$:", 2 => "recordE$:"},
+ {5 => "recordQ$:", 4 => "recordP$:"});
+check_contents(join("$:", qw(recordA recordB recordE
+ recordZ record4 record5 record6 record7)) . "$:");
+
+
+# (70-72) Discard should just dump the whole deferbuffer
+$o->discard;
+check_caches({3 => "recordZ$:", 2 => "recordE$:"},
+ {});
+check_contents(join("$:", qw(recordA recordB recordE
+ recordZ record4 record5 record6 record7)) . "$:");
+# (73-75) NOW we are out of deferred writing mode
+$a[0] = "recordF";
+check_caches({3 => "recordZ$:", 2 => "recordE$:", 0 => "recordF$:"},
+ {});
+check_contents(join("$:", qw(recordF recordB recordE
+ recordZ record4 record5 record6 record7)) . "$:");
+
+# (76-79) Last call--untying the array should flush the deferbuffer
+$o->defer;
+$a[0] = "flushed";
+check_caches({3 => "recordZ$:", 2 => "recordE$:"},
+ {0 => "flushed$:" });
+check_contents(join("$:", qw(recordF recordB recordE
+ recordZ record4 record5 record6 record7)) . "$:");
+undef $o;
+untie @a;
+# (79) We can't use check_contents any more, because the object is dead
+open F, "< $file" or die;
+{ local $/ ; $z = <F> }
+close F;
+my $x = join("$:", qw(flushed recordB recordE
+ recordZ record4 record5 record6 record7)) . "$:";
+if ($z eq $x) {
+ print "ok $N\n";
+} else {
+ my $msg = ctrlfix("expected <$x>, got <$z>");
+ print "not ok $N \# $msg\n";
+}
+$N++;
+
+################################################################
+
+
+sub check_caches {
+ my ($xcache, $xdefer) = @_;
+
+# my $integrity = $o->_check_integrity($file, $ENV{INTEGRITY});
+# print $integrity ? "ok $N\n" : "not ok $N\n";
+# $N++;
+
+ my $good = 1;
+ $good &&= hash_equal($o->{cache}, $xcache, "true cache", "expected cache");
+ $good &&= hash_equal($o->{deferred}, $xdefer, "true defer", "expected defer");
+ print $good ? "ok $N\n" : "not ok $N\n";
+ $N++;
+}
+
+sub hash_equal {
+ my ($a, $b, $ha, $hb) = @_;
+ $ha = 'first hash' unless defined $ha;
+ $hb = 'second hash' unless defined $hb;
+
+ my $good = 1;
+ my %b_seen;
+
+ for my $k (keys %$a) {
+ if (! exists $b->{$k}) {
+ print ctrlfix("# Key $k is in $ha but not $hb"), "\n";
+ $good = 0;
+ } elsif ($b->{$k} ne $a->{$k}) {
+ print ctrlfix("# Key $k is <$a->{$k}> in $ha but <$b->{$k}> in $hb"), "\n";
+ $b_seen{$k} = 1;
+ $good = 0;
+ } else {
+ $b_seen{$k} = 1;
+ }
+ }
+
+ for my $k (keys %$b) {
+ unless ($b_seen{$k}) {
+ print ctrlfix("# Key $k is in $hb but not $ha"), "\n";
+ $good = 0;
+ }
+ }
+
+ $good;
+}
+
+
+sub check_contents {
+ my $x = shift;
+
+ my $integrity = $o->_check_integrity($file, $ENV{INTEGRITY});
+ print $integrity ? "ok $N\n" : "not ok $N\n";
+ $N++;
+
+ local *FH = $o->{fh};
+ seek FH, 0, SEEK_SET;
+
+ my $a;
+ { local $/; $a = <FH> }
+ $a = "" unless defined $a;
+ if ($a eq $x) {
+ print "ok $N\n";
+ } else {
+ my $msg = ctrlfix("# expected <$x>, got <$a>");
+ print "not ok $N\n$msg\n";
+ }
+ $N++;
+}
+
+sub ctrlfix {
+ local $_ = shift;
+ s/\n/\\n/g;
+ s/\r/\\r/g;
+ $_;
+}
+
+END {
+ 1 while unlink $file;
+}
+
diff --git a/lib/Tie/File/t/31_autodefer.t b/lib/Tie/File/t/31_autodefer.t
new file mode 100644
index 0000000000..38d89dacd3
--- /dev/null
+++ b/lib/Tie/File/t/31_autodefer.t
@@ -0,0 +1,65 @@
+#!/usr/bin/perl
+#
+# Check behavior of 'autodefer' feature
+# Mostly this isn't implemented yet
+# This file is primarily here to make sure that the promised ->autodefer
+# method doesn't croak.
+#
+
+use POSIX 'SEEK_SET';
+my $file = "tf$$.txt";
+$: = Tie::File::_default_recsep();
+my $data = "rec0$:rec1$:rec2$:";
+my ($o, $n, @a);
+
+print "1..3\n";
+
+my $N = 1;
+use Tie::File;
+print "ok $N\n"; $N++;
+
+open F, "> $file" or die $!;
+binmode F;
+print F $data;
+close F;
+$o = tie @a, 'Tie::File', $file;
+print $o ? "ok $N\n" : "not ok $N\n";
+$N++;
+
+# (3) You promised this interface, so it better not die
+
+eval {$o->autodefer(0)};
+print $@ ? "not ok $N # $@\n" : "ok $N\n";
+
+
+
+sub check_contents {
+ my $x = shift;
+ my $integrity = $o->_check_integrity($file, $ENV{INTEGRITY});
+ local *FH = $o->{fh};
+ seek FH, 0, SEEK_SET;
+ print $integrity ? "ok $N\n" : "not ok $N\n";
+ $N++;
+ my $a;
+ { local $/; $a = <FH> }
+ $a = "" unless defined $a;
+ if ($a eq $x) {
+ print "ok $N\n";
+ } else {
+ ctrlfix(my $msg = "# expected <$x>, got <$a>");
+ print "not ok $N\n$msg\n";
+ }
+ $N++;
+}
+
+sub ctrlfix {
+ for (@_) {
+ s/\n/\\n/g;
+ s/\r/\\r/g;
+ }
+}
+
+END {
+ 1 while unlink $file;
+}
+
diff --git a/lib/Tie/File/t/32_defer_misc.t b/lib/Tie/File/t/32_defer_misc.t
new file mode 100644
index 0000000000..8e6edf94a4
--- /dev/null
+++ b/lib/Tie/File/t/32_defer_misc.t
@@ -0,0 +1,230 @@
+#!/usr/bin/perl
+#
+# Check interactions of deferred writing
+# with miscellaneous methods like DELETE, EXISTS,
+# FETCHSIZE, STORESIZE, CLEAR, EXTEND
+#
+
+use POSIX 'SEEK_SET';
+my $file = "tf$$.txt";
+$: = Tie::File::_default_recsep();
+my $data = "rec0$:rec1$:rec2$:";
+my ($o, $n);
+
+print "1..42\n";
+
+my $N = 1;
+use Tie::File;
+print "ok $N\n"; $N++;
+
+open F, "> $file" or die $!;
+binmode F;
+print F $data;
+close F;
+$o = tie @a, 'Tie::File', $file;
+print $o ? "ok $N\n" : "not ok $N\n";
+$N++;
+
+# (3-6) EXISTS
+if ($] >= 5.006) {
+ eval << 'TESTS';
+$o->defer;
+expect(not exists $a[4]);
+$a[4] = "rec4";
+expect(exists $a[4]);
+check_contents($data); # nothing written yet
+$o->discard;
+TESTS
+} else {
+ for (3..6) {
+ print "ok $_ \# skipped (no exists for arrays)\n";
+ $N++;
+ }
+}
+
+# (7-10) FETCHSIZE
+$o->defer;
+expect($#a, 2);
+$a[4] = "rec4";
+expect($#a, 4);
+check_contents($data); # nothing written yet
+$o->discard;
+
+# (11-21) STORESIZE
+$o->defer;
+$#a = 4;
+check_contents($data); # nothing written yet
+expect($#a, 4);
+$o->flush;
+expect($#a, 4);
+check_contents("$data$:$:"); # two extra empty records
+
+$o->defer;
+$a[4] = "rec4";
+$#a = 2;
+expect($a[4], undef);
+check_contents($data); # written data was unwritten
+$o->flush;
+check_contents($data); # nothing left to write
+
+# (22-28) CLEAR
+$o->defer;
+$a[9] = "rec9";
+check_contents($data); # nothing written yet
+@a = ();
+check_contents(""); # this happens right away
+expect($a[9], undef);
+$o->flush;
+check_contents(""); # nothing left to write
+
+# (29-34) EXTEND
+# Actually it's not real clear what these tests are for
+# since EXTEND has no defined semantics
+$o->defer;
+@a = (0..3);
+check_contents(""); # nothing happened yet
+expect($a[3], "3");
+expect($a[4], undef);
+$o->flush;
+check_contents("0$:1$:2$:3$:"); # file now 4 records long
+
+# (35-53) DELETE
+if ($] >= 5.006) {
+ eval << 'TESTS';
+my $del;
+$o->defer;
+$del = delete $a[2];
+check_contents("0$:1$:2$:3$:"); # nothing happened yet
+expect($a[2], "");
+expect($del, "2");
+$del = delete $a[3]; # shortens file!
+check_contents("0$:1$:2$:"); # deferred writes NOT flushed
+expect($a[3], undef);
+expect($a[2], "");
+exoect($del, "3");
+$a[2] = "cookies";
+$del = delete $a[2]; # shortens file!
+expect($a[2], undef);
+exoect($del, 'cookies');
+check_contents("0$:1$:");
+$a[0] = "crackers";
+$del = delete $a[0]; # file unchanged
+expect($a[0], "");
+exoect($del, 'crackers');
+check_contents("0$:1$:"); # no change yet
+$o->flush;
+check_contents("$:1$:"); # record 0 is NOT 'cookies';
+TESTS
+} else {
+ for (35..53) {
+ print "ok $_ \# skipped (no delete for arrays)\n";
+ $N++;
+ }
+}
+
+################################################################
+
+
+sub check_caches {
+ my ($xcache, $xdefer) = @_;
+
+# my $integrity = $o->_check_integrity($file, $ENV{INTEGRITY});
+# print $integrity ? "ok $N\n" : "not ok $N\n";
+# $N++;
+
+ my $good = 1;
+ $good &&= hash_equal($o->{cache}, $xcache, "true cache", "expected cache");
+ $good &&= hash_equal($o->{deferred}, $xdefer, "true defer", "expected defer");
+ print $good ? "ok $N\n" : "not ok $N\n";
+ $N++;
+}
+
+sub hash_equal {
+ my ($a, $b, $ha, $hb) = @_;
+ $ha = 'first hash' unless defined $ha;
+ $hb = 'second hash' unless defined $hb;
+
+ my $good = 1;
+ my %b_seen;
+
+ for my $k (keys %$a) {
+ if (! exists $b->{$k}) {
+ print ctrlfix("# Key $k is in $ha but not $hb"), "\n";
+ $good = 0;
+ } elsif ($b->{$k} ne $a->{$k}) {
+ print ctrlfix("# Key $k is <$a->{$k}> in $ha but <$b->{$k}> in $hb"), "\n";
+ $b_seen{$k} = 1;
+ $good = 0;
+ } else {
+ $b_seen{$k} = 1;
+ }
+ }
+
+ for my $k (keys %$b) {
+ unless ($b_seen{$k}) {
+ print ctrlfix("# Key $k is in $hb but not $ha"), "\n";
+ $good = 0;
+ }
+ }
+
+ $good;
+}
+
+
+sub check_contents {
+ my $x = shift;
+
+ my $integrity = $o->_check_integrity($file, $ENV{INTEGRITY});
+ print $integrity ? "ok $N\n" : "not ok $N\n";
+ $N++;
+
+ local *FH = $o->{fh};
+ seek FH, 0, SEEK_SET;
+
+ my $a;
+ { local $/; $a = <FH> }
+ $a = "" unless defined $a;
+ if ($a eq $x) {
+ print "ok $N\n";
+ } else {
+ my $msg = ctrlfix("# expected <$x>, got <$a>");
+ print "not ok $N\n$msg\n";
+ }
+ $N++;
+}
+
+sub expect {
+ if (@_ == 1) {
+ print $_[0] ? "ok $N\n" : "not ok $N\n";
+ } elsif (@_ == 2) {
+ my ($a, $x) = @_;
+ if (! defined($a) && ! defined($x)) { print "ok $N\n" }
+ elsif ( defined($a) && ! defined($x)) {
+ ctrlfix(my $msg = "expected UNDEF, got <$a>");
+ print "not ok $N \# $msg\n";
+ }
+ elsif (! defined($a) && defined($x)) {
+ ctrlfix(my $msg = "expected <$x>, got UNDEF");
+ print "not ok $N \# $msg\n";
+ } elsif ($a eq $x) { print "ok $N\n" }
+ else {
+ ctrlfix(my $msg = "expected <$x>, got <$a>");
+ print "not ok $N \# $msg\n";
+ }
+ } else {
+ die "expect() got ", scalar(@_), " args, should have been 1 or 2";
+ }
+ $N++;
+}
+
+sub ctrlfix {
+ local $_ = shift;
+ s/\n/\\n/g;
+ s/\r/\\r/g;
+ $_;
+}
+
+END {
+ 1 while unlink $file;
+}
+