diff options
-rw-r--r-- | MANIFEST | 14 | ||||
-rw-r--r-- | lib/Tie/File.pm | 744 | ||||
-rw-r--r-- | lib/Tie/File/01_gen.t | 89 | ||||
-rw-r--r-- | lib/Tie/File/02_fetchsize.t | 47 | ||||
-rw-r--r-- | lib/Tie/File/03_longfetch.t | 38 | ||||
-rw-r--r-- | lib/Tie/File/04_splice.t | 163 | ||||
-rw-r--r-- | lib/Tie/File/05_size.t | 78 | ||||
-rw-r--r-- | lib/Tie/File/06_fixrec.t | 36 | ||||
-rw-r--r-- | lib/Tie/File/07_rv_splice.t | 157 | ||||
-rw-r--r-- | lib/Tie/File/08_ro.t | 41 | ||||
-rw-r--r-- | lib/Tie/File/09_gen_rs.t | 90 | ||||
-rw-r--r-- | lib/Tie/File/10_splice_rs.t | 162 | ||||
-rw-r--r-- | lib/Tie/File/11_rv_splice_rs.t | 157 | ||||
-rw-r--r-- | lib/Tie/File/12_longfetch_rs.t | 38 | ||||
-rw-r--r-- | lib/Tie/File/13_size_rs.t | 78 |
15 files changed, 1932 insertions, 0 deletions
@@ -1418,6 +1418,20 @@ lib/Tie/Array/stdpush.t Test for Tie::StdArray lib/Tie/Handle.pm Base class for tied handles lib/Tie/Handle/stdhandle.t Test for Tie::StdHandle lib/Tie/Hash.pm Base class for tied hashes +lib/Tie/File.pm Files as tied arrays. +lib/Tie/File/01_gen.t Test for Tie::File. +lib/Tie/File/02_fetchsize.t Test for Tie::File. +lib/Tie/File/03_longfetch.t Test for Tie::File. +lib/Tie/File/04_splice.t Test for Tie::File. +lib/Tie/File/05_size.t Test for Tie::File. +lib/Tie/File/06_fixrec.t Test for Tie::File. +lib/Tie/File/07_rv_splice.t Test for Tie::File. +lib/Tie/File/08_ro.t Test for Tie::File. +lib/Tie/File/09_gen_rs.t Test for Tie::File. +lib/Tie/File/10_splice_rs.t Test for Tie::File. +lib/Tie/File/11_rv_splice_rs.t Test for Tie::File. +lib/Tie/File/12_longfetch_rs.t Test for Tie::File. +lib/Tie/File/13_size_rs.t Test for Tie::File. lib/Tie/Memoize.pm Base class for memoized tied hashes lib/Tie/Memoize.t Test for Memoize.t lib/Tie/RefHash.pm Base class for tied hashes with references as keys diff --git a/lib/Tie/File.pm b/lib/Tie/File.pm new file mode 100644 index 0000000000..5b9381b6c5 --- /dev/null +++ b/lib/Tie/File.pm @@ -0,0 +1,744 @@ + +package Tie::File; +use Carp; +use POSIX 'SEEK_SET'; +use Fcntl 'O_CREAT', 'O_RDWR'; +require 5.005; + +$VERSION = "0.12"; + +# Idea: The object will always contain an array of byte offsets +# this will be filled in as is necessary and convenient. +# fetch will do seek-read. +# There will be a cache parameter that controls the amount of cached *data* +# Also an LRU queue of cached records +# store will read the relevant record into the cache +# If it's the same length as what is being written, it will overwrite it in +# place; if not, it will do a from-to copying write. +# The record separator string is also a parameter + +# Record numbers start at ZERO. + +my $DEFAULT_CACHE_SIZE = 1<<21; # 2 megabytes + +sub TIEARRAY { + if (@_ % 2 != 0) { + croak "usage: tie \@array, $_[0], filename, [option => value]..."; + } + my ($pack, $file, %opts) = @_; + + # transform '-foo' keys into 'foo' keys + for my $key (keys %opts) { + my $okey = $key; + if ($key =~ s/^-+//) { + $opts{$key} = delete $opts{$okey}; + } + } + + $opts{cachesize} ||= $DEFAULT_CACHE_SIZE; + + # the cache is a hash instead of an array because it is likely to be + # sparsely populated + $opts{cache} = {}; + $opts{cached} = 0; # total size of cached data + $opts{lru} = []; # replace with heap in later version + + $opts{offsets} = [0]; + $opts{filename} = $file; + $opts{recsep} = $/ unless defined $opts{recsep}; + $opts{recseplen} = length($opts{recsep}); + if ($opts{recseplen} == 0) { + croak "Empty record separator not supported by $pack"; + } + + my $mode = defined($opts{mode}) ? $opts{mode} : O_CREAT|O_RDWR; + + my $fh = \do { local *FH }; # only works in 5.005 and later + sysopen $fh, $file, $mode, 0666 or return; + binmode $fh; + { my $ofh = select $fh; $| = 1; select $ofh } # autoflush on write + $opts{fh} = $fh; + + bless \%opts => $pack; +} + +sub FETCH { + my ($self, $n) = @_; + + # check the record cache + { my $cached = $self->_check_cache($n); + return $cached if defined $cached; + } + + unless ($#{$self->{offsets}} >= $n) { + my $o = $self->_fill_offsets_to($n); + # If it's still undefined, there is no such record, so return 'undef' + return unless defined $o; + } + + my $fh = $self->{FH}; + $self->_seek($n); # we can do this now that offsets is populated + my $rec = $self->_read_record; + $self->_cache_insert($n, $rec) if defined $rec; + $rec; +} + +sub STORE { + my ($self, $n, $rec) = @_; + + $self->_fixrecs($rec); + + # TODO: what should we do about the cache? Install the new record + # in the cache only if the old version of the same record was + # already there? + + # We need this to decide whether the new record will fit + # It incidentally populates the offsets table + # Note we have to do this before we alter the cache + my $oldrec = $self->FETCH($n); + + # _check_cache promotes record $n to MRU. Is this correct behavior? + $self->{cache}{$n} = $rec if $self->_check_cache($n); + + if (not defined $oldrec) { + # We're storing a record beyond the end of the file + $self->_extend_file_to($n); + $oldrec = $self->{recsep}; + } + my $len_diff = length($rec) - length($oldrec); + + $self->_twrite($rec, $self->{offsets}[$n], length($oldrec)); + + # now update the offsets + # array slice goes from element $n+1 (the first one to move) + # to the end + for (@{$self->{offsets}}[$n+1 .. $#{$self->{offsets}}]) { + $_ += $len_diff; + } +} + +sub FETCHSIZE { + my $self = shift; + my $n = $#{$self->{offsets}}; + while (defined ($self->_fill_offsets_to($n+1))) { + ++$n; + } + $n; +} + +sub STORESIZE { + my ($self, $len) = @_; + my $olen = $self->FETCHSIZE; + return if $len == $olen; # Woo-hoo! + + # file gets longer + if ($len > $olen) { + $self->_extend_file_to($len-1); # record numbers from 0 .. $len-1 + return; + } + + # file gets shorter + $self->_seek($len); + $self->_chop_file; + $#{$self->{offsets}} = $len-1; + my @cached = grep $_ > $len, keys %{$self->{cache}}; + delete @{$self->{cache}}{@cached} if @cached; +} + +sub SPLICE { + my ($self, $pos, $nrecs, @data) = @_; + my @result; + + $pos += $self->FETCHSIZE if $pos < 0; + + $self->_fixrecs(@data); + my $data = join '', @data; + my $datalen = length $data; + my $oldlen = 0; + + # compute length of data being removed + for ($pos .. $pos+$nrecs-1) { + my $rec = $self->FETCH($_); + last unless defined $rec; + push @result, $rec; + $oldlen += length($rec); + } + + $self->_fill_offsets_to($pos); + $self->_twrite($data, $self->{offsets}[$pos], $oldlen); + + # update the offsets table part 1 + # compute the offsets of the new records: + my @new_offsets; + if (@data) { + push @new_offsets, $self->{offsets}[$pos]; + for (0 .. $#data-1) { + push @new_offsets, $new_offsets[-1] + length($data[$_]); + } + } + splice(@{$self->{offsets}}, $pos, $nrecs, @new_offsets); + + # update the offsets table part 2 + # adjust the offsets of the following old records + for ($pos+@data .. $#{$self->{offsets}}) { + $self->{offsets}[$_] += $datalen - $oldlen; + } + # If we scrubbed out all known offsets, regenerate the trivial table + # that knows that the file does indeed start at 0. + $self->{offsets}[0] = 0 unless @{$self->{offsets}}; + + # update the read cache, part 1 + # modified records + # Consider this carefully for correctness + for ($pos .. $pos+$nrecs-1) { + my $cached = $self->{cache}{$_}; + next unless defined $cached; + my $new = $data[$_-$pos]; + if (defined $new) { + $self->{cached} += length($new) - length($cached); + $self->{cache}{$_} = $new; + } else { + delete $self->{cache}{$_}; + $self->{cached} -= length($cached); + } + } + # update the read cache, part 2 + # moved records - records past the site of the change + # need to be renumbered + # Maybe merge this with the previous block? + for (keys %{$self->{cache}}) { + next unless $_ >= $pos + $nrecs; + $self->{cache}{$_-$nrecs+@data} = delete $self->{cache}{$_}; + } + + # fix the LRU queue + my(@new, @changed); + for (@{$self->{lru}}) { + if ($_ >= $pos + $nrecs) { + push @new, $_ + @data - $nrecs; + } elsif ($_ >= $pos) { + push @changed, $_ if $_ < $pos + @data; + } else { + push @new, $_; + } + } + @{$self->{lru}} = (@new, @changed); + + @result; +} + +# write data into the file +# $data is the data to be written. +# it should be written at position $pos, and should overwrite +# exactly $len of the following bytes. +# Note that if length($data) > $len, the subsequent bytes will have to +# be moved up, and if length($data) < $len, they will have to +# be moved down +sub _twrite { + my ($self, $data, $pos, $len) = @_; + + unless (defined $pos) { + die "\$pos was undefined in _twrite"; + } + + my $len_diff = length($data) - $len; + + if ($len_diff == 0) { # Woo-hoo! + my $fh = $self->{fh}; + $self->_seekb($pos); + $self->_write_record($data); + return; # well, that was easy. + } + + # the two records are of different lengths + # our strategy here: rewrite the tail of the file, + # reading ahead one buffer at a time + # $bufsize is required to be at least as large as the data we're overwriting + my $bufsize = _bufsize($len_diff); + my ($writepos, $readpos) = ($pos, $pos+$len); + + # Seems like there ought to be a way to avoid the repeated code + # and the special case here. The read(1) is also a little weird. + # Think about this. + do { + $self->_seekb($readpos); + my $br = read $self->{fh}, my($next_block), $bufsize; + my $more_data = read $self->{fh}, my($dummy), 1; + $self->_seekb($writepos); + $self->_write_record($data); + $readpos += $br; + $writepos += length $data; + $data = $next_block; + unless ($more_data) { + $self->_seekb($writepos); + $self->_write_record($next_block); + } + } while $more_data; + + # There might be leftover data at the end of the file + $self->_chop_file if $len_diff < 0; +} + +# If a record does not already end with the appropriate terminator +# string, append one. +sub _fixrecs { + my $self = shift; + for (@_) { + $_ .= $self->{recsep} + unless substr($_, - $self->{recseplen}) eq $self->{recsep}; + } +} + +# seek to the beginning of record #$n +# Assumes that the offsets table is already correctly populated +# +# Note that $n=-1 has a special meaning here: It means the start of +# the last known record; this may or may not be the very last record +# in the file, depending on whether the offsets table is fully populated. +# +sub _seek { + my ($self, $n) = @_; + my $o = $self->{offsets}[$n]; + defined($o) + or confess("logic error: undefined offset for record $n"); + seek $self->{fh}, $o, SEEK_SET + or die "Couldn't seek filehandle: $!"; # "Should never happen." +} + +sub _seekb { + my ($self, $b) = @_; + seek $self->{fh}, $b, SEEK_SET + or die "Couldn't seek filehandle: $!"; # "Should never happen." +} + +# populate the offsets table up to the beginning of record $n +# return the offset of record $n +sub _fill_offsets_to { + my ($self, $n) = @_; + my $fh = $self->{fh}; + local *OFF = $self->{offsets}; + my $rec; + + until ($#OFF >= $n) { + my $o = $OFF[-1]; + $self->_seek(-1); # tricky -- see comment at _seek + $rec = $self->_read_record; + if (defined $rec) { + push @OFF, $o+length($rec); + } else { + return; # It turns out there is no such record + } + } + + # we have now read all the records up to record n-1, + # so we can return the offset of record n + return $OFF[$n]; +} + +# assumes that $rec is already suitably terminated +sub _write_record { + my ($self, $rec) = @_; + my $fh = $self->{fh}; + print $fh $rec + or die "Couldn't write record: $!"; # "Should never happen." + +} + +sub _read_record { + my $self = shift; + my $rec; + { local $/ = $self->{recsep}; + my $fh = $self->{fh}; + $rec = <$fh>; + } + $rec; +} + +sub _cache_insert { + my ($self, $n, $rec) = @_; + + # Do not cache records that are too big to fit in the cache. + return unless length $rec <= $self->{cachesize}; + + $self->{cache}{$n} = $rec; + $self->{cached} += length $rec; + push @{$self->{lru}}, $n; # most-recently-used is at the END + + $self->_cache_flush if $self->{cached} > $self->{cachesize}; +} + +sub _check_cache { + my ($self, $n) = @_; + my $rec; + return unless defined($rec = $self->{cache}{$n}); + + # cache hit; update LRU queue and return $rec + # replace this with a heap in a later version + @{$self->{lru}} = ((grep $_ ne $n, @{$self->{lru}}), $n); + $rec; +} + +sub _cache_flush { + my ($self) = @_; + while ($self->{cached} > $self->{cachesize}) { + my $lru = shift @{$self->{lru}}; + $self->{cached} -= length $lru; + delete $self->{cache}{$lru}; + } +} + +# We have read to the end of the file and have the offsets table +# entirely populated. Now we need to write a new record beyond +# the end of the file. We prepare for this by writing +# empty records into the file up to the position we want +# $n here is the record number of the last record we're going to write +sub _extend_file_to { + my ($self, $n) = @_; + $self->_seek(-1); # position after the end of the last record + my $pos = $self->{offsets}[-1]; + + # the offsets table has one entry more than the total number of records + $extras = $n - ($#{$self->{offsets}} - 1); + + # Todo : just use $self->{recsep} x $extras here? + while ($extras-- > 0) { + $self->_write_record($self->{recsep}); + $pos += $self->{recseplen}; + push @{$self->{offsets}}, $pos; + } +} + +# Truncate the file at the current position +sub _chop_file { + my $self = shift; + truncate $self->{fh}, tell($self->{fh}); +} + +# compute the size of a buffer suitable for moving +# all the data in a file forward $n bytes +# ($n may be negative) +# The result should be at least $n. +sub _bufsize { + my $n = shift; + return 8192 if $n < 0; + my $b = $n & ~8191; + $b += 8192 if $n & 8191; + $b; +} + + +# Given a file, make sure the cache is consistent with the +# file contents +sub _check_integrity { + my ($self, $file, $warn) = @_; + my $good = 1; + local *F; + open F, $file or die "Couldn't open file $file: $!"; + local $/ = $self->{recsep}; + unless ($self->{offsets}[0] == 0) { + $warn && print STDERR "# rec 0: offset <$self->{offsets}[0]> s/b 0!\n"; + $good = 0; + } + while (<F>) { + my $n = $. - 1; + my $cached = $self->{cache}{$n}; + my $offset = $self->{offsets}[$.]; + my $ao = tell F; + if (defined $offset && $offset != $ao) { + $warn && print STDERR "# rec $n: offset <$offset> actual <$ao>\n"; + } + if (defined $cached && $_ ne $cached) { + $good = 0; + chomp $cached; + chomp; + $warn && print STDERR "# rec $n: cached <$cached> actual <$_>\n"; + } + } + + my $cachesize = 0; + while (my ($n, $r) = each %{$self->{cache}}) { + $cachesize += length($r); + next if $n+1 <= $.; # checked this already + $warn && print STDERR "# spurious caching of record $n\n"; + $good = 0; + } + if ($cachesize != $self->{cached}) { + $warn && print STDERR "# cache size is $self->{cached}, should be $cachesize\n"; + $good = 0; + } + + my (%seen, @duplicate); + for (@{$self->{lru}}) { + $seen{$_}++; + if (not exists $self->{cache}{$_}) { + print "# $_ is mentioned in the LRU queue, but not in the cache\n"; + $good = 0; + } + } + @duplicate = grep $seen{$_}>1, keys %seen; + if (@duplicate) { + my $records = @duplicate == 1 ? 'Record' : 'Records'; + my $appear = @duplicate == 1 ? 'appears' : 'appear'; + print "# $records @duplicate $appear multiple times in LRU queue: @{$self->{lru}}\n"; + $good = 0; + } + for (keys %{$self->{cache}}) { + unless (exists $seen{$_}) { + print "# $record $_ is in the cache but not the LRU queue\n"; + $good = 0; + } + } + + $good; +} + +=head1 NAME + +Tie::File - Access the lines of a disk file via a Perl array + +=head1 SYNOPSIS + + # This file documents Tie::File version 0.12 + + tie @array, 'Tie::File', filename or die ...; + + $array[13] = 'blah'; # line 13 of the file is now 'blah' + print $array[42]; # display line 42 of the file + + $n_recs = @array; # how many records are in the file? + $#array = $n_recs - 2; # chop records off the end + + # As you would expect + @old_recs = splice @array, 3, 7, new recs...; + + untie @array; # all finished + +=head1 DESCRIPTION + +C<Tie::File> represents a regular text file as a Perl array. Each +element in the array corresponds to a record in the file. The first +line of the file is element 0 of the array; the second line is element +1, and so on. + +The file is I<not> loaded into memory, so this will work even for +gigantic files. + +Changes to the array are reflected in the file immediately. + +=head2 C<recsep> + +What is a 'record'? By default, the meaning is the same as for the +C<E<lt>...E<gt>> operator: It's a string terminated by C<$/>, which is +probably C<"\n"> or C<"\r\n">. You may change the definition of +"record" by supplying the C<recsep> option in the C<tie> call: + + tie @array, 'Tie::File', $file, recsep => 'es'; + +This says that records are delimited by the string C<es>. If the file contained the following data: + + Curse these pesky flies!\n + +then the C<@array> would appear to have four elements: + + "Curse thes" + "e pes" + "ky flies" + "!\n" + +An undefined value is not permitted as a record separator. Perl's +special "paragraph mode" semantics (E<agrave> la C<$/ = "">) are not +emulated. + +Records read from the tied array will have the record separator string +on the end, just as if they were read from the C<E<lt>...E<gt>> +operator. Records stored into the array will have the record +separator string appended before they are written to the file, if they +don't have one already. For example, if the record separator string +is C<"\n">, then the following two lines do exactly the same thing: + + $array[17] = "Cherry pie"; + $array[17] = "Cherry pie\n"; + +The result is that the contents of line 17 of the file will be +replaced with "Cherry pie"; a newline character will separate line 17 +from line 18. This means that inparticular, this will do nothing: + + chomp $array[17]; + +Because the C<chomp>ed value will have the separator reattached when +it is written back to the file. There is no way to create a file +whose trailing record separator string is missing. + +Inserting records that I<contain> the record separator string will +produce a reasonable result, but if you can't foresee what this result +will be, you'd better avoid doing this. + +=head2 C<mode> + +Normally, the specified file will be opened for read and write access, +and will be created if it does not exist. (That is, the flags +C<O_RDWR | O_CREAT> are supplied in the C<open> call.) If you want to +change this, you may supply alternative flags in the C<mode> option. +See L<Fcntl> for a listing of available flags. +For example: + + # open the file if it exists, but fail if it does not exist + use Fcntl 'O_RDWR'; + tie @array, 'Tie::File', $file, mode => O_RDWR; + + # create the file if it does not exist + use Fcntl 'O_RDWR', 'O_CREAT'; + tie @array, 'Tie::File', $file, mode => O_RDWR | O_CREAT; + + # open an existing file in read-only mode + use Fcntl 'O_RDONLY'; + tie @array, 'Tie::File', $file, mode => O_RDONLY; + +Opening the data file in write-only or append mode is not supported. + +=head2 C<cachesize> + +Records read in from the file are cached, to avoid having to re-read +them repeatedly. If you read the same record twice, the first time it +will be stored in memory, and the second time it will be fetched from +memory. + +The cache has a bounded size; when it exceeds this size, the +least-recently visited records will be purged from the cache. The +default size is 2Mib. You can adjust the amount of space used for the +cache by supplying the C<cachesize> option. The argument is the desired cache size, in bytes. + + # I have a lot of memory, so use a large cache to speed up access + tie @array, 'Tie::File', $file, cachesize => 20_000_000; + +Setting the cache size to 0 will inhibit caching; records will be +fetched from disk every time you examine them. + +=head2 Option Format + +C<-mode> is a synonym for C<mode>. C<-recsep> is a synonym for +C<recsep>. C<-cachesize> is a synonym for C<cachesize>. You get the +idea. + +=head1 Public Methods + +The C<tie> call returns an object, say C<$o>. You may call + + $rec = $o->FETCH($n); + $o->STORE($n, $rec); + +to fetch or store the record at line C<$n>, respectively. There are +no other public methods in this package. + +=head1 CAVEATS + +(That's Latin for 'warnings'.) + +=head2 Efficiency Note + +Every effort was made to make this module efficient. Nevertheless, +changing the size of a record in the middle of a large file will +always be slow, because everything after the new record must be move. + +In particular, note that: + + # million-line file + for (@file_array) { + $_ .= 'x'; + } + +is likely to be very slow, because the first iteration must relocate +lines 1 through 999,999; the second iteration must relocate lines 2 +through 999,999, and so on. The relocation is done using block +writes, however, so it's not as slow as it might be. + +A future version of this module will provide some mechanism for +getting better performance in such cases, by deferring the writing +until it can be done all at once. + +=head2 Efficiency Note 2 + +Not every effort was made to make this module as efficient as +possible. C<FETCHSIZE> should use binary search instead of linear +search. The cache's LRU queue should be a heap instead of a list. +These defects are probably minor; in any event, they will be fixed in +a later version of the module. + +=head2 Efficiency Note 3 + +The author has supposed that since this module is concerned with file +I/O, almost all normal use of it will be heavily I/O bound, and that +the time to maintain complicated data structures inside the module +will be dominated by the time to actually perform the I/O. This +suggests, for example, that and LRU read-cache is a good tradeoff, +even if it requires substantial adjustment following a C<splice> +operation. + +=head2 Missing Methods + +The tied array does not yet support C<push>, C<pop>, C<shift>, +C<unshift>, C<splice>, or size-setting via C<$#array = $n>. I will +put these in soon. + +=head1 AUTHOR + +Mark Jason Dominus + +To contact the author, send email to: C<mjd-perl-tiefile+@plover.com> + +To receive an announcement whenever a new version of this module is +released, send a blank email message to +C<mjd-perl-tiefile-subscribe@plover.com>. + +=head1 LICENSE + +C<Tie::File> version 0.12 is copyright (C) 2002 Mark Jason Dominus. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at +your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; it should be in the file C<COPYING>. If not, +write to the Free Software Foundation, Inc., 59 Temple Place, Suite +330, Boston, MA 02111 USA + +For licensing inquiries, contact the author at: + + Mark Jason Dominus + 255 S. Warnock St. + Philadelphia, PA 19107 + +=head1 WARRANTY + +C<Tie::File> version 0.12 comes with ABSOLUTELY NO WARRANTY. +For details, see the license. + +=head1 TODO + +C<push>, C<pop>, C<shift>, C<unshift>. + +More tests. (Configuration options, cache flushery. _twrite shoule +be tested separately, because there are a lot of weird special cases +lurking in there.) + +More tests. (Stuff I didn't think of yet.) + +File locking. + +Deferred writing. (!!!) + +Paragraph mode? + +More tests. + +Fixed-length mode. + +=cut + diff --git a/lib/Tie/File/01_gen.t b/lib/Tie/File/01_gen.t new file mode 100644 index 0000000000..58c7a9732c --- /dev/null +++ b/lib/Tie/File/01_gen.t @@ -0,0 +1,89 @@ +#!/usr/bin/perl + +my $file = "tf$$.txt"; + +print "1..38\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; + +my $o = tie @a, 'Tie::File', $file; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; + +# 3-4 create +$a[0] = 'rec0'; +check_contents("rec0"); + +# 5-8 append +$a[1] = 'rec1'; +check_contents("rec0", "rec1"); +$a[2] = 'rec2'; +check_contents("rec0", "rec1", "rec2"); + +# 9-14 same-length alterations +$a[0] = 'new0'; +check_contents("new0", "rec1", "rec2"); +$a[1] = 'new1'; +check_contents("new0", "new1", "rec2"); +$a[2] = 'new2'; +check_contents("new0", "new1", "new2"); + +# 15-24 lengthening alterations +$a[0] = 'long0'; +check_contents("long0", "new1", "new2"); +$a[1] = 'long1'; +check_contents("long0", "long1", "new2"); +$a[2] = 'long2'; +check_contents("long0", "long1", "long2"); +$a[1] = 'longer1'; +check_contents("long0", "longer1", "long2"); +$a[0] = 'longer0'; +check_contents("longer0", "longer1", "long2"); + +# 25-34 shortening alterations, including truncation +$a[0] = 'short0'; +check_contents("short0", "longer1", "long2"); +$a[1] = 'short1'; +check_contents("short0", "short1", "long2"); +$a[2] = 'short2'; +check_contents("short0", "short1", "short2"); +$a[1] = 'sh1'; +check_contents("short0", "sh1", "short2"); +$a[0] = 'sh0'; +check_contents("sh0", "sh1", "short2"); + +# file with holes +$a[4] = 'rec4'; +check_contents("sh0", "sh1", "short2", "", "rec4"); +$a[3] = 'rec3'; +check_contents("sh0", "sh1", "short2", "rec3", "rec4"); + + +# try inserting a record into the middle of an empty file + + +sub check_contents { + my @c = @_; + my $x = join $/, @c, ''; + local *FH; + my $open = open FH, "< $file"; + my $a; + { local $/; $a = <FH> } + print (($open && $a eq $x) ? "ok $N\n" : "not ok $N # file @c\n"); + $N++; + + # now check FETCH: + my $good = 1; + for (0.. $#c) { + $good = 0 unless $a[$_] eq "$c[$_]\n"; + } + print (($open && $good) ? "ok $N\n" : "not ok $N # fetch @c\n"); + $N++; +} + +END { + 1 while unlink $file; +} + diff --git a/lib/Tie/File/02_fetchsize.t b/lib/Tie/File/02_fetchsize.t new file mode 100644 index 0000000000..aaf44f09c7 --- /dev/null +++ b/lib/Tie/File/02_fetchsize.t @@ -0,0 +1,47 @@ +#!/usr/bin/perl + +my $file = "tf$$.txt"; +my $data = "rec1$/rec2$/rec3$/"; + +print "1..6\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; + +open F, "> $file" or die $!; +print F $data; +close F; + + +my $o = tie @a, 'Tie::File', $file; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; + +my $n; + +# 3 test array element count +$n = @a; +print $n == 3 ? "ok $N\n" : "not ok $N # n=$n\n"; +$N++; + +# 4 same thing again +$n = @a; +print $n == 3 ? "ok $N\n" : "not ok $N # n=$n\n"; +$N++; + +# 5 test $#a notation +$n = $#a; +print $n == 2 ? "ok $N\n" : "not ok $N # n=$n\n"; +$N++; + +# 6 test looping over array elements +my $q; +for (@a) { $q .= $_ } +print $q eq $data ? "ok $N\n" : "not ok $N # n=$n\n"; +$N++; + +END { + 1 while unlink $file; +} + diff --git a/lib/Tie/File/03_longfetch.t b/lib/Tie/File/03_longfetch.t new file mode 100644 index 0000000000..7e36962e9b --- /dev/null +++ b/lib/Tie/File/03_longfetch.t @@ -0,0 +1,38 @@ +#!/usr/bin/perl +# +# Make sure we can fetch a record in the middle of the file +# before we've ever looked at any records before it +# +# (tests _fill_offsets_to() ) +# + +my $file = "tf$$.txt"; +my $data = "rec0$/rec1$/rec2$/"; + +print "1..5\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; + +open F, "> $file" or die $!; +print F $data; +close F; + + +my $o = tie @a, 'Tie::File', $file; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; + +my $n; + +# 3-5 +for (2, 1, 0) { + print $a[$_] eq "rec$_$/" ? "ok $N\n" : "not ok $N # rec=$a[$_] ?\n"; + $N++; +} + +END { + 1 while unlink $file; +} + diff --git a/lib/Tie/File/04_splice.t b/lib/Tie/File/04_splice.t new file mode 100644 index 0000000000..c8daf0e9e3 --- /dev/null +++ b/lib/Tie/File/04_splice.t @@ -0,0 +1,163 @@ +#!/usr/bin/perl +# +# Check SPLICE function's effect on the file +# (07_rv_splice.t checks its return value) +# +# Each call to 'check_contents' actually performs two tests. +# First, it calls the tied object's own 'check_integrity' method, +# which makes sure that the contents of the read cache and offset tables +# accurately reflect the contents of the file. +# Then, it checks the actual contents of the file against the expected +# contents. + +use lib '/home/mjd/src/perl/Tie-File2/lib'; +my $file = "tf$$.txt"; +my $data = "rec0$/rec1$/rec2$/"; + +print "1..88\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; # partial credit just for showing up + +my $o = tie @a, 'Tie::File', $file; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; + +my $n; + +# (3-22) splicing at the beginning +init_file($data); + +splice(@a, 0, 0, "rec4"); +check_contents("rec4$/$data"); +splice(@a, 0, 1, "rec5"); # same length +check_contents("rec5$/$data"); +splice(@a, 0, 1, "record5"); # longer +check_contents("record5$/$data"); + +splice(@a, 0, 1, "r5"); # shorter +check_contents("r5$/$data"); +splice(@a, 0, 1); # removal +check_contents("$data"); +splice(@a, 0, 0); # no-op +check_contents("$data"); +splice(@a, 0, 0, 'r7', 'rec8'); # insert more than one +check_contents("r7$/rec8$/$data"); +splice(@a, 0, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check_contents("rec7$/record8$/rec9$/$data"); + +splice(@a, 0, 3, 'record9', 'rec10'); # delete more than insert +check_contents("record9$/rec10$/$data"); +splice(@a, 0, 2); # delete more than one +check_contents("$data"); + + +# (23-42) splicing in the middle +splice(@a, 1, 0, "rec4"); +check_contents("rec0$/rec4$/rec1$/rec2$/"); +splice(@a, 1, 1, "rec5"); # same length +check_contents("rec0$/rec5$/rec1$/rec2$/"); +splice(@a, 1, 1, "record5"); # longer +check_contents("rec0$/record5$/rec1$/rec2$/"); + +splice(@a, 1, 1, "r5"); # shorter +check_contents("rec0$/r5$/rec1$/rec2$/"); +splice(@a, 1, 1); # removal +check_contents("$data"); +splice(@a, 1, 0); # no-op +check_contents("$data"); +splice(@a, 1, 0, 'r7', 'rec8'); # insert more than one +check_contents("rec0$/r7$/rec8$/rec1$/rec2$/"); +splice(@a, 1, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check_contents("rec0$/rec7$/record8$/rec9$/rec1$/rec2$/"); + +splice(@a, 1, 3, 'record9', 'rec10'); # delete more than insert +check_contents("rec0$/record9$/rec10$/rec1$/rec2$/"); +splice(@a, 1, 2); # delete more than one +check_contents("$data"); + +# (43-62) splicing at the end +splice(@a, 3, 0, "rec4"); +check_contents("$ {data}rec4$/"); +splice(@a, 3, 1, "rec5"); # same length +check_contents("$ {data}rec5$/"); +splice(@a, 3, 1, "record5"); # longer +check_contents("$ {data}record5$/"); + +splice(@a, 3, 1, "r5"); # shorter +check_contents("$ {data}r5$/"); +splice(@a, 3, 1); # removal +check_contents("$data"); +splice(@a, 3, 0); # no-op +check_contents("$data"); +splice(@a, 3, 0, 'r7', 'rec8'); # insert more than one +check_contents("$ {data}r7$/rec8$/"); +splice(@a, 3, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check_contents("$ {data}rec7$/record8$/rec9$/"); + +splice(@a, 3, 3, 'record9', 'rec10'); # delete more than insert +check_contents("$ {data}record9$/rec10$/"); +splice(@a, 3, 2); # delete more than one +check_contents("$data"); + +# (63-82) splicing with negative subscript +splice(@a, -1, 0, "rec4"); +check_contents("rec0$/rec1$/rec4$/rec2$/"); +splice(@a, -1, 1, "rec5"); # same length +check_contents("rec0$/rec1$/rec4$/rec5$/"); +splice(@a, -1, 1, "record5"); # longer +check_contents("rec0$/rec1$/rec4$/record5$/"); + +splice(@a, -1, 1, "r5"); # shorter +check_contents("rec0$/rec1$/rec4$/r5$/"); +splice(@a, -1, 1); # removal +check_contents("rec0$/rec1$/rec4$/"); +splice(@a, -1, 0); # no-op +check_contents("rec0$/rec1$/rec4$/"); +splice(@a, -1, 0, 'r7', 'rec8'); # insert more than one +check_contents("rec0$/rec1$/r7$/rec8$/rec4$/"); +splice(@a, -1, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check_contents("rec0$/rec1$/r7$/rec8$/rec7$/record8$/rec9$/"); + +splice(@a, -3, 3, 'record9', 'rec10'); # delete more than insert +check_contents("rec0$/rec1$/r7$/rec8$/record9$/rec10$/"); +splice(@a, -4, 3); # delete more than one +check_contents("rec0$/rec1$/rec10$/"); + +# (83-84) scrub it all out +splice(@a, 0, 3); +check_contents(""); + +# (85-86) put some back in +splice(@a, 0, 0, "rec0", "rec1"); +check_contents("rec0$/rec1$/"); + +# (87-88) what if we remove too many records? +splice(@a, 0, 17); +check_contents(""); + +sub init_file { + my $data = shift; + open F, "> $file" or die $!; + print F $data; + close F; +} + +sub check_contents { + my $x = shift; + local *FH; + my $integrity = $o->_check_integrity($file, $ENV{INTEGRITY}); + print $integrity ? "ok $N\n" : "not ok $N\n"; + $N++; + my $open = open FH, "< $file"; + my $a; + { local $/; $a = <FH> } + print (($open && $a eq $x) ? "ok $N\n" : "not ok $N\n"); + $N++; +} + +END { + 1 while unlink $file; +} + diff --git a/lib/Tie/File/05_size.t b/lib/Tie/File/05_size.t new file mode 100644 index 0000000000..4b1085876f --- /dev/null +++ b/lib/Tie/File/05_size.t @@ -0,0 +1,78 @@ +#!/usr/bin/perl +# +# Check FETCHSIZE and SETSIZE functions +# PUSH POP SHIFT UNSHIFT +# + +my $file = "tf$$.txt"; +my $data = "rec0$/rec1$/rec2$/"; +my ($o, $n); + +print "1..10\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; + +# 2-3 FETCHSIZE 0-length file +open F, "> $file" or die $!; +close F; +$o = tie @a, 'Tie::File', $file; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; +$n = @a; +print $n == 0 ? "ok $N\n" : "not ok $N # $n, s/b 0\n"; +$N++; + +# Reset everything +undef $o; +untie @a; + +# 4-5 FETCHSIZE positive-length file +open F, "> $file" or die $!; +print F $data; +close F; +$o = tie @a, 'Tie::File', $file; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; +$n = @a; +print $n == 3 ? "ok $N\n" : "not ok $N # $n, s/b 0\n"; +$N++; + +# STORESIZE +# 6 Make it longer: +$#a = 4; +check_contents("$data$/$/"); + +# 7 Make it longer again: +$#a = 6; +check_contents("$data$/$/$/$/"); + +# 8 Make it shorter: +$#a = 4; +check_contents("$data$/$/"); + +# 9 Make it shorter again: +$#a = 2; +check_contents($data); + +# 10 Get rid of it completely: +$#a = -1; +check_contents(''); + + +sub check_contents { + my $x = shift; + local *FH; + my $open = open FH, "< $file"; + my $a; + { local $/; $a = <FH> } + print (($open && $a eq $x) ? "ok $N\n" : "not ok $N\n"); + $N++; +} + + +END { + 1 while unlink $file; +} + diff --git a/lib/Tie/File/06_fixrec.t b/lib/Tie/File/06_fixrec.t new file mode 100644 index 0000000000..4a8ceb8d55 --- /dev/null +++ b/lib/Tie/File/06_fixrec.t @@ -0,0 +1,36 @@ +#!/usr/bin/perl + +my $file = "tf$$.txt"; + +print "1..5\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; + +my $o = tie @a, 'Tie::File', $file; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; + +$a[0] = 'rec0'; +check_contents("rec0$/"); +$a[1] = "rec1$/"; +check_contents("rec0$/rec1$/"); +$a[2] = "rec2$/$/"; # should we detect this? +check_contents("rec0$/rec1$/rec2$/$/"); + +sub check_contents { + my $x = shift; + local *FH; + my $open = open FH, "< $file"; + my $a; + { local $/; $a = <FH> } + print (($open && $a eq $x) ? "ok $N\n" : "not ok $N\n"); + $N++; +} + + +END { + 1 while unlink $file; +} + diff --git a/lib/Tie/File/07_rv_splice.t b/lib/Tie/File/07_rv_splice.t new file mode 100644 index 0000000000..feaf0096bf --- /dev/null +++ b/lib/Tie/File/07_rv_splice.t @@ -0,0 +1,157 @@ +#!/usr/bin/perl +# +# Check SPLICE function's return value +# (04_splice.t checks its effect on the file) +# + +my $file = "tf$$.txt"; +my $data = "rec0$/rec1$/rec2$/"; + +print "1..45\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; # partial credit just for showing up + +my $o = tie @a, 'Tie::File', $file; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; + +my $n; + +# (3-12) splicing at the beginning +init_file($data); + +@r = splice(@a, 0, 0, "rec4"); +check_result(); +@r = splice(@a, 0, 1, "rec5"); # same length +check_result("rec4"); +@r = splice(@a, 0, 1, "record5"); # longer +check_result("rec5"); + +@r = splice(@a, 0, 1, "r5"); # shorter +check_result("record5"); +@r = splice(@a, 0, 1); # removal +check_result("r5"); +@r = splice(@a, 0, 0); # no-op +check_result(); +@r = splice(@a, 0, 0, 'r7', 'rec8'); # insert more than one +check_result(); +@r = splice(@a, 0, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check_result('r7', 'rec8'); + +@r = splice(@a, 0, 3, 'record9', 'rec10'); # delete more than insert +check_result('rec7', 'record8', 'rec9'); +@r = splice(@a, 0, 2); # delete more than one +check_result('record9', 'rec10'); + + +# (13-22) splicing in the middle +@r = splice(@a, 1, 0, "rec4"); +check_result(); +@r = splice(@a, 1, 1, "rec5"); # same length +check_result('rec4'); +@r = splice(@a, 1, 1, "record5"); # longer +check_result('rec5'); + +@r = splice(@a, 1, 1, "r5"); # shorter +check_result("record5"); +@r = splice(@a, 1, 1); # removal +check_result("r5"); +@r = splice(@a, 1, 0); # no-op +check_result(); +@r = splice(@a, 1, 0, 'r7', 'rec8'); # insert more than one +check_result(); +@r = splice(@a, 1, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check_result('r7', 'rec8'); + +@r = splice(@a, 1, 3, 'record9', 'rec10'); # delete more than insert +check_result('rec7', 'record8', 'rec9'); +@r = splice(@a, 1, 2); # delete more than one +check_result('record9','rec10'); + +# (23-32) splicing at the end +@r = splice(@a, 3, 0, "rec4"); +check_result(); +@r = splice(@a, 3, 1, "rec5"); # same length +check_result('rec4'); +@r = splice(@a, 3, 1, "record5"); # longer +check_result('rec5'); + +@r = splice(@a, 3, 1, "r5"); # shorter +check_result('record5'); +@r = splice(@a, 3, 1); # removal +check_result('r5'); +@r = splice(@a, 3, 0); # no-op +check_result(); +@r = splice(@a, 3, 0, 'r7', 'rec8'); # insert more than one +check_result(); +@r = splice(@a, 3, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check_result('r7', 'rec8'); + +@r = splice(@a, 3, 3, 'record9', 'rec10'); # delete more than insert +check_result('rec7', 'record8', 'rec9'); +@r = splice(@a, 3, 2); # delete more than one +check_result('record9', 'rec10'); + +# (33-42) splicing with negative subscript +@r = splice(@a, -1, 0, "rec4"); +check_result(); +@r = splice(@a, -1, 1, "rec5"); # same length +check_result('rec2'); +@r = splice(@a, -1, 1, "record5"); # longer +check_result("rec5"); + +@r = splice(@a, -1, 1, "r5"); # shorter +check_result("record5"); +@r = splice(@a, -1, 1); # removal +check_result("r5"); +@r = splice(@a, -1, 0); # no-op +check_result(); +@r = splice(@a, -1, 0, 'r7', 'rec8'); # insert more than one +check_result(); +@r = splice(@a, -1, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check_result('rec4'); + +@r = splice(@a, -3, 3, 'record9', 'rec10'); # delete more than insert +check_result('rec7', 'record8', 'rec9'); +@r = splice(@a, -4, 3); # delete more than one +check_result('r7', 'rec8', 'record9'); + +# (43) scrub it all out +@r = splice(@a, 0, 3); +check_result('rec0', 'rec1', 'rec10'); + +# (44) put some back in +@r = splice(@a, 0, 0, "rec0", "rec1"); +check_result(); + +# (45) what if we remove too many records? +@r = splice(@a, 0, 17); +check_result('rec0', 'rec1'); + +sub init_file { + my $data = shift; + open F, "> $file" or die $!; + print F $data; + close F; +} + +# actual results are in @r. +# expected results are in @_ +sub check_result { + my @x = @_; + chomp @r; + my $good = 1; + $good = 0 unless @r == @x; + for my $i (0 .. $#r) { + $good = 0 unless $r[$i] eq $x[$i]; + } + print $good ? "ok $N\n" : "not ok $N \# was (@r); should be (@x)\n"; + $N++; +} + +END { + 1 while unlink $file; +} + diff --git a/lib/Tie/File/08_ro.t b/lib/Tie/File/08_ro.t new file mode 100644 index 0000000000..dde7f20f14 --- /dev/null +++ b/lib/Tie/File/08_ro.t @@ -0,0 +1,41 @@ +#!/usr/bin/perl +# +# Make sure it works to open the file in read-only mode +# + +my $file = "tf$$.txt"; + +print "1..9\n"; + +my $N = 1; +use Tie::File; +use Fcntl 'O_RDONLY'; +print "ok $N\n"; $N++; + +my @items = qw(Gold Frankincense Myrrh Ivory Apes Peacocks); +init_file(join $/, @items, ''); + +my $o = tie @a, 'Tie::File', $file, mode => O_RDONLY; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; + +$#a == $#items ? print "ok $N\n" : print "not ok $N\n"; +$N++; + +for my $i (0..$#items) { + ("$items[$i]$/" eq $a[$i]) ? print "ok $N\n" : print "not ok $N\n"; + $N++; +} + +sub init_file { + my $data = shift; + open F, "> $file" or die $!; + print F $data; + close F; +} + + +END { + 1 while unlink $file; +} + diff --git a/lib/Tie/File/09_gen_rs.t b/lib/Tie/File/09_gen_rs.t new file mode 100644 index 0000000000..cf797360e5 --- /dev/null +++ b/lib/Tie/File/09_gen_rs.t @@ -0,0 +1,90 @@ +#!/usr/bin/perl + +my $file = "tf$$.txt"; + +print "1..38\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; + +my $o = tie @a, 'Tie::File', $file, recsep => 'blah'; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; + + +# 3-4 create +$a[0] = 'rec0'; +check_contents("rec0"); + +# 5-8 append +$a[1] = 'rec1'; +check_contents("rec0", "rec1"); +$a[2] = 'rec2'; +check_contents("rec0", "rec1", "rec2"); + +# 9-14 same-length alterations +$a[0] = 'new0'; +check_contents("new0", "rec1", "rec2"); +$a[1] = 'new1'; +check_contents("new0", "new1", "rec2"); +$a[2] = 'new2'; +check_contents("new0", "new1", "new2"); + +# 15-24 lengthening alterations +$a[0] = 'long0'; +check_contents("long0", "new1", "new2"); +$a[1] = 'long1'; +check_contents("long0", "long1", "new2"); +$a[2] = 'long2'; +check_contents("long0", "long1", "long2"); +$a[1] = 'longer1'; +check_contents("long0", "longer1", "long2"); +$a[0] = 'longer0'; +check_contents("longer0", "longer1", "long2"); + +# 25-34 shortening alterations, including truncation +$a[0] = 'short0'; +check_contents("short0", "longer1", "long2"); +$a[1] = 'short1'; +check_contents("short0", "short1", "long2"); +$a[2] = 'short2'; +check_contents("short0", "short1", "short2"); +$a[1] = 'sh1'; +check_contents("short0", "sh1", "short2"); +$a[0] = 'sh0'; +check_contents("sh0", "sh1", "short2"); + +# file with holes +$a[4] = 'rec4'; +check_contents("sh0", "sh1", "short2", "", "rec4"); +$a[3] = 'rec3'; +check_contents("sh0", "sh1", "short2", "rec3", "rec4"); + + +# try inserting a record into the middle of an empty file + + +sub check_contents { + my @c = @_; + my $x = join 'blah', @c, ''; + local *FH; + my $open = open FH, "< $file"; + my $a; + { local $/; $a = <FH> } + print (($open && $a eq $x) ? "ok $N\n" : "not ok $N # file @c\n"); + $N++; + + # now check FETCH: + my $good = 1; + for (0.. $#c) { + $good = 0 unless $a[$_] eq "$c[$_]blah"; + } + print (($open && $good) ? "ok $N\n" : "not ok $N # fetch @c\n"); + $N++; +} + +END { + 1 while unlink $file; +} + diff --git a/lib/Tie/File/10_splice_rs.t b/lib/Tie/File/10_splice_rs.t new file mode 100644 index 0000000000..1e1b545ccd --- /dev/null +++ b/lib/Tie/File/10_splice_rs.t @@ -0,0 +1,162 @@ +#!/usr/bin/perl +# +# Check SPLICE function's effect on the file +# (07_rv_splice.t checks its return value) +# +# Each call to 'check_contents' actually performs two tests. +# First, it calls the tied object's own 'check_integrity' method, +# which makes sure that the contents of the read cache and offset tables +# accurately reflect the contents of the file. +# Then, it checks the actual contents of the file against the expected +# contents. + +my $file = "tf$$.txt"; +my $data = "rec0blahrec1blahrec2blah"; + +print "1..88\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; # partial credit just for showing up + +my $o = tie @a, 'Tie::File', $file, recsep => 'blah'; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; + +my $n; + +# (3-22) splicing at the beginning +init_file($data); + +splice(@a, 0, 0, "rec4"); +check_contents("rec4blah$data"); +splice(@a, 0, 1, "rec5"); # same length +check_contents("rec5blah$data"); +splice(@a, 0, 1, "record5"); # longer +check_contents("record5blah$data"); + +splice(@a, 0, 1, "r5"); # shorter +check_contents("r5blah$data"); +splice(@a, 0, 1); # removal +check_contents("$data"); +splice(@a, 0, 0); # no-op +check_contents("$data"); +splice(@a, 0, 0, 'r7', 'rec8'); # insert more than one +check_contents("r7blahrec8blah$data"); +splice(@a, 0, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check_contents("rec7blahrecord8blahrec9blah$data"); + +splice(@a, 0, 3, 'record9', 'rec10'); # delete more than insert +check_contents("record9blahrec10blah$data"); +splice(@a, 0, 2); # delete more than one +check_contents("$data"); + + +# (23-42) splicing in the middle +splice(@a, 1, 0, "rec4"); +check_contents("rec0blahrec4blahrec1blahrec2blah"); +splice(@a, 1, 1, "rec5"); # same length +check_contents("rec0blahrec5blahrec1blahrec2blah"); +splice(@a, 1, 1, "record5"); # longer +check_contents("rec0blahrecord5blahrec1blahrec2blah"); + +splice(@a, 1, 1, "r5"); # shorter +check_contents("rec0blahr5blahrec1blahrec2blah"); +splice(@a, 1, 1); # removal +check_contents("$data"); +splice(@a, 1, 0); # no-op +check_contents("$data"); +splice(@a, 1, 0, 'r7', 'rec8'); # insert more than one +check_contents("rec0blahr7blahrec8blahrec1blahrec2blah"); +splice(@a, 1, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check_contents("rec0blahrec7blahrecord8blahrec9blahrec1blahrec2blah"); + +splice(@a, 1, 3, 'record9', 'rec10'); # delete more than insert +check_contents("rec0blahrecord9blahrec10blahrec1blahrec2blah"); +splice(@a, 1, 2); # delete more than one +check_contents("$data"); + +# (43-62) splicing at the end +splice(@a, 3, 0, "rec4"); +check_contents("$ {data}rec4blah"); +splice(@a, 3, 1, "rec5"); # same length +check_contents("$ {data}rec5blah"); +splice(@a, 3, 1, "record5"); # longer +check_contents("$ {data}record5blah"); + +splice(@a, 3, 1, "r5"); # shorter +check_contents("$ {data}r5blah"); +splice(@a, 3, 1); # removal +check_contents("$data"); +splice(@a, 3, 0); # no-op +check_contents("$data"); +splice(@a, 3, 0, 'r7', 'rec8'); # insert more than one +check_contents("$ {data}r7blahrec8blah"); +splice(@a, 3, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check_contents("$ {data}rec7blahrecord8blahrec9blah"); + +splice(@a, 3, 3, 'record9', 'rec10'); # delete more than insert +check_contents("$ {data}record9blahrec10blah"); +splice(@a, 3, 2); # delete more than one +check_contents("$data"); + +# (63-82) splicing with negative subscript +splice(@a, -1, 0, "rec4"); +check_contents("rec0blahrec1blahrec4blahrec2blah"); +splice(@a, -1, 1, "rec5"); # same length +check_contents("rec0blahrec1blahrec4blahrec5blah"); +splice(@a, -1, 1, "record5"); # longer +check_contents("rec0blahrec1blahrec4blahrecord5blah"); + +splice(@a, -1, 1, "r5"); # shorter +check_contents("rec0blahrec1blahrec4blahr5blah"); +splice(@a, -1, 1); # removal +check_contents("rec0blahrec1blahrec4blah"); +splice(@a, -1, 0); # no-op +check_contents("rec0blahrec1blahrec4blah"); +splice(@a, -1, 0, 'r7', 'rec8'); # insert more than one +check_contents("rec0blahrec1blahr7blahrec8blahrec4blah"); +splice(@a, -1, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check_contents("rec0blahrec1blahr7blahrec8blahrec7blahrecord8blahrec9blah"); + +splice(@a, -3, 3, 'record9', 'rec10'); # delete more than insert +check_contents("rec0blahrec1blahr7blahrec8blahrecord9blahrec10blah"); +splice(@a, -4, 3); # delete more than one +check_contents("rec0blahrec1blahrec10blah"); + +# (83-84) scrub it all out +splice(@a, 0, 3); +check_contents(""); + +# (85-86) put some back in +splice(@a, 0, 0, "rec0", "rec1"); +check_contents("rec0blahrec1blah"); + +# (87-88) what if we remove too many records? +splice(@a, 0, 17); +check_contents(""); + +sub init_file { + my $data = shift; + open F, "> $file" or die $!; + print F $data; + close F; +} + +sub check_contents { + my $x = shift; + local *FH; + my $integrity = $o->_check_integrity($file, $ENV{INTEGRITY}); + print $integrity ? "ok $N\n" : "not ok $N\n"; + $N++; + my $open = open FH, "< $file"; + my $a; + { local $/; $a = <FH> } + print (($open && $a eq $x) ? "ok $N\n" : "not ok $N\n"); + $N++; +} + +END { + 1 while unlink $file; +} + diff --git a/lib/Tie/File/11_rv_splice_rs.t b/lib/Tie/File/11_rv_splice_rs.t new file mode 100644 index 0000000000..f78c25c8d8 --- /dev/null +++ b/lib/Tie/File/11_rv_splice_rs.t @@ -0,0 +1,157 @@ +#!/usr/bin/perl +# +# Check SPLICE function's return value +# (04_splice.t checks its effect on the file) +# + +my $file = "tf$$.txt"; +my $data = "rec0blahrec1blahrec2blah"; + +print "1..45\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; # partial credit just for showing up + +my $o = tie @a, 'Tie::File', $file, recsep => 'blah'; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; + +my $n; + +# (3-12) splicing at the beginning +init_file($data); + +@r = splice(@a, 0, 0, "rec4"); +check_result(); +@r = splice(@a, 0, 1, "rec5"); # same length +check_result("rec4"); +@r = splice(@a, 0, 1, "record5"); # longer +check_result("rec5"); + +@r = splice(@a, 0, 1, "r5"); # shorter +check_result("record5"); +@r = splice(@a, 0, 1); # removal +check_result("r5"); +@r = splice(@a, 0, 0); # no-op +check_result(); +@r = splice(@a, 0, 0, 'r7', 'rec8'); # insert more than one +check_result(); +@r = splice(@a, 0, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check_result('r7', 'rec8'); + +@r = splice(@a, 0, 3, 'record9', 'rec10'); # delete more than insert +check_result('rec7', 'record8', 'rec9'); +@r = splice(@a, 0, 2); # delete more than one +check_result('record9', 'rec10'); + + +# (13-22) splicing in the middle +@r = splice(@a, 1, 0, "rec4"); +check_result(); +@r = splice(@a, 1, 1, "rec5"); # same length +check_result('rec4'); +@r = splice(@a, 1, 1, "record5"); # longer +check_result('rec5'); + +@r = splice(@a, 1, 1, "r5"); # shorter +check_result("record5"); +@r = splice(@a, 1, 1); # removal +check_result("r5"); +@r = splice(@a, 1, 0); # no-op +check_result(); +@r = splice(@a, 1, 0, 'r7', 'rec8'); # insert more than one +check_result(); +@r = splice(@a, 1, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check_result('r7', 'rec8'); + +@r = splice(@a, 1, 3, 'record9', 'rec10'); # delete more than insert +check_result('rec7', 'record8', 'rec9'); +@r = splice(@a, 1, 2); # delete more than one +check_result('record9','rec10'); + +# (23-32) splicing at the end +@r = splice(@a, 3, 0, "rec4"); +check_result(); +@r = splice(@a, 3, 1, "rec5"); # same length +check_result('rec4'); +@r = splice(@a, 3, 1, "record5"); # longer +check_result('rec5'); + +@r = splice(@a, 3, 1, "r5"); # shorter +check_result('record5'); +@r = splice(@a, 3, 1); # removal +check_result('r5'); +@r = splice(@a, 3, 0); # no-op +check_result(); +@r = splice(@a, 3, 0, 'r7', 'rec8'); # insert more than one +check_result(); +@r = splice(@a, 3, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check_result('r7', 'rec8'); + +@r = splice(@a, 3, 3, 'record9', 'rec10'); # delete more than insert +check_result('rec7', 'record8', 'rec9'); +@r = splice(@a, 3, 2); # delete more than one +check_result('record9', 'rec10'); + +# (33-42) splicing with negative subscript +@r = splice(@a, -1, 0, "rec4"); +check_result(); +@r = splice(@a, -1, 1, "rec5"); # same length +check_result('rec2'); +@r = splice(@a, -1, 1, "record5"); # longer +check_result("rec5"); + +@r = splice(@a, -1, 1, "r5"); # shorter +check_result("record5"); +@r = splice(@a, -1, 1); # removal +check_result("r5"); +@r = splice(@a, -1, 0); # no-op +check_result(); +@r = splice(@a, -1, 0, 'r7', 'rec8'); # insert more than one +check_result(); +@r = splice(@a, -1, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check_result('rec4'); + +@r = splice(@a, -3, 3, 'record9', 'rec10'); # delete more than insert +check_result('rec7', 'record8', 'rec9'); +@r = splice(@a, -4, 3); # delete more than one +check_result('r7', 'rec8', 'record9'); + +# (43) scrub it all out +@r = splice(@a, 0, 3); +check_result('rec0', 'rec1', 'rec10'); + +# (44) put some back in +@r = splice(@a, 0, 0, "rec0", "rec1"); +check_result(); + +# (45) what if we remove too many records? +@r = splice(@a, 0, 17); +check_result('rec0', 'rec1'); + +sub init_file { + my $data = shift; + open F, "> $file" or die $!; + print F $data; + close F; +} + +# actual results are in @r. +# expected results are in @_ +sub check_result { + my @x = @_; + s/blah$// for @r; + my $good = 1; + $good = 0 unless @r == @x; + for my $i (0 .. $#r) { + $good = 0 unless $r[$i] eq $x[$i]; + } + print $good ? "ok $N\n" : "not ok $N \# was (@r); should be (@x)\n"; + $N++; +} + +END { + 1 while unlink $file; +} + diff --git a/lib/Tie/File/12_longfetch_rs.t b/lib/Tie/File/12_longfetch_rs.t new file mode 100644 index 0000000000..60f1fd1a50 --- /dev/null +++ b/lib/Tie/File/12_longfetch_rs.t @@ -0,0 +1,38 @@ +#!/usr/bin/perl +# +# Make sure we can fetch a record in the middle of the file +# before we've ever looked at any records before it +# +# (tests _fill_offsets_to() ) +# + +my $file = "tf$$.txt"; +my $data = "rec0blahrec1blahrec2blah"; + +print "1..5\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; + +open F, "> $file" or die $!; +print F $data; +close F; + + +my $o = tie @a, 'Tie::File', $file, recsep => 'blah'; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; + +my $n; + +# 3-5 +for (2, 1, 0) { + print $a[$_] eq "rec${_}blah" ? "ok $N\n" : "not ok $N # rec=$a[$_] ?\n"; + $N++; +} + +END { + 1 while unlink $file; +} + diff --git a/lib/Tie/File/13_size_rs.t b/lib/Tie/File/13_size_rs.t new file mode 100644 index 0000000000..254f3ab0b0 --- /dev/null +++ b/lib/Tie/File/13_size_rs.t @@ -0,0 +1,78 @@ +#!/usr/bin/perl +# +# Check FETCHSIZE and SETSIZE functions +# PUSH POP SHIFT UNSHIFT +# + +my $file = "tf$$.txt"; +my $data = "rec0blahrec1blahrec2blah"; +my ($o, $n); + +print "1..10\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; + +# 2-3 FETCHSIZE 0-length file +open F, "> $file" or die $!; +close F; +$o = tie @a, 'Tie::File', $file, recsep => 'blah'; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; +$n = @a; +print $n == 0 ? "ok $N\n" : "not ok $N # $n, s/b 0\n"; +$N++; + +# Reset everything +undef $o; +untie @a; + +# 4-5 FETCHSIZE positive-length file +open F, "> $file" or die $!; +print F $data; +close F; +$o = tie @a, 'Tie::File', $file, recsep => 'blah'; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; +$n = @a; +print $n == 3 ? "ok $N\n" : "not ok $N # $n, s/b 0\n"; +$N++; + +# STORESIZE +# 6 Make it longer: +$#a = 4; +check_contents("${data}blahblah"); + +# 7 Make it longer again: +$#a = 6; +check_contents("${data}blahblahblahblah"); + +# 8 Make it shorter: +$#a = 4; +check_contents("${data}blahblah"); + +# 9 Make it shorter again: +$#a = 2; +check_contents($data); + +# 10 Get rid of it completely: +$#a = -1; +check_contents(''); + + +sub check_contents { + my $x = shift; + local *FH; + my $open = open FH, "< $file"; + my $a; + { local $/; $a = <FH> } + print (($open && $a eq $x) ? "ok $N\n" : "not ok $N\n"); + $N++; +} + + +END { + 1 while unlink $file; +} + |