summaryrefslogtreecommitdiff
path: root/ext/DB_File/DB_File.pm
diff options
context:
space:
mode:
Diffstat (limited to 'ext/DB_File/DB_File.pm')
-rw-r--r--ext/DB_File/DB_File.pm361
1 files changed, 330 insertions, 31 deletions
diff --git a/ext/DB_File/DB_File.pm b/ext/DB_File/DB_File.pm
index fcd0746a5e..e20a5621e7 100644
--- a/ext/DB_File/DB_File.pm
+++ b/ext/DB_File/DB_File.pm
@@ -1,10 +1,10 @@
# DB_File.pm -- Perl 5 interface to Berkeley DB
#
-# written by Paul Marquess (pmarquess@bfsec.bt.co.uk)
-# last modified 16th May 1998
-# version 1.60
+# written by Paul Marquess (Paul.Marquess@btinternet.com)
+# last modified 4th August 1999
+# version 1.70
#
-# Copyright (c) 1995-8 Paul Marquess. All rights reserved.
+# Copyright (c) 1995-1999 Paul Marquess. All rights reserved.
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
@@ -145,7 +145,7 @@ use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO $db_ver
use Carp;
-$VERSION = "1.60" ;
+$VERSION = "1.70" ;
#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
$DB_BTREE = new DB_File::BTREEINFO ;
@@ -196,7 +196,7 @@ sub AUTOLOAD {
($constname = $AUTOLOAD) =~ s/.*:://;
my $val = constant($constname, @_ ? $_[0] : 0);
if ($! != 0) {
- if ($! =~ /Invalid/) {
+ if ($! =~ /Invalid/ || $!{EINVAL}) {
$AutoLoader::AUTOLOAD = $AUTOLOAD;
goto &AutoLoader::AUTOLOAD;
}
@@ -300,6 +300,40 @@ sub STORESIZE
}
}
+sub find_dup
+{
+ croak "Usage: \$db->find_dup(key,value)\n"
+ unless @_ == 3 ;
+
+ my $db = shift ;
+ my ($origkey, $value_wanted) = @_ ;
+ my ($key, $value) = ($origkey, 0);
+ my ($status) = 0 ;
+
+ for ($status = $db->seq($key, $value, R_CURSOR() ) ;
+ $status == 0 ;
+ $status = $db->seq($key, $value, R_NEXT() ) ) {
+
+ return 0 if $key eq $origkey and $value eq $value_wanted ;
+ }
+
+ return $status ;
+}
+
+sub del_dup
+{
+ croak "Usage: \$db->del_dup(key,value)\n"
+ unless @_ == 3 ;
+
+ my $db = shift ;
+ my ($key, $value) = @_ ;
+ my ($status) = $db->find_dup($key, $value) ;
+ return $status if $status != 0 ;
+
+ $status = $db->del($key, R_CURSOR() ) ;
+ return $status ;
+}
+
sub get_dup
{
croak "Usage: \$db->get_dup(key [,flag])\n"
@@ -364,6 +398,8 @@ DB_File - Perl5 access to Berkeley DB version 1.x
$count = $X->get_dup($key) ;
@list = $X->get_dup($key) ;
%list = $X->get_dup($key, 1) ;
+ $status = $X->find_dup($key, $value) ;
+ $status = $X->del_dup($key, $value) ;
# RECNO only
$a = $X->length;
@@ -372,6 +408,12 @@ DB_File - Perl5 access to Berkeley DB version 1.x
$a = $X->shift;
$X->unshift(list);
+ # DBM Filters
+ $old_filter = $db->filter_store_key ( sub { ... } ) ;
+ $old_filter = $db->filter_store_value( sub { ... } ) ;
+ $old_filter = $db->filter_fetch_key ( sub { ... } ) ;
+ $old_filter = $db->filter_fetch_value( sub { ... } ) ;
+
untie %hash ;
untie @array ;
@@ -443,11 +485,11 @@ is considered stable enough for real work.
B<Note:> The database file format has changed in Berkeley DB version 2.
If you cannot recreate your databases, you must dump any existing
databases with the C<db_dump185> utility that comes with Berkeley DB.
-Once you have upgraded DB_File to use Berkeley DB version 2, your
+Once you have rebuilt DB_File to use Berkeley DB version 2, your
databases can be recreated using C<db_load>. Refer to the Berkeley DB
documentation for further details.
-Please read L<COPYRIGHT> before using version 2.x of Berkeley DB with
+Please read L<"COPYRIGHT"> before using version 2.x of Berkeley DB with
DB_File.
=head2 Interface to Berkeley DB
@@ -628,6 +670,7 @@ contents of the database.
use DB_File ;
use vars qw( %h $k $v ) ;
+ unlink "fruit" ;
tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0640, $DB_HASH
or die "Cannot open file 'fruit': $!\n";
@@ -687,6 +730,7 @@ insensitive compare function will be used.
# specify the Perl sub that will do the comparison
$DB_BTREE->{'compare'} = \&Compare ;
+ unlink "tree" ;
tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0640, $DB_BTREE
or die "Cannot open file 'tree': $!\n" ;
@@ -763,7 +807,7 @@ code:
# iterate through the associative array
# and print each key/value pair.
- foreach (keys %h)
+ foreach (sort keys %h)
{ print "$_ -> $h{$_}\n" }
untie %h ;
@@ -837,9 +881,12 @@ that prints:
This time we have got all the key/value pairs, including the multiple
values associated with the key C<Wall>.
+To make life easier when dealing with duplicate keys, B<DB_File> comes with
+a few utility methods.
+
=head2 The get_dup() Method
-B<DB_File> comes with a utility method, called C<get_dup>, to assist in
+The C<get_dup> method assists in
reading duplicate values from BTREE databases. The method can take the
following forms:
@@ -862,6 +909,19 @@ particular value occurred in the BTREE.
So assuming the database created above, we can use C<get_dup> like
this:
+ use strict ;
+ use DB_File ;
+
+ use vars qw($filename $x %h ) ;
+
+ $filename = "tree" ;
+
+ # Enable duplicate records
+ $DB_BTREE->{'flags'} = R_DUP ;
+
+ $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
+ or die "Cannot open $filename: $!\n";
+
my $cnt = $x->get_dup("Wall") ;
print "Wall occurred $cnt times\n" ;
@@ -869,7 +929,7 @@ this:
print "Larry is there\n" if $hash{'Larry'} ;
print "There are $hash{'Brick'} Brick Walls\n" ;
- my @list = $x->get_dup("Wall") ;
+ my @list = sort $x->get_dup("Wall") ;
print "Wall => [@list]\n" ;
@list = $x->get_dup("Smith") ;
@@ -888,6 +948,79 @@ and it will print:
Smith => [John]
Dog => []
+=head2 The find_dup() Method
+
+ $status = $X->find_dup($key, $value) ;
+
+This method checks for the existance of a specific key/value pair. If the
+pair exists, the cursor is left pointing to the pair and the method
+returns 0. Otherwise the method returns a non-zero value.
+
+Assuming the database from the previous example:
+
+ use strict ;
+ use DB_File ;
+
+ use vars qw($filename $x %h $found) ;
+
+ my $filename = "tree" ;
+
+ # Enable duplicate records
+ $DB_BTREE->{'flags'} = R_DUP ;
+
+ $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
+ or die "Cannot open $filename: $!\n";
+
+ $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
+ print "Larry Wall is $found there\n" ;
+
+ $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ;
+ print "Harry Wall is $found there\n" ;
+
+ undef $x ;
+ untie %h ;
+
+prints this
+
+ Larry Wall is there
+ Harry Wall is not there
+
+
+=head2 The del_dup() Method
+
+ $status = $X->del_dup($key, $value) ;
+
+This method deletes a specific key/value pair. It returns
+0 if they exist and have been deleted successfully.
+Otherwise the method returns a non-zero value.
+
+Again assuming the existance of the C<tree> database
+
+ use strict ;
+ use DB_File ;
+
+ use vars qw($filename $x %h $found) ;
+
+ my $filename = "tree" ;
+
+ # Enable duplicate records
+ $DB_BTREE->{'flags'} = R_DUP ;
+
+ $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
+ or die "Cannot open $filename: $!\n";
+
+ $x->del_dup("Wall", "Larry") ;
+
+ $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
+ print "Larry Wall is $found there\n" ;
+
+ undef $x ;
+ untie %h ;
+
+prints this
+
+ Larry Wall is not there
+
=head2 Matching Partial Keys
The BTREE interface has a feature which allows partial keys to be
@@ -941,7 +1074,7 @@ and print the first matching key/value pair given a partial key.
$st == 0 ;
$st = $x->seq($key, $value, R_NEXT) )
- { print "$key -> $value\n" }
+ { print "$key -> $value\n" }
print "\nPARTIAL MATCH\n" ;
@@ -970,7 +1103,7 @@ Here is the output:
DB_RECNO provides an interface to flat text files. Both variable and
fixed length records are supported.
-In order to make RECNO more compatible with Perl the array offset for
+In order to make RECNO more compatible with Perl, the array offset for
all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
As with normal Perl arrays, a RECNO array can be accessed using
@@ -999,7 +1132,7 @@ error will be fixed in the next release of Berkeley DB.
That clarifies the situation with regards Berkeley DB itself. What
about B<DB_File>? Well, the behavior defined in the quote above is
-quite useful, so B<DB_File> conforms it.
+quite useful, so B<DB_File> conforms to it.
That means that you can specify other options (e.g. cachesize) and
still have bval default to C<"\n"> for variable length records, and
@@ -1007,13 +1140,18 @@ space for fixed length records.
=head2 A Simple Example
-Here is a simple example that uses RECNO.
+Here is a simple example that uses RECNO (if you are using a version
+of Perl earlier than 5.004_57 this example won't work -- see
+L<Extra RECNO Methods> for a workaround).
use strict ;
use DB_File ;
+ my $filename = "text" ;
+ unlink $filename ;
+
my @h ;
- tie @h, "DB_File", "text", O_RDWR|O_CREAT, 0640, $DB_RECNO
+ tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_RECNO
or die "Cannot open file 'text': $!\n" ;
# Add a few key/value pairs to the file
@@ -1021,6 +1159,18 @@ Here is a simple example that uses RECNO.
$h[1] = "blue" ;
$h[2] = "yellow" ;
+ push @h, "green", "black" ;
+
+ my $elements = scalar @h ;
+ print "The array contains $elements entries\n" ;
+
+ my $last = pop @h ;
+ print "popped $last\n" ;
+
+ unshift @h, "white" ;
+ my $first = shift @h ;
+ print "shifted $first\n" ;
+
# Check for existence of a key
print "Element 1 Exists with value $h[1]\n" if $h[1] ;
@@ -1032,17 +1182,19 @@ Here is a simple example that uses RECNO.
Here is the output from the script:
-
+ The array contains 5 entries
+ popped black
+ shifted white
Element 1 Exists with value blue
- The last element is yellow
- The 2nd last element is blue
+ The last element is green
+ The 2nd last element is yellow
-=head2 Extra Methods
+=head2 Extra RECNO Methods
If you are using a version of Perl earlier than 5.004_57, the tied
-array interface is quite limited. The example script above will work,
-but you won't be able to use C<push>, C<pop>, C<shift>, C<unshift>
-etc. with the tied array.
+array interface is quite limited. In the example script above
+C<push>, C<pop>, C<shift>, C<unshift>
+or determining the array length will not work with a tied array.
To make the interface more useful for older versions of Perl, a number
of methods are supplied with B<DB_File> to simulate the missing array
@@ -1360,6 +1512,141 @@ R_RECNOSYNC is the only valid flag at present.
=back
+=head1 DBM FILTERS
+
+A DBM Filter is a piece of code that is be used when you I<always>
+want to make the same transformation to all keys and/or values in a
+DBM database.
+
+There are four methods associated with DBM Filters. All work identically,
+and each is used to install (or uninstall) a single DBM Filter. Each
+expects a single parameter, namely a reference to a sub. The only
+difference between them is the place that the filter is installed.
+
+To summarise:
+
+=over 5
+
+=item B<filter_store_key>
+
+If a filter has been installed with this method, it will be invoked
+every time you write a key to a DBM database.
+
+=item B<filter_store_value>
+
+If a filter has been installed with this method, it will be invoked
+every time you write a value to a DBM database.
+
+
+=item B<filter_fetch_key>
+
+If a filter has been installed with this method, it will be invoked
+every time you read a key from a DBM database.
+
+=item B<filter_fetch_value>
+
+If a filter has been installed with this method, it will be invoked
+every time you read a value from a DBM database.
+
+=back
+
+You can use any combination of the methods, from none, to all four.
+
+All filter methods return the existing filter, if present, or C<undef>
+in not.
+
+To delete a filter pass C<undef> to it.
+
+=head2 The Filter
+
+When each filter is called by Perl, a local copy of C<$_> will contain
+the key or value to be filtered. Filtering is achieved by modifying
+the contents of C<$_>. The return code from the filter is ignored.
+
+=head2 An Example -- the NULL termination problem.
+
+Consider the following scenario. You have a DBM database
+that you need to share with a third-party C application. The C application
+assumes that I<all> keys and values are NULL terminated. Unfortunately
+when Perl writes to DBM databases it doesn't use NULL termination, so
+your Perl application will have to manage NULL termination itself. When
+you write to the database you will have to use something like this:
+
+ $hash{"$key\0"} = "$value\0" ;
+
+Similarly the NULL needs to be taken into account when you are considering
+the length of existing keys/values.
+
+It would be much better if you could ignore the NULL terminations issue
+in the main application code and have a mechanism that automatically
+added the terminating NULL to all keys and values whenever you write to
+the database and have them removed when you read from the database. As I'm
+sure you have already guessed, this is a problem that DBM Filters can
+fix very easily.
+
+ use strict ;
+ use DB_File ;
+
+ my %hash ;
+ my $filename = "/tmp/filt" ;
+ unlink $filename ;
+
+ my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
+ or die "Cannot open $filename: $!\n" ;
+
+ # Install DBM Filters
+ $db->filter_fetch_key ( sub { s/\0$// } ) ;
+ $db->filter_store_key ( sub { $_ .= "\0" } ) ;
+ $db->filter_fetch_value( sub { s/\0$// } ) ;
+ $db->filter_store_value( sub { $_ .= "\0" } ) ;
+
+ $hash{"abc"} = "def" ;
+ my $a = $hash{"ABC"} ;
+ # ...
+ undef $db ;
+ untie %hash ;
+
+Hopefully the contents of each of the filters should be
+self-explanatory. Both "fetch" filters remove the terminating NULL,
+and both "store" filters add a terminating NULL.
+
+
+=head2 Another Example -- Key is a C int.
+
+Here is another real-life example. By default, whenever Perl writes to
+a DBM database it always writes the key and value as strings. So when
+you use this:
+
+ $hash{12345} = "soemthing" ;
+
+the key 12345 will get stored in the DBM database as the 5 byte string
+"12345". If you actually want the key to be stored in the DBM database
+as a C int, you will have to use C<pack> when writing, and C<unpack>
+when reading.
+
+Here is a DBM Filter that does it:
+
+ use strict ;
+ use DB_File ;
+ my %hash ;
+ my $filename = "/tmp/filt" ;
+ unlink $filename ;
+
+
+ my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
+ or die "Cannot open $filename: $!\n" ;
+
+ $db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ;
+ $db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ;
+ $hash{123} = "def" ;
+ # ...
+ undef $db ;
+ untie %hash ;
+
+This time only two filters have been used -- we only need to manipulate
+the contents of the key, so it wasn't necessary to install any value
+filters.
+
=head1 HINTS AND TIPS
@@ -1429,7 +1716,7 @@ shared by both a Perl and a C application.
The vast majority of problems that are reported in this area boil down
to the fact that C strings are NULL terminated, whilst Perl strings are
-not.
+not. See L<DBM FILTERS> for a generic way to work around this problem.
Here is a real example. Netscape 2.0 keeps a record of the locations you
visit along with the time you last visited them in a DB_HASH database.
@@ -1618,6 +1905,19 @@ double quotes, like this:
Although it might seem like a real pain, it is really worth the effort
of having a C<use strict> in all your scripts.
+=head1 REFERENCES
+
+Articles that are either about B<DB_File> or make use of it.
+
+=over 5
+
+=item 1.
+
+I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com),
+Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41
+
+=back
+
=head1 HISTORY
Moved to the Changes file.
@@ -1643,10 +1943,8 @@ F<modules/by-module/DB_File>.
This version of B<DB_File> will work with either version 1.x or 2.x of
Berkeley DB, but is limited to the functionality provided by version 1.
-The official web site for Berkeley DB is
-F<http://www.sleepycat.com/db>. The ftp equivalent is
-F<ftp.sleepycat.com:/pub>. Both versions 1 and 2 of Berkeley DB are
-available there.
+The official web site for Berkeley DB is F<http://www.sleepycat.com>.
+Both versions 1 and 2 of Berkeley DB are available there.
Alternatively, Berkeley DB version 1 is available at your nearest CPAN
archive in F<src/misc/db.1.85.tar.gz>.
@@ -1657,7 +1955,7 @@ compile properly on IRIX 5.3.
=head1 COPYRIGHT
-Copyright (c) 1995-8 Paul Marquess. All rights reserved. This program
+Copyright (c) 1995-1999 Paul Marquess. All rights reserved. This program
is free software; you can redistribute it and/or modify it under the
same terms as Perl itself.
@@ -1683,12 +1981,13 @@ Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details.
=head1 SEE ALSO
-L<perl(1)>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>
+L<perl(1)>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>,
+L<dbmfilter>
=head1 AUTHOR
The DB_File interface was written by Paul Marquess
-E<lt>pmarquess@bfsec.bt.co.ukE<gt>.
+E<lt>Paul.Marquess@btinternet.comE<gt>.
Questions about the DB system itself may be addressed to
E<lt>db@sleepycat.com<gt>.