summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2021-11-07 20:18:31 +0200
committerSergey Poznyakoff <gray@gnu.org>2021-11-14 13:29:42 +0200
commit322f6e9c3c13d3079306628b96864f728f87be62 (patch)
tree997592aae23da27552e9cbc3549dd58b2e044e5b
parent45d2ce49d88bc59bbc84dc738a55142205a6e2ce (diff)
downloadgdbm-322f6e9c3c13d3079306628b96864f728f87be62.tar.gz
Switch to hash table cache implementation
* src/cachetree.c: Remove. * src/Makefile.am: Remove cachetree.c * doc/gdbm.texi: Document the changes. * src/bucket.c (cache_tab_lookup_slot) (cache_tab_resize): New function. (cache_elem_new): Initialize ca_coll. (cache_elem_free, cache_lookup) (_gdbm_cache_init,_gdbm_cache_free): Rewrite with hash-based cache lookup. (_gdbm_fetch_data): Remove unused function. * src/gdbm.h.in (GDBM_GETDBFORMAT, GDBM_GETDIRDEPTH) (GDBM_GETBUCKETSIZE, GDBM_GETCACHEAUTO, GDBM_SETCACHEAUTO): New option codes. * src/gdbmdefs.h (cache_node): Remove. (cache_elem): Remove ca_node. Add ca_coll (collision resolution pointer). (gdbm_file_info): New members: cache_auto, cache_bits, cache. * src/gdbmopen.c (gdbm_fd_open): Change cache initialization. * src/gdbmsetopt.c (GDBM_GETDBFORMAT,GDBM_GETDIRDEPTH) (GDBM_GETBUCKETSIZE,GDBM_GETCACHEAUTO) (GDBM_SETCACHEAUTO): Implement new options. (setopt_gdbm_getflags): Reflect the state of GDBM_CLOEXEC and GDBM_NUMSYNC. * src/proto.h (_gdbm_fetch_data,_gdbm_cache_tree_alloc) (_gdbm_cache_tree_destroy,_gdbm_cache_tree_delete) (_gdbm_cache_tree_lookup): Remove protos. * src/recover.c (_gdbm_finish_transfer): Restore original cache settings. * tests/Makefile.am: Add new test. * tests/testsuite.at: Likewise. * tests/gtcacheopt.c: New file. * tests/setopt02.at: New test case.
-rw-r--r--doc/gdbm.texi141
-rw-r--r--src/Makefile.am1
-rw-r--r--src/bucket.c343
-rw-r--r--src/cachetree.c484
-rw-r--r--src/gdbm.h.in11
-rw-r--r--src/gdbmdefs.h27
-rw-r--r--src/gdbmopen.c15
-rw-r--r--src/gdbmsetopt.c88
-rw-r--r--src/proto.h15
-rw-r--r--src/recover.c9
-rw-r--r--tests/.gitignore2
-rw-r--r--tests/Makefile.am3
-rw-r--r--tests/gtcacheopt.c273
-rw-r--r--tests/setopt02.at20
-rw-r--r--tests/testsuite.at1
15 files changed, 716 insertions, 717 deletions
diff --git a/doc/gdbm.texi b/doc/gdbm.texi
index a738c85..50d06ba 100644
--- a/doc/gdbm.texi
+++ b/doc/gdbm.texi
@@ -2083,20 +2083,19 @@ success. The global variable @code{gdbm_errno} will be set upon failure.
The valid options are:
-@table @asis
-@kwindex GDBM_CACHESIZE
-@kwindex GDBM_SETCACHESIZE
-@item GDBM_SETCACHESIZE
-@itemx GDBM_CACHESIZE
-@kwindex GDBM_CACHE_AUTO
+@defvr {Option} GDBM_SETCACHESIZE
+@defvrx {Option} GDBM_CACHESIZE
Set the size of the internal bucket cache. The @var{value} should
point to a @code{size_t} holding the desired cache size, or the
-constant @code{GDBM_CACHE_AUTO}, to set the best cache size
+constant @code{GDBM_CACHE_AUTO}, to adjust the cache size
automatically.
-By default, a newly open database is configured to adapt the cache
-size to the number of index buckets in the database file. This
-provides for the best performance.
+By default, a newly open database is configured to dynamically
+accommodate the cache size to the number of index buckets in the
+database file. This provides for the best performance.
+
+If another @var{value} is set, it is adjusted to the nearest larger
+power of two.
Use this option if you wish to limit the memory usage at the expense
of performance. If you chose to do so, please bear in mind that cache
@@ -2110,41 +2109,84 @@ gdbm_bucket_count (dbf, &bn);
ret = gdbm_setopt (dbf, GDBM_SETCACHESIZE, &bn, sizeof (bn));
@end example
-To set the best cache size, use the constant @code{GDBM_CACHE_AUTO}:
+To request the automatically adjustable cache size, use the constant
+@code{GDBM_CACHE_AUTO}:
@example
size_t bn = GDBM_CACHE_AUTO;
ret = gdbm_setopt (dbf, GDBM_SETCACHESIZE, &bn, sizeof (bn));
@end example
+@end defvr
+
+@defvr {Option} GDBM_GETCACHESIZE
+Return the actual size of the internal bucket cache. The @var{value}
+should point to a @code{size_t} variable, where the size will be
+stored.
+@end defvr
+
+@defvr {Option} GDBM_SETCACHEAUTO
+Controls whether the cache size will be adjusted automatically as
+needed. The @var{value} should point to an integer: @code{TRUE} to
+enable automatical cache adjustment and @code{FALSE} to disable it.
+
+The following two calls are equivalent:
+
+@example
+int t = TRUE;
+gdbm_setopt (dbf, GDBM_SETCACHEAUTO, &t, sizeof (t));
+
+size_t n = GDBM_CACHE_AUTO;
+gdbm_setopt (dbf, GDBM_SETCACHESIZE, &n, sizeof (n));
+@end example
+@end defvr
-@kwindex GDBM_GETCACHESIZE
-@item GDBM_GETCACHESIZE
-Return the size of the internal bucket cache. The @var{value} should
-point to a @code{size_t} variable, where the size will be stored.
+@defvr {Option} GDBM_GETCACHEAUTO
+Return the state of the automatic cache size adjustment. The
+@var{value} should point to an integer which, upon successful return,
+will have the value @code{TRUE} if the automatic cache size adjustment
+is enabled and @code{FALSE} otherwise.
+@end defvr
-@kwindex GDBM_GETFLAGS
-@item GDBM_GETFLAGS
+@defvr {Option} GDBM_GETFLAGS
Return the flags describing the state of the database. The @var{value} should
point to an @code{int} variable where to store the flags. On success,
its value will be similar to the flags used when opening the database
(@pxref{Open, gdbm_open}), except that it will reflect the current state
(which may have been altered by another calls to @code{gdbm_setopt}).
+@end defvr
-@kwindex GDBM_FASTMODE
-@item GDBM_FASTMODE
+@defvr {Option} GDBM_GETDBFORMAT
+Return the database format. The @var{value} should point to an
+@code{int} variable. Upon successful return, it will be set to
+@samp{0} if the database is in standard format and @code{GDBM_NUMSYNC}
+if it is in extended format. @xref{Database format}.
+@end defvr
+
+@defvr {Option} GDBM_GETDIRDEPTH
+Returns the @dfn{directory depth}: the number of initial (most significant)
+bits in hash value that are interpreted as index to the directory. The
+actual directory size can be computed as @code{1 << @var{value}}.
+
+The @var{value} argument should point to an @code{int}.
+@end defvr
+
+@defvr {Option} GDBM_GETBUCKETSIZE
+Returns the @dfn{bucket capacity}: maximum number of keys per bucket
+(@code{int}).
+@end defvr
+
+@defvr {Option} GDBM_FASTMODE
Enable or disable the @dfn{fast writes mode}, i.e.@: writes without
subsequent synchronization. The @var{value} should point
to an integer: @code{TRUE} to enable fast mode, and @code{FALSE} to
disable it.
This option is retained for compatibility with previous versions of
-@command{GDBM}. Its effect is the reverse of @code{GDBM_SETSYNCMODE}
-(see below).
+@command{GDBM}. Its effect is the reverse of @code{GDBM_SETSYNCMODE}.
+@end defvr
-@kwindex GDBM_SETSYNCMODE
-@kwindex GDBM_SYNCMODE
-@item GDBM_SETSYNCMODE
-@itemx GDBM_SYNCMODE
+@defvr {Option} GDBM_SETSYNCMODE
+@defvrx {Option} GDBM_SYNCMODE
Turn on or off file system synchronization operations. This
setting defaults to off. The @var{value} should point
to an integer: @code{TRUE} to turn synchronization on, and @code{FALSE} to
@@ -2156,16 +2198,15 @@ as calling @code{GDBM_FASTMODE} with @code{FALSE}.
The @code{GDBM_SYNCMODE} option is provided for compatibility with
earlier versions.
+@end defvr
-@kwindex GDBM_GETSYNCMODE
-@item GDBM_GETSYNCMODE
+@defvr {Option} GDBM_GETSYNCMODE
Return the current synchronization status. The @var{value} should
point to an @code{int} where the status will be stored.
+@end defvr
-@kwindex GDBM_SETCENTFREE
-@kwindex GDBM_CENTFREE
-@item GDBM_SETCENTFREE
-@itemx GDBM_CENTFREE
+@defvr {Option} GDBM_SETCENTFREE
+@defvrx {Option} GDBM_CENTFREE
@emph{NOTICE: This feature is still under study.}
Set central free block pool to either on or off. The default is off,
@@ -2177,11 +2218,10 @@ turn central block pool on, and @code{FALSE} to turn it off.
The @code{GDBM_CENTFREE} option is provided for compatibility with
earlier versions.
+@end defvr
-@kwindex GDBM_SETCOALESCEBLKS
-@kwindex GDBM_COALESCEBLKS
-@item GDBM_SETCOALESCEBLKS
-@itemx GDBM_COALESCEBLKS
+@defvr {Option} GDBM_SETCOALESCEBLKS
+@defvrx {Option} GDBM_COALESCEBLKS
@emph{NOTICE: This feature is still under study.}
Set free block merging to either on or off. The default is off, which
@@ -2191,38 +2231,38 @@ a @acronym{CPU} expensive process with time, though, especially if
used in conjunction with GDBM_CENTFREE. The @var{value} should point
to an integer: @code{TRUE} to turn free block merging on, and @code{FALSE} to
turn it off.
+@end defvr
-@kwindex GDBM_GETCOALESCEBLKS
-@item GDBM_GETCOALESCEBLKS
+@defvr {Option} GDBM_GETCOALESCEBLKS
Return the current status of free block merging. The @var{value} should
point to an @code{int} where the status will be stored.
+@end defvr
-@kwindex GDBM_SETMAXMAPSIZE
-@item GDBM_SETMAXMAPSIZE
+@defvr {Option} GDBM_SETMAXMAPSIZE
Sets maximum size of a memory mapped region. The @var{value} should
point to a value of type @code{size_t}, @code{unsigned long} or
@code{unsigned}. The actual value is rounded to the nearest page
boundary (the page size is obtained from
@code{sysconf(_SC_PAGESIZE)}).
+@end defvr
-@kwindex GDBM_GETMAXMAPSIZE
-@item GDBM_GETMAXMAPSIZE
+@defvr {Option} GDBM_GETMAXMAPSIZE
Return the maximum size of a memory mapped region. The @var{value} should
point to a value of type @code{size_t} where to return the data.
+@end defvr
-@kwindex GDBM_SETMMAP
-@item GDBM_SETMMAP
+@defvr {Option} GDBM_SETMMAP
Enable or disable memory mapping mode. The @var{value} should point
to an integer: @code{TRUE} to enable memory mapping or @code{FALSE} to
disable it.
+@end defvr
-@kwindex GDBM_GETMMAP
-@item GDBM_GETMMAP
+@defvr {Option} GDBM_GETMMAP
Check whether memory mapping is enabled. The @var{value} should point
to an integer where to return the status.
+@end defvr
-@kwindex GDBM_GETDBNAME
-@item GDBM_GETDBNAME
+@defvr {Option} GDBM_GETDBNAME
Return the name of the database disk file. The @var{value} should
point to a variable of type @code{char**}. A pointer to the newly
allocated copy of the file name will be placed there. The caller is
@@ -2243,12 +2283,11 @@ else
free (name);
@}
@end example
+@end defvr
-@kwindex GDBM_GETBLOCKSIZE
-@item GDBM_GETBLOCKSIZE
+@defvr {Option} GDBM_GETBLOCKSIZE
Return the block size in bytes. The @var{value} should point to @code{int}.
-
-@end table
+@end defvr
@node Locking
@chapter File Locking
diff --git a/src/Makefile.am b/src/Makefile.am
index 74103ee..afd9abd 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -42,7 +42,6 @@ lib_LTLIBRARIES = libgdbm.la
libgdbm_la_LIBADD = @LTLIBINTL@
libgdbm_la_SOURCES = \
- cachetree.c\
gdbmclose.c\
gdbmcount.c\
gdbmdelete.c\
diff --git a/src/bucket.c b/src/bucket.c
index 4a5a04c..2165ce1 100644
--- a/src/bucket.c
+++ b/src/bucket.c
@@ -18,9 +18,10 @@
#include "autoconf.h"
#include "gdbmdefs.h"
+#include <stdint.h>
#include <limits.h>
-#define GDBM_MAX_DIR_SIZE INT_MAX
+#define GDBM_MAX_DIR_SIZE INT32_MAX
#define GDBM_MAX_DIR_HALF (GDBM_MAX_DIR_SIZE / 2)
/* Initializing a new hash buckets sets all bucket entries to -1 hash value. */
@@ -49,6 +50,44 @@ set_cache_entry (GDBM_FILE dbf, cache_elem *elem)
}
+/* Bucket cache table functions */
+
+/* Hash an off_t word into an index of width NBITS. */
+static size_t
+adrhash (off_t adr, size_t nbits)
+{
+ adr ^= adr >> (GDBM_HASH_BITS + 1 - nbits);
+ return ((265443576910ul * adr) & 0xffffffff) >> (GDBM_HASH_BITS + 1 - nbits);
+}
+
+/*
+ * Return a pointer to the cache table slot for bucket address ADR.
+ * Never returns NULL.
+ */
+static cache_elem **
+cache_tab_lookup_slot (GDBM_FILE dbf, off_t adr)
+{
+ cache_elem **cache = dbf->cache;
+ size_t h = adrhash (adr, dbf->cache_bits);
+
+ if (cache[h])
+ {
+ if (cache[h]->ca_adr != adr)
+ {
+ cache_elem *prev = cache[h], *p = prev->ca_coll;
+ while (p)
+ {
+ if (p->ca_adr == adr)
+ break;
+ prev = p;
+ p = prev->ca_coll;
+ }
+ return &prev->ca_coll;
+ }
+ }
+ return &cache[h];
+}
+
/* LRU list management */
/* Link ELEM after REF in DBF cache. If REF is NULL, link at head */
@@ -126,11 +165,8 @@ cache_elem_new (GDBM_FILE dbf, off_t adr)
elem->ca_data.hash_val = -1;
elem->ca_data.elem_loc = -1;
- elem->ca_prev = elem->ca_next = NULL;
+ elem->ca_prev = elem->ca_next = elem->ca_coll = NULL;
elem->ca_hits = 0;
- elem->ca_node = NULL;
-
- dbf->cache_num++;
return elem;
}
@@ -139,11 +175,25 @@ cache_elem_new (GDBM_FILE dbf, off_t adr)
static void
cache_elem_free (GDBM_FILE dbf, cache_elem *elem)
{
- _gdbm_cache_tree_delete (dbf->cache_tree, elem->ca_node);
+ size_t h = adrhash (elem->ca_adr, dbf->cache_bits);
+ cache_elem **pp;
+
lru_unlink_elem (dbf, elem);
+
elem->ca_next = dbf->cache_avail;
dbf->cache_avail = elem;
dbf->cache_num--;
+
+ pp = &dbf->cache[h];
+ while (*pp)
+ {
+ if (*pp == elem)
+ {
+ *pp = (*pp)->ca_coll;
+ break;
+ }
+ pp = &(*pp)->ca_coll;
+ }
}
/* Free the least recently used cache entry. */
@@ -159,66 +209,134 @@ cache_lru_free (GDBM_FILE dbf)
cache_elem_free (dbf, last);
return 0;
}
-
+
+/*
+ * Round up V to the next highest power of 2 and compute log2 of
+ * it using De Brujin sequences.
+ * See http://supertech.csail.mit.edu/papers/debruijn.pdf
+ */
+static unsigned
+log2i (unsigned v)
+{
+ static const int dbp[32] =
+ {
+ 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+ 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+ };
+
+ v--;
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ v++;
+ return dbp[(uint32_t)(v * 0x077CB531U) >> 27];
+}
+
+static int
+cache_tab_resize (GDBM_FILE dbf, int bits)
+{
+ size_t size = 1 << bits;
+
+ if (!dbf->cache || size != dbf->cache_size)
+ {
+ size_t n = size * sizeof (dbf->cache[0]);
+ cache_elem **p, *elem;
+
+ /* Flush existing cache */
+ if (_gdbm_cache_flush (dbf))
+ return -1;
+
+ /* Reallocate it */
+ p = realloc (dbf->cache, n);
+ if (!p)
+ {
+ GDBM_SET_ERRNO (dbf, GDBM_MALLOC_ERROR, FALSE);
+ return -1;
+ }
+ dbf->cache = p;
+ dbf->cache_size = size;
+ dbf->cache_bits = bits;
+
+ memset (dbf->cache, 0, n);
+
+ /* Rehash and free surplus elements */
+ for (elem = dbf->cache_lru; elem; )
+ {
+ cache_elem *prev = elem->ca_prev;
+ elem->ca_coll = NULL;
+ if (size < dbf->cache_num)
+ {
+ cache_elem_free (dbf, elem);
+ }
+ else
+ {
+ p = cache_tab_lookup_slot (dbf, elem->ca_adr);
+ if (*p)
+ abort ();// shouldn't happen
+ *p = elem;
+ }
+ elem = prev;
+ }
+ }
+ return 0;
+}
+
+enum
+ {
+ cache_found,
+ cache_new,
+ cache_failure
+ };
+
static int
cache_lookup (GDBM_FILE dbf, off_t adr, cache_elem *ref, cache_elem **ret_elem)
{
int rc;
- cache_node *node;
- cache_elem *elem;
- int retrying = 0;
+ cache_elem **elp, *elem;
dbf->cache_access_count++;
- retry:
- rc = _gdbm_cache_tree_lookup (dbf->cache_tree, adr, &node);
- switch (rc)
+
+ elp = cache_tab_lookup_slot (dbf, adr);
+
+ if (*elp != NULL)
{
- case node_found:
- elem = node->elem;
+ elem = *elp;
elem->ca_hits++;
dbf->cache_hits++;
lru_unlink_elem (dbf, elem);
- break;
-
- case node_new:
- elem = cache_elem_new (dbf, adr);
- if (!elem)
- {
- _gdbm_cache_tree_delete (dbf->cache_tree, node);
- return node_failure;
- }
- elem->ca_node = node;
- node->elem = elem;
-
- if (dbf->cache_size != GDBM_CACHE_AUTO
- && dbf->cache_num > dbf->cache_size
- && cache_lru_free (dbf))
+ rc = cache_found;
+ }
+ else if ((elem = cache_elem_new (dbf, adr)) == NULL)
+ return cache_failure;
+ else
+ {
+ rc = cache_new;
+
+ if (dbf->cache_num == dbf->cache_size)
{
- cache_elem_free (dbf, elem);
- return node_failure;
+ if (dbf->cache_auto && dbf->cache_bits < dbf->header->dir_bits &&
+ cache_tab_resize (dbf, dbf->cache_bits + 1) == 0)
+ {
+ /* Table has been reallocated, recompute the slot. */
+ elp = cache_tab_lookup_slot (dbf, adr);
+ }
+ else if (cache_lru_free (dbf))
+ {
+ rc = cache_failure;
+ }
}
- break;
- case node_failure:
- if (!retrying)
+ if (rc == cache_new)
{
- if (errno == ENOMEM)
- {
- /* Release the last recently used element and retry. */
- if (cache_lru_free (dbf))
- return node_failure;
- retrying = 1;
- goto retry;
- }
+ *elp = elem;
+ dbf->cache_num++;
}
- return node_failure;
-
- default:
- abort ();
}
-
lru_link_elem (dbf, elem, ref);
- *ret_elem = elem;
+ if (rc != cache_failure)
+ *ret_elem = elem;
return rc;
}
@@ -254,10 +372,10 @@ _gdbm_get_bucket (GDBM_FILE dbf, int dir_index)
switch (cache_lookup (dbf, bucket_adr, NULL, &elem))
{
- case node_found:
+ case cache_found:
break;
- case node_new:
+ case cache_new:
/* Position the file pointer */
file_pos = gdbm_file_seek (dbf, bucket_adr, SEEK_SET);
if (file_pos != bucket_adr)
@@ -306,7 +424,7 @@ _gdbm_get_bucket (GDBM_FILE dbf, int dir_index)
break;
- case node_failure:
+ case cache_failure:
return -1;
}
set_cache_entry (dbf, elem);
@@ -343,14 +461,15 @@ _gdbm_split_bucket (GDBM_FILE dbf, int next_insert)
off_t dir_end;
new_bits = dbf->bucket->bucket_bits + 1;
+
/* Allocate two new buckets */
adr_0 = _gdbm_alloc (dbf, dbf->header->bucket_size);
switch (cache_lookup (dbf, adr_0, dbf->cache_mru, &newcache[0]))
{
- case node_new:
+ case cache_new:
break;
- case node_found:
+ case cache_found:
/* should not happen */
GDBM_DEBUG (GDBM_DEBUG_ERR,
"%s: bucket found where it should not",
@@ -358,7 +477,7 @@ _gdbm_split_bucket (GDBM_FILE dbf, int next_insert)
GDBM_SET_ERRNO (dbf, GDBM_BUCKET_CACHE_CORRUPTED, TRUE);
return -1;
- case node_failure:
+ case cache_failure:
return -1;
}
_gdbm_new_bucket (dbf, newcache[0]->ca_bucket, new_bits);
@@ -366,10 +485,10 @@ _gdbm_split_bucket (GDBM_FILE dbf, int next_insert)
adr_1 = _gdbm_alloc (dbf, dbf->header->bucket_size);
switch (cache_lookup (dbf, adr_1, newcache[0], &newcache[1]))
{
- case node_new:
+ case cache_new:
break;
- case node_found:
+ case cache_found:
/* should not happen */
GDBM_DEBUG (GDBM_DEBUG_ERR,
"%s: bucket found where it should not",
@@ -377,7 +496,7 @@ _gdbm_split_bucket (GDBM_FILE dbf, int next_insert)
GDBM_SET_ERRNO (dbf, GDBM_BUCKET_CACHE_CORRUPTED, TRUE);
return -1;
- case node_failure:
+ case cache_failure:
return -1;
}
_gdbm_new_bucket (dbf, newcache[1]->ca_bucket, new_bits);
@@ -508,6 +627,7 @@ _gdbm_split_bucket (GDBM_FILE dbf, int next_insert)
newcache[1]->ca_bucket->bucket_avail,
&newcache[1]->ca_bucket->av_count,
dbf->coalesce_blocks);
+
lru_unlink_elem (dbf, newcache[0]);
lru_link_elem (dbf, newcache[0], NULL);
set_cache_entry (dbf, newcache[0]);
@@ -554,33 +674,37 @@ _gdbm_write_bucket (GDBM_FILE dbf, cache_elem *ca_entry)
return 0;
}
-/* Cache manipulation functions. */
+/* Cache manipulation interface functions. */
+
+#define INIT_CACHE_BITS 9
/* Initialize the bucket cache. */
int
_gdbm_cache_init (GDBM_FILE dbf, size_t size)
{
- if (size == dbf->cache_size)
- return 0;
-
- if (size != GDBM_CACHE_AUTO)
+ int bits;
+ int cache_auto;
+
+ if (size == GDBM_CACHE_AUTO)
{
- while (size < dbf->cache_num)
- {
- /* Flush the least recently used element */
- cache_elem *elem = dbf->cache_lru;
- if (elem->ca_changed)
- {
- if (_gdbm_write_bucket (dbf, elem))
- return -1;
- }
- cache_elem_free (dbf, elem);
- }
+ cache_auto = TRUE;
+ bits = dbf->cache ? dbf->cache_bits : INIT_CACHE_BITS;
+ }
+ else if (size > GDBM_DIR_COUNT (dbf) ||
+ size > SIZE_T_MAX / sizeof (dbf->cache[0]))
+ {
+ GDBM_SET_ERRNO (dbf, GDBM_OPT_BADVAL, FALSE);
+ return -1;
+ }
+ else
+ {
+ cache_auto = FALSE;
+ bits = log2i (size < 4 ? 4 : size);
}
-
- dbf->cache_size = size;
- return 0;
+ dbf->cache_auto = cache_auto;
+
+ return cache_tab_resize (dbf, bits);
}
/* Free the bucket cache */
@@ -591,7 +715,8 @@ _gdbm_cache_free (GDBM_FILE dbf)
while (dbf->cache_lru)
cache_elem_free (dbf, dbf->cache_lru);
- _gdbm_cache_tree_destroy (dbf->cache_tree);
+ free (dbf->cache);
+ dbf->cache = NULL;
while ((elem = dbf->cache_avail) != NULL)
{
dbf->cache_avail = elem->ca_next;
@@ -615,69 +740,7 @@ _gdbm_cache_flush (GDBM_FILE dbf)
}
return 0;
}
-
-int
-_gdbm_fetch_data (GDBM_FILE dbf, off_t off, size_t size, void *buf)
-{
- off_t bucket_adr;
- off_t file_pos;
- int rc;
- cache_elem *elem;
- char *dst = buf;
-
- bucket_adr = (off / dbf->header->bucket_size)
- * dbf->header->bucket_size;
- off -= bucket_adr;
- while (size)
- {
- size_t nbytes;
-
- switch (cache_lookup (dbf, bucket_adr, dbf->cache_mru, &elem))
- {
- case node_found:
- break;
-
- case node_new:
- /* Position the file pointer */
- file_pos = gdbm_file_seek (dbf, bucket_adr, SEEK_SET);
- if (file_pos != bucket_adr)
- {
- GDBM_SET_ERRNO (dbf, GDBM_FILE_SEEK_ERROR, TRUE);
- cache_elem_free (dbf, elem);
- _gdbm_fatal (dbf, _("lseek error"));
- return -1;
- }
- /* Read the bucket. */
- rc = _gdbm_full_read (dbf, elem->ca_bucket,
- dbf->header->bucket_size);
- if (rc)
- {
- GDBM_DEBUG (GDBM_DEBUG_ERR,
- "%s: error reading data bucket: %s",
- dbf->name, gdbm_db_strerror (dbf));
- dbf->need_recovery = TRUE;
- cache_elem_free (dbf, elem);
- _gdbm_fatal (dbf, gdbm_db_strerror (dbf));
- return -1;
- }
- break;
-
- case node_failure:
- return -1;
- }
-
- nbytes = dbf->header->bucket_size - off;
- if (nbytes > size)
- nbytes = size;
- memcpy (dst, (char*) elem->ca_bucket + off, nbytes);
- dst += nbytes;
- size -= nbytes;
- bucket_adr++;
- off = 0;
- }
- return 0;
-}
void
gdbm_get_cache_stats (GDBM_FILE dbf,
diff --git a/src/cachetree.c b/src/cachetree.c
deleted file mode 100644
index de5a5c1..0000000
--- a/src/cachetree.c
+++ /dev/null
@@ -1,484 +0,0 @@
-/* cachetree.c - Implementation of the red-black tree for cache lookups. */
-
-/* This file is part of GDBM, the GNU data base manager.
- Copyright (C) 2019-2021 Free Software Foundation, Inc.
-
- GDBM is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- GDBM is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with GDBM. If not, see <http://www.gnu.org/licenses/>. */
-
-#include "autoconf.h"
-#include "gdbmdefs.h"
-
-enum cache_node_color { RED, BLACK };
-
-struct cache_tree
-{
- cache_node *root; /* Root of the tree */
- cache_node *avail; /* List of available nodes, linked by parent field */
-};
-
-/* Allocate and return a new node. Pick the head item from the avail
- list and update the avail pointer. If the list is empty, malloc
- a new node.
- All members in the returned node are filled with 0.
-*/
-static cache_node *
-rbt_node_alloc (cache_tree *tree)
-{
- cache_node *n;
-
- n = tree->avail;
- if (n)
- tree->avail = n->parent;
- else
- {
- n = malloc (sizeof (*n));
- if (!n)
- return NULL;
- }
- memset (n, 0, sizeof (*n));
- return n;
-}
-
-/* Return the node N to the avail list in TREE. */
-static void
-rbt_node_dealloc (cache_tree *tree, cache_node *n)
-{
- n->parent = tree->avail;
- tree->avail = n;
-}
-
-/* Red-black tree properties:
- 1. Each node is either red or black.
- 2. The root node is black.
- 3. All leaves are black and contain no data.
- 4. Every red node has two children, and both are black.
- IOW, the parent of every red node is black.
- 5. All paths from any given node to its leaf nodes contain the same
- number of black nodes.
- */
-
-/* Auxiliary functions for accessing nodes. */
-
-/* Return the grandparent node of N.
- Prerequisite: N may not be root.
-*/
-static inline cache_node *
-grandparent (cache_node *n)
-{
- return n->parent->parent;
-}
-
-/* Return the sibling node of N.
- Prerequisite: N may not be root.
-*/
-static inline cache_node *
-sibling (cache_node *n)
-{
- return (n == n->parent->left) ? n->parent->right : n->parent->left;
-}
-
-/* Return the uncle node of N.
- Prerequisite: N must be at least 2 nodes away from root.
-*/
-static inline cache_node *
-uncle (cache_node *n)
-{
- return sibling (n->parent);
-}
-
-/* Returns the color of the node N.
- Empty leaves are represented by NULL, therefore NULL is assumed to
- be black (see property 3).
-*/
-static inline enum cache_node_color
-node_color (cache_node *n)
-{
- return n == NULL ? BLACK : n->color;
-}
-
-/* Replace the OLDN with NEWN.
- Does not modify OLDN. */
-static inline void
-replace_node (cache_tree *tree, cache_node *oldn, cache_node *newn)
-{
- if (oldn->parent == NULL)
- tree->root = newn;
- else if (oldn == oldn->parent->left)
- oldn->parent->left = newn;
- else
- oldn->parent->right = newn;
-
- if (newn != NULL)
- newn->parent = oldn->parent;
-}
-
-/* Rotate the TREE left over the node N. */
-static inline void
-rotate_left (cache_tree *tree, cache_node *n)
-{
- cache_node *right = n->right;
- replace_node (tree, n, right);
- n->right = right->left;
- if (right->left != NULL)
- right->left->parent = n;
- right->left = n;
- n->parent = right;
-}
-
-/* Rotate the TREE right over the node N. */
-static inline void
-rotate_right (cache_tree *tree, cache_node *n)
-{
- cache_node *left = n->left;
- replace_node (tree, n, left);
- n->left = left->right;
- if (left->right != NULL)
- left->right->parent = n;
- left->right = n;
- n->parent = left;
-}
-
-/* Node deletion */
-static inline void
-rbt_delete_fixup (cache_tree *tree, cache_node *n)
-{
- while (1)
- {
- if (n->parent == NULL)
- {
- /* If N has become the root node, deletion resulted in removing
- one black node (prior root) from every path, so all properties
- still hold.
- */
- return;
- }
- else
- {
- /* If N has a red sibling, change the colors of the parent and
- sibling and rotate about the parent. Thus, the sibling becomes
- grandparent and we can proceed to the next case.
- */
- if (node_color (sibling (n)) == RED)
- {
- n->parent->color = RED;
- sibling (n)->color = BLACK;
- if (n == n->parent->left)
- rotate_left (tree, n->parent);
- else
- rotate_right (tree, n->parent);
- }
-
- /* If the parent, sibling and nephews are all black, paint the
- sibling red. This means one black node was removed from all
- paths passing through the parent, so we recurse to the beginning
- of the loop with parent as the argument to restore the properties.
- This is the only branch that loops.
- */
- if (node_color (n->parent) == BLACK
- && node_color (sibling (n)) == BLACK
- && node_color (sibling (n)->left) == BLACK
- && node_color (sibling (n)->right) == BLACK)
- {
- sibling (n)->color = RED;
- n = n->parent;
- continue;
- }
- else
- {
- /* If the sibling and nephews are black but the parent is red,
- swap the colors of the sibling and parent. The properties
- are then restored.
- */
- if (node_color (n->parent) == RED
- && node_color (sibling (n)) == BLACK
- && node_color (sibling (n)->left) == BLACK
- && node_color (sibling (n)->right) == BLACK)
- {
- sibling (n)->color = RED;
- n->parent->color = BLACK;
- }
- else
- {
- /* N is the left child of its parent, its sibling is black,
- and the sibling's right child is black. Swap the colors
- of the sibling and its left sibling and rotate right
- over the sibling.
- */
- if (n == n->parent->left
- && node_color (sibling (n)) == BLACK
- && node_color (sibling (n)->left) == RED
- && node_color (sibling (n)->right) == BLACK)
- {
- sibling (n)->color = RED;
- sibling (n)->left->color = BLACK;
- rotate_right (tree, sibling (n));
- }
- else if (n == n->parent->right
- && node_color (sibling (n)) == BLACK
- && node_color (sibling (n)->right) == RED
- && node_color (sibling (n)->left) == BLACK)
- {
- /* The mirror case is handled similarly. */
- sibling (n)->color = RED;
- sibling (n)->right->color = BLACK;
- rotate_left (tree, sibling (n));
- }
- /* N is the left child of its parent, its sibling is black
- and the sibling's right child is red. Swap the colors
- of the parent and sibling, paint the sibling's right
- child black and rotate left at the parent. Similarly
- for the mirror case. This achieves the following:
-
- . A black node is added to all paths passing through N;
- . A black node is removed from all paths through the
- sibling's red child.
- . The latter is painted black which restores missing
- black node in all paths through the sibling's red child.
-
- Another sibling's child becomes a child of N's parent
- during the rotation and is therefore not affected.
- */
- sibling (n)->color = node_color (n->parent);
- n->parent->color = BLACK;
- if (n == n->parent->left)
- {
- sibling (n)->right->color = BLACK;
- rotate_left (tree, n->parent);
- }
- else
- {
- sibling (n)->left->color = BLACK;
- rotate_right (tree, n->parent);
- }
- }
- }
- }
- break;
- }
-}
-
-/* Remove N from the TREE. */
-void
-_gdbm_cache_tree_delete (cache_tree *tree, cache_node *n)
-{
- cache_node *child;
-
- /* If N has both left and right children, reduce the problem to
- the node with only one child. To do so, find the in-order
- predecessor of N, copy its value (elem) to N and then delete
- the predecessor. */
- if (n->left != NULL && n->right != NULL)
- {
- cache_node *p;
- for (p = n->left; p->right; p = p->right)
- ;
- n->adr = p->adr;
- n->elem = p->elem;
- n->elem->ca_node = n;
- n = p;
- }
-
- /* N has only one child. Select it. */
- child = n->left ? n->left : n->right;
- if (node_color (n) == BLACK)
- {
- n->color = node_color (child);
- rbt_delete_fixup (tree, n);
- }
- replace_node (tree, n, child);
- if (n->parent == NULL && child != NULL) /* root should be black */
- child->color = BLACK;
-
- /* Return N to the avail pool */
- rbt_node_dealloc (tree, n);
-}
-
-/* Insertion */
-static inline void
-rbt_insert_fixup (cache_tree *tree, cache_node *n)
-{
- while (1)
- {
- if (n->parent == NULL)
- {
- /* Node was inserted at the root of the tree.
- The root node must be black (property 2). Changing its color
- to black would add one black node to every path, which means
- the property 5 would remain satisfied. So we simply paint the
- node black.
- */
- n->color = BLACK;
- }
- else if (node_color (n->parent) == BLACK)
- {
- /* The node has black parent.
- All properties are satisfied. There's no need to change anything.
- */
- return;
- }
- else if (node_color (uncle (n)) == RED)
- {
- /* The uncle node is red.
- Repaint the parent and uncle black and the grandparent red. This
- would satisfy 4. However, if the grandparent is root, this would
- violate the property 2. So we repaint the grandparent by
- re-entering the fixup loop with grandparent as the node.
- This is the only branch that loops.
- */
- n->parent->color = BLACK;
- uncle (n)->color = BLACK;
- n = grandparent (n);
- n->color = RED;
- continue;
- }
- else
- {
- /* The new node is the right child of its parent and the parent is
- the left child of the grandparent. Rotate left about the parent.
- Mirror case: The new node is the left child of its parent and the
- parent is the right child of the grandparent. Rotate right about
- the parent. This fixes the properties for the rbt_insert_5.
- */
- if (n == n->parent->right && n->parent == grandparent (n)->left)
- {
- rotate_left (tree, n->parent);
- n = n->left;
- }
- else if (n == n->parent->left && n->parent == grandparent (n)->right)
- {
- rotate_right (tree, n->parent);
- n = n->right;
- }
-
- /* The new node is the left child of its parent and the parent is the
- left child of the grandparent. Rotate right about the grandparent.
- Mirror case: The new node is the right child of its parent and the
- parent
- is the right child of the grandparent. Rotate left.
- */
- n->parent->color = BLACK;
- grandparent (n)->color = RED;
- if (n == n->parent->left && n->parent == grandparent (n)->left)
- {
- rotate_right (tree, grandparent (n));
- }
- else
- {
- rotate_left (tree, grandparent (n));
- }
- }
- break;
- }
-}
-
-/* Look up the node with the given ADR.
- If found, put it in *RETVAL and return node_found.
-
- Otherwise, create a new node and insert it at the appropriate place in
- the tree. Store the address of the newly created node in *RETVAL and
- return node_new.
-
- If a new node cannot be created (memory exhausted), return node_failure.
-*/
-int
-_gdbm_cache_tree_lookup (cache_tree *tree, off_t adr, cache_node **retval)
-{
- int res;
- cache_node *node, *parent = NULL;
-
- node = tree->root;
- while (node)
- {
- if (adr == node->adr)
- break;
- parent = node;
- if (adr < node->adr)
- node = node->left;
- else
- node = node->right;
- }
-
- if (node)
- {
- res = node_found;
- }
- else
- {
- node = rbt_node_alloc (tree);
- if (!node)
- return node_failure;
- if (!parent)
- tree->root = node;
- else if (adr < parent->adr)
- parent->left = node;
- else
- parent->right = node;
- node->adr = adr;
- node->parent = parent;
- rbt_insert_fixup (tree, node);
- res = node_new;
- }
- *retval = node;
- return res;
-}
-
-/* Interface functions */
-
-/* Create a cache tree structure for the database file DBF. */
-cache_tree *
-_gdbm_cache_tree_alloc (void)
-{
- cache_tree *t = malloc (sizeof (*t));
- if (t)
- {
- t->root = NULL;
- t->avail = NULL;
- }
- return t;
-}
-
-/* Free the memory used by the TREE. */
-void
-_gdbm_cache_tree_destroy (cache_tree *tree)
-{
- cache_node *n;
-
- /* Free the allocated tree nodes. Traverse the tree as if it were
- a simple binary tree: there's no use preserving RBT properties now.
- */
- while ((n = tree->root) != NULL)
- {
- if (!n->left)
- tree->root = n->right;
- else if (!n->right)
- tree->root = n->left;
- else
- {
- cache_node *p;
- for (p = n->left; p->right; p = p->right)
- ;
- p->right = n->right;
- tree->root = n->left;
- }
- free (n);
- }
-
- /* Free the avail list */
- while ((n = tree->avail) != NULL)
- {
- tree->avail = n->parent;
- free (n);
- }
- free (tree);
-}
diff --git a/src/gdbm.h.in b/src/gdbm.h.in
index 041837d..e371e9b 100644
--- a/src/gdbm.h.in
+++ b/src/gdbm.h.in
@@ -88,9 +88,16 @@ extern "C" {
# define GDBM_GETMAXMAPSIZE 14 /* Get maximum mapped memory size */
# define GDBM_GETDBNAME 15 /* Return database file name */
# define GDBM_GETBLOCKSIZE 16 /* Return block size */
-
+# define GDBM_GETDBFORMAT 17 /* Return the database format */
+# define GDBM_GETDIRDEPTH 18 /* Directory depth: number of initial (most
+ significant) bits in hash interpreted as
+ index to the directory. */
+# define GDBM_GETBUCKETSIZE 19 /* Get number of elements per bucket */
+# define GDBM_GETCACHEAUTO 20 /* Get the value of cache auto-adjustment */
+# define GDBM_SETCACHEAUTO 21 /* Set the value of cache auto-adjustment */
+
# define GDBM_CACHE_AUTO 0
-
+
typedef @GDBM_COUNT_T@ gdbm_count_t;
/* The data and key structure. */
diff --git a/src/gdbmdefs.h b/src/gdbmdefs.h
index 98f55c3..fba9d2d 100644
--- a/src/gdbmdefs.h
+++ b/src/gdbmdefs.h
@@ -179,15 +179,6 @@ typedef struct
} data_cache_elem;
typedef struct cache_elem cache_elem;
-typedef struct cache_node cache_node;
-
-struct cache_node
-{
- cache_node *left, *right, *parent;
- int color;
- off_t adr;
- cache_elem *elem;
-};
struct cache_elem
{
@@ -195,19 +186,16 @@ struct cache_elem
char ca_changed; /* Data in the bucket changed. */
data_cache_elem ca_data; /* Cached datum */
cache_elem *ca_prev, /* Previous element in LRU list. */
- *ca_next; /* Next elements in LRU list.
+ *ca_next, /* Next elements in LRU list.
If the item is in cache_avail list, only
ca_next is used. It points to the next
available element. */
+ *ca_coll; /* Next element in a collision sequence */
size_t ca_hits; /* Number of times this element was requested */
- cache_node *ca_node; /* Points back to the RBT node for this
- element. */
hash_bucket ca_bucket[1];/* Associated bucket (dbf->header->bucket_size
bytes). */
};
-typedef struct cache_tree cache_tree;
-
/* This final structure contains all main memory based information for
a gdbm file. This allows multiple gdbm files to be opened at the same
time by one program. */
@@ -243,6 +231,9 @@ struct gdbm_file_info
/* Last error was fatal, the database needs recovery */
unsigned need_recovery :1;
+ /* Automatic bucket cache size */
+ unsigned cache_auto :1;
+
/* Last GDBM error number */
gdbm_error last_error;
/* Last system error number */
@@ -275,10 +266,12 @@ struct gdbm_file_info
off_t *dir;
/* The bucket cache. */
- size_t cache_size; /* Cache capacity */
+ int cache_bits; /* Address bits used for compting bucket hash */
+ size_t cache_size; /* Cache capacity: 2^cache_bits */
size_t cache_num; /* Actual number of elements in cache */
- /* Cache elements form a binary search tree. */
- cache_tree *cache_tree;
+ /* Cache hash table. */
+ cache_elem **cache;
+
/* Cache elements are linked in a list sorted by relative access time */
cache_elem *cache_mru; /* Most recently used element - head of the list */
cache_elem *cache_lru; /* Last recently used element - tail of the list */
diff --git a/src/gdbmopen.c b/src/gdbmopen.c
index 4b154be..3994dd4 100644
--- a/src/gdbmopen.c
+++ b/src/gdbmopen.c
@@ -271,10 +271,6 @@ gdbm_fd_open (int fd, const char *file_name, int block_size,
dbf->bucket = NULL;
dbf->header = NULL;
- /* Initialize cache */
- dbf->cache_tree = _gdbm_cache_tree_alloc ();
- _gdbm_cache_init (dbf, DEFAULT_CACHESIZE);
-
dbf->file_size = -1;
dbf->memory_mapping = FALSE;
@@ -640,6 +636,17 @@ gdbm_fd_open (int fd, const char *file_name, int block_size,
}
+ if (_gdbm_cache_init (dbf, DEFAULT_CACHESIZE))
+ {
+ GDBM_DEBUG (GDBM_DEBUG_ERR|GDBM_DEBUG_OPEN,
+ "%s: error initializing cache: %s",
+ dbf->name, gdbm_db_strerror (dbf));
+ if (!(flags & GDBM_CLOERROR))
+ dbf->desc = -1;
+ SAVE_ERRNO (gdbm_close (dbf));
+ return NULL;
+ }
+
#if HAVE_MMAP
if (!(flags & GDBM_NOMMAP))
{
diff --git a/src/gdbmsetopt.c b/src/gdbmsetopt.c
index ae66a17..15c7ada 100644
--- a/src/gdbmsetopt.c
+++ b/src/gdbmsetopt.c
@@ -74,6 +74,32 @@ setopt_gdbm_getcachesize (GDBM_FILE dbf, void *optval, int optlen)
return 0;
}
+static int
+setopt_gdbm_setcacheauto (GDBM_FILE dbf, void *optval, int optlen)
+{
+ int n;
+
+ if ((n = getbool (optval, optlen)) == -1)
+ {
+ GDBM_SET_ERRNO (dbf, GDBM_OPT_BADVAL, FALSE);
+ return -1;
+ }
+ dbf->cache_auto = n;
+ return 0;
+}
+
+static int
+setopt_gdbm_getcacheauto (GDBM_FILE dbf, void *optval, int optlen)
+{
+ if (!optval || optlen != sizeof (int))
+ {
+ GDBM_SET_ERRNO (dbf, GDBM_OPT_BADVAL, FALSE);
+ return -1;
+ }
+ *(int*) optval = !!dbf->cache_auto;
+ return 0;
+}
+
/* Obsolete form of GDBM_SETSYNCMODE. */
static int
setopt_gdbm_fastmode (GDBM_FILE dbf, void *optval, int optlen)
@@ -254,14 +280,24 @@ setopt_gdbm_getflags (GDBM_FILE dbf, void *optval, int optlen)
else
{
int flags = dbf->read_write;
+
if (!dbf->fast_write)
flags |= GDBM_SYNC;
+
if (!dbf->file_locking)
flags |= GDBM_NOLOCK;
+
if (!dbf->memory_mapping)
flags |= GDBM_NOMMAP;
else if (dbf->mmap_preread)
flags |= GDBM_PREREAD;
+
+ if (dbf->cloexec)
+ flags |= GDBM_CLOEXEC;
+
+ if (dbf->header->header_magic == GDBM_NUMSYNC_MAGIC)
+ flags |= GDBM_NUMSYNC;
+
*(int*) optval = flags;
}
return 0;
@@ -300,6 +336,53 @@ setopt_gdbm_getblocksize (GDBM_FILE dbf, void *optval, int optlen)
GDBM_SET_ERRNO (dbf, GDBM_OPT_BADVAL, FALSE);
return -1;
}
+
+static int
+setopt_gdbm_getdbformat (GDBM_FILE dbf, void *optval, int optlen)
+{
+ if (optval && optlen == sizeof (int))
+ {
+ switch (dbf->header->header_magic)
+ {
+ case GDBM_OMAGIC:
+ case GDBM_MAGIC:
+ *(int*)optval = 0;
+ break;
+
+ case GDBM_NUMSYNC_MAGIC:
+ *(int*)optval = GDBM_NUMSYNC;
+ }
+ }
+
+ GDBM_SET_ERRNO (dbf, GDBM_OPT_BADVAL, FALSE);
+ return -1;
+}
+
+static int
+setopt_gdbm_getdirdepth (GDBM_FILE dbf, void *optval, int optlen)
+{
+ if (optval && optlen == sizeof (int))
+ {
+ *(int*) optval = dbf->header->dir_bits;
+ return 0;
+ }
+
+ GDBM_SET_ERRNO (dbf, GDBM_OPT_BADVAL, FALSE);
+ return -1;
+}
+
+static int
+setopt_gdbm_getbucketsize (GDBM_FILE dbf, void *optval, int optlen)
+{
+ if (optval && optlen == sizeof (int))
+ {
+ *(int*) optval = dbf->header->bucket_elems;
+ return 0;
+ }
+
+ GDBM_SET_ERRNO (dbf, GDBM_OPT_BADVAL, FALSE);
+ return -1;
+}
typedef int (*setopt_handler) (GDBM_FILE, void *, int);
@@ -322,6 +405,11 @@ static setopt_handler setopt_handler_tab[] = {
[GDBM_GETFLAGS] = setopt_gdbm_getflags,
[GDBM_GETDBNAME] = setopt_gdbm_getdbname,
[GDBM_GETBLOCKSIZE] = setopt_gdbm_getblocksize,
+ [GDBM_GETDBFORMAT] = setopt_gdbm_getdbformat,
+ [GDBM_GETDIRDEPTH] = setopt_gdbm_getdirdepth,
+ [GDBM_GETBUCKETSIZE] = setopt_gdbm_getbucketsize,
+ [GDBM_GETCACHEAUTO] = setopt_gdbm_getcacheauto,
+ [GDBM_SETCACHEAUTO] = setopt_gdbm_setcacheauto,
};
int
diff --git a/src/proto.h b/src/proto.h
index a5d6d10..68b2bc3 100644
--- a/src/proto.h
+++ b/src/proto.h
@@ -20,7 +20,6 @@
/* From bucket.c */
void _gdbm_new_bucket (GDBM_FILE, hash_bucket *, int);
int _gdbm_get_bucket (GDBM_FILE, int);
-int _gdbm_fetch_data (GDBM_FILE dbf, off_t off, size_t size, void *buf);
int _gdbm_split_bucket (GDBM_FILE, int);
int _gdbm_write_bucket (GDBM_FILE, cache_elem *);
@@ -103,10 +102,6 @@ int _gdbm_dump (GDBM_FILE dbf, FILE *fp);
/* From recover.c */
int _gdbm_next_bucket_dir (GDBM_FILE dbf, int bucket_dir);
-/* cachetree.c */
-cache_tree *_gdbm_cache_tree_alloc (void);
-void _gdbm_cache_tree_destroy (cache_tree *tree);
-void _gdbm_cache_tree_delete (cache_tree *tree, struct cache_node *n);
/* avail.c */
int gdbm_avail_block_validate (GDBM_FILE dbf, avail_block *avblk, size_t size);
@@ -116,16 +111,6 @@ int gdbm_avail_traverse (GDBM_FILE dbf,
void *data);
-/* Return codes for _gdbm_cache_tree_lookup. */
-enum
- {
- node_found, /* Returned element was found in cache. */
- node_new, /* Returned element has been created and inserted to cache */
- node_failure /* An error occurred. */
- };
-
-int _gdbm_cache_tree_lookup (cache_tree *tree, off_t adr, cache_node **retval);
-
/* I/O functions */
static inline ssize_t
gdbm_file_read (GDBM_FILE dbf, void *buf, size_t size)
diff --git a/src/recover.c b/src/recover.c
index af775ad..fac6b17 100644
--- a/src/recover.c
+++ b/src/recover.c
@@ -126,6 +126,10 @@ _gdbm_finish_transfer (GDBM_FILE dbf, GDBM_FILE new_dbf,
}
rcvr->backup_name = bkname;
}
+
+ /* Restore cache settings */
+ if (!dbf->cache_auto)
+ _gdbm_cache_init (new_dbf, dbf->cache_size);
/* Move the new file to old name. */
@@ -157,9 +161,10 @@ _gdbm_finish_transfer (GDBM_FILE dbf, GDBM_FILE new_dbf,
dbf->avail_size = new_dbf->avail_size;
dbf->xheader = new_dbf->xheader;
+ dbf->cache_bits = new_dbf->cache_bits;
dbf->cache_size = new_dbf->cache_size;
dbf->cache_num = new_dbf->cache_num;
- dbf->cache_tree = new_dbf->cache_tree;
+ dbf->cache = new_dbf->cache;
dbf->cache_mru = new_dbf->cache_mru;
dbf->cache_lru = new_dbf->cache_lru;
dbf->cache_avail = new_dbf->cache_avail;
@@ -184,7 +189,7 @@ _gdbm_finish_transfer (GDBM_FILE dbf, GDBM_FILE new_dbf,
/* Make sure the new database is all on disk. */
gdbm_file_sync (dbf);
-
+
/* Force the right stuff for a correct bucket cache. */
return _gdbm_get_bucket (dbf, 0);
}
diff --git a/tests/.gitignore b/tests/.gitignore
index 558b2ee..0addc86 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -5,6 +5,7 @@ Makefile
Makefile.in
atconfig
atlocal
+closerr
d_creat_ce
dtdel
dtdump
@@ -13,6 +14,7 @@ dtload
fdop
g_open_ce
g_reorg_ce
+gtcacheopt
gtconv
gtdel
gtdump
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 337cbed..1323cab 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -77,6 +77,7 @@ TESTSUITE_AT = \
fetch01.at\
setopt00.at\
setopt01.at\
+ setopt02.at\
version.at
TESTSUITE = $(srcdir)/testsuite
@@ -114,6 +115,7 @@ check_PROGRAMS = \
fdop\
g_open_ce\
g_reorg_ce\
+ gtcacheopt\
gtconv\
gtdel\
gtdump\
@@ -138,4 +140,3 @@ dtdel_LDADD = ../src/libgdbm.la ../compat/libgdbm_compat.la
d_creat_ce_LDADD = ../src/libgdbm.la ../compat/libgdbm_compat.la
SUBDIRS = gdbmtool
-
diff --git a/tests/gtcacheopt.c b/tests/gtcacheopt.c
new file mode 100644
index 0000000..413329e
--- /dev/null
+++ b/tests/gtcacheopt.c
@@ -0,0 +1,273 @@
+/*
+ NAME
+ gtcacheopt - test GDBM_GETCACHESIZE and GDBM_SETCACHESIZE options.
+
+ SYNOPSIS
+ gtcacheopt [-v]
+
+ DESCRIPTION
+ Reducing the cache size should retain most recently used elements
+ and ensure correct rehashing.
+
+ Operation:
+
+ 1) Create new database.
+ 2) Generate at least 10 full buckets,
+ 3) Check GDBM_GETCACHESIZE.
+ 4) Get cache statistics for the first 8 most recently
+ used cache elements.
+ 5) Set cache size to 8.
+ 6) Retrieve each of the bucket pointed to by stats obtained in 4.
+ 7) Verify that (6) retrieved buckets from cache.
+
+ OPTIONS
+ -v Verbosely print what's being done.
+
+ EXIT CODE
+ 0 success
+ 1 failure
+ 2 usage error
+
+ LICENSE
+ This file is part of GDBM test suite.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+
+ GDBM is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ GDBM is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GDBM. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include "autoconf.h"
+#include "gdbmdefs.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <unistd.h>
+
+char dbname[] = "a.db";
+int verbose = 0;
+
+#define NBUCKETS 10
+#define CACHE_SIZE 8
+#define DATASIZE (4*IGNORE_SIZE)
+
+static void
+test_getcachesize (GDBM_FILE dbf, size_t expected_size,
+ struct gdbm_cache_stat *pstat, size_t *pnstat)
+{
+ size_t size;
+ int cache_auto;
+
+ if (gdbm_setopt (dbf, GDBM_GETCACHESIZE, &size, sizeof (size)))
+ {
+ fprintf (stderr, "GDBM_GETCACHESIZE: %s\n", gdbm_strerror (gdbm_errno));
+ exit (1);
+ }
+
+ if (verbose)
+ printf ("size = %zu\n", size);
+
+ if (expected_size && expected_size != size)
+ {
+ fprintf (stderr, "expected_size != size (%zu != %zu)\n",
+ expected_size, size);
+ exit (1);
+ }
+
+ if (gdbm_setopt (dbf, GDBM_GETCACHEAUTO, &cache_auto, sizeof (cache_auto)))
+ {
+ fprintf (stderr, "GDBM_GETCACHESIZE: %s\n", gdbm_strerror (gdbm_errno));
+ exit (1);
+ }
+
+ if (verbose)
+ printf ("cache_auto = %d\n", cache_auto);
+
+ if (expected_size && cache_auto != 0)
+ {
+ fprintf (stderr, "cache_auto != 0\n");
+ exit (1);
+ }
+
+ if (pstat)
+ {
+ gdbm_get_cache_stats (dbf, NULL, NULL, pnstat, pstat, CACHE_SIZE);
+ }
+}
+
+static int
+dir_index (GDBM_FILE dbf, off_t adr)
+{
+ int i;
+
+ for (i = 0; i < dbf->header->dir_size; i++)
+ if (dbf->dir[i] == adr)
+ return i;
+ fprintf (stderr, "%lu: can't find bucket in directory\n", adr);
+ exit (1);
+}
+
+int
+main (int argc, char **argv)
+{
+ GDBM_FILE dbf;
+ datum key, content;
+
+ int nkeys;
+
+ char data[DATASIZE];
+
+ int i;
+
+ struct gdbm_cache_stat stat[2][CACHE_SIZE];
+ size_t nstat[2];
+
+ while ((i = getopt (argc, argv, "v")) != EOF)
+ {
+ switch (i)
+ {
+ case 'v':
+ verbose++;
+ break;
+
+ default:
+ return 2;
+ }
+ }
+
+ /*
+ * 1) Create new database.
+ */
+ if (verbose)
+ printf ("creating database\n");
+
+ dbf = gdbm_open (dbname, GDBM_MIN_BLOCK_SIZE, GDBM_NEWDB, 0644, NULL);
+ if (!dbf)
+ {
+ fprintf (stderr, "gdbm_open: %s\n", gdbm_strerror (gdbm_errno));
+ return 1;
+ }
+
+ /*
+ * 2) Generate 10 full buckets or key/value pairs are created.
+ */
+
+ /* Initialize keys. */
+ nkeys = NBUCKETS * dbf->header->bucket_elems;
+
+ /* Initialize content */
+ for (i = 0; i < DATASIZE; i++)
+ data[i] = i+1;
+ content.dsize = DATASIZE;
+ content.dptr = data;
+
+ /* Populate the database. */
+ if (verbose)
+ printf ("populating database (%d keys)\n", nkeys);
+ key.dsize = sizeof (i);
+ key.dptr = (char*) &i;
+ for (i = 0; i < nkeys; i++)
+ {
+ if (gdbm_store (dbf, key, content, 0) != 0)
+ {
+ fprintf (stderr, "%d: item not inserted: %s\n",
+ i, gdbm_db_strerror (dbf));
+ gdbm_close (dbf);
+ return 1;
+ }
+ }
+
+ /*
+ * 3) Check if the value retrieved by GDBM_GETCACHESIZE matches the
+ * expected one and
+ * 4) save cache statistics for the first CACHE_SIZE most recently used
+ * cache elements.
+ */
+ test_getcachesize (dbf, 0, stat[0], &nstat[0]);
+
+ if (verbose)
+ printf ("setting new cache size\n");
+
+ /*
+ * 5) Set new cache size.
+ */
+ i = CACHE_SIZE;
+ if (gdbm_setopt (dbf, GDBM_SETCACHESIZE, &i, sizeof (i)))
+ {
+ fprintf (stderr, "GDBM_GETCACHESIZE: %s\n", gdbm_strerror (gdbm_errno));
+ return 1;
+ }
+
+ if (verbose)
+ printf ("verifying cache (pass 1)\n");
+
+ /*
+ * 6) Retrieve each of the bucket pointed to by stats obtained in 4.
+ *
+ * To retrieve a bucket, the corresponding directory index must be known.
+ * That index is obtained using linear search in the database file directory.
+ *
+ * Buckets must be retrieved in reverse order, so that the LRU cache
+ * remains in the same order after the operation (each retrieval cyclically
+ * shifts elements in the queue).
+ */
+ test_getcachesize (dbf, CACHE_SIZE, stat[1], &nstat[1]);
+
+ for (i = CACHE_SIZE - 1; i >= 0; i--)
+ {
+ if (stat[0][i].adr != stat[1][i].adr)
+ {
+ fprintf (stderr, "%d: address mismatch\n", i);
+ return 1;
+ }
+ if (_gdbm_get_bucket (dbf, dir_index (dbf, stat[0][i].adr)))
+ {
+ fprintf (stderr, "%d: _gdbm_get_bucket: %s\n", i,
+ gdbm_db_strerror (dbf));
+ return 1;
+ }
+ }
+
+ if (verbose)
+ printf ("getting cache statistics\n");
+ test_getcachesize (dbf, CACHE_SIZE, stat[0], &nstat[0]);
+
+ gdbm_close (dbf);
+
+ /*
+ * 7) Verify that the buckets were retrieved from cache.
+ *
+ * To do so, compare addresses and hit counts in statistic buffers
+ * stat[0] and stat[1]. Each pair of elements must have the same
+ * bucket address. Hit counts must differ by 1.
+ */
+ if (verbose)
+ printf ("verifying cache (pass 2)\n");
+ for (i = 0; i < CACHE_SIZE; i++)
+ {
+ if (stat[0][i].adr != stat[1][i].adr)
+ {
+ fprintf (stderr, "%d: address mismatch\n", i);
+ return 1;
+ }
+ if (stat[0][i].hits != stat[1][i].hits + 1)
+ {
+ fprintf (stderr, "%d: hit count mismatch: %zu != %zu\n", i,
+ stat[0][i].hits, stat[1][i].hits);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+
+
diff --git a/tests/setopt02.at b/tests/setopt02.at
new file mode 100644
index 0000000..500fbee
--- /dev/null
+++ b/tests/setopt02.at
@@ -0,0 +1,20 @@
+# This file is part of GDBM. -*- autoconf -*-
+# Copyright (C) 2011-2021 Free Software Foundation, Inc.
+#
+# GDBM is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# GDBM is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GDBM. If not, see <http://www.gnu.org/licenses/>. */
+
+AT_SETUP([GDBM_GETCACHESIZE/GDBM_SETCACHESIZE])
+AT_KEYWORDS([setopt setopt02 cachesize])
+AT_CHECK([gtcacheopt])
+AT_CLEANUP
diff --git a/tests/testsuite.at b/tests/testsuite.at
index 8437851..bfc9747 100644
--- a/tests/testsuite.at
+++ b/tests/testsuite.at
@@ -69,6 +69,7 @@ AT_BANNER([DB options])
m4_include([setopt00.at])
m4_include([setopt01.at])
+m4_include([setopt02.at])
AT_BANNER([Cloexec])