summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2022-01-06 14:49:30 +0200
committerSergey Poznyakoff <gray@gnu.org>2022-01-06 15:36:10 +0200
commitb8c3d13fd821e90a190cc5cfad3a9e17f18aa416 (patch)
tree1beb84078d59d5800ade4de7972671c2b823b8b7 /tests
parent42276af5bd0a48512a23f83db021b6e832c3fb92 (diff)
downloadgdbm-b8c3d13fd821e90a190cc5cfad3a9e17f18aa416.tar.gz
Speed up flushing the bucket cache on disk
The implementation of _gdbm_cache_flush becomes prohibitively inefficient during extensive updates of large databases. The bug was reported at https://github.com/Perl/perl5/issues/19306. To fix it, make sure that all changed cache entries are placed at the head of the cache_mru list, forming a contiguous sequence. This way a potentially long iteration over all cache entries can be cut off at the first entry with ca_changed == FALSE. This commit also gets rid of several superfluous fields in struct gdbm_file_info: - cache_entry Not needed, because the most recently used cache entry (cache_mru) is always the current one. - bucket_changed dbf->cache_mru->ca_changed reflects the status of the current bucket. - second_changed Not needed because _gdbm_cache_flush, which flushes all changed buckets, is now invoked unconditionally by _gdbm_end_update (and also whenever dbf->cache_mru changes). * src/gdbmdefs.h (struct gdbm_file_info): Remove cache_entry. The current cache entry is cache_mru. Remove bucket_changed, and second_changed. All uses changed. * src/proto.h (_gdbm_current_bucket_changed): New inline function. * src/bucket.c (_gdbm_cache_flush): Assume all changed elements form a contiguous sequence beginning with dbf->cache_mru. (set_cache_entry): Remove. All callers changed. (lru_link_elem,lru_unlink_elem): Update dbf->bucket as necessary. (cache_lookup): If the obtained bucket is not changed and is going to become current, flush all changed cache elements. * src/update.c (_gdbm_end_update): Call _gdbm_cache_flush unconditionally. * src/findkey.c: Use dbf->cache_mru instead of the removed dbf->cache_entry. * src/gdbmseq.c: Likewise. * tools/gdbmshell.c (_gdbm_print_bucket_cache): Likewise. * src/falloc.c: Use _gdbm_current_bucket_changed to mark the current bucket as changed. * src/gdbmstore.c: Likewise. * src/gdbmdelete.c: Likewise. Use _gdbm_current_bucket_changed. * tests/gtcacheopt.c: Fix typo. * tests/gtload.c: New option: -cachesize
Diffstat (limited to 'tests')
-rw-r--r--tests/gtcacheopt.c2
-rw-r--r--tests/gtload.c15
2 files changed, 15 insertions, 2 deletions
diff --git a/tests/gtcacheopt.c b/tests/gtcacheopt.c
index 3f23714..088e6d8 100644
--- a/tests/gtcacheopt.c
+++ b/tests/gtcacheopt.c
@@ -202,7 +202,7 @@ main (int argc, char **argv)
i = CACHE_SIZE;
if (gdbm_setopt (dbf, GDBM_SETCACHESIZE, &i, sizeof (i)))
{
- fprintf (stderr, "GDBM_GETCACHESIZE: %s\n", gdbm_strerror (gdbm_errno));
+ fprintf (stderr, "GDBM_SETCACHESIZE: %s\n", gdbm_strerror (gdbm_errno));
return 1;
}
diff --git a/tests/gtload.c b/tests/gtload.c
index d843111..1fcafb2 100644
--- a/tests/gtload.c
+++ b/tests/gtload.c
@@ -96,6 +96,7 @@ main (int argc, char **argv)
int recover = 0;
gdbm_recovery rcvr;
int rcvr_flags = 0;
+ size_t cache_size = 0;
progname = canonical_progname (argv[0]);
#ifdef GDBM_DEBUG_ENABLE
@@ -135,6 +136,8 @@ main (int argc, char **argv)
delim = arg[7];
else if (strcmp (arg, "-recover") == 0)
recover = 1;
+ else if (strncmp (arg, "-cachesize=", 11) == 0)
+ cache_size = read_size (arg + 11);
else if (strcmp (arg, "-verbose") == 0)
{
verbose = 1;
@@ -213,7 +216,17 @@ main (int argc, char **argv)
gdbm_strerror (gdbm_errno));
exit (1);
}
- }
+ }
+ if (cache_size)
+ {
+ if (gdbm_setopt (dbf, GDBM_SETCACHESIZE, &cache_size,
+ sizeof (cache_size)))
+ {
+ fprintf (stderr, "GDBM_SETCACHESIZE failed: %s\n",
+ gdbm_strerror (gdbm_errno));
+ exit (1);
+ }
+ }
if (verbose)
{