summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2019-11-12 07:29:45 +0200
committerSergey Poznyakoff <gray@gnu.org>2019-11-12 08:11:07 +0200
commitdc176a5cdc841e05876d5e7b52cfb1b7bac2d333 (patch)
tree8a12ecb5057e25a8fdd0ebdc436718b19b6d78c6
parent4fb2326a4ac0e6f45c21f7651b1c87317567fd82 (diff)
downloadgdbm-dc176a5cdc841e05876d5e7b52cfb1b7bac2d333.tar.gz
Rewrite bucket cache
The new bucket cache uses the least recently used replacement policy (instead of the least recently read, implemented previously). It also allows for quick bucket lookups by the corresponding disk address. To this effect the cache entries form a red-black tree sorted by bucket address. Additionally, data buckets are also cached. * README: Describe the new branch. * src/bucket.c: Rewrite cache support. * src/cachetree.c: New file. * src/Makefile.am: Add new file. * src/findkey.c (_gdbm_read_entry): Use _gdbm_fetch_data. This ensures data pages are cached as well as buckets. * src/gdbm.h.in (GDBM_BUCKET_CACHE_CORRUPTED): New error code. (gdbm_cache_stat): New struct. (gdbm_get_cache_stats): New proto. * src/gdbmclose.c (gdbm_close): Call _gdbm_cache_free to dispose of the cache. * src/gdbmdefs.h (cache_elem_color): New data type. (cache_elem): New members: ca_left, ca_right, ca_node, and ca_hits. (cache_tree): New typedef. (gdbm_file_info): Remove bucket_cache and last_read. New fields: cache_num, cache_tree, cache_mru, cache_lru, cache_avail, cache_access_count. * src/gdbmerrno.c: Handle GDBM_BUCKET_CACHE_CORRUPTED. * src/gdbmopen.c (gdbm_fd_open): Change cache initialization. (_gdbm_init_cache, _gdbm_cache_entry_invalidate: Remove. * src/gdbmsetopt.c (setopt_gdbm_setcachesize): Cache can be re-initialized on the fly. * src/gdbmtool.c: Change bucket printing routines. * src/proto.h (_gdbm_read_bucket_at): Remove. (_gdbm_fetch_data,_gdbm_cache_init,_gdbm_cache_free) (_gdbm_cache_flush,_gdbm_cache_elem_new) (_gdbm_cache_tree_alloc,_gdbm_cache_tree_destroy) (_gdbm_cache_tree_delete,_gdbm_rbt_remove_node) (_gdbm_cache_tree_lookup): New protos. (_gdbm_init_cache,_gdbm_cache_entry_invalidate): Remove. * src/recover.c (_gdbm_finish_transfer): Adapt to the new cache structure. * src/update.c: Likewise. * tests/setopt00.at: Fix second GDBM_SETCACHESIZE test.
-rw-r--r--README6
-rw-r--r--src/Makefile.am1
-rw-r--r--src/bucket.c609
-rw-r--r--src/cachetree.c528
-rw-r--r--src/findkey.c25
-rw-r--r--src/gdbm.h.in19
-rw-r--r--src/gdbmclose.c12
-rw-r--r--src/gdbmdefs.h40
-rw-r--r--src/gdbmerrno.c3
-rw-r--r--src/gdbmopen.c51
-rw-r--r--src/gdbmsetopt.c8
-rw-r--r--src/gdbmtool.c27
-rw-r--r--src/proto.h27
-rw-r--r--src/recover.c24
-rw-r--r--src/update.c14
-rw-r--r--tests/setopt00.at2
16 files changed, 1057 insertions, 339 deletions
diff --git a/README b/README
index 747d1a2..c938120 100644
--- a/README
+++ b/README
@@ -1,6 +1,12 @@
GNU dbm README
See the end of file for copying conditions.
+* Note
+
+This is an experimental branch of GNU DBM. Its goal is to evaluate
+possibilities of improving the caching scheme. Please don't use it
+in production.
+
* Introduction
This file contains brief information about configuring, testing
diff --git a/src/Makefile.am b/src/Makefile.am
index e34bb7e..18c80d9 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -41,6 +41,7 @@ lib_LTLIBRARIES = libgdbm.la
libgdbm_la_LIBADD = @LTLIBINTL@
libgdbm_la_SOURCES = \
+ cachetree.c\
gdbmclose.c\
gdbmcount.c\
gdbmdelete.c\
diff --git a/src/bucket.c b/src/bucket.c
index b88972e..4bce594 100644
--- a/src/bucket.c
+++ b/src/bucket.c
@@ -56,7 +56,155 @@ gdbm_dir_entry_valid_p (GDBM_FILE dbf, int dir_index)
&& dir_index < GDBM_DIR_COUNT (dbf)
&& dbf->dir[dir_index] >= dbf->header->block_size;
}
-
+
+static void
+set_cache_entry (GDBM_FILE dbf, cache_elem *elem)
+{
+ dbf->cache_entry = elem;
+ dbf->bucket = dbf->cache_entry->ca_bucket;
+}
+
+
+/* LRU list management */
+
+/* Link ELEM after REF in DBF cache. If REF is NULL, link at head */
+static void
+lru_link_elem (GDBM_FILE dbf, cache_elem *elem, cache_elem *ref)
+{
+ if (!ref)
+ {
+ elem->ca_prev = NULL;
+ elem->ca_next = dbf->cache_mru;
+ if (dbf->cache_mru)
+ dbf->cache_mru->ca_prev = elem;
+ else
+ dbf->cache_lru = elem;
+ dbf->cache_mru = elem;
+ }
+ else
+ {
+ cache_elem *x;
+
+ elem->ca_prev = ref;
+ elem->ca_next = ref->ca_next;
+ if ((x = ref->ca_next))
+ x->ca_prev = elem;
+ else
+ dbf->cache_lru = elem;
+ ref->ca_next = elem;
+ }
+}
+
+/* Unlink ELEM from the list of cache elements in DBF. */
+static void
+lru_unlink_elem (GDBM_FILE dbf, cache_elem *elem)
+{
+ cache_elem *x;
+
+ if ((x = elem->ca_prev))
+ x->ca_next = elem->ca_next;
+ else
+ dbf->cache_mru = elem->ca_next;
+ if ((x = elem->ca_next))
+ x->ca_prev = elem->ca_prev;
+ else
+ dbf->cache_lru = elem->ca_prev;
+ elem->ca_prev = elem->ca_next = NULL;
+}
+
+/* Creates and returns new cache element for DBF. The element is initialized,
+ but not linked to the LRU list.
+ Return NULL on error.
+*/
+cache_elem *
+_gdbm_cache_elem_new (GDBM_FILE dbf, off_t adr)
+{
+ cache_elem *elem;
+
+ elem = dbf->cache_avail;
+ if (elem)
+ {
+ dbf->cache_avail = elem->ca_next;
+ }
+ else
+ {
+ elem = calloc (1, sizeof (*elem));
+
+ if (!elem)
+ return NULL;
+
+ elem->ca_bucket = malloc (dbf->header->bucket_size);
+ if (!elem->ca_bucket)
+ {
+ free (elem);
+ return NULL;
+ }
+ }
+
+ elem->ca_adr = adr;
+ elem->ca_changed = FALSE;
+ elem->ca_data.hash_val = -1;
+ elem->ca_data.elem_loc = -1;
+
+ elem->ca_prev = elem->ca_next = NULL;
+ elem->ca_hits = 0;
+ elem->ca_node = NULL;
+
+ dbf->cache_num++;
+
+ return elem;
+}
+
+/* Frees element ELEM. Unlinks it from the cache tree and LRU list. */
+static void
+cache_elem_free (GDBM_FILE dbf, cache_elem *elem)
+{
+ _gdbm_rbt_remove_node (dbf->cache_tree, elem->ca_node);
+ lru_unlink_elem (dbf, elem);
+ elem->ca_next = dbf->cache_avail;
+ dbf->cache_avail = elem;
+ dbf->cache_num--;
+}
+
+static int
+cache_lookup (GDBM_FILE dbf, off_t adr, cache_elem *ref, cache_elem **ret_elem)
+{
+ int rc;
+ cache_elem *elem;
+
+ dbf->cache_access_count++;
+ rc = _gdbm_cache_tree_lookup (dbf->cache_tree, adr, &elem);
+ switch (rc)
+ {
+ case node_found:
+ elem->ca_hits++;
+ lru_unlink_elem (dbf, elem);
+ break;
+
+ case node_new:
+ if (dbf->cache_num > dbf->cache_size)
+ {
+ cache_elem *last = dbf->cache_lru;
+ if (last->ca_changed)
+ {
+ if (_gdbm_write_bucket (dbf, last))
+ {
+ cache_elem_free (dbf, elem);
+ return node_failure;
+ }
+ }
+ cache_elem_free (dbf, last);
+ }
+ break;
+
+ default:
+ return rc;
+ }
+ lru_link_elem (dbf, elem, ref);
+ *ret_elem = elem;
+ return rc;
+}
+
/* Find a bucket for DBF that is pointed to by the bucket directory from
location DIR_INDEX. The bucket cache is first checked to see if it
is already in memory. If not, a bucket may be tossed to read the new
@@ -70,8 +218,9 @@ _gdbm_get_bucket (GDBM_FILE dbf, int dir_index)
int rc;
off_t bucket_adr; /* The address of the correct hash bucket. */
off_t file_pos; /* The return address for lseek. */
- int index; /* Loop index. */
-
+ hash_bucket *bucket;
+ cache_elem *elem;
+
if (!gdbm_dir_entry_valid_p (dbf, dir_index))
{
/* FIXME: negative caching? */
@@ -82,127 +231,66 @@ _gdbm_get_bucket (GDBM_FILE dbf, int dir_index)
/* Initial set up. */
dbf->bucket_dir = dir_index;
bucket_adr = dbf->dir[dir_index];
-
- if (dbf->bucket_cache == NULL)
- {
- if (_gdbm_init_cache (dbf, DEFAULT_CACHESIZE) == -1)
- {
- _gdbm_fatal (dbf, _("couldn't init cache"));
- return -1;
- }
- }
- /* If that one is not already current, we must find it. */
- if (dbf->cache_entry->ca_adr != bucket_adr)
+ switch (cache_lookup (dbf, bucket_adr, NULL, &elem))
{
- size_t lru;
- hash_bucket *bucket;
+ case node_found:
+ break;
- /* Look in the cache. */
- for (index = 0; index < dbf->cache_size; index++)
- {
- if (dbf->bucket_cache[index].ca_adr == bucket_adr)
- {
- dbf->bucket = dbf->bucket_cache[index].ca_bucket;
- dbf->cache_entry = &dbf->bucket_cache[index];
- return 0;
- }
- }
-
- /* It is not in the cache, read it from the disk. */
-
+ case node_new:
/* Position the file pointer */
file_pos = gdbm_file_seek (dbf, bucket_adr, SEEK_SET);
if (file_pos != bucket_adr)
{
GDBM_SET_ERRNO (dbf, GDBM_FILE_SEEK_ERROR, TRUE);
+ cache_elem_free (dbf, elem);
_gdbm_fatal (dbf, _("lseek error"));
return -1;
}
-
- /* Flush and drop the last recently used cache entry */
- lru = (dbf->last_read + 1) % dbf->cache_size;
- if (dbf->bucket_cache[lru].ca_changed)
- {
- if (_gdbm_write_bucket (dbf, &dbf->bucket_cache[lru]))
- return -1;
- }
- _gdbm_cache_entry_invalidate (dbf, lru);
-
+
/* Read the bucket. */
- rc = _gdbm_full_read (dbf, dbf->bucket_cache[lru].ca_bucket,
- dbf->header->bucket_size);
+ rc = _gdbm_full_read (dbf, elem->ca_bucket, dbf->header->bucket_size);
if (rc)
{
GDBM_DEBUG (GDBM_DEBUG_ERR,
"%s: error reading bucket: %s",
dbf->name, gdbm_db_strerror (dbf));
dbf->need_recovery = TRUE;
+ cache_elem_free (dbf, elem);
_gdbm_fatal (dbf, gdbm_db_strerror (dbf));
return -1;
}
+
/* Validate the bucket */
- bucket = dbf->bucket_cache[lru].ca_bucket;
+ bucket = elem->ca_bucket;
if (!(bucket->count >= 0
&& bucket->count <= dbf->header->bucket_elems
&& bucket->bucket_bits >= 0
&& bucket->bucket_bits <= dbf->header->dir_bits))
{
GDBM_SET_ERRNO (dbf, GDBM_BAD_BUCKET, TRUE);
+ cache_elem_free (dbf, elem);
return -1;
}
/* Validate bucket_avail table */
if (gdbm_bucket_avail_table_validate (dbf, bucket))
- return -1;
-
- /* Finally, store it in cache */
- dbf->last_read = lru;
- dbf->bucket_cache[lru].ca_adr = bucket_adr;
- dbf->bucket = dbf->bucket_cache[lru].ca_bucket;
- dbf->cache_entry = &dbf->bucket_cache[lru];
- dbf->cache_entry->ca_data.elem_loc = -1;
- dbf->cache_entry->ca_changed = FALSE;
- }
- return 0;
-}
-
-int
-_gdbm_read_bucket_at (GDBM_FILE dbf, off_t off, hash_bucket *bucket,
- size_t size)
-{
- off_t file_pos;
- int i;
-
- if (dbf->cache_entry && dbf->cache_entry->ca_adr == off)
- {
- memcpy (bucket, dbf->bucket, size);
- return 0;
- }
-
- /* Look in the cache. */
- for (i = 0; i < dbf->cache_size; i++)
- {
- if (dbf->bucket_cache[i].ca_adr == off)
{
- memcpy (bucket, dbf->bucket_cache[i].ca_bucket, size);
- return 0;
+ cache_elem_free (dbf, elem);
+ return -1;
}
- }
-
- /* Read the bucket. */
- file_pos = gdbm_file_seek (dbf, off, SEEK_SET);
- if (file_pos != off)
- {
- GDBM_SET_ERRNO (dbf, GDBM_FILE_SEEK_ERROR, TRUE);
- return -1;
- }
- if (_gdbm_full_read (dbf, bucket, size))
- {
- GDBM_DEBUG (GDBM_DEBUG_ERR,
- "%s: error reading bucket: %s",
- dbf->name, gdbm_db_strerror (dbf));
+
+ /* Update the cache */
+ elem->ca_adr = bucket_adr;
+ elem->ca_data.elem_loc = -1;
+ elem->ca_changed = FALSE;
+
+ break;
+
+ case node_failure:
return -1;
}
+ set_cache_entry (dbf, elem);
+
return 0;
}
@@ -213,86 +301,74 @@ _gdbm_read_bucket_at (GDBM_FILE dbf, off_t off, hash_bucket *bucket,
int
_gdbm_split_bucket (GDBM_FILE dbf, int next_insert)
{
- hash_bucket *bucket[2]; /* Pointers to the new buckets. */
-
- int new_bits; /* The number of bits for the new buckets. */
- int cache_0; /* Location in the cache for the buckets. */
- int cache_1;
- off_t adr_0; /* File address of the new bucket 0. */
- off_t adr_1; /* File address of the new bucket 1. */
- avail_elem old_bucket; /* Avail Struct for the old bucket. */
-
- off_t dir_start0; /* Used in updating the directory. */
- off_t dir_start1;
- off_t dir_end;
-
- off_t *new_dir; /* Pointer to the new directory. */
- off_t dir_adr; /* Address of the new directory. */
- int dir_size; /* Size of the new directory. */
off_t old_adr[GDBM_HASH_BITS]; /* Address of the old directories. */
int old_size[GDBM_HASH_BITS]; /* Size of the old directories. */
int old_count; /* Number of old directories. */
int index; /* Used in array indexing. */
int index1; /* Used in array indexing. */
- int elem_loc; /* Location in new bucket to put element. */
- bucket_element *old_el; /* Pointer into the old bucket. */
- int select; /* Used to index bucket during movement. */
/* No directories are yet old. */
old_count = 0;
-
- if (dbf->bucket_cache == NULL)
+ while (dbf->bucket->count == dbf->header->bucket_elems)
{
- if (_gdbm_init_cache (dbf, DEFAULT_CACHESIZE) == -1)
+ int new_bits; /* The number of bits for the new buckets. */
+ cache_elem *newcache[2]; /* Location in the cache for the buckets. */
+ off_t adr_0; /* File address of the new bucket 0. */
+ off_t adr_1; /* File address of the new bucket 1. */
+ avail_elem old_bucket; /* Avail Struct for the old bucket. */
+
+ off_t dir_start0; /* Used in updating the directory. */
+ off_t dir_start1;
+ off_t dir_end;
+
+ new_bits = dbf->bucket->bucket_bits + 1;
+ /* Allocate two new buckets */
+ adr_0 = _gdbm_alloc (dbf, dbf->header->bucket_size);
+ switch (cache_lookup (dbf, adr_0, dbf->cache_mru, &newcache[0]))
{
- _gdbm_fatal (dbf, _("couldn't init cache"));
+ case node_new:
+ break;
+
+ case node_found:
+ /* should not happen */
+ GDBM_DEBUG (GDBM_DEBUG_ERR,
+ "%s: bucket found where it should not",
+ dbf->name);
+ GDBM_SET_ERRNO (dbf, GDBM_BUCKET_CACHE_CORRUPTED, TRUE);
return -1;
- }
- }
- while (dbf->bucket->count == dbf->header->bucket_elems)
- {
- /* Initialize the "new" buckets in the cache. */
- do
- {
- dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
- cache_0 = dbf->last_read;
- }
- while (dbf->bucket_cache[cache_0].ca_bucket == dbf->bucket);
- bucket[0] = dbf->bucket_cache[cache_0].ca_bucket;
- if (dbf->bucket_cache[cache_0].ca_changed)
- {
- if (_gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_0]))
- return -1;
+ case node_failure:
+ return -1;
}
- do
- {
- dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
- cache_1 = dbf->last_read;
- }
- while (dbf->bucket_cache[cache_1].ca_bucket == dbf->bucket);
- bucket[1] = dbf->bucket_cache[cache_1].ca_bucket;
- if (dbf->bucket_cache[cache_1].ca_changed)
+ _gdbm_new_bucket (dbf, newcache[0]->ca_bucket, new_bits);
+
+ adr_1 = _gdbm_alloc (dbf, dbf->header->bucket_size);
+ switch (cache_lookup (dbf, adr_1, newcache[0], &newcache[1]))
{
- if (_gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_1]))
- return -1;
+ case node_new:
+ break;
+
+ case node_found:
+ /* should not happen */
+ GDBM_DEBUG (GDBM_DEBUG_ERR,
+ "%s: bucket found where it should not",
+ dbf->name);
+ GDBM_SET_ERRNO (dbf, GDBM_BUCKET_CACHE_CORRUPTED, TRUE);
+ return -1;
+
+ case node_failure:
+ return -1;
}
- new_bits = dbf->bucket->bucket_bits + 1;
- _gdbm_new_bucket (dbf, bucket[0], new_bits);
- _gdbm_new_bucket (dbf, bucket[1], new_bits);
- adr_0 = _gdbm_alloc (dbf, dbf->header->bucket_size);
- if (adr_0 == 0)
- return -1;
- dbf->bucket_cache[cache_0].ca_adr = adr_0;
- adr_1 = _gdbm_alloc (dbf, dbf->header->bucket_size);
- if (adr_1 == 0)
- return -1;
- dbf->bucket_cache[cache_1].ca_adr = adr_1;
+ _gdbm_new_bucket (dbf, newcache[1]->ca_bucket, new_bits);
/* Double the directory size if necessary. */
if (dbf->header->dir_bits == dbf->bucket->bucket_bits)
{
+ off_t *new_dir; /* Pointer to the new directory. */
+ int dir_size; /* Size of the new directory. */
+ off_t dir_adr; /* Address of the new directory. */
+
if (dbf->header->dir_size >= GDBM_MAX_DIR_HALF)
{
GDBM_SET_ERRNO (dbf, GDBM_DIR_OVERFLOW, TRUE);
@@ -335,40 +411,44 @@ _gdbm_split_bucket (GDBM_FILE dbf, int next_insert)
/* Copy all elements in dbf->bucket into the new buckets. */
for (index = 0; index < dbf->header->bucket_elems; index++)
{
- old_el = &dbf->bucket->h_table[index];
- select = (old_el->hash_value >> (GDBM_HASH_BITS - new_bits)) & 1;
- elem_loc = old_el->hash_value % dbf->header->bucket_elems;
- while (bucket[select]->h_table[elem_loc].hash_value != -1)
+ bucket_element *old_el = &dbf->bucket->h_table[index];
+ hash_bucket *bucket =
+ newcache[(old_el->hash_value >> (GDBM_HASH_BITS - new_bits)) & 1]->ca_bucket;
+ int elem_loc = old_el->hash_value % dbf->header->bucket_elems;
+ while (bucket->h_table[elem_loc].hash_value != -1)
elem_loc = (elem_loc + 1) % dbf->header->bucket_elems;
- bucket[select]->h_table[elem_loc] = *old_el;
- bucket[select]->count++;
+ bucket->h_table[elem_loc] = *old_el;
+ bucket->count++;
}
- /* Allocate avail space for the bucket[1]. */
- bucket[1]->bucket_avail[0].av_adr
+ /* Allocate avail space for the newcache[1]->ca_bucket. */
+ newcache[1]->ca_bucket->bucket_avail[0].av_adr
= _gdbm_alloc (dbf, dbf->header->block_size);
- if (bucket[1]->bucket_avail[0].av_adr == 0)
+ if (newcache[1]->ca_bucket->bucket_avail[0].av_adr == 0)
return -1;
- bucket[1]->bucket_avail[0].av_size = dbf->header->block_size;
- bucket[1]->av_count = 1;
+ newcache[1]->ca_bucket->bucket_avail[0].av_size
+ = dbf->header->block_size;
+ newcache[1]->ca_bucket->av_count = 1;
- /* Copy the avail elements in dbf->bucket to bucket[0]. */
- bucket[0]->av_count = dbf->bucket->av_count;
+ /* Copy the avail elements in dbf->bucket to newcache[0]->ca_bucket. */
+ newcache[0]->ca_bucket->av_count = dbf->bucket->av_count;
index = 0;
- index1 = 0;
- if (bucket[0]->av_count == BUCKET_AVAIL)
+ if (newcache[0]->ca_bucket->av_count == BUCKET_AVAIL)
{
- /* The avail is full, move the first one to bucket[1]. */
+ /* The avail is full, move the first one to newcache[1]->ca_bucket.*/
_gdbm_put_av_elem (dbf->bucket->bucket_avail[0],
- bucket[1]->bucket_avail,
- &bucket[1]->av_count,
+ newcache[1]->ca_bucket->bucket_avail,
+ &newcache[1]->ca_bucket->av_count,
dbf->coalesce_blocks);
index = 1;
- bucket[0]->av_count--;
+ newcache[0]->ca_bucket->av_count--;
}
+
+ index1 = 0;
for (; index < dbf->bucket->av_count; index++)
{
- bucket[0]->bucket_avail[index1++] = dbf->bucket->bucket_avail[index];
+ newcache[0]->ca_bucket->bucket_avail[index1++]
+ = dbf->bucket->bucket_avail[index];
}
/* Update the directory. We have new file addresses for both buckets. */
@@ -381,10 +461,9 @@ _gdbm_split_bucket (GDBM_FILE dbf, int next_insert)
for (index = dir_start1; index < dir_end; index++)
dbf->dir[index] = adr_1;
-
/* Set changed flags. */
- dbf->bucket_cache[cache_0].ca_changed = TRUE;
- dbf->bucket_cache[cache_1].ca_changed = TRUE;
+ newcache[0]->ca_changed = TRUE;
+ newcache[1]->ca_changed = TRUE;
dbf->bucket_changed = TRUE;
dbf->directory_changed = TRUE;
dbf->second_changed = TRUE;
@@ -395,29 +474,23 @@ _gdbm_split_bucket (GDBM_FILE dbf, int next_insert)
/* Invalidate old cache entry. */
old_bucket.av_adr = dbf->cache_entry->ca_adr;
old_bucket.av_size = dbf->header->bucket_size;
- dbf->cache_entry->ca_adr = 0;
- dbf->cache_entry->ca_changed = FALSE;
+ cache_elem_free (dbf, dbf->cache_entry);
/* Set dbf->bucket to the proper bucket. */
- if (dbf->dir[dbf->bucket_dir] == adr_0)
+ if (dbf->dir[dbf->bucket_dir] != adr_0)
{
- dbf->bucket = bucket[0];
- dbf->cache_entry = &dbf->bucket_cache[cache_0];
- _gdbm_put_av_elem (old_bucket,
- bucket[1]->bucket_avail,
- &bucket[1]->av_count,
- dbf->coalesce_blocks);
- }
- else
- {
- dbf->bucket = bucket[1];
- dbf->cache_entry = &dbf->bucket_cache[cache_1];
- _gdbm_put_av_elem (old_bucket,
- bucket[0]->bucket_avail,
- &bucket[0]->av_count,
- dbf->coalesce_blocks);
+ cache_elem *t = newcache[0];
+ newcache[0] = newcache[1];
+ newcache[1] = t;
}
-
+
+ _gdbm_put_av_elem (old_bucket,
+ newcache[1]->ca_bucket->bucket_avail,
+ &newcache[1]->ca_bucket->av_count,
+ dbf->coalesce_blocks);
+ lru_unlink_elem (dbf, newcache[0]);
+ lru_link_elem (dbf, newcache[0], NULL);
+ set_cache_entry (dbf, newcache[0]);
}
/* Get rid of old directories. */
@@ -460,3 +533,159 @@ _gdbm_write_bucket (GDBM_FILE dbf, cache_elem *ca_entry)
ca_entry->ca_data.elem_loc = -1;
return 0;
}
+
+/* Cache manipulation functions. */
+
+/* Initialize the bucket cache. */
+int
+_gdbm_cache_init (GDBM_FILE dbf, size_t size)
+{
+ if (size < 3)
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (size == dbf->cache_size)
+ return 0;
+
+ while (size < dbf->cache_num)
+ {
+ /* Flush the least recently used element */
+ cache_elem *elem = dbf->cache_lru;
+ if (elem->ca_changed)
+ {
+ if (_gdbm_write_bucket (dbf, elem))
+ return -1;
+ }
+ cache_elem_free (dbf, elem);
+ }
+
+ dbf->cache_size = size;
+
+ return 0;
+}
+
+/* Free the bucket cache */
+void
+_gdbm_cache_free (GDBM_FILE dbf)
+{
+ cache_elem *elem;
+
+ while (dbf->cache_lru)
+ cache_elem_free (dbf, dbf->cache_lru);
+ _gdbm_cache_tree_destroy (dbf->cache_tree);
+ while ((elem = dbf->cache_avail) != NULL)
+ {
+ dbf->cache_avail = elem->ca_next;
+ free (elem->ca_data.dptr);
+ free (elem->ca_bucket);
+ free (elem);
+ }
+}
+
+/* Flush cache content to disk. */
+int
+_gdbm_cache_flush (GDBM_FILE dbf)
+{
+ cache_elem *elem;
+ for (elem = dbf->cache_lru; elem; elem = elem->ca_prev)
+ {
+ if (elem->ca_changed)
+ {
+ if (_gdbm_write_bucket (dbf, elem))
+ return -1;
+ }
+ }
+ return 0;
+}
+
+int
+_gdbm_fetch_data (GDBM_FILE dbf, off_t off, size_t size, void *buf)
+{
+ off_t bucket_adr;
+ off_t file_pos;
+ int rc;
+ cache_elem *elem;
+ char *dst = buf;
+
+ bucket_adr = (off / dbf->header->bucket_size)
+ * dbf->header->bucket_size;
+ off -= bucket_adr;
+ while (size)
+ {
+ size_t nbytes;
+
+ switch (cache_lookup (dbf, bucket_adr, dbf->cache_mru, &elem))
+ {
+ case node_found:
+ break;
+
+ case node_new:
+ /* Position the file pointer */
+ file_pos = gdbm_file_seek (dbf, bucket_adr, SEEK_SET);
+ if (file_pos != bucket_adr)
+ {
+ GDBM_SET_ERRNO (dbf, GDBM_FILE_SEEK_ERROR, TRUE);
+ cache_elem_free (dbf, elem);
+ _gdbm_fatal (dbf, _("lseek error"));
+ return -1;
+ }
+
+ /* Read the bucket. */
+ rc = _gdbm_full_read (dbf, elem->ca_bucket,
+ dbf->header->bucket_size);
+ if (rc)
+ {
+ GDBM_DEBUG (GDBM_DEBUG_ERR,
+ "%s: error reading data bucket: %s",
+ dbf->name, gdbm_db_strerror (dbf));
+ dbf->need_recovery = TRUE;
+ cache_elem_free (dbf, elem);
+ _gdbm_fatal (dbf, gdbm_db_strerror (dbf));
+ return -1;
+ }
+ break;
+
+ case node_failure:
+ return -1;
+ }
+
+ nbytes = dbf->header->bucket_size - off;
+ if (nbytes > size)
+ nbytes = size;
+ memcpy (dst, (char*) elem->ca_bucket + off, nbytes);
+ dst += nbytes;
+ size -= nbytes;
+ bucket_adr++;
+ off = 0;
+ }
+ return 0;
+}
+
+void
+gdbm_get_cache_stats (GDBM_FILE dbf,
+ size_t *access_count,
+ size_t *cache_count,
+ struct gdbm_cache_stat *bstat,
+ size_t nstat)
+{
+ if (access_count)
+ *access_count = dbf->cache_access_count;
+ if (cache_count)
+ *cache_count = dbf->cache_num;
+ if (bstat)
+ {
+ size_t i;
+ cache_elem *elem;
+
+ if (nstat > dbf->cache_num)
+ nstat = dbf->cache_num;
+
+ for (i = 0, elem = dbf->cache_mru; i < nstat; i++, elem = elem->ca_next)
+ {
+ bstat[i].adr = elem->ca_adr;
+ bstat[i].hits = elem->ca_hits;
+ }
+ }
+}
diff --git a/src/cachetree.c b/src/cachetree.c
new file mode 100644
index 0000000..b27d8a7
--- /dev/null
+++ b/src/cachetree.c
@@ -0,0 +1,528 @@
+/* cachetree.c - Implementation of the red-black tree for cache lookups. */
+
+/* This file is part of GDBM, the GNU data base manager.
+ Copyright (C) 2019 Free Software Foundation, Inc.
+
+ GDBM is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GDBM is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GDBM. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "autoconf.h"
+#include "gdbmdefs.h"
+#define NDEBUG
+#include "assert.h"
+
+enum cache_node_color { RED, BLACK };
+
+typedef struct cache_node cache_node;
+struct cache_node
+{
+ cache_node *left, *right, *parent;
+ enum cache_node_color color;
+ cache_elem *elem;
+};
+
+struct cache_tree
+{
+ cache_node *root; /* Root of the tree */
+ cache_node *avail; /* List of available nodes, linked by parent field */
+ GDBM_FILE dbf; /* Database this tree belongs to */
+};
+
+/* Allocate and return a new node. Pick the head item from the avail
+ list and update the avail pointer. If the list is empty, malloc
+ a new node.
+ All members in the returned node are filled with 0.
+*/
+static cache_node *
+rbt_node_alloc (cache_tree *tree)
+{
+ cache_node *n;
+
+ n = tree->avail;
+ if (n)
+ tree->avail = n->parent;
+ else
+ {
+ n = malloc (sizeof (*n));
+ if (!n)
+ return NULL;
+ }
+ memset (n, 0, sizeof (*n));
+ return n;
+}
+
+/* Return the node N to the avail list in TREE. */
+static void
+rbt_node_dealloc (cache_tree *tree, cache_node *n)
+{
+ n->parent = tree->avail;
+ tree->avail = n;
+}
+
+/* Red-black tree properties:
+ 1. Each node is either red or black.
+ 2. The root node is black.
+ 3. All leaves are black and contain no data.
+ 4. Every red node has two children, and both are black.
+ IOW, the parent of every red node is black.
+ 5. All paths from any given node to its leaf nodes contain the same
+ number of black nodes.
+ */
+
+/* Auxiliary functions for accessing nodes. */
+
+/* Return the grandparent node of N.
+ Prerequisite: N may not be root.
+*/
+static inline cache_node *
+grandparent (cache_node *n)
+{
+ return n->parent->parent;
+}
+
+/* Return the sibling node of N.
+ Prerequisite: N may not be root.
+*/
+static inline cache_node *
+sibling (cache_node *n)
+{
+ return (n == n->parent->left) ? n->parent->right : n->parent->left;
+}
+
+/* Return the uncle node of N.
+ Prerequisite: N must be at least 2 nodes away from root.
+*/
+static inline cache_node *
+uncle (cache_node *n)
+{
+ return sibling (n->parent);
+}
+
+/* Returns the color of the node N.
+ Empty leaves are represented by NULL, therefore NULL is assumed to
+ be black (see property 3).
+*/
+static inline enum cache_node_color
+node_color (cache_node *n)
+{
+ return n == NULL ? BLACK : n->color;
+}
+
+/* Replace the OLDN with NEWN.
+ Does not modify OLDN. */
+static void
+replace_node (cache_tree *tree, cache_node *oldn, cache_node *newn)
+{
+ if (oldn->parent == NULL)
+ tree->root = newn;
+ else if (oldn == oldn->parent->left)
+ oldn->parent->left = newn;
+ else
+ oldn->parent->right = newn;
+
+ if (newn != NULL)
+ newn->parent = oldn->parent;
+}
+
+/* Rotate the TREE left over the node N. */
+static void
+rotate_left (cache_tree *tree, cache_node *n)
+{
+ cache_node *right = n->right;
+ replace_node (tree, n, right);
+ n->right = right->left;
+ if (right->left != NULL)
+ right->left->parent = n;
+ right->left = n;
+ n->parent = right;
+}
+
+/* Rotate the TREE right over the node N. */
+static void
+rotate_right (cache_tree *tree, cache_node *n)
+{
+ cache_node *left = n->left;
+ replace_node (tree, n, left);
+ n->left = left->right;
+ if (left->right != NULL)
+ left->right->parent = n;
+ left->right = n;
+ n->parent = left;
+}
+
+/* Node deletion */
+static void rbt_delete_fixup (cache_tree *tree, cache_node *n);
+
+/* Remove N from the TREE. */
+void
+_gdbm_rbt_remove_node (cache_tree *tree, cache_node *n)
+{
+ cache_node *child;
+
+ /* If N has both left and right children, reduce the problem to
+ the node with only one child. To do so, find the in-order
+ predecessor of N, copy its value (elem) to N and then delete
+ the predecessor. */
+ if (n->left != NULL && n->right != NULL)
+ {
+ cache_node *p;
+ for (p = n->left; p->right; p = p->right)
+ ;
+ n->elem = p->elem;
+ n->elem->ca_node = n;
+ n = p;
+ }
+
+ /* N has only one child. Select it. */
+ child = n->left ? n->left : n->right;
+ if (node_color (n) == BLACK)
+ {
+ n->color = node_color (child);
+ rbt_delete_fixup (tree, n);
+ }
+ replace_node (tree, n, child);
+ if (n->parent == NULL && child != NULL) /* root should be black */
+ child->color = BLACK;
+
+ /* Return N to the avail pool */
+ rbt_node_dealloc (tree, n);
+}
+
+static void
+rbt_delete_fixup (cache_tree *tree, cache_node *n)
+{
+ while (1)
+ {
+ if (n->parent == NULL)
+ {
+ /* If N has become the root node, deletion resulted in removing
+ one black node (prior root) from every path, so all properties
+ still hold.
+ */
+ return;
+ }
+ else
+ {
+ /* If N has a red sibling, change the colors of the parent and
+ sibling and rotate about the parent. Thus, the sibling becomes
+ grandparent and we can proceed to the next case.
+ */
+ if (node_color (sibling (n)) == RED)
+ {
+ n->parent->color = RED;
+ sibling (n)->color = BLACK;
+ if (n == n->parent->left)
+ rotate_left (tree, n->parent);
+ else
+ rotate_right (tree, n->parent);
+ }
+
+ /* If the parent, sibling and nephews are all black, paint the
+ sibling red. This means one black node was removed from all
+ paths passing through the parent, so we recurse to the beginning
+ of the loop with parent as the argument to restore the properties.
+ This is the only branch that loops.
+ */
+ if (node_color (n->parent) == BLACK
+ && node_color (sibling (n)) == BLACK
+ && node_color (sibling (n)->left) == BLACK
+ && node_color (sibling (n)->right) == BLACK)
+ {
+ sibling (n)->color = RED;
+ n = n->parent;
+ continue;
+ }
+ else
+ {
+ /* If the sibling and nephews are black but the parent is red,
+ swap the colors of the sibling and parent. The properties
+ are then restored.
+ */
+ if (node_color (n->parent) == RED
+ && node_color (sibling (n)) == BLACK
+ && node_color (sibling (n)->left) == BLACK
+ && node_color (sibling (n)->right) == BLACK)
+ {
+ sibling (n)->color = RED;
+ n->parent->color = BLACK;
+ }
+ else
+ {
+ /* N is the left child of its parent, its sibling is black,
+ and the sibling's right child is black. Swap the colors
+ of the sibling and its left sibling and rotate right
+ over the sibling.
+ */
+ if (n == n->parent->left
+ && node_color (sibling (n)) == BLACK
+ && node_color (sibling (n)->left) == RED
+ && node_color (sibling (n)->right) == BLACK)
+ {
+ sibling (n)->color = RED;
+ sibling (n)->left->color = BLACK;
+ rotate_right (tree, sibling (n));
+ }
+ else if (n == n->parent->right
+ && node_color (sibling (n)) == BLACK
+ && node_color (sibling (n)->right) == RED
+ && node_color (sibling (n)->left) == BLACK)
+ {
+ /* The mirror case is handled similarly. */
+ sibling (n)->color = RED;
+ sibling (n)->right->color = BLACK;
+ rotate_left (tree, sibling (n));
+ }
+ /* N is the left child of its parent, its sibling is black
+ and the sibling's right child is red. Swap the colors
+ of the parent and sibling, paint the sibling's right
+ child black and rotate left at the parent. Similarly
+ for the mirror case. This achieves the following:
+
+ . A black node is added to all paths passing through N;
+ . A black node is removed from all paths through the
+ sibling's red child.
+ . The latter is painted black which restores missing
+ black node in all paths through the sibling's red child.
+
+ Another sibling's child becomes a child of N's parent
+ during the rotation and is therefore not affected.
+ */
+ sibling (n)->color = node_color (n->parent);
+ n->parent->color = BLACK;
+ if (n == n->parent->left)
+ {
+ sibling (n)->right->color = BLACK;
+ rotate_left (tree, n->parent);
+ }
+ else
+ {
+ sibling (n)->left->color = BLACK;
+ rotate_right (tree, n->parent);
+ }
+ }
+ }
+ }
+ break;
+ }
+}
+
+/* Insertion */
+static void rbt_insert_fixup (cache_tree *tree, cache_node *n);
+
+/* Look up the node with elem->ca_adr equal to ADR.
+ If found, put it in *RETVAL and return node_found.
+
+ Otherwise, if INSERT is TRUE, create a new node and insert it in the
+ appropriate place in the tree. Store the address of the newly created
+ node in *RETVAL and return node_new. If a new node cannot be created
+ (memory exhausted), return node_failure.
+
+ Otherwise, if INSERT is FALSE, store NULL in *RETVAL and return node_new.
+*/
+static int
+cache_tree_lookup (cache_tree *tree, off_t adr, int insert,
+ cache_node **retval)
+{
+ int res;
+ cache_node *node, *parent = NULL;
+ cache_node **nodeptr;
+
+ nodeptr = &tree->root;
+ while ((node = *nodeptr) != NULL)
+ {
+ if (adr == node->elem->ca_adr)
+ break;
+ parent = node;
+ if (adr < node->elem->ca_adr)
+ nodeptr = &node->left;
+ else
+ nodeptr = &node->right;
+ }
+
+ if (node)
+ {
+ res = node_found;
+ }
+ else
+ {
+ if (insert)
+ {
+ node = rbt_node_alloc (tree);
+ if (!node)
+ return node_failure;
+ node->elem = _gdbm_cache_elem_new (tree->dbf, adr);
+ if (!node->elem)
+ {
+ rbt_node_dealloc (tree, node);
+ return node_failure;
+ }
+ node->elem->ca_node = node;
+ *nodeptr = node;
+ node->parent = parent;
+ rbt_insert_fixup (tree, node);
+ }
+ res = node_new;
+ }
+ *retval = node;
+ return res;
+}
+
+static void
+rbt_insert_fixup (cache_tree *tree, cache_node *n)
+{
+ while (1)
+ {
+ if (n->parent == NULL)
+ {
+ /* Node was inserted at the root of the tree.
+ The root node must be black (property 2). Changing its color
+ to black would add one black node to every path, which means
+ the property 5 would remain satisfied. So we simply paint the
+ node black.
+ */
+ n->color = BLACK;
+ }
+ else if (node_color (n->parent) == BLACK)
+ {
+ /* The node has black parent.
+ All properties are satisfied. There's no need to change anything.
+ */
+ return;
+ }
+ else if (node_color (uncle (n)) == RED)
+ {
+ /* The uncle node is red.
+ Repaint the parent and uncle black and the grandparent red. This
+ would satisfy 4. However, if the grandparent is root, this would
+ violate the property 2. So we repaint the grandparent by
+ re-entering the fixup loop with grandparent as the node.
+ This is the only branch that loops.
+ */
+ n->parent->color = BLACK;
+ uncle (n)->color = BLACK;
+ n = grandparent (n);
+ n->color = RED;
+ continue;
+ }
+ else
+ {
+ /* The new node is the right child of its parent and the parent is
+ the left child of the grandparent. Rotate left about the parent.
+ Mirror case: The new node is the left child of its parent and the
+ parent is the right child of the grandparent. Rotate right about
+ the parent. This fixes the properties for the rbt_insert_5.
+ */
+ if (n == n->parent->right && n->parent == grandparent (n)->left)
+ {
+ rotate_left (tree, n->parent);
+ n = n->left;
+ }
+ else if (n == n->parent->left && n->parent == grandparent (n)->right)
+ {
+ rotate_right (tree, n->parent);
+ n = n->right;
+ }
+
+ /* The new node is the left child of its parent and the parent is the
+ left child of the grandparent. Rotate right about the grandparent.
+ Mirror case: The new node is the right child of its parent and the
+ parent
+ is the right child of the grandparent. Rotate left.
+ */
+ n->parent->color = BLACK;
+ grandparent (n)->color = RED;
+ if (n == n->parent->left && n->parent == grandparent (n)->left)
+ {
+ rotate_right (tree, grandparent (n));
+ }
+ else
+ {
+ rotate_left (tree, grandparent (n));
+ }
+ }
+ break;
+ }
+}
+
+/* Interface functions */
+
+/* Create a cache tree structure for the database file DBF. */
+cache_tree *
+_gdbm_cache_tree_alloc (GDBM_FILE dbf)
+{
+ cache_tree *t = malloc (sizeof (*t));
+ if (t)
+ {
+ t->root = NULL;
+ t->avail = NULL;
+ t->dbf = dbf;
+ }
+ return t;
+}
+
+/* Free the memory used by the TREE. */
+void
+_gdbm_cache_tree_destroy (cache_tree *tree)
+{
+ cache_node *n;
+ while ((n = tree->root) != NULL)
+ _gdbm_rbt_remove_node (tree, n);
+ while ((n = tree->avail) != NULL)
+ {
+ tree->avail = n->parent;
+ free (n);
+ }
+ free (tree);
+}
+
+/* Delete the node with ELEM from the TREE.
+ Note, that it actually removes the node whith elem->ca_adr equal to
+ that of ELEM, which means that it's elem pointer could theoretically
+ have differ from ELEM. However, the actual algorithm ensures it is
+ the same.
+*/
+int
+_gdbm_cache_tree_delete (cache_tree *tree, cache_elem *elem)
+{
+ cache_node *n;
+
+ switch (cache_tree_lookup (tree, elem->ca_adr, 0, &n))
+ {
+ case node_found:
+ assert (n->elem == elem);
+ _gdbm_rbt_remove_node (tree, n);
+ break;
+
+ case node_new:
+ break;
+
+ case node_failure:
+ return -1;
+ }
+ return 0;
+}
+
+/* Look up the node with elem->ca_adr equal to ADR.
+ If found, store its pointer in *RETVAL and return node_found.
+ Otherwise, return node_new and don't touch RETVAL.
+*/
+int
+_gdbm_cache_tree_lookup (cache_tree *tree, off_t adr, cache_elem **retval)
+{
+ cache_node *n;
+ int rc = cache_tree_lookup (tree, adr, TRUE, &n);
+ if (rc != node_failure)
+ *retval = n->elem;
+ return rc;
+}
+
diff --git a/src/findkey.c b/src/findkey.c
index 6e4bccd..f82b422 100644
--- a/src/findkey.c
+++ b/src/findkey.c
@@ -43,11 +43,9 @@ gdbm_bucket_element_valid_p (GDBM_FILE dbf, int elem_loc)
char *
_gdbm_read_entry (GDBM_FILE dbf, int elem_loc)
{
- int rc;
int key_size;
int data_size;
size_t dsize;
- off_t file_pos;
data_cache_elem *data_ca;
/* Is it already in the cache? */
@@ -103,26 +101,9 @@ _gdbm_read_entry (GDBM_FILE dbf, int elem_loc)
}
}
- /* Read into the cache. */
- file_pos = gdbm_file_seek (dbf, dbf->bucket->h_table[elem_loc].data_pointer,
- SEEK_SET);
- if (file_pos != dbf->bucket->h_table[elem_loc].data_pointer)
- {
- GDBM_SET_ERRNO2 (dbf, GDBM_FILE_SEEK_ERROR, TRUE, GDBM_DEBUG_LOOKUP);
- _gdbm_fatal (dbf, _("lseek error"));
- return NULL;
- }
-
- rc = _gdbm_full_read (dbf, data_ca->dptr, key_size+data_size);
- if (rc)
- {
- GDBM_DEBUG (GDBM_DEBUG_ERR|GDBM_DEBUG_LOOKUP|GDBM_DEBUG_READ,
- "%s: error reading entry: %s",
- dbf->name, gdbm_db_strerror (dbf));
- dbf->need_recovery = TRUE;
- _gdbm_fatal (dbf, gdbm_db_strerror (dbf));
- return NULL;
- }
+ if (_gdbm_fetch_data (dbf, dbf->bucket->h_table[elem_loc].data_pointer,
+ key_size + data_size, data_ca->dptr))
+ return NULL;
return data_ca->dptr;
}
diff --git a/src/gdbm.h.in b/src/gdbm.h.in
index e537d54..6a23ea6 100644
--- a/src/gdbm.h.in
+++ b/src/gdbm.h.in
@@ -228,9 +228,10 @@ extern int gdbm_copy_meta (GDBM_FILE dst, GDBM_FILE src);
# define GDBM_FILE_CLOSE_ERROR 37
# define GDBM_FILE_SYNC_ERROR 38
# define GDBM_FILE_TRUNCATE_ERROR 39
-
+# define GDBM_BUCKET_CACHE_CORRUPTED 40
+
# define _GDBM_MIN_ERRNO 0
-# define _GDBM_MAX_ERRNO GDBM_FILE_TRUNCATE_ERROR
+# define _GDBM_MAX_ERRNO GDBM_BUCKET_CACHE_CORRUPTED
/* This one was never used and will be removed in the future */
# define GDBM_UNKNOWN_UPDATE GDBM_UNKNOWN_ERROR
@@ -275,8 +276,20 @@ extern void gdbm_debug_parse_state (int (*f) (void *, int, char const *),
void *d);
extern void gdbm_debug_datum (datum dat, char const *pfx);
-
#endif
+
+/* Cache statistics */
+struct gdbm_cache_stat
+{
+ off_t adr;
+ size_t hits;
+};
+
+void gdbm_get_cache_stats (GDBM_FILE dbf,
+ size_t *access_count,
+ size_t *cache_count,
+ struct gdbm_cache_stat *bstat,
+ size_t nstat);
# if defined(__cplusplus) || defined(c_plusplus)
}
diff --git a/src/gdbmclose.c b/src/gdbmclose.c
index 6e21320..71fa128 100644
--- a/src/gdbmclose.c
+++ b/src/gdbmclose.c
@@ -29,7 +29,6 @@
int
gdbm_close (GDBM_FILE dbf)
{
- int index; /* For freeing the bucket cache. */
int syserrno;
gdbm_set_errno (dbf, GDBM_NO_ERROR, FALSE);
@@ -58,15 +57,8 @@ gdbm_close (GDBM_FILE dbf)
free (dbf->name);
free (dbf->dir);
- if (dbf->bucket_cache != NULL)
- {
- for (index = 0; index < dbf->cache_size; index++)
- {
- free (dbf->bucket_cache[index].ca_bucket);
- free (dbf->bucket_cache[index].ca_data.dptr);
- }
- free (dbf->bucket_cache);
- }
+ _gdbm_cache_free (dbf);
+
free (dbf->header);
free (dbf);
if (gdbm_errno)
diff --git a/src/gdbmdefs.h b/src/gdbmdefs.h
index 7dc15ee..3297cd1 100644
--- a/src/gdbmdefs.h
+++ b/src/gdbmdefs.h
@@ -157,13 +157,23 @@ typedef struct
int elem_loc;
} data_cache_elem;
-typedef struct
+typedef struct cache_elem_s cache_elem;
+
+struct cache_elem_s
{
hash_bucket * ca_bucket;
off_t ca_adr;
- char ca_changed; /* Data in the bucket changed. */
+ char ca_changed; /* Data in the bucket changed. */
data_cache_elem ca_data;
-} cache_elem;
+ cache_elem *ca_prev, *ca_next; /* Previous and next elements in LRU list.
+ If the item is in cache_avail list, only
+ ca_next is used. It points to the next
+ available element. */
+ size_t ca_hits; /* Number of times this element was requested */
+ struct cache_node *ca_node;
+};
+
+typedef struct cache_tree cache_tree;
/* This final structure contains all main memory based information for
a gdbm file. This allows multiple gdbm files to be opened at the same
@@ -225,19 +235,27 @@ struct gdbm_file_info
off_t *dir;
/* The bucket cache. */
- cache_elem *bucket_cache;
- size_t cache_size;
- size_t last_read;
-
+ size_t cache_size; /* Cache capacity */
+ size_t cache_num; /* Actual number of elements in cache */
+ /* Cache elements form a binary search tree. */
+ cache_tree *cache_tree;
+ /* Cache elements are linked in a list sorted by relative access time */
+ cache_elem *cache_mru; /* Most recently used element - head of the list */
+ cache_elem *cache_lru; /* Last recently used element - tail of the list */
+ cache_elem *cache_avail; /* Pool of available elements (linked by prev, next)
+ */
+ /* Pointer to the current bucket's cache entry. */
+ cache_elem *cache_entry;
+
/* Points to the current hash bucket in the cache. */
hash_bucket *bucket;
-
+
/* The directory entry used to get the current hash bucket. */
int bucket_dir;
- /* Pointer to the current bucket's cache entry. */
- cache_elem *cache_entry;
-
+ /* Number of cache accesses */
+ size_t cache_access_count;
+
/* Bookkeeping of things that need to be written back at the
end of an update. */
unsigned header_changed :1;
diff --git a/src/gdbmerrno.c b/src/gdbmerrno.c
index 8aa58a4..392f23b 100644
--- a/src/gdbmerrno.c
+++ b/src/gdbmerrno.c
@@ -139,7 +139,8 @@ const char * const gdbm_errlist[_GDBM_MAX_ERRNO+1] = {
[GDBM_BAD_DIR_ENTRY] = N_("Invalid directory entry"),
[GDBM_FILE_CLOSE_ERROR] = N_("Error closing file"),
[GDBM_FILE_SYNC_ERROR] = N_("Error synchronizing file"),
- [GDBM_FILE_TRUNCATE_ERROR] = N_("Error truncating file")
+ [GDBM_FILE_TRUNCATE_ERROR] = N_("Error truncating file"),
+ [GDBM_BUCKET_CACHE_CORRUPTED] = N_("Bucket cache corrupted")
};
const char *
diff --git a/src/gdbmopen.c b/src/gdbmopen.c
index c68834c..621d6c7 100644
--- a/src/gdbmopen.c
+++ b/src/gdbmopen.c
@@ -269,9 +269,11 @@ gdbm_fd_open (int fd, const char *file_name, int block_size,
dbf->dir = NULL;
dbf->bucket = NULL;
dbf->header = NULL;
- dbf->bucket_cache = NULL;
- dbf->cache_size = 0;
+ /* Initialize cache */
+ dbf->cache_tree = _gdbm_cache_tree_alloc (dbf);
+ _gdbm_cache_init (dbf, DEFAULT_CACHESIZE);
+
dbf->memory_mapping = FALSE;
dbf->mapped_size_max = SIZE_T_MAX;
dbf->mapped_region = NULL;
@@ -639,10 +641,8 @@ gdbm_fd_open (int fd, const char *file_name, int block_size,
#endif
/* Finish initializing dbf. */
- dbf->last_read = -1;
dbf->bucket = NULL;
dbf->bucket_dir = 0;
- dbf->cache_entry = NULL;
dbf->header_changed = FALSE;
dbf->directory_changed = FALSE;
dbf->bucket_changed = FALSE;
@@ -717,46 +717,3 @@ gdbm_open (const char *file, int block_size, int flags, int mode,
fatal_func);
}
-/* Initialize the bucket cache. */
-int
-_gdbm_init_cache (GDBM_FILE dbf, size_t size)
-{
- int index;
-
- if (dbf->bucket_cache == NULL)
- {
- dbf->bucket_cache = calloc (size, sizeof(cache_elem));
- if (dbf->bucket_cache == NULL)
- {
- GDBM_SET_ERRNO (dbf, GDBM_MALLOC_ERROR, TRUE);
- return -1;
- }
- dbf->cache_size = size;
-
- for (index = 0; index < size; index++)
- {
- (dbf->bucket_cache[index]).ca_bucket =
- malloc (dbf->header->bucket_size);
- if ((dbf->bucket_cache[index]).ca_bucket == NULL)
- {
- GDBM_SET_ERRNO (dbf, GDBM_MALLOC_ERROR, TRUE);
- return -1;
- }
- dbf->bucket_cache[index].ca_data.dptr = NULL;
- dbf->bucket_cache[index].ca_data.dsize = 0;
- _gdbm_cache_entry_invalidate (dbf, index);
- }
- dbf->bucket = dbf->bucket_cache[0].ca_bucket;
- dbf->cache_entry = &dbf->bucket_cache[0];
- }
- return 0;
-}
-
-void
-_gdbm_cache_entry_invalidate (GDBM_FILE dbf, int index)
-{
- dbf->bucket_cache[index].ca_adr = 0;
- dbf->bucket_cache[index].ca_changed = FALSE;
- dbf->bucket_cache[index].ca_data.hash_val = -1;
- dbf->bucket_cache[index].ca_data.elem_loc = -1;
-}
diff --git a/src/gdbmsetopt.c b/src/gdbmsetopt.c
index c132335..2e409f9 100644
--- a/src/gdbmsetopt.c
+++ b/src/gdbmsetopt.c
@@ -54,19 +54,13 @@ setopt_gdbm_setcachesize (GDBM_FILE dbf, void *optval, int optlen)
{
size_t sz;
- if (dbf->bucket_cache != NULL)
- {
- GDBM_SET_ERRNO (dbf, GDBM_OPT_ALREADY_SET, FALSE);
- return -1;
- }
-
/* Optval will point to the new size of the cache. */
if (get_size (optval, optlen, &sz))
{
GDBM_SET_ERRNO (dbf, GDBM_OPT_ILLEGAL, FALSE);
return -1;
}
- return _gdbm_init_cache (dbf, (sz > 9) ? sz : 10);
+ return _gdbm_cache_init (dbf, (sz > 9) ? sz : 10);
}
static int
diff --git a/src/gdbmtool.c b/src/gdbmtool.c
index ceffaef..b2ee956 100644
--- a/src/gdbmtool.c
+++ b/src/gdbmtool.c
@@ -321,26 +321,25 @@ _gdbm_print_avail_list (FILE *fp, GDBM_FILE dbf)
void
_gdbm_print_bucket_cache (FILE *fp, GDBM_FILE dbf)
{
- int index;
- char changed;
-
- if (dbf->bucket_cache != NULL)
+ if (dbf->cache_num)
{
+ int i;
+ cache_elem *elem;
+
fprintf (fp,
- _("Bucket Cache (size %zu):\n Index: Address Changed Data_Hash \n"),
- dbf->cache_size);
- for (index = 0; index < dbf->cache_size; index++)
+ _("Bucket Cache (size %zu/%zu):\n Index: Address Changed Data_Hash \n"),
+ dbf->cache_num, dbf->cache_size);
+ for (elem = dbf->cache_entry, i = 0; elem; elem = elem->ca_next, i++)
{
- changed = dbf->bucket_cache[index].ca_changed;
fprintf (fp, " %5d: %15lu %7s %x\n",
- index,
- (unsigned long) dbf->bucket_cache[index].ca_adr,
- (changed ? _("True") : _("False")),
- dbf->bucket_cache[index].ca_data.hash_val);
+ i,
+ (unsigned long) elem->ca_adr,
+ (elem->ca_changed ? _("True") : _("False")),
+ elem->ca_data.hash_val);
}
}
else
- fprintf (fp, _("Bucket cache has not been initialized.\n"));
+ fprintf (fp, _("Bucket cache is empty.\n"));
}
int
@@ -879,7 +878,7 @@ print_cache_begin (struct handler_param *param GDBM_ARG_UNUSED, size_t *exp_coun
if (checkdb ())
return 1;
if (exp_count)
- *exp_count = gdbm_file->bucket_cache ? gdbm_file->cache_size + 1 : 1;
+ *exp_count = gdbm_file->cache_num + 1;
return 0;
}
diff --git a/src/proto.h b/src/proto.h
index 57f9e3c..2bd7956 100644
--- a/src/proto.h
+++ b/src/proto.h
@@ -21,11 +21,15 @@
/* From bucket.c */
void _gdbm_new_bucket (GDBM_FILE, hash_bucket *, int);
int _gdbm_get_bucket (GDBM_FILE, int);
-int _gdbm_read_bucket_at (GDBM_FILE dbf, off_t off, hash_bucket *bucket,
- size_t size);
+int _gdbm_fetch_data (GDBM_FILE dbf, off_t off, size_t size, void *buf);
int _gdbm_split_bucket (GDBM_FILE, int);
int _gdbm_write_bucket (GDBM_FILE, cache_elem *);
+int _gdbm_cache_init (GDBM_FILE, size_t);
+void _gdbm_cache_free (GDBM_FILE dbf);
+int _gdbm_cache_flush (GDBM_FILE dbf);
+
+cache_elem *_gdbm_cache_elem_new (GDBM_FILE dbf, off_t adr);
/* From falloc.c */
off_t _gdbm_alloc (GDBM_FILE, int);
@@ -47,9 +51,6 @@ int _gdbm_end_update (GDBM_FILE);
void _gdbm_fatal (GDBM_FILE, const char *);
/* From gdbmopen.c */
-int _gdbm_init_cache (GDBM_FILE, size_t);
-void _gdbm_cache_entry_invalidate (GDBM_FILE, int);
-
int gdbm_avail_block_validate (GDBM_FILE dbf, avail_block *avblk);
int gdbm_bucket_avail_table_validate (GDBM_FILE dbf, hash_bucket *bucket);
@@ -86,6 +87,22 @@ int _gdbm_dump (GDBM_FILE dbf, FILE *fp);
/* From recover.c */
int _gdbm_next_bucket_dir (GDBM_FILE dbf, int bucket_dir);
+/* cachetree.c */
+cache_tree *_gdbm_cache_tree_alloc (GDBM_FILE dbf);
+void _gdbm_cache_tree_destroy (cache_tree *tree);
+int _gdbm_cache_tree_delete (cache_tree *tree, cache_elem *elem);
+void _gdbm_rbt_remove_node (cache_tree *tree, struct cache_node *n);
+
+/* Return codes for _gdbm_cache_tree_lookup. */
+enum
+ {
+ node_found, /* Returned element was found in cache. */
+ node_new, /* Returned element has been created and inserted to cache */
+ node_failure /* An error occurred. */
+ };
+
+int _gdbm_cache_tree_lookup (cache_tree *tree, off_t adr, cache_elem **retval);
+
/* I/O functions */
static inline ssize_t
gdbm_file_read (GDBM_FILE dbf, void *buf, size_t size)
diff --git a/src/recover.c b/src/recover.c
index 8d34bec..d4da31d 100644
--- a/src/recover.c
+++ b/src/recover.c
@@ -91,8 +91,6 @@ static int
_gdbm_finish_transfer (GDBM_FILE dbf, GDBM_FILE new_dbf,
gdbm_recovery *rcvr, int flags)
{
- int i;
-
/* Write everything. */
if (_gdbm_end_update (new_dbf))
{
@@ -145,15 +143,7 @@ _gdbm_finish_transfer (GDBM_FILE dbf, GDBM_FILE new_dbf,
free (dbf->header);
free (dbf->dir);
- if (dbf->bucket_cache != NULL)
- {
- for (i = 0; i < dbf->cache_size; i++)
- {
- free (dbf->bucket_cache[i].ca_bucket);
- free (dbf->bucket_cache[i].ca_data.dptr);
- }
- free (dbf->bucket_cache);
- }
+ _gdbm_cache_flush (dbf);
dbf->lock_type = new_dbf->lock_type;
dbf->desc = new_dbf->desc;
@@ -161,9 +151,14 @@ _gdbm_finish_transfer (GDBM_FILE dbf, GDBM_FILE new_dbf,
dbf->dir = new_dbf->dir;
dbf->bucket = new_dbf->bucket;
dbf->bucket_dir = new_dbf->bucket_dir;
- dbf->last_read = new_dbf->last_read;
- dbf->bucket_cache = new_dbf->bucket_cache;
- dbf->cache_size = new_dbf->cache_size;
+
+ dbf->cache_size = new_dbf->cache_size;
+ dbf->cache_num = new_dbf->cache_num;
+ dbf->cache_tree = new_dbf->cache_tree;
+ dbf->cache_entry = new_dbf->cache_entry;
+ dbf->cache_lru = new_dbf->cache_lru;
+ dbf->cache_avail = new_dbf->cache_avail;
+
dbf->header_changed = new_dbf->header_changed;
dbf->directory_changed = new_dbf->directory_changed;
dbf->bucket_changed = new_dbf->bucket_changed;
@@ -182,7 +177,6 @@ _gdbm_finish_transfer (GDBM_FILE dbf, GDBM_FILE new_dbf,
gdbm_file_sync (dbf);
/* Force the right stuff for a correct bucket cache. */
- dbf->cache_entry = &dbf->bucket_cache[0];
return _gdbm_get_bucket (dbf, 0);
}
diff --git a/src/update.c b/src/update.c
index 44be33c..4c271d2 100644
--- a/src/update.c
+++ b/src/update.c
@@ -75,19 +75,7 @@ _gdbm_end_update (GDBM_FILE dbf)
/* Write the other changed buckets if there are any. */
if (dbf->second_changed)
{
- if (dbf->bucket_cache != NULL)
- {
- int index;
-
- for (index = 0; index < dbf->cache_size; index++)
- {
- if (dbf->bucket_cache[index].ca_changed)
- {
- if (_gdbm_write_bucket (dbf, &dbf->bucket_cache[index]))
- return -1;
- }
- }
- }
+ _gdbm_cache_flush (dbf);
dbf->second_changed = FALSE;
}
diff --git a/tests/setopt00.at b/tests/setopt00.at
index 4ef915b..a1be255 100644
--- a/tests/setopt00.at
+++ b/tests/setopt00.at
@@ -26,7 +26,7 @@ gtopt test.db '!MMAP'
* CACHESIZE:
initial GDBM_SETCACHESIZE: PASS
GDBM_GETCACHESIZE: PASS
-second GDBM_SETCACHESIZE: XFAIL
+second GDBM_SETCACHESIZE: PASS
* SYNCMODE:
initial GDBM_GETSYNCMODE: PASS
GDBM_SETSYNCMODE: PASS