summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README6
-rw-r--r--src/Makefile.am1
-rw-r--r--src/bucket.c609
-rw-r--r--src/cachetree.c528
-rw-r--r--src/findkey.c25
-rw-r--r--src/gdbm.h.in19
-rw-r--r--src/gdbmclose.c12
-rw-r--r--src/gdbmdefs.h40
-rw-r--r--src/gdbmerrno.c3
-rw-r--r--src/gdbmopen.c51
-rw-r--r--src/gdbmsetopt.c8
-rw-r--r--src/gdbmtool.c27
-rw-r--r--src/proto.h27
-rw-r--r--src/recover.c24
-rw-r--r--src/update.c14
-rw-r--r--tests/setopt00.at2
16 files changed, 1057 insertions, 339 deletions
diff --git a/README b/README
index 747d1a2..c938120 100644
--- a/README
+++ b/README
@@ -1,6 +1,12 @@
GNU dbm README
See the end of file for copying conditions.
+* Note
+
+This is an experimental branch of GNU DBM. Its goal is to evaluate
+possibilities of improving the caching scheme. Please don't use it
+in production.
+
* Introduction
This file contains brief information about configuring, testing
diff --git a/src/Makefile.am b/src/Makefile.am
index e34bb7e..18c80d9 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -41,6 +41,7 @@ lib_LTLIBRARIES = libgdbm.la
libgdbm_la_LIBADD = @LTLIBINTL@
libgdbm_la_SOURCES = \
+ cachetree.c\
gdbmclose.c\
gdbmcount.c\
gdbmdelete.c\
diff --git a/src/bucket.c b/src/bucket.c
index b88972e..4bce594 100644
--- a/src/bucket.c
+++ b/src/bucket.c
@@ -56,7 +56,155 @@ gdbm_dir_entry_valid_p (GDBM_FILE dbf, int dir_index)
&& dir_index < GDBM_DIR_COUNT (dbf)
&& dbf->dir[dir_index] >= dbf->header->block_size;
}
-
+
+static void
+set_cache_entry (GDBM_FILE dbf, cache_elem *elem)
+{
+ dbf->cache_entry = elem;
+ dbf->bucket = dbf->cache_entry->ca_bucket;
+}
+
+
+/* LRU list management */
+
+/* Link ELEM after REF in DBF cache. If REF is NULL, link at head */
+static void
+lru_link_elem (GDBM_FILE dbf, cache_elem *elem, cache_elem *ref)
+{
+ if (!ref)
+ {
+ elem->ca_prev = NULL;
+ elem->ca_next = dbf->cache_mru;
+ if (dbf->cache_mru)
+ dbf->cache_mru->ca_prev = elem;
+ else
+ dbf->cache_lru = elem;
+ dbf->cache_mru = elem;
+ }
+ else
+ {
+ cache_elem *x;
+
+ elem->ca_prev = ref;
+ elem->ca_next = ref->ca_next;
+ if ((x = ref->ca_next))
+ x->ca_prev = elem;
+ else
+ dbf->cache_lru = elem;
+ ref->ca_next = elem;
+ }
+}
+
+/* Unlink ELEM from the list of cache elements in DBF. */
+static void
+lru_unlink_elem (GDBM_FILE dbf, cache_elem *elem)
+{
+ cache_elem *x;
+
+ if ((x = elem->ca_prev))
+ x->ca_next = elem->ca_next;
+ else
+ dbf->cache_mru = elem->ca_next;
+ if ((x = elem->ca_next))
+ x->ca_prev = elem->ca_prev;
+ else
+ dbf->cache_lru = elem->ca_prev;
+ elem->ca_prev = elem->ca_next = NULL;
+}
+
+/* Creates and returns new cache element for DBF. The element is initialized,
+ but not linked to the LRU list.
+ Return NULL on error.
+*/
+cache_elem *
+_gdbm_cache_elem_new (GDBM_FILE dbf, off_t adr)
+{
+ cache_elem *elem;
+
+ elem = dbf->cache_avail;
+ if (elem)
+ {
+ dbf->cache_avail = elem->ca_next;
+ }
+ else
+ {
+ elem = calloc (1, sizeof (*elem));
+
+ if (!elem)
+ return NULL;
+
+ elem->ca_bucket = malloc (dbf->header->bucket_size);
+ if (!elem->ca_bucket)
+ {
+ free (elem);
+ return NULL;
+ }
+ }
+
+ elem->ca_adr = adr;
+ elem->ca_changed = FALSE;
+ elem->ca_data.hash_val = -1;
+ elem->ca_data.elem_loc = -1;
+
+ elem->ca_prev = elem->ca_next = NULL;
+ elem->ca_hits = 0;
+ elem->ca_node = NULL;
+
+ dbf->cache_num++;
+
+ return elem;
+}
+
+/* Frees element ELEM. Unlinks it from the cache tree and LRU list. */
+static void
+cache_elem_free (GDBM_FILE dbf, cache_elem *elem)
+{
+ _gdbm_rbt_remove_node (dbf->cache_tree, elem->ca_node);
+ lru_unlink_elem (dbf, elem);
+ elem->ca_next = dbf->cache_avail;
+ dbf->cache_avail = elem;
+ dbf->cache_num--;
+}
+
+static int
+cache_lookup (GDBM_FILE dbf, off_t adr, cache_elem *ref, cache_elem **ret_elem)
+{
+ int rc;
+ cache_elem *elem;
+
+ dbf->cache_access_count++;
+ rc = _gdbm_cache_tree_lookup (dbf->cache_tree, adr, &elem);
+ switch (rc)
+ {
+ case node_found:
+ elem->ca_hits++;
+ lru_unlink_elem (dbf, elem);
+ break;
+
+ case node_new:
+ if (dbf->cache_num > dbf->cache_size)
+ {
+ cache_elem *last = dbf->cache_lru;
+ if (last->ca_changed)
+ {
+ if (_gdbm_write_bucket (dbf, last))
+ {
+ cache_elem_free (dbf, elem);
+ return node_failure;
+ }
+ }
+ cache_elem_free (dbf, last);
+ }
+ break;
+
+ default:
+ return rc;
+ }
+ lru_link_elem (dbf, elem, ref);
+ *ret_elem = elem;
+ return rc;
+}
+
/* Find a bucket for DBF that is pointed to by the bucket directory from
location DIR_INDEX. The bucket cache is first checked to see if it
is already in memory. If not, a bucket may be tossed to read the new
@@ -70,8 +218,9 @@ _gdbm_get_bucket (GDBM_FILE dbf, int dir_index)
int rc;
off_t bucket_adr; /* The address of the correct hash bucket. */
off_t file_pos; /* The return address for lseek. */
- int index; /* Loop index. */
-
+ hash_bucket *bucket;
+ cache_elem *elem;
+
if (!gdbm_dir_entry_valid_p (dbf, dir_index))
{
/* FIXME: negative caching? */
@@ -82,127 +231,66 @@ _gdbm_get_bucket (GDBM_FILE dbf, int dir_index)
/* Initial set up. */
dbf->bucket_dir = dir_index;
bucket_adr = dbf->dir[dir_index];
-
- if (dbf->bucket_cache == NULL)
- {
- if (_gdbm_init_cache (dbf, DEFAULT_CACHESIZE) == -1)
- {
- _gdbm_fatal (dbf, _("couldn't init cache"));
- return -1;
- }
- }
- /* If that one is not already current, we must find it. */
- if (dbf->cache_entry->ca_adr != bucket_adr)
+ switch (cache_lookup (dbf, bucket_adr, NULL, &elem))
{
- size_t lru;
- hash_bucket *bucket;
+ case node_found:
+ break;
- /* Look in the cache. */
- for (index = 0; index < dbf->cache_size; index++)
- {
- if (dbf->bucket_cache[index].ca_adr == bucket_adr)
- {
- dbf->bucket = dbf->bucket_cache[index].ca_bucket;
- dbf->cache_entry = &dbf->bucket_cache[index];
- return 0;
- }
- }
-
- /* It is not in the cache, read it from the disk. */
-
+ case node_new:
/* Position the file pointer */
file_pos = gdbm_file_seek (dbf, bucket_adr, SEEK_SET);
if (file_pos != bucket_adr)
{
GDBM_SET_ERRNO (dbf, GDBM_FILE_SEEK_ERROR, TRUE);
+ cache_elem_free (dbf, elem);
_gdbm_fatal (dbf, _("lseek error"));
return -1;
}
-
- /* Flush and drop the last recently used cache entry */
- lru = (dbf->last_read + 1) % dbf->cache_size;
- if (dbf->bucket_cache[lru].ca_changed)
- {
- if (_gdbm_write_bucket (dbf, &dbf->bucket_cache[lru]))
- return -1;
- }
- _gdbm_cache_entry_invalidate (dbf, lru);
-
+
/* Read the bucket. */
- rc = _gdbm_full_read (dbf, dbf->bucket_cache[lru].ca_bucket,
- dbf->header->bucket_size);
+ rc = _gdbm_full_read (dbf, elem->ca_bucket, dbf->header->bucket_size);
if (rc)
{
GDBM_DEBUG (GDBM_DEBUG_ERR,
"%s: error reading bucket: %s",
dbf->name, gdbm_db_strerror (dbf));
dbf->need_recovery = TRUE;
+ cache_elem_free (dbf, elem);
_gdbm_fatal (dbf, gdbm_db_strerror (dbf));
return -1;
}
+
/* Validate the bucket */
- bucket = dbf->bucket_cache[lru].ca_bucket;
+ bucket = elem->ca_bucket;
if (!(bucket->count >= 0
&& bucket->count <= dbf->header->bucket_elems
&& bucket->bucket_bits >= 0
&& bucket->bucket_bits <= dbf->header->dir_bits))
{
GDBM_SET_ERRNO (dbf, GDBM_BAD_BUCKET, TRUE);
+ cache_elem_free (dbf, elem);
return -1;
}
/* Validate bucket_avail table */
if (gdbm_bucket_avail_table_validate (dbf, bucket))
- return -1;
-
- /* Finally, store it in cache */
- dbf->last_read = lru;
- dbf->bucket_cache[lru].ca_adr = bucket_adr;
- dbf->bucket = dbf->bucket_cache[lru].ca_bucket;
- dbf->cache_entry = &dbf->bucket_cache[lru];
- dbf->cache_entry->ca_data.elem_loc = -1;
- dbf->cache_entry->ca_changed = FALSE;
- }
- return 0;
-}
-
-int
-_gdbm_read_bucket_at (GDBM_FILE dbf, off_t off, hash_bucket *bucket,
- size_t size)
-{
- off_t file_pos;
- int i;
-
- if (dbf->cache_entry && dbf->cache_entry->ca_adr == off)
- {
- memcpy (bucket, dbf->bucket, size);
- return 0;
- }
-
- /* Look in the cache. */
- for (i = 0; i < dbf->cache_size; i++)
- {
- if (dbf->bucket_cache[i].ca_adr == off)
{
- memcpy (bucket, dbf->bucket_cache[i].ca_bucket, size);
- return 0;
+ cache_elem_free (dbf, elem);
+ return -1;
}
- }
-
- /* Read the bucket. */
- file_pos = gdbm_file_seek (dbf, off, SEEK_SET);
- if (file_pos != off)
- {
- GDBM_SET_ERRNO (dbf, GDBM_FILE_SEEK_ERROR, TRUE);
- return -1;
- }
- if (_gdbm_full_read (dbf, bucket, size))
- {
- GDBM_DEBUG (GDBM_DEBUG_ERR,
- "%s: error reading bucket: %s",
- dbf->name, gdbm_db_strerror (dbf));
+
+ /* Update the cache */
+ elem->ca_adr = bucket_adr;
+ elem->ca_data.elem_loc = -1;
+ elem->ca_changed = FALSE;
+
+ break;
+
+ case node_failure:
return -1;
}
+ set_cache_entry (dbf, elem);
+
return 0;
}
@@ -213,86 +301,74 @@ _gdbm_read_bucket_at (GDBM_FILE dbf, off_t off, hash_bucket *bucket,
int
_gdbm_split_bucket (GDBM_FILE dbf, int next_insert)
{
- hash_bucket *bucket[2]; /* Pointers to the new buckets. */
-
- int new_bits; /* The number of bits for the new buckets. */
- int cache_0; /* Location in the cache for the buckets. */
- int cache_1;
- off_t adr_0; /* File address of the new bucket 0. */
- off_t adr_1; /* File address of the new bucket 1. */
- avail_elem old_bucket; /* Avail Struct for the old bucket. */
-
- off_t dir_start0; /* Used in updating the directory. */
- off_t dir_start1;
- off_t dir_end;
-
- off_t *new_dir; /* Pointer to the new directory. */
- off_t dir_adr; /* Address of the new directory. */
- int dir_size; /* Size of the new directory. */
off_t old_adr[GDBM_HASH_BITS]; /* Address of the old directories. */
int old_size[GDBM_HASH_BITS]; /* Size of the old directories. */
int old_count; /* Number of old directories. */
int index; /* Used in array indexing. */
int index1; /* Used in array indexing. */
- int elem_loc; /* Location in new bucket to put element. */
- bucket_element *old_el; /* Pointer into the old bucket. */
- int select; /* Used to index bucket during movement. */
/* No directories are yet old. */
old_count = 0;
-
- if (dbf->bucket_cache == NULL)
+ while (dbf->bucket->count == dbf->header->bucket_elems)
{
- if (_gdbm_init_cache (dbf, DEFAULT_CACHESIZE) == -1)
+ int new_bits; /* The number of bits for the new buckets. */
+ cache_elem *newcache[2]; /* Location in the cache for the buckets. */
+ off_t adr_0; /* File address of the new bucket 0. */
+ off_t adr_1; /* File address of the new bucket 1. */
+ avail_elem old_bucket; /* Avail Struct for the old bucket. */
+
+ off_t dir_start0; /* Used in updating the directory. */
+ off_t dir_start1;
+ off_t dir_end;
+
+ new_bits = dbf->bucket->bucket_bits + 1;
+ /* Allocate two new buckets */
+ adr_0 = _gdbm_alloc (dbf, dbf->header->bucket_size);
+ switch (cache_lookup (dbf, adr_0, dbf->cache_mru, &newcache[0]))
{
- _gdbm_fatal (dbf, _("couldn't init cache"));
+ case node_new:
+ break;
+
+ case node_found:
+ /* should not happen */
+ GDBM_DEBUG (GDBM_DEBUG_ERR,
+ "%s: bucket found where it should not",
+ dbf->name);
+ GDBM_SET_ERRNO (dbf, GDBM_BUCKET_CACHE_CORRUPTED, TRUE);
return -1;
- }
- }
- while (dbf->bucket->count == dbf->header->bucket_elems)
- {
- /* Initialize the "new" buckets in the cache. */
- do
- {
- dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
- cache_0 = dbf->last_read;
- }
- while (dbf->bucket_cache[cache_0].ca_bucket == dbf->bucket);
- bucket[0] = dbf->bucket_cache[cache_0].ca_bucket;
- if (dbf->bucket_cache[cache_0].ca_changed)
- {
- if (_gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_0]))
- return -1;
+ case node_failure:
+ return -1;
}
- do
- {
- dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
- cache_1 = dbf->last_read;
- }
- while (dbf->bucket_cache[cache_1].ca_bucket == dbf->bucket);
- bucket[1] = dbf->bucket_cache[cache_1].ca_bucket;
- if (dbf->bucket_cache[cache_1].ca_changed)
+ _gdbm_new_bucket (dbf, newcache[0]->ca_bucket, new_bits);
+
+ adr_1 = _gdbm_alloc (dbf, dbf->header->bucket_size);
+ switch (cache_lookup (dbf, adr_1, newcache[0], &newcache[1]))
{
- if (_gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_1]))
- return -1;
+ case node_new:
+ break;
+
+ case node_found:
+ /* should not happen */
+ GDBM_DEBUG (GDBM_DEBUG_ERR,
+ "%s: bucket found where it should not",
+ dbf->name);
+ GDBM_SET_ERRNO (dbf, GDBM_BUCKET_CACHE_CORRUPTED, TRUE);
+ return -1;
+
+ case node_failure:
+ return -1;
}
- new_bits = dbf->bucket->bucket_bits + 1;
- _gdbm_new_bucket (dbf, bucket[0], new_bits);
- _gdbm_new_bucket (dbf, bucket[1], new_bits);
- adr_0 = _gdbm_alloc (dbf, dbf->header->bucket_size);
- if (adr_0 == 0)
- return -1;
- dbf->bucket_cache[cache_0].ca_adr = adr_0;
- adr_1 = _gdbm_alloc (dbf, dbf->header->bucket_size);
- if (adr_1 == 0)
- return -1;
- dbf->bucket_cache[cache_1].ca_adr = adr_1;
+ _gdbm_new_bucket (dbf, newcache[1]->ca_bucket, new_bits);
/* Double the directory size if necessary. */
if (dbf->header->dir_bits == dbf->bucket->bucket_bits)
{
+ off_t *new_dir; /* Pointer to the new directory. */
+ int dir_size; /* Size of the new directory. */
+ off_t dir_adr; /* Address of the new directory. */
+
if (dbf->header->dir_size >= GDBM_MAX_DIR_HALF)
{
GDBM_SET_ERRNO (dbf, GDBM_DIR_OVERFLOW, TRUE);
@@ -335,40 +411,44 @@ _gdbm_split_bucket (GDBM_FILE dbf, int next_insert)
/* Copy all elements in dbf->bucket into the new buckets. */
for (index = 0; index < dbf->header->bucket_elems; index++)
{
- old_el = &dbf->bucket->h_table[index];
- select = (old_el->hash_value >> (GDBM_HASH_BITS - new_bits)) & 1;
- elem_loc = old_el->hash_value % dbf->header->bucket_elems;
- while (bucket[select]->h_table[elem_loc].hash_value != -1)
+ bucket_element *old_el = &dbf->bucket->h_table[index];
+ hash_bucket *bucket =
+ newcache[(old_el->hash_value >> (GDBM_HASH_BITS - new_bits)) & 1]->ca_bucket;
+ int elem_loc = old_el->hash_value % dbf->header->bucket_elems;
+ while (bucket->h_table[elem_loc].hash_value != -1)
elem_loc = (elem_loc + 1) % dbf->header->bucket_elems;
- bucket[select]->h_table[elem_loc] = *old_el;
- bucket[select]->count++;
+ bucket->h_table[elem_loc] = *old_el;
+ bucket->count++;
}
- /* Allocate avail space for the bucket[1]. */
- bucket[1]->bucket_avail[0].av_adr
+ /* Allocate avail space for the newcache[1]->ca_bucket. */
+ newcache[1]->ca_bucket->bucket_avail[0].av_adr
= _gdbm_alloc (dbf, dbf->header->block_size);
- if (bucket[1]->bucket_avail[0].av_adr == 0)
+ if (newcache[1]->ca_bucket->bucket_avail[0].av_adr == 0)
return -1;
- bucket[1]->bucket_avail[0].av_size = dbf->header->block_size;
- bucket[1]->av_count = 1;
+ newcache[1]->ca_bucket->bucket_avail[0].av_size
+ = dbf->header->block_size;
+ newcache[1]->ca_bucket->av_count = 1;
- /* Copy the avail elements in dbf->bucket to bucket[0]. */
- bucket[0]->av_count = dbf->bucket->av_count;
+ /* Copy the avail elements in dbf->bucket to newcache[0]->ca_bucket. */
+ newcache[0]->ca_bucket->av_count = dbf->bucket->av_count;
index = 0;
- index1 = 0;
- if (bucket[0]->av_count == BUCKET_AVAIL)
+ if (newcache[0]->ca_bucket->av_count == BUCKET_AVAIL)
{
- /* The avail is full, move the first one to bucket[1]. */
+ /* The avail is full, move the first one to newcache[1]->ca_bucket.*/
_gdbm_put_av_elem (dbf->bucket->bucket_avail[0],
- bucket[1]->bucket_avail,
- &bucket[1]->av_count,
+ newcache[1]->ca_bucket->bucket_avail,
+ &newcache[1]->ca_bucket->av_count,
dbf->coalesce_blocks);
index = 1;
- bucket[0]->av_count--;
+ newcache[0]->ca_bucket->av_count--;
}
+
+ index1 = 0;
for (; index < dbf->bucket->av_count; index++)
{
- bucket[0]->bucket_avail[index1++] = dbf->bucket->bucket_avail[index];
+ newcache[0]->ca_bucket->bucket_avail[index1++]
+ = dbf->bucket->bucket_avail[index];
}
/* Update the directory. We have new file addresses for both buckets. */
@@ -381,10 +461,9 @@ _gdbm_split_bucket (GDBM_FILE dbf, int next_insert)
for (index = dir_start1; index < dir_end; index++)
dbf->dir[index] = adr_1;
-
/* Set changed flags. */
- dbf->bucket_cache[cache_0].ca_changed = TRUE;
- dbf->bucket_cache[cache_1].ca_changed = TRUE;
+ newcache[0]->ca_changed = TRUE;
+ newcache[1]->ca_changed = TRUE;
dbf->bucket_changed = TRUE;
dbf->directory_changed = TRUE;
dbf->second_changed = TRUE;
@@ -395,29 +474,23 @@ _gdbm_split_bucket (GDBM_FILE dbf, int next_insert)
/* Invalidate old cache entry. */
old_bucket.av_adr = dbf->cache_entry->ca_adr;
old_bucket.av_size = dbf->header->bucket_size;
- dbf->cache_entry->ca_adr = 0;
- dbf->cache_entry->ca_changed = FALSE;
+ cache_elem_free (dbf, dbf->cache_entry);
/* Set dbf->bucket to the proper bucket. */
- if (dbf->dir[dbf->bucket_dir] == adr_0)
+ if (dbf->dir[dbf->bucket_dir] != adr_0)
{
- dbf->bucket = bucket[0];
- dbf->cache_entry = &dbf->bucket_cache[cache_0];
- _gdbm_put_av_elem (old_bucket,
- bucket[1]->bucket_avail,
- &bucket[1]->av_count,
- dbf->coalesce_blocks);
- }
- else
- {
- dbf->bucket = bucket[1];
- dbf->cache_entry = &dbf->bucket_cache[cache_1];
- _gdbm_put_av_elem (old_bucket,
- bucket[0]->bucket_avail,
- &bucket[0]->av_count,
- dbf->coalesce_blocks);
+ cache_elem *t = newcache[0];
+ newcache[0] = newcache[1];
+ newcache[1] = t;
}
-
+
+ _gdbm_put_av_elem (old_bucket,
+ newcache[1]->ca_bucket->bucket_avail,
+ &newcache[1]->ca_bucket->av_count,
+ dbf->coalesce_blocks);
+ lru_unlink_elem (dbf, newcache[0]);
+ lru_link_elem (dbf, newcache[0], NULL);
+ set_cache_entry (dbf, newcache[0]);
}
/* Get rid of old directories. */
@@ -460,3 +533,159 @@ _gdbm_write_bucket (GDBM_FILE dbf, cache_elem *ca_entry)
ca_entry->ca_data.elem_loc = -1;
return 0;
}
+
+/* Cache manipulation functions. */
+
+/* Initialize the bucket cache. */
+int
+_gdbm_cache_init (GDBM_FILE dbf, size_t size)
+{
+ if (size < 3)
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (size == dbf->cache_size)
+ return 0;
+
+ while (size < dbf->cache_num)
+ {
+ /* Flush the least recently used element */
+ cache_elem *elem = dbf->cache_lru;
+ if (elem->ca_changed)
+ {
+ if (_gdbm_write_bucket (dbf, elem))
+ return -1;
+ }
+ cache_elem_free (dbf, elem);
+ }
+
+ dbf->cache_size = size;
+
+ return 0;
+}
+
+/* Free the bucket cache */
+void
+_gdbm_cache_free (GDBM_FILE dbf)
+{
+ cache_elem *elem;
+
+ while (dbf->cache_lru)
+ cache_elem_free (dbf, dbf->cache_lru);
+ _gdbm_cache_tree_destroy (dbf->cache_tree);
+ while ((elem = dbf->cache_avail) != NULL)
+ {
+ dbf->cache_avail = elem->ca_next;
+ free (elem->ca_data.dptr);
+ free (elem->ca_bucket);
+ free (elem);
+ }
+}
+
+/* Flush cache content to disk. */
+int
+_gdbm_cache_flush (GDBM_FILE dbf)
+{
+ cache_elem *elem;
+ for (elem = dbf->cache_lru; elem; elem = elem->ca_prev)
+ {
+ if (elem->ca_changed)
+ {
+ if (_gdbm_write_bucket (dbf, elem))
+ return -1;
+ }
+ }
+ return 0;
+}
+
+int
+_gdbm_fetch_data (GDBM_FILE dbf, off_t off, size_t size, void *buf)
+{
+ off_t bucket_adr;
+ off_t file_pos;
+ int rc;
+ cache_elem *elem;
+ char *dst = buf;
+
+ bucket_adr = (off / dbf->header->bucket_size)
+ * dbf->header->bucket_size;
+ off -= bucket_adr;
+ while (size)
+ {
+ size_t nbytes;
+
+ switch (cache_lookup (dbf, bucket_adr, dbf->cache_mru, &elem))
+ {
+ case node_found:
+ break;
+
+ case node_new:
+ /* Position the file pointer */
+ file_pos = gdbm_file_seek (dbf, bucket_adr, SEEK_SET);
+ if (file_pos != bucket_adr)
+ {
+ GDBM_SET_ERRNO (dbf, GDBM_FILE_SEEK_ERROR, TRUE);
+ cache_elem_free (dbf, elem);
+ _gdbm_fatal (dbf, _("lseek error"));
+ return -1;
+ }
+
+ /* Read the bucket. */
+ rc = _gdbm_full_read (dbf, elem->ca_bucket,
+ dbf->header->bucket_size);
+ if (rc)
+ {
+ GDBM_DEBUG (GDBM_DEBUG_ERR,
+ "%s: error reading data bucket: %s",
+ dbf->name, gdbm_db_strerror (dbf));
+ dbf->need_recovery = TRUE;
+ cache_elem_free (dbf, elem);
+ _gdbm_fatal (dbf, gdbm_db_strerror (dbf));
+ return -1;
+ }
+ break;
+
+ case node_failure:
+ return -1;
+ }
+
+ nbytes = dbf->header->bucket_size - off;
+ if (nbytes > size)
+ nbytes = size;
+ memcpy (dst, (char*) elem->ca_bucket + off, nbytes);
+ dst += nbytes;
+ size -= nbytes;
+ bucket_adr++;
+ off = 0;
+ }
+ return 0;
+}
+
+void
+gdbm_get_cache_stats (GDBM_FILE dbf,
+ size_t *access_count,
+ size_t *cache_count,
+ struct gdbm_cache_stat *bstat,
+ size_t nstat)
+{
+ if (access_count)
+ *access_count = dbf->cache_access_count;
+ if (cache_count)
+ *cache_count = dbf->cache_num;
+ if (bstat)
+ {
+ size_t i;
+ cache_elem *elem;
+
+ if (nstat > dbf->cache_num)
+ nstat = dbf->cache_num;
+
+ for (i = 0, elem = dbf->cache_mru; i < nstat; i++, elem = elem->ca_next)
+ {
+ bstat[i].adr = elem->ca_adr;
+ bstat[i].hits = elem->ca_hits;
+ }
+ }
+}
diff --git a/src/cachetree.c b/src/cachetree.c
new file mode 100644
index 0000000..b27d8a7
--- /dev/null
+++ b/src/cachetree.c
@@ -0,0 +1,528 @@
+/* cachetree.c - Implementation of the red-black tree for cache lookups. */
+
+/* This file is part of GDBM, the GNU data base manager.
+ Copyright (C) 2019 Free Software Foundation, Inc.
+
+ GDBM is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GDBM is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GDBM. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "autoconf.h"
+#include "gdbmdefs.h"
+#define NDEBUG
+#include "assert.h"
+
+enum cache_node_color { RED, BLACK };
+
+typedef struct cache_node cache_node;
+struct cache_node
+{
+ cache_node *left, *right, *parent;
+ enum cache_node_color color;
+ cache_elem *elem;
+};
+
+struct cache_tree
+{
+ cache_node *root; /* Root of the tree */
+ cache_node *avail; /* List of available nodes, linked by parent field */
+ GDBM_FILE dbf; /* Database this tree belongs to */
+};
+
+/* Allocate and return a new node. Pick the head item from the avail
+ list and update the avail pointer. If the list is empty, malloc
+ a new node.
+ All members in the returned node are filled with 0.
+*/
+static cache_node *
+rbt_node_alloc (cache_tree *tree)
+{
+ cache_node *n;
+
+ n = tree->avail;
+ if (n)
+ tree->avail = n->parent;
+ else
+ {
+ n = malloc (sizeof (*n));
+ if (!n)
+ return NULL;
+ }
+ memset (n, 0, sizeof (*n));
+ return n;
+}
+
+/* Return the node N to the avail list in TREE. */
+static void
+rbt_node_dealloc (cache_tree *tree, cache_node *n)
+{
+ n->parent = tree->avail;
+ tree->avail = n;
+}
+
+/* Red-black tree properties:
+ 1. Each node is either red or black.
+ 2. The root node is black.
+ 3. All leaves are black and contain no data.
+ 4. Every red node has two children, and both are black.
+ IOW, the parent of every red node is black.
+ 5. All paths from any given node to its leaf nodes contain the same
+ number of black nodes.
+ */
+
+/* Auxiliary functions for accessing nodes. */
+
+/* Return the grandparent node of N.
+ Prerequisite: N may not be root.
+*/
+static inline cache_node *
+grandparent (cache_node *n)
+{
+ return n->parent->parent;
+}
+
+/* Return the sibling node of N.
+ Prerequisite: N may not be root.
+*/
+static inline cache_node *
+sibling (cache_node *n)
+{
+ return (n == n->parent->left) ? n->parent->right : n->parent->left;
+}
+
+/* Return the uncle node of N.
+ Prerequisite: N must be at least 2 nodes away from root.
+*/
+static inline cache_node *
+uncle (cache_node *n)
+{
+ return sibling (n->parent);
+}
+
+/* Returns the color of the node N.
+ Empty leaves are represented by NULL, therefore NULL is assumed to
+ be black (see property 3).
+*/
+static inline enum cache_node_color
+node_color (cache_node *n)
+{
+ return n == NULL ? BLACK : n->color;
+}
+
+/* Replace the OLDN with NEWN.
+ Does not modify OLDN. */
+static void
+replace_node (cache_tree *tree, cache_node *oldn, cache_node *newn)
+{
+ if (oldn->parent == NULL)
+ tree->root = newn;
+ else if (oldn == oldn->parent->left)
+ oldn->parent->left = newn;
+ else
+ oldn->parent->right = newn;
+
+ if (newn != NULL)
+ newn->parent = oldn->parent;
+}
+
+/* Rotate the TREE left over the node N. */
+static void
+rotate_left (cache_tree *tree, cache_node *n)
+{
+ cache_node *right = n->right;
+ replace_node (tree, n, right);
+ n->right = right->left;
+ if (right->left != NULL)
+ right->left->parent = n;
+ right->left = n;
+ n->parent = right;
+}
+
+/* Rotate the TREE right over the node N. */
+static void
+rotate_right (cache_tree *tree, cache_node *n)
+{
+ cache_node *left = n->left;
+ replace_node (tree, n, left);
+ n->left = left->right;
+ if (left->right != NULL)
+ left->right->parent = n;
+ left->right = n;
+ n->parent = left;
+}
+
+/* Node deletion */
+static void rbt_delete_fixup (cache_tree *tree, cache_node *n);
+
+/* Remove N from the TREE. */
+void
+_gdbm_rbt_remove_node (cache_tree *tree, cache_node *n)
+{
+ cache_node *child;
+
+ /* If N has both left and right children, reduce the problem to
+ the node with only one child. To do so, find the in-order
+ predecessor of N, copy its value (elem) to N and then delete
+ the predecessor. */
+ if (n->left != NULL && n->right != NULL)
+ {
+ cache_node *p;
+ for (p = n->left; p->right; p = p->right)
+ ;
+ n->elem = p->elem;
+ n->elem->ca_node = n;
+ n = p;
+ }
+
+ /* N has only one child. Select it. */
+ child = n->left ? n->left : n->right;
+ if (node_color (n) == BLACK)
+ {
+ n->color = node_color (child);
+ rbt_delete_fixup (tree, n);
+ }
+ replace_node (tree, n, child);
+ if (n->parent == NULL && child != NULL) /* root should be black */
+ child->color = BLACK;
+
+ /* Return N to the avail pool */
+ rbt_node_dealloc (tree, n);
+}
+
+static void
+rbt_delete_fixup (cache_tree *tree, cache_node *n)
+{
+ while (1)
+ {
+ if (n->parent == NULL)
+ {
+ /* If N has become the root node, deletion resulted in removing
+ one black node (prior root) from every path, so all properties
+ still hold.
+ */
+ return;
+ }
+ else
+ {
+ /* If N has a red sibling, change the colors of the parent and
+ sibling and rotate about the parent. Thus, the sibling becomes
+ grandparent and we can proceed to the next case.
+ */
+ if (node_color (sibling (n)) == RED)
+ {
+ n->parent->color = RED;
+ sibling (n)->color = BLACK;
+ if (n == n->parent->left)
+ rotate_left (tree, n->parent);
+ else
+ rotate_right (tree, n->parent);
+ }
+
+ /* If the parent, sibling and nephews are all black, paint the
+ sibling red. This means one black node was removed from all
+ paths passing through the parent, so we recurse to the beginning
+ of the loop with parent as the argument to restore the properties.
+ This is the only branch that loops.
+ */
+ if (node_color (n->parent) == BLACK
+ && node_color (sibling (n)) == BLACK
+ && node_color (sibling (n)->left) == BLACK
+ && node_color (sibling (n)->right) == BLACK)
+ {
+ sibling (n)->color = RED;
+ n = n->parent;
+ continue;
+ }
+ else
+ {
+ /* If the sibling and nephews are black but the parent is red,
+ swap the colors of the sibling and parent. The properties
+ are then restored.
+ */
+ if (node_color (n->parent) == RED
+ && node_color (sibling (n)) == BLACK
+ && node_color (sibling (n)->left) == BLACK
+ && node_color (sibling (n)->right) == BLACK)
+ {
+ sibling (n)->color = RED;
+ n->parent->color = BLACK;
+ }
+ else
+ {
+ /* N is the left child of its parent, its sibling is black,
+ and the sibling's right child is black. Swap the colors
+ of the sibling and its left sibling and rotate right
+ over the sibling.
+ */
+ if (n == n->parent->left
+ && node_color (sibling (n)) == BLACK
+ && node_color (sibling (n)->left) == RED
+ && node_color (sibling (n)->right) == BLACK)
+ {
+ sibling (n)->color = RED;
+ sibling (n)->left->color = BLACK;
+ rotate_right (tree, sibling (n));
+ }
+ else if (n == n->parent->right
+ && node_color (sibling (n)) == BLACK
+ && node_color (sibling (n)->right) == RED
+ && node_color (sibling (n)->left) == BLACK)
+ {
+ /* The mirror case is handled similarly. */
+ sibling (n)->color = RED;
+ sibling (n)->right->color = BLACK;
+ rotate_left (tree, sibling (n));
+ }
+ /* N is the left child of its parent, its sibling is black
+ and the sibling's right child is red. Swap the colors
+ of the parent and sibling, paint the sibling's right
+ child black and rotate left at the parent. Similarly
+ for the mirror case. This achieves the following:
+
+ . A black node is added to all paths passing through N;
+ . A black node is removed from all paths through the
+ sibling's red child.
+ . The latter is painted black which restores missing
+ black node in all paths through the sibling's red child.
+
+ Another sibling's child becomes a child of N's parent
+ during the rotation and is therefore not affected.
+ */
+ sibling (n)->color = node_color (n->parent);
+ n->parent->color = BLACK;
+ if (n == n->parent->left)
+ {
+ sibling (n)->right->color = BLACK;
+ rotate_left (tree, n->parent);
+ }
+ else
+ {
+ sibling (n)->left->color = BLACK;
+ rotate_right (tree, n->parent);
+ }
+ }
+ }
+ }
+ break;
+ }
+}
+
+/* Insertion */
+static void rbt_insert_fixup (cache_tree *tree, cache_node *n);
+
+/* Look up the node with elem->ca_adr equal to ADR.
+ If found, put it in *RETVAL and return node_found.
+
+ Otherwise, if INSERT is TRUE, create a new node and insert it in the
+ appropriate place in the tree. Store the address of the newly created
+ node in *RETVAL and return node_new. If a new node cannot be created
+ (memory exhausted), return node_failure.
+
+ Otherwise, if INSERT is FALSE, store NULL in *RETVAL and return node_new.
+*/
+static int
+cache_tree_lookup (cache_tree *tree, off_t adr, int insert,
+ cache_node **retval)
+{
+ int res;
+ cache_node *node, *parent = NULL;
+ cache_node **nodeptr;
+
+ nodeptr = &tree->root;
+ while ((node = *nodeptr) != NULL)
+ {
+ if (adr == node->elem->ca_adr)
+ break;
+ parent = node;
+ if (adr < node->elem->ca_adr)
+ nodeptr = &node->left;
+ else
+ nodeptr = &node->right;
+ }
+
+ if (node)
+ {
+ res = node_found;
+ }
+ else
+ {
+ if (insert)
+ {
+ node = rbt_node_alloc (tree);
+ if (!node)
+ return node_failure;
+ node->elem = _gdbm_cache_elem_new (tree->dbf, adr);
+ if (!node->elem)
+ {
+ rbt_node_dealloc (tree, node);
+ return node_failure;
+ }
+ node->elem->ca_node = node;
+ *nodeptr = node;
+ node->parent = parent;
+ rbt_insert_fixup (tree, node);
+ }
+ res = node_new;
+ }
+ *retval = node;
+ return res;
+}
+
+static void
+rbt_insert_fixup (cache_tree *tree, cache_node *n)
+{
+ while (1)
+ {
+ if (n->parent == NULL)
+ {
+ /* Node was inserted at the root of the tree.
+ The root node must be black (property 2). Changing its color
+ to black would add one black node to every path, which means
+ the property 5 would remain satisfied. So we simply paint the
+ node black.
+ */
+ n->color = BLACK;
+ }
+ else if (node_color (n->parent) == BLACK)
+ {
+ /* The node has black parent.
+ All properties are satisfied. There's no need to change anything.
+ */
+ return;
+ }
+ else if (node_color (uncle (n)) == RED)
+ {
+ /* The uncle node is red.
+ Repaint the parent and uncle black and the grandparent red. This
+ would satisfy 4. However, if the grandparent is root, this would
+ violate the property 2. So we repaint the grandparent by
+ re-entering the fixup loop with grandparent as the node.
+ This is the only branch that loops.
+ */
+ n->parent->color = BLACK;
+ uncle (n)->color = BLACK;
+ n = grandparent (n);
+ n->color = RED;
+ continue;
+ }
+ else
+ {
+ /* The new node is the right child of its parent and the parent is
+ the left child of the grandparent. Rotate left about the parent.
+ Mirror case: The new node is the left child of its parent and the
+ parent is the right child of the grandparent. Rotate right about
+ the parent. This fixes the properties for the rbt_insert_5.
+ */
+ if (n == n->parent->right && n->parent == grandparent (n)->left)
+ {
+ rotate_left (tree, n->parent);
+ n = n->left;
+ }
+ else if (n == n->parent->left && n->parent == grandparent (n)->right)
+ {
+ rotate_right (tree, n->parent);
+ n = n->right;
+ }
+
+ /* The new node is the left child of its parent and the parent is the
+ left child of the grandparent. Rotate right about the grandparent.
+ Mirror case: The new node is the right child of its parent and the
+ parent
+ is the right child of the grandparent. Rotate left.
+ */
+ n->parent->color = BLACK;
+ grandparent (n)->color = RED;
+ if (n == n->parent->left && n->parent == grandparent (n)->left)
+ {
+ rotate_right (tree, grandparent (n));
+ }
+ else
+ {
+ rotate_left (tree, grandparent (n));
+ }
+ }
+ break;
+ }
+}
+
+/* Interface functions */
+
+/* Create a cache tree structure for the database file DBF. */
+cache_tree *
+_gdbm_cache_tree_alloc (GDBM_FILE dbf)
+{
+ cache_tree *t = malloc (sizeof (*t));
+ if (t)
+ {
+ t->root = NULL;
+ t->avail = NULL;
+ t->dbf = dbf;
+ }
+ return t;
+}
+
+/* Free the memory used by the TREE. */
+void
+_gdbm_cache_tree_destroy (cache_tree *tree)
+{
+ cache_node *n;
+ while ((n = tree->root) != NULL)
+ _gdbm_rbt_remove_node (tree, n);
+ while ((n = tree->avail) != NULL)
+ {
+ tree->avail = n->parent;
+ free (n);
+ }
+ free (tree);
+}
+
+/* Delete the node with ELEM from the TREE.
+ Note, that it actually removes the node whith elem->ca_adr equal to
+ that of ELEM, which means that it's elem pointer could theoretically
+ have differ from ELEM. However, the actual algorithm ensures it is
+ the same.
+*/
+int
+_gdbm_cache_tree_delete (cache_tree *tree, cache_elem *elem)
+{
+ cache_node *n;
+
+ switch (cache_tree_lookup (tree, elem->ca_adr, 0, &n))
+ {
+ case node_found:
+ assert (n->elem == elem);
+ _gdbm_rbt_remove_node (tree, n);
+ break;
+
+ case node_new:
+ break;
+
+ case node_failure:
+ return -1;
+ }
+ return 0;
+}
+
+/* Look up the node with elem->ca_adr equal to ADR.
+ If found, store its pointer in *RETVAL and return node_found.
+ Otherwise, return node_new and don't touch RETVAL.
+*/
+int
+_gdbm_cache_tree_lookup (cache_tree *tree, off_t adr, cache_elem **retval)
+{
+ cache_node *n;
+ int rc = cache_tree_lookup (tree, adr, TRUE, &n);
+ if (rc != node_failure)
+ *retval = n->elem;
+ return rc;
+}
+
diff --git a/src/findkey.c b/src/findkey.c
index 6e4bccd..f82b422 100644
--- a/src/findkey.c
+++ b/src/findkey.c
@@ -43,11 +43,9 @@ gdbm_bucket_element_valid_p (GDBM_FILE dbf, int elem_loc)
char *
_gdbm_read_entry (GDBM_FILE dbf, int elem_loc)
{
- int rc;
int key_size;
int data_size;
size_t dsize;
- off_t file_pos;
data_cache_elem *data_ca;
/* Is it already in the cache? */
@@ -103,26 +101,9 @@ _gdbm_read_entry (GDBM_FILE dbf, int elem_loc)
}
}
- /* Read into the cache. */
- file_pos = gdbm_file_seek (dbf, dbf->bucket->h_table[elem_loc].data_pointer,
- SEEK_SET);
- if (file_pos != dbf->bucket->h_table[elem_loc].data_pointer)
- {
- GDBM_SET_ERRNO2 (dbf, GDBM_FILE_SEEK_ERROR, TRUE, GDBM_DEBUG_LOOKUP);
- _gdbm_fatal (dbf, _("lseek error"));
- return NULL;
- }
-
- rc = _gdbm_full_read (dbf, data_ca->dptr, key_size+data_size);
- if (rc)
- {
- GDBM_DEBUG (GDBM_DEBUG_ERR|GDBM_DEBUG_LOOKUP|GDBM_DEBUG_READ,
- "%s: error reading entry: %s",
- dbf->name, gdbm_db_strerror (dbf));
- dbf->need_recovery = TRUE;
- _gdbm_fatal (dbf, gdbm_db_strerror (dbf));
- return NULL;
- }
+ if (_gdbm_fetch_data (dbf, dbf->bucket->h_table[elem_loc].data_pointer,
+ key_size + data_size, data_ca->dptr))
+ return NULL;
return data_ca->dptr;
}
diff --git a/src/gdbm.h.in b/src/gdbm.h.in
index e537d54..6a23ea6 100644
--- a/src/gdbm.h.in
+++ b/src/gdbm.h.in
@@ -228,9 +228,10 @@ extern int gdbm_copy_meta (GDBM_FILE dst, GDBM_FILE src);
# define GDBM_FILE_CLOSE_ERROR 37
# define GDBM_FILE_SYNC_ERROR 38
# define GDBM_FILE_TRUNCATE_ERROR 39
-
+# define GDBM_BUCKET_CACHE_CORRUPTED 40
+
# define _GDBM_MIN_ERRNO 0
-# define _GDBM_MAX_ERRNO GDBM_FILE_TRUNCATE_ERROR
+# define _GDBM_MAX_ERRNO GDBM_BUCKET_CACHE_CORRUPTED
/* This one was never used and will be removed in the future */
# define GDBM_UNKNOWN_UPDATE GDBM_UNKNOWN_ERROR
@@ -275,8 +276,20 @@ extern void gdbm_debug_parse_state (int (*f) (void *, int, char const *),
void *d);
extern void gdbm_debug_datum (datum dat, char const *pfx);
-
#endif
+
+/* Cache statistics */
+struct gdbm_cache_stat
+{
+ off_t adr;
+ size_t hits;
+};
+
+void gdbm_get_cache_stats (GDBM_FILE dbf,
+ size_t *access_count,
+ size_t *cache_count,
+ struct gdbm_cache_stat *bstat,
+ size_t nstat);
# if defined(__cplusplus) || defined(c_plusplus)
}
diff --git a/src/gdbmclose.c b/src/gdbmclose.c
index 6e21320..71fa128 100644
--- a/src/gdbmclose.c
+++ b/src/gdbmclose.c
@@ -29,7 +29,6 @@
int
gdbm_close (GDBM_FILE dbf)
{
- int index; /* For freeing the bucket cache. */
int syserrno;
gdbm_set_errno (dbf, GDBM_NO_ERROR, FALSE);
@@ -58,15 +57,8 @@ gdbm_close (GDBM_FILE dbf)
free (dbf->name);
free (dbf->dir);
- if (dbf->bucket_cache != NULL)
- {
- for (index = 0; index < dbf->cache_size; index++)
- {
- free (dbf->bucket_cache[index].ca_bucket);
- free (dbf->bucket_cache[index].ca_data.dptr);
- }
- free (dbf->bucket_cache);
- }
+ _gdbm_cache_free (dbf);
+
free (dbf->header);
free (dbf);
if (gdbm_errno)
diff --git a/src/gdbmdefs.h b/src/gdbmdefs.h
index 7dc15ee..3297cd1 100644
--- a/src/gdbmdefs.h
+++ b/src/gdbmdefs.h
@@ -157,13 +157,23 @@ typedef struct
int elem_loc;
} data_cache_elem;
-typedef struct
+typedef struct cache_elem_s cache_elem;
+
+struct cache_elem_s
{
hash_bucket * ca_bucket;
off_t ca_adr;
- char ca_changed; /* Data in the bucket changed. */
+ char ca_changed; /* Data in the bucket changed. */
data_cache_elem ca_data;
-} cache_elem;
+ cache_elem *ca_prev, *ca_next; /* Previous and next elements in LRU list.
+ If the item is in cache_avail list, only
+ ca_next is used. It points to the next
+ available element. */
+ size_t ca_hits; /* Number of times this element was requested */
+ struct cache_node *ca_node;
+};
+
+typedef struct cache_tree cache_tree;
/* This final structure contains all main memory based information for
a gdbm file. This allows multiple gdbm files to be opened at the same
@@ -225,19 +235,27 @@ struct gdbm_file_info
off_t *dir;
/* The bucket cache. */
- cache_elem *bucket_cache;
- size_t cache_size;
- size_t last_read;
-
+ size_t cache_size; /* Cache capacity */
+ size_t cache_num; /* Actual number of elements in cache */
+ /* Cache elements form a binary search tree. */
+ cache_tree *cache_tree;
+ /* Cache elements are linked in a list sorted by relative access time */
+ cache_elem *cache_mru; /* Most recently used element - head of the list */
+ cache_elem *cache_lru; /* Last recently used element - tail of the list */
+ cache_elem *cache_avail; /* Pool of available elements (linked by prev, next)
+ */
+ /* Pointer to the current bucket's cache entry. */
+ cache_elem *cache_entry;
+
/* Points to the current hash bucket in the cache. */
hash_bucket *bucket;
-
+
/* The directory entry used to get the current hash bucket. */
int bucket_dir;
- /* Pointer to the current bucket's cache entry. */
- cache_elem *cache_entry;
-
+ /* Number of cache accesses */
+ size_t cache_access_count;
+
/* Bookkeeping of things that need to be written back at the
end of an update. */
unsigned header_changed :1;
diff --git a/src/gdbmerrno.c b/src/gdbmerrno.c
index 8aa58a4..392f23b 100644
--- a/src/gdbmerrno.c
+++ b/src/gdbmerrno.c
@@ -139,7 +139,8 @@ const char * const gdbm_errlist[_GDBM_MAX_ERRNO+1] = {
[GDBM_BAD_DIR_ENTRY] = N_("Invalid directory entry"),
[GDBM_FILE_CLOSE_ERROR] = N_("Error closing file"),
[GDBM_FILE_SYNC_ERROR] = N_("Error synchronizing file"),
- [GDBM_FILE_TRUNCATE_ERROR] = N_("Error truncating file")
+ [GDBM_FILE_TRUNCATE_ERROR] = N_("Error truncating file"),
+ [GDBM_BUCKET_CACHE_CORRUPTED] = N_("Bucket cache corrupted")
};
const char *
diff --git a/src/gdbmopen.c b/src/gdbmopen.c
index c68834c..621d6c7 100644
--- a/src/gdbmopen.c
+++ b/src/gdbmopen.c
@@ -269,9 +269,11 @@ gdbm_fd_open (int fd, const char *file_name, int block_size,
dbf->dir = NULL;
dbf->bucket = NULL;
dbf->header = NULL;
- dbf->bucket_cache = NULL;
- dbf->cache_size = 0;
+ /* Initialize cache */
+ dbf->cache_tree = _gdbm_cache_tree_alloc (dbf);
+ _gdbm_cache_init (dbf, DEFAULT_CACHESIZE);
+
dbf->memory_mapping = FALSE;
dbf->mapped_size_max = SIZE_T_MAX;
dbf->mapped_region = NULL;
@@ -639,10 +641,8 @@ gdbm_fd_open (int fd, const char *file_name, int block_size,
#endif
/* Finish initializing dbf. */
- dbf->last_read = -1;
dbf->bucket = NULL;
dbf->bucket_dir = 0;
- dbf->cache_entry = NULL;
dbf->header_changed = FALSE;
dbf->directory_changed = FALSE;
dbf->bucket_changed = FALSE;
@@ -717,46 +717,3 @@ gdbm_open (const char *file, int block_size, int flags, int mode,
fatal_func);
}
-/* Initialize the bucket cache. */
-int
-_gdbm_init_cache (GDBM_FILE dbf, size_t size)
-{
- int index;
-
- if (dbf->bucket_cache == NULL)
- {
- dbf->bucket_cache = calloc (size, sizeof(cache_elem));
- if (dbf->bucket_cache == NULL)
- {
- GDBM_SET_ERRNO (dbf, GDBM_MALLOC_ERROR, TRUE);
- return -1;
- }
- dbf->cache_size = size;
-
- for (index = 0; index < size; index++)
- {
- (dbf->bucket_cache[index]).ca_bucket =
- malloc (dbf->header->bucket_size);
- if ((dbf->bucket_cache[index]).ca_bucket == NULL)
- {
- GDBM_SET_ERRNO (dbf, GDBM_MALLOC_ERROR, TRUE);
- return -1;
- }
- dbf->bucket_cache[index].ca_data.dptr = NULL;
- dbf->bucket_cache[index].ca_data.dsize = 0;
- _gdbm_cache_entry_invalidate (dbf, index);
- }
- dbf->bucket = dbf->bucket_cache[0].ca_bucket;
- dbf->cache_entry = &dbf->bucket_cache[0];
- }
- return 0;
-}
-
-void
-_gdbm_cache_entry_invalidate (GDBM_FILE dbf, int index)
-{
- dbf->bucket_cache[index].ca_adr = 0;
- dbf->bucket_cache[index].ca_changed = FALSE;
- dbf->bucket_cache[index].ca_data.hash_val = -1;
- dbf->bucket_cache[index].ca_data.elem_loc = -1;
-}
diff --git a/src/gdbmsetopt.c b/src/gdbmsetopt.c
index c132335..2e409f9 100644
--- a/src/gdbmsetopt.c
+++ b/src/gdbmsetopt.c
@@ -54,19 +54,13 @@ setopt_gdbm_setcachesize (GDBM_FILE dbf, void *optval, int optlen)
{
size_t sz;
- if (dbf->bucket_cache != NULL)
- {
- GDBM_SET_ERRNO (dbf, GDBM_OPT_ALREADY_SET, FALSE);
- return -1;
- }
-
/* Optval will point to the new size of the cache. */
if (get_size (optval, optlen, &sz))
{
GDBM_SET_ERRNO (dbf, GDBM_OPT_ILLEGAL, FALSE);
return -1;
}
- return _gdbm_init_cache (dbf, (sz > 9) ? sz : 10);
+ return _gdbm_cache_init (dbf, (sz > 9) ? sz : 10);
}
static int
diff --git a/src/gdbmtool.c b/src/gdbmtool.c
index ceffaef..b2ee956 100644
--- a/src/gdbmtool.c
+++ b/src/gdbmtool.c
@@ -321,26 +321,25 @@ _gdbm_print_avail_list (FILE *fp, GDBM_FILE dbf)
void
_gdbm_print_bucket_cache (FILE *fp, GDBM_FILE dbf)
{
- int index;
- char changed;
-
- if (dbf->bucket_cache != NULL)
+ if (dbf->cache_num)
{
+ int i;
+ cache_elem *elem;
+
fprintf (fp,
- _("Bucket Cache (size %zu):\n Index: Address Changed Data_Hash \n"),
- dbf->cache_size);
- for (index = 0; index < dbf->cache_size; index++)
+ _("Bucket Cache (size %zu/%zu):\n Index: Address Changed Data_Hash \n"),
+ dbf->cache_num, dbf->cache_size);
+ for (elem = dbf->cache_entry, i = 0; elem; elem = elem->ca_next, i++)
{
- changed = dbf->bucket_cache[index].ca_changed;
fprintf (fp, " %5d: %15lu %7s %x\n",
- index,
- (unsigned long) dbf->bucket_cache[index].ca_adr,
- (changed ? _("True") : _("False")),
- dbf->bucket_cache[index].ca_data.hash_val);
+ i,
+ (unsigned long) elem->ca_adr,
+ (elem->ca_changed ? _("True") : _("False")),
+ elem->ca_data.hash_val);
}
}
else
- fprintf (fp, _("Bucket cache has not been initialized.\n"));
+ fprintf (fp, _("Bucket cache is empty.\n"));
}
int
@@ -879,7 +878,7 @@ print_cache_begin (struct handler_param *param GDBM_ARG_UNUSED, size_t *exp_coun
if (checkdb ())
return 1;
if (exp_count)
- *exp_count = gdbm_file->bucket_cache ? gdbm_file->cache_size + 1 : 1;
+ *exp_count = gdbm_file->cache_num + 1;
return 0;
}
diff --git a/src/proto.h b/src/proto.h
index 57f9e3c..2bd7956 100644
--- a/src/proto.h
+++ b/src/proto.h
@@ -21,11 +21,15 @@
/* From bucket.c */
void _gdbm_new_bucket (GDBM_FILE, hash_bucket *, int);
int _gdbm_get_bucket (GDBM_FILE, int);
-int _gdbm_read_bucket_at (GDBM_FILE dbf, off_t off, hash_bucket *bucket,
- size_t size);
+int _gdbm_fetch_data (GDBM_FILE dbf, off_t off, size_t size, void *buf);
int _gdbm_split_bucket (GDBM_FILE, int);
int _gdbm_write_bucket (GDBM_FILE, cache_elem *);
+int _gdbm_cache_init (GDBM_FILE, size_t);
+void _gdbm_cache_free (GDBM_FILE dbf);
+int _gdbm_cache_flush (GDBM_FILE dbf);
+
+cache_elem *_gdbm_cache_elem_new (GDBM_FILE dbf, off_t adr);
/* From falloc.c */
off_t _gdbm_alloc (GDBM_FILE, int);
@@ -47,9 +51,6 @@ int _gdbm_end_update (GDBM_FILE);
void _gdbm_fatal (GDBM_FILE, const char *);
/* From gdbmopen.c */
-int _gdbm_init_cache (GDBM_FILE, size_t);
-void _gdbm_cache_entry_invalidate (GDBM_FILE, int);
-
int gdbm_avail_block_validate (GDBM_FILE dbf, avail_block *avblk);
int gdbm_bucket_avail_table_validate (GDBM_FILE dbf, hash_bucket *bucket);
@@ -86,6 +87,22 @@ int _gdbm_dump (GDBM_FILE dbf, FILE *fp);
/* From recover.c */
int _gdbm_next_bucket_dir (GDBM_FILE dbf, int bucket_dir);
+/* cachetree.c */
+cache_tree *_gdbm_cache_tree_alloc (GDBM_FILE dbf);
+void _gdbm_cache_tree_destroy (cache_tree *tree);
+int _gdbm_cache_tree_delete (cache_tree *tree, cache_elem *elem);
+void _gdbm_rbt_remove_node (cache_tree *tree, struct cache_node *n);
+
+/* Return codes for _gdbm_cache_tree_lookup. */
+enum
+ {
+ node_found, /* Returned element was found in cache. */
+ node_new, /* Returned element has been created and inserted to cache */
+ node_failure /* An error occurred. */
+ };
+
+int _gdbm_cache_tree_lookup (cache_tree *tree, off_t adr, cache_elem **retval);
+
/* I/O functions */
static inline ssize_t
gdbm_file_read (GDBM_FILE dbf, void *buf, size_t size)
diff --git a/src/recover.c b/src/recover.c
index 8d34bec..d4da31d 100644
--- a/src/recover.c
+++ b/src/recover.c
@@ -91,8 +91,6 @@ static int
_gdbm_finish_transfer (GDBM_FILE dbf, GDBM_FILE new_dbf,
gdbm_recovery *rcvr, int flags)
{
- int i;
-
/* Write everything. */
if (_gdbm_end_update (new_dbf))
{
@@ -145,15 +143,7 @@ _gdbm_finish_transfer (GDBM_FILE dbf, GDBM_FILE new_dbf,
free (dbf->header);
free (dbf->dir);
- if (dbf->bucket_cache != NULL)
- {
- for (i = 0; i < dbf->cache_size; i++)
- {
- free (dbf->bucket_cache[i].ca_bucket);
- free (dbf->bucket_cache[i].ca_data.dptr);
- }
- free (dbf->bucket_cache);
- }
+ _gdbm_cache_flush (dbf);
dbf->lock_type = new_dbf->lock_type;
dbf->desc = new_dbf->desc;
@@ -161,9 +151,14 @@ _gdbm_finish_transfer (GDBM_FILE dbf, GDBM_FILE new_dbf,
dbf->dir = new_dbf->dir;
dbf->bucket = new_dbf->bucket;
dbf->bucket_dir = new_dbf->bucket_dir;
- dbf->last_read = new_dbf->last_read;
- dbf->bucket_cache = new_dbf->bucket_cache;
- dbf->cache_size = new_dbf->cache_size;
+
+ dbf->cache_size = new_dbf->cache_size;
+ dbf->cache_num = new_dbf->cache_num;
+ dbf->cache_tree = new_dbf->cache_tree;
+ dbf->cache_entry = new_dbf->cache_entry;
+ dbf->cache_lru = new_dbf->cache_lru;
+ dbf->cache_avail = new_dbf->cache_avail;
+
dbf->header_changed = new_dbf->header_changed;
dbf->directory_changed = new_dbf->directory_changed;
dbf->bucket_changed = new_dbf->bucket_changed;
@@ -182,7 +177,6 @@ _gdbm_finish_transfer (GDBM_FILE dbf, GDBM_FILE new_dbf,
gdbm_file_sync (dbf);
/* Force the right stuff for a correct bucket cache. */
- dbf->cache_entry = &dbf->bucket_cache[0];
return _gdbm_get_bucket (dbf, 0);
}
diff --git a/src/update.c b/src/update.c
index 44be33c..4c271d2 100644
--- a/src/update.c
+++ b/src/update.c
@@ -75,19 +75,7 @@ _gdbm_end_update (GDBM_FILE dbf)
/* Write the other changed buckets if there are any. */
if (dbf->second_changed)
{
- if (dbf->bucket_cache != NULL)
- {
- int index;
-
- for (index = 0; index < dbf->cache_size; index++)
- {
- if (dbf->bucket_cache[index].ca_changed)
- {
- if (_gdbm_write_bucket (dbf, &dbf->bucket_cache[index]))
- return -1;
- }
- }
- }
+ _gdbm_cache_flush (dbf);
dbf->second_changed = FALSE;
}
diff --git a/tests/setopt00.at b/tests/setopt00.at
index 4ef915b..a1be255 100644
--- a/tests/setopt00.at
+++ b/tests/setopt00.at
@@ -26,7 +26,7 @@ gtopt test.db '!MMAP'
* CACHESIZE:
initial GDBM_SETCACHESIZE: PASS
GDBM_GETCACHESIZE: PASS
-second GDBM_SETCACHESIZE: XFAIL
+second GDBM_SETCACHESIZE: PASS
* SYNCMODE:
initial GDBM_GETSYNCMODE: PASS
GDBM_SETSYNCMODE: PASS