diff options
-rw-r--r-- | README | 6 | ||||
-rw-r--r-- | src/Makefile.am | 1 | ||||
-rw-r--r-- | src/bucket.c | 609 | ||||
-rw-r--r-- | src/cachetree.c | 528 | ||||
-rw-r--r-- | src/findkey.c | 25 | ||||
-rw-r--r-- | src/gdbm.h.in | 19 | ||||
-rw-r--r-- | src/gdbmclose.c | 12 | ||||
-rw-r--r-- | src/gdbmdefs.h | 40 | ||||
-rw-r--r-- | src/gdbmerrno.c | 3 | ||||
-rw-r--r-- | src/gdbmopen.c | 51 | ||||
-rw-r--r-- | src/gdbmsetopt.c | 8 | ||||
-rw-r--r-- | src/gdbmtool.c | 27 | ||||
-rw-r--r-- | src/proto.h | 27 | ||||
-rw-r--r-- | src/recover.c | 24 | ||||
-rw-r--r-- | src/update.c | 14 | ||||
-rw-r--r-- | tests/setopt00.at | 2 |
16 files changed, 1057 insertions, 339 deletions
@@ -1,6 +1,12 @@ GNU dbm README See the end of file for copying conditions. +* Note + +This is an experimental branch of GNU DBM. Its goal is to evaluate +possibilities of improving the caching scheme. Please don't use it +in production. + * Introduction This file contains brief information about configuring, testing diff --git a/src/Makefile.am b/src/Makefile.am index e34bb7e..18c80d9 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -41,6 +41,7 @@ lib_LTLIBRARIES = libgdbm.la libgdbm_la_LIBADD = @LTLIBINTL@ libgdbm_la_SOURCES = \ + cachetree.c\ gdbmclose.c\ gdbmcount.c\ gdbmdelete.c\ diff --git a/src/bucket.c b/src/bucket.c index b88972e..4bce594 100644 --- a/src/bucket.c +++ b/src/bucket.c @@ -56,7 +56,155 @@ gdbm_dir_entry_valid_p (GDBM_FILE dbf, int dir_index) && dir_index < GDBM_DIR_COUNT (dbf) && dbf->dir[dir_index] >= dbf->header->block_size; } - + +static void +set_cache_entry (GDBM_FILE dbf, cache_elem *elem) +{ + dbf->cache_entry = elem; + dbf->bucket = dbf->cache_entry->ca_bucket; +} + + +/* LRU list management */ + +/* Link ELEM after REF in DBF cache. If REF is NULL, link at head */ +static void +lru_link_elem (GDBM_FILE dbf, cache_elem *elem, cache_elem *ref) +{ + if (!ref) + { + elem->ca_prev = NULL; + elem->ca_next = dbf->cache_mru; + if (dbf->cache_mru) + dbf->cache_mru->ca_prev = elem; + else + dbf->cache_lru = elem; + dbf->cache_mru = elem; + } + else + { + cache_elem *x; + + elem->ca_prev = ref; + elem->ca_next = ref->ca_next; + if ((x = ref->ca_next)) + x->ca_prev = elem; + else + dbf->cache_lru = elem; + ref->ca_next = elem; + } +} + +/* Unlink ELEM from the list of cache elements in DBF. */ +static void +lru_unlink_elem (GDBM_FILE dbf, cache_elem *elem) +{ + cache_elem *x; + + if ((x = elem->ca_prev)) + x->ca_next = elem->ca_next; + else + dbf->cache_mru = elem->ca_next; + if ((x = elem->ca_next)) + x->ca_prev = elem->ca_prev; + else + dbf->cache_lru = elem->ca_prev; + elem->ca_prev = elem->ca_next = NULL; +} + +/* Creates and returns new cache element for DBF. The element is initialized, + but not linked to the LRU list. + Return NULL on error. +*/ +cache_elem * +_gdbm_cache_elem_new (GDBM_FILE dbf, off_t adr) +{ + cache_elem *elem; + + elem = dbf->cache_avail; + if (elem) + { + dbf->cache_avail = elem->ca_next; + } + else + { + elem = calloc (1, sizeof (*elem)); + + if (!elem) + return NULL; + + elem->ca_bucket = malloc (dbf->header->bucket_size); + if (!elem->ca_bucket) + { + free (elem); + return NULL; + } + } + + elem->ca_adr = adr; + elem->ca_changed = FALSE; + elem->ca_data.hash_val = -1; + elem->ca_data.elem_loc = -1; + + elem->ca_prev = elem->ca_next = NULL; + elem->ca_hits = 0; + elem->ca_node = NULL; + + dbf->cache_num++; + + return elem; +} + +/* Frees element ELEM. Unlinks it from the cache tree and LRU list. */ +static void +cache_elem_free (GDBM_FILE dbf, cache_elem *elem) +{ + _gdbm_rbt_remove_node (dbf->cache_tree, elem->ca_node); + lru_unlink_elem (dbf, elem); + elem->ca_next = dbf->cache_avail; + dbf->cache_avail = elem; + dbf->cache_num--; +} + +static int +cache_lookup (GDBM_FILE dbf, off_t adr, cache_elem *ref, cache_elem **ret_elem) +{ + int rc; + cache_elem *elem; + + dbf->cache_access_count++; + rc = _gdbm_cache_tree_lookup (dbf->cache_tree, adr, &elem); + switch (rc) + { + case node_found: + elem->ca_hits++; + lru_unlink_elem (dbf, elem); + break; + + case node_new: + if (dbf->cache_num > dbf->cache_size) + { + cache_elem *last = dbf->cache_lru; + if (last->ca_changed) + { + if (_gdbm_write_bucket (dbf, last)) + { + cache_elem_free (dbf, elem); + return node_failure; + } + } + cache_elem_free (dbf, last); + } + break; + + default: + return rc; + } + lru_link_elem (dbf, elem, ref); + *ret_elem = elem; + return rc; +} + /* Find a bucket for DBF that is pointed to by the bucket directory from location DIR_INDEX. The bucket cache is first checked to see if it is already in memory. If not, a bucket may be tossed to read the new @@ -70,8 +218,9 @@ _gdbm_get_bucket (GDBM_FILE dbf, int dir_index) int rc; off_t bucket_adr; /* The address of the correct hash bucket. */ off_t file_pos; /* The return address for lseek. */ - int index; /* Loop index. */ - + hash_bucket *bucket; + cache_elem *elem; + if (!gdbm_dir_entry_valid_p (dbf, dir_index)) { /* FIXME: negative caching? */ @@ -82,127 +231,66 @@ _gdbm_get_bucket (GDBM_FILE dbf, int dir_index) /* Initial set up. */ dbf->bucket_dir = dir_index; bucket_adr = dbf->dir[dir_index]; - - if (dbf->bucket_cache == NULL) - { - if (_gdbm_init_cache (dbf, DEFAULT_CACHESIZE) == -1) - { - _gdbm_fatal (dbf, _("couldn't init cache")); - return -1; - } - } - /* If that one is not already current, we must find it. */ - if (dbf->cache_entry->ca_adr != bucket_adr) + switch (cache_lookup (dbf, bucket_adr, NULL, &elem)) { - size_t lru; - hash_bucket *bucket; + case node_found: + break; - /* Look in the cache. */ - for (index = 0; index < dbf->cache_size; index++) - { - if (dbf->bucket_cache[index].ca_adr == bucket_adr) - { - dbf->bucket = dbf->bucket_cache[index].ca_bucket; - dbf->cache_entry = &dbf->bucket_cache[index]; - return 0; - } - } - - /* It is not in the cache, read it from the disk. */ - + case node_new: /* Position the file pointer */ file_pos = gdbm_file_seek (dbf, bucket_adr, SEEK_SET); if (file_pos != bucket_adr) { GDBM_SET_ERRNO (dbf, GDBM_FILE_SEEK_ERROR, TRUE); + cache_elem_free (dbf, elem); _gdbm_fatal (dbf, _("lseek error")); return -1; } - - /* Flush and drop the last recently used cache entry */ - lru = (dbf->last_read + 1) % dbf->cache_size; - if (dbf->bucket_cache[lru].ca_changed) - { - if (_gdbm_write_bucket (dbf, &dbf->bucket_cache[lru])) - return -1; - } - _gdbm_cache_entry_invalidate (dbf, lru); - + /* Read the bucket. */ - rc = _gdbm_full_read (dbf, dbf->bucket_cache[lru].ca_bucket, - dbf->header->bucket_size); + rc = _gdbm_full_read (dbf, elem->ca_bucket, dbf->header->bucket_size); if (rc) { GDBM_DEBUG (GDBM_DEBUG_ERR, "%s: error reading bucket: %s", dbf->name, gdbm_db_strerror (dbf)); dbf->need_recovery = TRUE; + cache_elem_free (dbf, elem); _gdbm_fatal (dbf, gdbm_db_strerror (dbf)); return -1; } + /* Validate the bucket */ - bucket = dbf->bucket_cache[lru].ca_bucket; + bucket = elem->ca_bucket; if (!(bucket->count >= 0 && bucket->count <= dbf->header->bucket_elems && bucket->bucket_bits >= 0 && bucket->bucket_bits <= dbf->header->dir_bits)) { GDBM_SET_ERRNO (dbf, GDBM_BAD_BUCKET, TRUE); + cache_elem_free (dbf, elem); return -1; } /* Validate bucket_avail table */ if (gdbm_bucket_avail_table_validate (dbf, bucket)) - return -1; - - /* Finally, store it in cache */ - dbf->last_read = lru; - dbf->bucket_cache[lru].ca_adr = bucket_adr; - dbf->bucket = dbf->bucket_cache[lru].ca_bucket; - dbf->cache_entry = &dbf->bucket_cache[lru]; - dbf->cache_entry->ca_data.elem_loc = -1; - dbf->cache_entry->ca_changed = FALSE; - } - return 0; -} - -int -_gdbm_read_bucket_at (GDBM_FILE dbf, off_t off, hash_bucket *bucket, - size_t size) -{ - off_t file_pos; - int i; - - if (dbf->cache_entry && dbf->cache_entry->ca_adr == off) - { - memcpy (bucket, dbf->bucket, size); - return 0; - } - - /* Look in the cache. */ - for (i = 0; i < dbf->cache_size; i++) - { - if (dbf->bucket_cache[i].ca_adr == off) { - memcpy (bucket, dbf->bucket_cache[i].ca_bucket, size); - return 0; + cache_elem_free (dbf, elem); + return -1; } - } - - /* Read the bucket. */ - file_pos = gdbm_file_seek (dbf, off, SEEK_SET); - if (file_pos != off) - { - GDBM_SET_ERRNO (dbf, GDBM_FILE_SEEK_ERROR, TRUE); - return -1; - } - if (_gdbm_full_read (dbf, bucket, size)) - { - GDBM_DEBUG (GDBM_DEBUG_ERR, - "%s: error reading bucket: %s", - dbf->name, gdbm_db_strerror (dbf)); + + /* Update the cache */ + elem->ca_adr = bucket_adr; + elem->ca_data.elem_loc = -1; + elem->ca_changed = FALSE; + + break; + + case node_failure: return -1; } + set_cache_entry (dbf, elem); + return 0; } @@ -213,86 +301,74 @@ _gdbm_read_bucket_at (GDBM_FILE dbf, off_t off, hash_bucket *bucket, int _gdbm_split_bucket (GDBM_FILE dbf, int next_insert) { - hash_bucket *bucket[2]; /* Pointers to the new buckets. */ - - int new_bits; /* The number of bits for the new buckets. */ - int cache_0; /* Location in the cache for the buckets. */ - int cache_1; - off_t adr_0; /* File address of the new bucket 0. */ - off_t adr_1; /* File address of the new bucket 1. */ - avail_elem old_bucket; /* Avail Struct for the old bucket. */ - - off_t dir_start0; /* Used in updating the directory. */ - off_t dir_start1; - off_t dir_end; - - off_t *new_dir; /* Pointer to the new directory. */ - off_t dir_adr; /* Address of the new directory. */ - int dir_size; /* Size of the new directory. */ off_t old_adr[GDBM_HASH_BITS]; /* Address of the old directories. */ int old_size[GDBM_HASH_BITS]; /* Size of the old directories. */ int old_count; /* Number of old directories. */ int index; /* Used in array indexing. */ int index1; /* Used in array indexing. */ - int elem_loc; /* Location in new bucket to put element. */ - bucket_element *old_el; /* Pointer into the old bucket. */ - int select; /* Used to index bucket during movement. */ /* No directories are yet old. */ old_count = 0; - - if (dbf->bucket_cache == NULL) + while (dbf->bucket->count == dbf->header->bucket_elems) { - if (_gdbm_init_cache (dbf, DEFAULT_CACHESIZE) == -1) + int new_bits; /* The number of bits for the new buckets. */ + cache_elem *newcache[2]; /* Location in the cache for the buckets. */ + off_t adr_0; /* File address of the new bucket 0. */ + off_t adr_1; /* File address of the new bucket 1. */ + avail_elem old_bucket; /* Avail Struct for the old bucket. */ + + off_t dir_start0; /* Used in updating the directory. */ + off_t dir_start1; + off_t dir_end; + + new_bits = dbf->bucket->bucket_bits + 1; + /* Allocate two new buckets */ + adr_0 = _gdbm_alloc (dbf, dbf->header->bucket_size); + switch (cache_lookup (dbf, adr_0, dbf->cache_mru, &newcache[0])) { - _gdbm_fatal (dbf, _("couldn't init cache")); + case node_new: + break; + + case node_found: + /* should not happen */ + GDBM_DEBUG (GDBM_DEBUG_ERR, + "%s: bucket found where it should not", + dbf->name); + GDBM_SET_ERRNO (dbf, GDBM_BUCKET_CACHE_CORRUPTED, TRUE); return -1; - } - } - while (dbf->bucket->count == dbf->header->bucket_elems) - { - /* Initialize the "new" buckets in the cache. */ - do - { - dbf->last_read = (dbf->last_read + 1) % dbf->cache_size; - cache_0 = dbf->last_read; - } - while (dbf->bucket_cache[cache_0].ca_bucket == dbf->bucket); - bucket[0] = dbf->bucket_cache[cache_0].ca_bucket; - if (dbf->bucket_cache[cache_0].ca_changed) - { - if (_gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_0])) - return -1; + case node_failure: + return -1; } - do - { - dbf->last_read = (dbf->last_read + 1) % dbf->cache_size; - cache_1 = dbf->last_read; - } - while (dbf->bucket_cache[cache_1].ca_bucket == dbf->bucket); - bucket[1] = dbf->bucket_cache[cache_1].ca_bucket; - if (dbf->bucket_cache[cache_1].ca_changed) + _gdbm_new_bucket (dbf, newcache[0]->ca_bucket, new_bits); + + adr_1 = _gdbm_alloc (dbf, dbf->header->bucket_size); + switch (cache_lookup (dbf, adr_1, newcache[0], &newcache[1])) { - if (_gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_1])) - return -1; + case node_new: + break; + + case node_found: + /* should not happen */ + GDBM_DEBUG (GDBM_DEBUG_ERR, + "%s: bucket found where it should not", + dbf->name); + GDBM_SET_ERRNO (dbf, GDBM_BUCKET_CACHE_CORRUPTED, TRUE); + return -1; + + case node_failure: + return -1; } - new_bits = dbf->bucket->bucket_bits + 1; - _gdbm_new_bucket (dbf, bucket[0], new_bits); - _gdbm_new_bucket (dbf, bucket[1], new_bits); - adr_0 = _gdbm_alloc (dbf, dbf->header->bucket_size); - if (adr_0 == 0) - return -1; - dbf->bucket_cache[cache_0].ca_adr = adr_0; - adr_1 = _gdbm_alloc (dbf, dbf->header->bucket_size); - if (adr_1 == 0) - return -1; - dbf->bucket_cache[cache_1].ca_adr = adr_1; + _gdbm_new_bucket (dbf, newcache[1]->ca_bucket, new_bits); /* Double the directory size if necessary. */ if (dbf->header->dir_bits == dbf->bucket->bucket_bits) { + off_t *new_dir; /* Pointer to the new directory. */ + int dir_size; /* Size of the new directory. */ + off_t dir_adr; /* Address of the new directory. */ + if (dbf->header->dir_size >= GDBM_MAX_DIR_HALF) { GDBM_SET_ERRNO (dbf, GDBM_DIR_OVERFLOW, TRUE); @@ -335,40 +411,44 @@ _gdbm_split_bucket (GDBM_FILE dbf, int next_insert) /* Copy all elements in dbf->bucket into the new buckets. */ for (index = 0; index < dbf->header->bucket_elems; index++) { - old_el = &dbf->bucket->h_table[index]; - select = (old_el->hash_value >> (GDBM_HASH_BITS - new_bits)) & 1; - elem_loc = old_el->hash_value % dbf->header->bucket_elems; - while (bucket[select]->h_table[elem_loc].hash_value != -1) + bucket_element *old_el = &dbf->bucket->h_table[index]; + hash_bucket *bucket = + newcache[(old_el->hash_value >> (GDBM_HASH_BITS - new_bits)) & 1]->ca_bucket; + int elem_loc = old_el->hash_value % dbf->header->bucket_elems; + while (bucket->h_table[elem_loc].hash_value != -1) elem_loc = (elem_loc + 1) % dbf->header->bucket_elems; - bucket[select]->h_table[elem_loc] = *old_el; - bucket[select]->count++; + bucket->h_table[elem_loc] = *old_el; + bucket->count++; } - /* Allocate avail space for the bucket[1]. */ - bucket[1]->bucket_avail[0].av_adr + /* Allocate avail space for the newcache[1]->ca_bucket. */ + newcache[1]->ca_bucket->bucket_avail[0].av_adr = _gdbm_alloc (dbf, dbf->header->block_size); - if (bucket[1]->bucket_avail[0].av_adr == 0) + if (newcache[1]->ca_bucket->bucket_avail[0].av_adr == 0) return -1; - bucket[1]->bucket_avail[0].av_size = dbf->header->block_size; - bucket[1]->av_count = 1; + newcache[1]->ca_bucket->bucket_avail[0].av_size + = dbf->header->block_size; + newcache[1]->ca_bucket->av_count = 1; - /* Copy the avail elements in dbf->bucket to bucket[0]. */ - bucket[0]->av_count = dbf->bucket->av_count; + /* Copy the avail elements in dbf->bucket to newcache[0]->ca_bucket. */ + newcache[0]->ca_bucket->av_count = dbf->bucket->av_count; index = 0; - index1 = 0; - if (bucket[0]->av_count == BUCKET_AVAIL) + if (newcache[0]->ca_bucket->av_count == BUCKET_AVAIL) { - /* The avail is full, move the first one to bucket[1]. */ + /* The avail is full, move the first one to newcache[1]->ca_bucket.*/ _gdbm_put_av_elem (dbf->bucket->bucket_avail[0], - bucket[1]->bucket_avail, - &bucket[1]->av_count, + newcache[1]->ca_bucket->bucket_avail, + &newcache[1]->ca_bucket->av_count, dbf->coalesce_blocks); index = 1; - bucket[0]->av_count--; + newcache[0]->ca_bucket->av_count--; } + + index1 = 0; for (; index < dbf->bucket->av_count; index++) { - bucket[0]->bucket_avail[index1++] = dbf->bucket->bucket_avail[index]; + newcache[0]->ca_bucket->bucket_avail[index1++] + = dbf->bucket->bucket_avail[index]; } /* Update the directory. We have new file addresses for both buckets. */ @@ -381,10 +461,9 @@ _gdbm_split_bucket (GDBM_FILE dbf, int next_insert) for (index = dir_start1; index < dir_end; index++) dbf->dir[index] = adr_1; - /* Set changed flags. */ - dbf->bucket_cache[cache_0].ca_changed = TRUE; - dbf->bucket_cache[cache_1].ca_changed = TRUE; + newcache[0]->ca_changed = TRUE; + newcache[1]->ca_changed = TRUE; dbf->bucket_changed = TRUE; dbf->directory_changed = TRUE; dbf->second_changed = TRUE; @@ -395,29 +474,23 @@ _gdbm_split_bucket (GDBM_FILE dbf, int next_insert) /* Invalidate old cache entry. */ old_bucket.av_adr = dbf->cache_entry->ca_adr; old_bucket.av_size = dbf->header->bucket_size; - dbf->cache_entry->ca_adr = 0; - dbf->cache_entry->ca_changed = FALSE; + cache_elem_free (dbf, dbf->cache_entry); /* Set dbf->bucket to the proper bucket. */ - if (dbf->dir[dbf->bucket_dir] == adr_0) + if (dbf->dir[dbf->bucket_dir] != adr_0) { - dbf->bucket = bucket[0]; - dbf->cache_entry = &dbf->bucket_cache[cache_0]; - _gdbm_put_av_elem (old_bucket, - bucket[1]->bucket_avail, - &bucket[1]->av_count, - dbf->coalesce_blocks); - } - else - { - dbf->bucket = bucket[1]; - dbf->cache_entry = &dbf->bucket_cache[cache_1]; - _gdbm_put_av_elem (old_bucket, - bucket[0]->bucket_avail, - &bucket[0]->av_count, - dbf->coalesce_blocks); + cache_elem *t = newcache[0]; + newcache[0] = newcache[1]; + newcache[1] = t; } - + + _gdbm_put_av_elem (old_bucket, + newcache[1]->ca_bucket->bucket_avail, + &newcache[1]->ca_bucket->av_count, + dbf->coalesce_blocks); + lru_unlink_elem (dbf, newcache[0]); + lru_link_elem (dbf, newcache[0], NULL); + set_cache_entry (dbf, newcache[0]); } /* Get rid of old directories. */ @@ -460,3 +533,159 @@ _gdbm_write_bucket (GDBM_FILE dbf, cache_elem *ca_entry) ca_entry->ca_data.elem_loc = -1; return 0; } + +/* Cache manipulation functions. */ + +/* Initialize the bucket cache. */ +int +_gdbm_cache_init (GDBM_FILE dbf, size_t size) +{ + if (size < 3) + { + errno = EINVAL; + return -1; + } + + if (size == dbf->cache_size) + return 0; + + while (size < dbf->cache_num) + { + /* Flush the least recently used element */ + cache_elem *elem = dbf->cache_lru; + if (elem->ca_changed) + { + if (_gdbm_write_bucket (dbf, elem)) + return -1; + } + cache_elem_free (dbf, elem); + } + + dbf->cache_size = size; + + return 0; +} + +/* Free the bucket cache */ +void +_gdbm_cache_free (GDBM_FILE dbf) +{ + cache_elem *elem; + + while (dbf->cache_lru) + cache_elem_free (dbf, dbf->cache_lru); + _gdbm_cache_tree_destroy (dbf->cache_tree); + while ((elem = dbf->cache_avail) != NULL) + { + dbf->cache_avail = elem->ca_next; + free (elem->ca_data.dptr); + free (elem->ca_bucket); + free (elem); + } +} + +/* Flush cache content to disk. */ +int +_gdbm_cache_flush (GDBM_FILE dbf) +{ + cache_elem *elem; + for (elem = dbf->cache_lru; elem; elem = elem->ca_prev) + { + if (elem->ca_changed) + { + if (_gdbm_write_bucket (dbf, elem)) + return -1; + } + } + return 0; +} + +int +_gdbm_fetch_data (GDBM_FILE dbf, off_t off, size_t size, void *buf) +{ + off_t bucket_adr; + off_t file_pos; + int rc; + cache_elem *elem; + char *dst = buf; + + bucket_adr = (off / dbf->header->bucket_size) + * dbf->header->bucket_size; + off -= bucket_adr; + while (size) + { + size_t nbytes; + + switch (cache_lookup (dbf, bucket_adr, dbf->cache_mru, &elem)) + { + case node_found: + break; + + case node_new: + /* Position the file pointer */ + file_pos = gdbm_file_seek (dbf, bucket_adr, SEEK_SET); + if (file_pos != bucket_adr) + { + GDBM_SET_ERRNO (dbf, GDBM_FILE_SEEK_ERROR, TRUE); + cache_elem_free (dbf, elem); + _gdbm_fatal (dbf, _("lseek error")); + return -1; + } + + /* Read the bucket. */ + rc = _gdbm_full_read (dbf, elem->ca_bucket, + dbf->header->bucket_size); + if (rc) + { + GDBM_DEBUG (GDBM_DEBUG_ERR, + "%s: error reading data bucket: %s", + dbf->name, gdbm_db_strerror (dbf)); + dbf->need_recovery = TRUE; + cache_elem_free (dbf, elem); + _gdbm_fatal (dbf, gdbm_db_strerror (dbf)); + return -1; + } + break; + + case node_failure: + return -1; + } + + nbytes = dbf->header->bucket_size - off; + if (nbytes > size) + nbytes = size; + memcpy (dst, (char*) elem->ca_bucket + off, nbytes); + dst += nbytes; + size -= nbytes; + bucket_adr++; + off = 0; + } + return 0; +} + +void +gdbm_get_cache_stats (GDBM_FILE dbf, + size_t *access_count, + size_t *cache_count, + struct gdbm_cache_stat *bstat, + size_t nstat) +{ + if (access_count) + *access_count = dbf->cache_access_count; + if (cache_count) + *cache_count = dbf->cache_num; + if (bstat) + { + size_t i; + cache_elem *elem; + + if (nstat > dbf->cache_num) + nstat = dbf->cache_num; + + for (i = 0, elem = dbf->cache_mru; i < nstat; i++, elem = elem->ca_next) + { + bstat[i].adr = elem->ca_adr; + bstat[i].hits = elem->ca_hits; + } + } +} diff --git a/src/cachetree.c b/src/cachetree.c new file mode 100644 index 0000000..b27d8a7 --- /dev/null +++ b/src/cachetree.c @@ -0,0 +1,528 @@ +/* cachetree.c - Implementation of the red-black tree for cache lookups. */ + +/* This file is part of GDBM, the GNU data base manager. + Copyright (C) 2019 Free Software Foundation, Inc. + + GDBM is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GDBM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GDBM. If not, see <http://www.gnu.org/licenses/>. */ + +#include "autoconf.h" +#include "gdbmdefs.h" +#define NDEBUG +#include "assert.h" + +enum cache_node_color { RED, BLACK }; + +typedef struct cache_node cache_node; +struct cache_node +{ + cache_node *left, *right, *parent; + enum cache_node_color color; + cache_elem *elem; +}; + +struct cache_tree +{ + cache_node *root; /* Root of the tree */ + cache_node *avail; /* List of available nodes, linked by parent field */ + GDBM_FILE dbf; /* Database this tree belongs to */ +}; + +/* Allocate and return a new node. Pick the head item from the avail + list and update the avail pointer. If the list is empty, malloc + a new node. + All members in the returned node are filled with 0. +*/ +static cache_node * +rbt_node_alloc (cache_tree *tree) +{ + cache_node *n; + + n = tree->avail; + if (n) + tree->avail = n->parent; + else + { + n = malloc (sizeof (*n)); + if (!n) + return NULL; + } + memset (n, 0, sizeof (*n)); + return n; +} + +/* Return the node N to the avail list in TREE. */ +static void +rbt_node_dealloc (cache_tree *tree, cache_node *n) +{ + n->parent = tree->avail; + tree->avail = n; +} + +/* Red-black tree properties: + 1. Each node is either red or black. + 2. The root node is black. + 3. All leaves are black and contain no data. + 4. Every red node has two children, and both are black. + IOW, the parent of every red node is black. + 5. All paths from any given node to its leaf nodes contain the same + number of black nodes. + */ + +/* Auxiliary functions for accessing nodes. */ + +/* Return the grandparent node of N. + Prerequisite: N may not be root. +*/ +static inline cache_node * +grandparent (cache_node *n) +{ + return n->parent->parent; +} + +/* Return the sibling node of N. + Prerequisite: N may not be root. +*/ +static inline cache_node * +sibling (cache_node *n) +{ + return (n == n->parent->left) ? n->parent->right : n->parent->left; +} + +/* Return the uncle node of N. + Prerequisite: N must be at least 2 nodes away from root. +*/ +static inline cache_node * +uncle (cache_node *n) +{ + return sibling (n->parent); +} + +/* Returns the color of the node N. + Empty leaves are represented by NULL, therefore NULL is assumed to + be black (see property 3). +*/ +static inline enum cache_node_color +node_color (cache_node *n) +{ + return n == NULL ? BLACK : n->color; +} + +/* Replace the OLDN with NEWN. + Does not modify OLDN. */ +static void +replace_node (cache_tree *tree, cache_node *oldn, cache_node *newn) +{ + if (oldn->parent == NULL) + tree->root = newn; + else if (oldn == oldn->parent->left) + oldn->parent->left = newn; + else + oldn->parent->right = newn; + + if (newn != NULL) + newn->parent = oldn->parent; +} + +/* Rotate the TREE left over the node N. */ +static void +rotate_left (cache_tree *tree, cache_node *n) +{ + cache_node *right = n->right; + replace_node (tree, n, right); + n->right = right->left; + if (right->left != NULL) + right->left->parent = n; + right->left = n; + n->parent = right; +} + +/* Rotate the TREE right over the node N. */ +static void +rotate_right (cache_tree *tree, cache_node *n) +{ + cache_node *left = n->left; + replace_node (tree, n, left); + n->left = left->right; + if (left->right != NULL) + left->right->parent = n; + left->right = n; + n->parent = left; +} + +/* Node deletion */ +static void rbt_delete_fixup (cache_tree *tree, cache_node *n); + +/* Remove N from the TREE. */ +void +_gdbm_rbt_remove_node (cache_tree *tree, cache_node *n) +{ + cache_node *child; + + /* If N has both left and right children, reduce the problem to + the node with only one child. To do so, find the in-order + predecessor of N, copy its value (elem) to N and then delete + the predecessor. */ + if (n->left != NULL && n->right != NULL) + { + cache_node *p; + for (p = n->left; p->right; p = p->right) + ; + n->elem = p->elem; + n->elem->ca_node = n; + n = p; + } + + /* N has only one child. Select it. */ + child = n->left ? n->left : n->right; + if (node_color (n) == BLACK) + { + n->color = node_color (child); + rbt_delete_fixup (tree, n); + } + replace_node (tree, n, child); + if (n->parent == NULL && child != NULL) /* root should be black */ + child->color = BLACK; + + /* Return N to the avail pool */ + rbt_node_dealloc (tree, n); +} + +static void +rbt_delete_fixup (cache_tree *tree, cache_node *n) +{ + while (1) + { + if (n->parent == NULL) + { + /* If N has become the root node, deletion resulted in removing + one black node (prior root) from every path, so all properties + still hold. + */ + return; + } + else + { + /* If N has a red sibling, change the colors of the parent and + sibling and rotate about the parent. Thus, the sibling becomes + grandparent and we can proceed to the next case. + */ + if (node_color (sibling (n)) == RED) + { + n->parent->color = RED; + sibling (n)->color = BLACK; + if (n == n->parent->left) + rotate_left (tree, n->parent); + else + rotate_right (tree, n->parent); + } + + /* If the parent, sibling and nephews are all black, paint the + sibling red. This means one black node was removed from all + paths passing through the parent, so we recurse to the beginning + of the loop with parent as the argument to restore the properties. + This is the only branch that loops. + */ + if (node_color (n->parent) == BLACK + && node_color (sibling (n)) == BLACK + && node_color (sibling (n)->left) == BLACK + && node_color (sibling (n)->right) == BLACK) + { + sibling (n)->color = RED; + n = n->parent; + continue; + } + else + { + /* If the sibling and nephews are black but the parent is red, + swap the colors of the sibling and parent. The properties + are then restored. + */ + if (node_color (n->parent) == RED + && node_color (sibling (n)) == BLACK + && node_color (sibling (n)->left) == BLACK + && node_color (sibling (n)->right) == BLACK) + { + sibling (n)->color = RED; + n->parent->color = BLACK; + } + else + { + /* N is the left child of its parent, its sibling is black, + and the sibling's right child is black. Swap the colors + of the sibling and its left sibling and rotate right + over the sibling. + */ + if (n == n->parent->left + && node_color (sibling (n)) == BLACK + && node_color (sibling (n)->left) == RED + && node_color (sibling (n)->right) == BLACK) + { + sibling (n)->color = RED; + sibling (n)->left->color = BLACK; + rotate_right (tree, sibling (n)); + } + else if (n == n->parent->right + && node_color (sibling (n)) == BLACK + && node_color (sibling (n)->right) == RED + && node_color (sibling (n)->left) == BLACK) + { + /* The mirror case is handled similarly. */ + sibling (n)->color = RED; + sibling (n)->right->color = BLACK; + rotate_left (tree, sibling (n)); + } + /* N is the left child of its parent, its sibling is black + and the sibling's right child is red. Swap the colors + of the parent and sibling, paint the sibling's right + child black and rotate left at the parent. Similarly + for the mirror case. This achieves the following: + + . A black node is added to all paths passing through N; + . A black node is removed from all paths through the + sibling's red child. + . The latter is painted black which restores missing + black node in all paths through the sibling's red child. + + Another sibling's child becomes a child of N's parent + during the rotation and is therefore not affected. + */ + sibling (n)->color = node_color (n->parent); + n->parent->color = BLACK; + if (n == n->parent->left) + { + sibling (n)->right->color = BLACK; + rotate_left (tree, n->parent); + } + else + { + sibling (n)->left->color = BLACK; + rotate_right (tree, n->parent); + } + } + } + } + break; + } +} + +/* Insertion */ +static void rbt_insert_fixup (cache_tree *tree, cache_node *n); + +/* Look up the node with elem->ca_adr equal to ADR. + If found, put it in *RETVAL and return node_found. + + Otherwise, if INSERT is TRUE, create a new node and insert it in the + appropriate place in the tree. Store the address of the newly created + node in *RETVAL and return node_new. If a new node cannot be created + (memory exhausted), return node_failure. + + Otherwise, if INSERT is FALSE, store NULL in *RETVAL and return node_new. +*/ +static int +cache_tree_lookup (cache_tree *tree, off_t adr, int insert, + cache_node **retval) +{ + int res; + cache_node *node, *parent = NULL; + cache_node **nodeptr; + + nodeptr = &tree->root; + while ((node = *nodeptr) != NULL) + { + if (adr == node->elem->ca_adr) + break; + parent = node; + if (adr < node->elem->ca_adr) + nodeptr = &node->left; + else + nodeptr = &node->right; + } + + if (node) + { + res = node_found; + } + else + { + if (insert) + { + node = rbt_node_alloc (tree); + if (!node) + return node_failure; + node->elem = _gdbm_cache_elem_new (tree->dbf, adr); + if (!node->elem) + { + rbt_node_dealloc (tree, node); + return node_failure; + } + node->elem->ca_node = node; + *nodeptr = node; + node->parent = parent; + rbt_insert_fixup (tree, node); + } + res = node_new; + } + *retval = node; + return res; +} + +static void +rbt_insert_fixup (cache_tree *tree, cache_node *n) +{ + while (1) + { + if (n->parent == NULL) + { + /* Node was inserted at the root of the tree. + The root node must be black (property 2). Changing its color + to black would add one black node to every path, which means + the property 5 would remain satisfied. So we simply paint the + node black. + */ + n->color = BLACK; + } + else if (node_color (n->parent) == BLACK) + { + /* The node has black parent. + All properties are satisfied. There's no need to change anything. + */ + return; + } + else if (node_color (uncle (n)) == RED) + { + /* The uncle node is red. + Repaint the parent and uncle black and the grandparent red. This + would satisfy 4. However, if the grandparent is root, this would + violate the property 2. So we repaint the grandparent by + re-entering the fixup loop with grandparent as the node. + This is the only branch that loops. + */ + n->parent->color = BLACK; + uncle (n)->color = BLACK; + n = grandparent (n); + n->color = RED; + continue; + } + else + { + /* The new node is the right child of its parent and the parent is + the left child of the grandparent. Rotate left about the parent. + Mirror case: The new node is the left child of its parent and the + parent is the right child of the grandparent. Rotate right about + the parent. This fixes the properties for the rbt_insert_5. + */ + if (n == n->parent->right && n->parent == grandparent (n)->left) + { + rotate_left (tree, n->parent); + n = n->left; + } + else if (n == n->parent->left && n->parent == grandparent (n)->right) + { + rotate_right (tree, n->parent); + n = n->right; + } + + /* The new node is the left child of its parent and the parent is the + left child of the grandparent. Rotate right about the grandparent. + Mirror case: The new node is the right child of its parent and the + parent + is the right child of the grandparent. Rotate left. + */ + n->parent->color = BLACK; + grandparent (n)->color = RED; + if (n == n->parent->left && n->parent == grandparent (n)->left) + { + rotate_right (tree, grandparent (n)); + } + else + { + rotate_left (tree, grandparent (n)); + } + } + break; + } +} + +/* Interface functions */ + +/* Create a cache tree structure for the database file DBF. */ +cache_tree * +_gdbm_cache_tree_alloc (GDBM_FILE dbf) +{ + cache_tree *t = malloc (sizeof (*t)); + if (t) + { + t->root = NULL; + t->avail = NULL; + t->dbf = dbf; + } + return t; +} + +/* Free the memory used by the TREE. */ +void +_gdbm_cache_tree_destroy (cache_tree *tree) +{ + cache_node *n; + while ((n = tree->root) != NULL) + _gdbm_rbt_remove_node (tree, n); + while ((n = tree->avail) != NULL) + { + tree->avail = n->parent; + free (n); + } + free (tree); +} + +/* Delete the node with ELEM from the TREE. + Note, that it actually removes the node whith elem->ca_adr equal to + that of ELEM, which means that it's elem pointer could theoretically + have differ from ELEM. However, the actual algorithm ensures it is + the same. +*/ +int +_gdbm_cache_tree_delete (cache_tree *tree, cache_elem *elem) +{ + cache_node *n; + + switch (cache_tree_lookup (tree, elem->ca_adr, 0, &n)) + { + case node_found: + assert (n->elem == elem); + _gdbm_rbt_remove_node (tree, n); + break; + + case node_new: + break; + + case node_failure: + return -1; + } + return 0; +} + +/* Look up the node with elem->ca_adr equal to ADR. + If found, store its pointer in *RETVAL and return node_found. + Otherwise, return node_new and don't touch RETVAL. +*/ +int +_gdbm_cache_tree_lookup (cache_tree *tree, off_t adr, cache_elem **retval) +{ + cache_node *n; + int rc = cache_tree_lookup (tree, adr, TRUE, &n); + if (rc != node_failure) + *retval = n->elem; + return rc; +} + diff --git a/src/findkey.c b/src/findkey.c index 6e4bccd..f82b422 100644 --- a/src/findkey.c +++ b/src/findkey.c @@ -43,11 +43,9 @@ gdbm_bucket_element_valid_p (GDBM_FILE dbf, int elem_loc) char * _gdbm_read_entry (GDBM_FILE dbf, int elem_loc) { - int rc; int key_size; int data_size; size_t dsize; - off_t file_pos; data_cache_elem *data_ca; /* Is it already in the cache? */ @@ -103,26 +101,9 @@ _gdbm_read_entry (GDBM_FILE dbf, int elem_loc) } } - /* Read into the cache. */ - file_pos = gdbm_file_seek (dbf, dbf->bucket->h_table[elem_loc].data_pointer, - SEEK_SET); - if (file_pos != dbf->bucket->h_table[elem_loc].data_pointer) - { - GDBM_SET_ERRNO2 (dbf, GDBM_FILE_SEEK_ERROR, TRUE, GDBM_DEBUG_LOOKUP); - _gdbm_fatal (dbf, _("lseek error")); - return NULL; - } - - rc = _gdbm_full_read (dbf, data_ca->dptr, key_size+data_size); - if (rc) - { - GDBM_DEBUG (GDBM_DEBUG_ERR|GDBM_DEBUG_LOOKUP|GDBM_DEBUG_READ, - "%s: error reading entry: %s", - dbf->name, gdbm_db_strerror (dbf)); - dbf->need_recovery = TRUE; - _gdbm_fatal (dbf, gdbm_db_strerror (dbf)); - return NULL; - } + if (_gdbm_fetch_data (dbf, dbf->bucket->h_table[elem_loc].data_pointer, + key_size + data_size, data_ca->dptr)) + return NULL; return data_ca->dptr; } diff --git a/src/gdbm.h.in b/src/gdbm.h.in index e537d54..6a23ea6 100644 --- a/src/gdbm.h.in +++ b/src/gdbm.h.in @@ -228,9 +228,10 @@ extern int gdbm_copy_meta (GDBM_FILE dst, GDBM_FILE src); # define GDBM_FILE_CLOSE_ERROR 37 # define GDBM_FILE_SYNC_ERROR 38 # define GDBM_FILE_TRUNCATE_ERROR 39 - +# define GDBM_BUCKET_CACHE_CORRUPTED 40 + # define _GDBM_MIN_ERRNO 0 -# define _GDBM_MAX_ERRNO GDBM_FILE_TRUNCATE_ERROR +# define _GDBM_MAX_ERRNO GDBM_BUCKET_CACHE_CORRUPTED /* This one was never used and will be removed in the future */ # define GDBM_UNKNOWN_UPDATE GDBM_UNKNOWN_ERROR @@ -275,8 +276,20 @@ extern void gdbm_debug_parse_state (int (*f) (void *, int, char const *), void *d); extern void gdbm_debug_datum (datum dat, char const *pfx); - #endif + +/* Cache statistics */ +struct gdbm_cache_stat +{ + off_t adr; + size_t hits; +}; + +void gdbm_get_cache_stats (GDBM_FILE dbf, + size_t *access_count, + size_t *cache_count, + struct gdbm_cache_stat *bstat, + size_t nstat); # if defined(__cplusplus) || defined(c_plusplus) } diff --git a/src/gdbmclose.c b/src/gdbmclose.c index 6e21320..71fa128 100644 --- a/src/gdbmclose.c +++ b/src/gdbmclose.c @@ -29,7 +29,6 @@ int gdbm_close (GDBM_FILE dbf) { - int index; /* For freeing the bucket cache. */ int syserrno; gdbm_set_errno (dbf, GDBM_NO_ERROR, FALSE); @@ -58,15 +57,8 @@ gdbm_close (GDBM_FILE dbf) free (dbf->name); free (dbf->dir); - if (dbf->bucket_cache != NULL) - { - for (index = 0; index < dbf->cache_size; index++) - { - free (dbf->bucket_cache[index].ca_bucket); - free (dbf->bucket_cache[index].ca_data.dptr); - } - free (dbf->bucket_cache); - } + _gdbm_cache_free (dbf); + free (dbf->header); free (dbf); if (gdbm_errno) diff --git a/src/gdbmdefs.h b/src/gdbmdefs.h index 7dc15ee..3297cd1 100644 --- a/src/gdbmdefs.h +++ b/src/gdbmdefs.h @@ -157,13 +157,23 @@ typedef struct int elem_loc; } data_cache_elem; -typedef struct +typedef struct cache_elem_s cache_elem; + +struct cache_elem_s { hash_bucket * ca_bucket; off_t ca_adr; - char ca_changed; /* Data in the bucket changed. */ + char ca_changed; /* Data in the bucket changed. */ data_cache_elem ca_data; -} cache_elem; + cache_elem *ca_prev, *ca_next; /* Previous and next elements in LRU list. + If the item is in cache_avail list, only + ca_next is used. It points to the next + available element. */ + size_t ca_hits; /* Number of times this element was requested */ + struct cache_node *ca_node; +}; + +typedef struct cache_tree cache_tree; /* This final structure contains all main memory based information for a gdbm file. This allows multiple gdbm files to be opened at the same @@ -225,19 +235,27 @@ struct gdbm_file_info off_t *dir; /* The bucket cache. */ - cache_elem *bucket_cache; - size_t cache_size; - size_t last_read; - + size_t cache_size; /* Cache capacity */ + size_t cache_num; /* Actual number of elements in cache */ + /* Cache elements form a binary search tree. */ + cache_tree *cache_tree; + /* Cache elements are linked in a list sorted by relative access time */ + cache_elem *cache_mru; /* Most recently used element - head of the list */ + cache_elem *cache_lru; /* Last recently used element - tail of the list */ + cache_elem *cache_avail; /* Pool of available elements (linked by prev, next) + */ + /* Pointer to the current bucket's cache entry. */ + cache_elem *cache_entry; + /* Points to the current hash bucket in the cache. */ hash_bucket *bucket; - + /* The directory entry used to get the current hash bucket. */ int bucket_dir; - /* Pointer to the current bucket's cache entry. */ - cache_elem *cache_entry; - + /* Number of cache accesses */ + size_t cache_access_count; + /* Bookkeeping of things that need to be written back at the end of an update. */ unsigned header_changed :1; diff --git a/src/gdbmerrno.c b/src/gdbmerrno.c index 8aa58a4..392f23b 100644 --- a/src/gdbmerrno.c +++ b/src/gdbmerrno.c @@ -139,7 +139,8 @@ const char * const gdbm_errlist[_GDBM_MAX_ERRNO+1] = { [GDBM_BAD_DIR_ENTRY] = N_("Invalid directory entry"), [GDBM_FILE_CLOSE_ERROR] = N_("Error closing file"), [GDBM_FILE_SYNC_ERROR] = N_("Error synchronizing file"), - [GDBM_FILE_TRUNCATE_ERROR] = N_("Error truncating file") + [GDBM_FILE_TRUNCATE_ERROR] = N_("Error truncating file"), + [GDBM_BUCKET_CACHE_CORRUPTED] = N_("Bucket cache corrupted") }; const char * diff --git a/src/gdbmopen.c b/src/gdbmopen.c index c68834c..621d6c7 100644 --- a/src/gdbmopen.c +++ b/src/gdbmopen.c @@ -269,9 +269,11 @@ gdbm_fd_open (int fd, const char *file_name, int block_size, dbf->dir = NULL; dbf->bucket = NULL; dbf->header = NULL; - dbf->bucket_cache = NULL; - dbf->cache_size = 0; + /* Initialize cache */ + dbf->cache_tree = _gdbm_cache_tree_alloc (dbf); + _gdbm_cache_init (dbf, DEFAULT_CACHESIZE); + dbf->memory_mapping = FALSE; dbf->mapped_size_max = SIZE_T_MAX; dbf->mapped_region = NULL; @@ -639,10 +641,8 @@ gdbm_fd_open (int fd, const char *file_name, int block_size, #endif /* Finish initializing dbf. */ - dbf->last_read = -1; dbf->bucket = NULL; dbf->bucket_dir = 0; - dbf->cache_entry = NULL; dbf->header_changed = FALSE; dbf->directory_changed = FALSE; dbf->bucket_changed = FALSE; @@ -717,46 +717,3 @@ gdbm_open (const char *file, int block_size, int flags, int mode, fatal_func); } -/* Initialize the bucket cache. */ -int -_gdbm_init_cache (GDBM_FILE dbf, size_t size) -{ - int index; - - if (dbf->bucket_cache == NULL) - { - dbf->bucket_cache = calloc (size, sizeof(cache_elem)); - if (dbf->bucket_cache == NULL) - { - GDBM_SET_ERRNO (dbf, GDBM_MALLOC_ERROR, TRUE); - return -1; - } - dbf->cache_size = size; - - for (index = 0; index < size; index++) - { - (dbf->bucket_cache[index]).ca_bucket = - malloc (dbf->header->bucket_size); - if ((dbf->bucket_cache[index]).ca_bucket == NULL) - { - GDBM_SET_ERRNO (dbf, GDBM_MALLOC_ERROR, TRUE); - return -1; - } - dbf->bucket_cache[index].ca_data.dptr = NULL; - dbf->bucket_cache[index].ca_data.dsize = 0; - _gdbm_cache_entry_invalidate (dbf, index); - } - dbf->bucket = dbf->bucket_cache[0].ca_bucket; - dbf->cache_entry = &dbf->bucket_cache[0]; - } - return 0; -} - -void -_gdbm_cache_entry_invalidate (GDBM_FILE dbf, int index) -{ - dbf->bucket_cache[index].ca_adr = 0; - dbf->bucket_cache[index].ca_changed = FALSE; - dbf->bucket_cache[index].ca_data.hash_val = -1; - dbf->bucket_cache[index].ca_data.elem_loc = -1; -} diff --git a/src/gdbmsetopt.c b/src/gdbmsetopt.c index c132335..2e409f9 100644 --- a/src/gdbmsetopt.c +++ b/src/gdbmsetopt.c @@ -54,19 +54,13 @@ setopt_gdbm_setcachesize (GDBM_FILE dbf, void *optval, int optlen) { size_t sz; - if (dbf->bucket_cache != NULL) - { - GDBM_SET_ERRNO (dbf, GDBM_OPT_ALREADY_SET, FALSE); - return -1; - } - /* Optval will point to the new size of the cache. */ if (get_size (optval, optlen, &sz)) { GDBM_SET_ERRNO (dbf, GDBM_OPT_ILLEGAL, FALSE); return -1; } - return _gdbm_init_cache (dbf, (sz > 9) ? sz : 10); + return _gdbm_cache_init (dbf, (sz > 9) ? sz : 10); } static int diff --git a/src/gdbmtool.c b/src/gdbmtool.c index ceffaef..b2ee956 100644 --- a/src/gdbmtool.c +++ b/src/gdbmtool.c @@ -321,26 +321,25 @@ _gdbm_print_avail_list (FILE *fp, GDBM_FILE dbf) void _gdbm_print_bucket_cache (FILE *fp, GDBM_FILE dbf) { - int index; - char changed; - - if (dbf->bucket_cache != NULL) + if (dbf->cache_num) { + int i; + cache_elem *elem; + fprintf (fp, - _("Bucket Cache (size %zu):\n Index: Address Changed Data_Hash \n"), - dbf->cache_size); - for (index = 0; index < dbf->cache_size; index++) + _("Bucket Cache (size %zu/%zu):\n Index: Address Changed Data_Hash \n"), + dbf->cache_num, dbf->cache_size); + for (elem = dbf->cache_entry, i = 0; elem; elem = elem->ca_next, i++) { - changed = dbf->bucket_cache[index].ca_changed; fprintf (fp, " %5d: %15lu %7s %x\n", - index, - (unsigned long) dbf->bucket_cache[index].ca_adr, - (changed ? _("True") : _("False")), - dbf->bucket_cache[index].ca_data.hash_val); + i, + (unsigned long) elem->ca_adr, + (elem->ca_changed ? _("True") : _("False")), + elem->ca_data.hash_val); } } else - fprintf (fp, _("Bucket cache has not been initialized.\n")); + fprintf (fp, _("Bucket cache is empty.\n")); } int @@ -879,7 +878,7 @@ print_cache_begin (struct handler_param *param GDBM_ARG_UNUSED, size_t *exp_coun if (checkdb ()) return 1; if (exp_count) - *exp_count = gdbm_file->bucket_cache ? gdbm_file->cache_size + 1 : 1; + *exp_count = gdbm_file->cache_num + 1; return 0; } diff --git a/src/proto.h b/src/proto.h index 57f9e3c..2bd7956 100644 --- a/src/proto.h +++ b/src/proto.h @@ -21,11 +21,15 @@ /* From bucket.c */ void _gdbm_new_bucket (GDBM_FILE, hash_bucket *, int); int _gdbm_get_bucket (GDBM_FILE, int); -int _gdbm_read_bucket_at (GDBM_FILE dbf, off_t off, hash_bucket *bucket, - size_t size); +int _gdbm_fetch_data (GDBM_FILE dbf, off_t off, size_t size, void *buf); int _gdbm_split_bucket (GDBM_FILE, int); int _gdbm_write_bucket (GDBM_FILE, cache_elem *); +int _gdbm_cache_init (GDBM_FILE, size_t); +void _gdbm_cache_free (GDBM_FILE dbf); +int _gdbm_cache_flush (GDBM_FILE dbf); + +cache_elem *_gdbm_cache_elem_new (GDBM_FILE dbf, off_t adr); /* From falloc.c */ off_t _gdbm_alloc (GDBM_FILE, int); @@ -47,9 +51,6 @@ int _gdbm_end_update (GDBM_FILE); void _gdbm_fatal (GDBM_FILE, const char *); /* From gdbmopen.c */ -int _gdbm_init_cache (GDBM_FILE, size_t); -void _gdbm_cache_entry_invalidate (GDBM_FILE, int); - int gdbm_avail_block_validate (GDBM_FILE dbf, avail_block *avblk); int gdbm_bucket_avail_table_validate (GDBM_FILE dbf, hash_bucket *bucket); @@ -86,6 +87,22 @@ int _gdbm_dump (GDBM_FILE dbf, FILE *fp); /* From recover.c */ int _gdbm_next_bucket_dir (GDBM_FILE dbf, int bucket_dir); +/* cachetree.c */ +cache_tree *_gdbm_cache_tree_alloc (GDBM_FILE dbf); +void _gdbm_cache_tree_destroy (cache_tree *tree); +int _gdbm_cache_tree_delete (cache_tree *tree, cache_elem *elem); +void _gdbm_rbt_remove_node (cache_tree *tree, struct cache_node *n); + +/* Return codes for _gdbm_cache_tree_lookup. */ +enum + { + node_found, /* Returned element was found in cache. */ + node_new, /* Returned element has been created and inserted to cache */ + node_failure /* An error occurred. */ + }; + +int _gdbm_cache_tree_lookup (cache_tree *tree, off_t adr, cache_elem **retval); + /* I/O functions */ static inline ssize_t gdbm_file_read (GDBM_FILE dbf, void *buf, size_t size) diff --git a/src/recover.c b/src/recover.c index 8d34bec..d4da31d 100644 --- a/src/recover.c +++ b/src/recover.c @@ -91,8 +91,6 @@ static int _gdbm_finish_transfer (GDBM_FILE dbf, GDBM_FILE new_dbf, gdbm_recovery *rcvr, int flags) { - int i; - /* Write everything. */ if (_gdbm_end_update (new_dbf)) { @@ -145,15 +143,7 @@ _gdbm_finish_transfer (GDBM_FILE dbf, GDBM_FILE new_dbf, free (dbf->header); free (dbf->dir); - if (dbf->bucket_cache != NULL) - { - for (i = 0; i < dbf->cache_size; i++) - { - free (dbf->bucket_cache[i].ca_bucket); - free (dbf->bucket_cache[i].ca_data.dptr); - } - free (dbf->bucket_cache); - } + _gdbm_cache_flush (dbf); dbf->lock_type = new_dbf->lock_type; dbf->desc = new_dbf->desc; @@ -161,9 +151,14 @@ _gdbm_finish_transfer (GDBM_FILE dbf, GDBM_FILE new_dbf, dbf->dir = new_dbf->dir; dbf->bucket = new_dbf->bucket; dbf->bucket_dir = new_dbf->bucket_dir; - dbf->last_read = new_dbf->last_read; - dbf->bucket_cache = new_dbf->bucket_cache; - dbf->cache_size = new_dbf->cache_size; + + dbf->cache_size = new_dbf->cache_size; + dbf->cache_num = new_dbf->cache_num; + dbf->cache_tree = new_dbf->cache_tree; + dbf->cache_entry = new_dbf->cache_entry; + dbf->cache_lru = new_dbf->cache_lru; + dbf->cache_avail = new_dbf->cache_avail; + dbf->header_changed = new_dbf->header_changed; dbf->directory_changed = new_dbf->directory_changed; dbf->bucket_changed = new_dbf->bucket_changed; @@ -182,7 +177,6 @@ _gdbm_finish_transfer (GDBM_FILE dbf, GDBM_FILE new_dbf, gdbm_file_sync (dbf); /* Force the right stuff for a correct bucket cache. */ - dbf->cache_entry = &dbf->bucket_cache[0]; return _gdbm_get_bucket (dbf, 0); } diff --git a/src/update.c b/src/update.c index 44be33c..4c271d2 100644 --- a/src/update.c +++ b/src/update.c @@ -75,19 +75,7 @@ _gdbm_end_update (GDBM_FILE dbf) /* Write the other changed buckets if there are any. */ if (dbf->second_changed) { - if (dbf->bucket_cache != NULL) - { - int index; - - for (index = 0; index < dbf->cache_size; index++) - { - if (dbf->bucket_cache[index].ca_changed) - { - if (_gdbm_write_bucket (dbf, &dbf->bucket_cache[index])) - return -1; - } - } - } + _gdbm_cache_flush (dbf); dbf->second_changed = FALSE; } diff --git a/tests/setopt00.at b/tests/setopt00.at index 4ef915b..a1be255 100644 --- a/tests/setopt00.at +++ b/tests/setopt00.at @@ -26,7 +26,7 @@ gtopt test.db '!MMAP' * CACHESIZE: initial GDBM_SETCACHESIZE: PASS GDBM_GETCACHESIZE: PASS -second GDBM_SETCACHESIZE: XFAIL +second GDBM_SETCACHESIZE: PASS * SYNCMODE: initial GDBM_GETSYNCMODE: PASS GDBM_SETSYNCMODE: PASS |