diff options
author | Alex Gorrod <alexg@wiredtiger.com> | 2012-08-29 16:00:39 +1000 |
---|---|---|
committer | Alex Gorrod <alexg@wiredtiger.com> | 2012-08-29 16:00:39 +1000 |
commit | efbbae44e71ae3e9b7e6944eddf4742b403a10f4 (patch) | |
tree | 3e3e5d3f7185b9050d9500ae74a4c9420d5f2e05 | |
parent | bc8ea6d32e35a264e950beb39b3c7f8603595c30 (diff) | |
download | mongo-efbbae44e71ae3e9b7e6944eddf4742b403a10f4.tar.gz |
Add bloom filter implementation and test.
The bloom filters will be used internally by the LSM implementation.
They do not form part of the public API.
-rw-r--r-- | build_posix/Make.subdirs | 1 | ||||
-rw-r--r-- | dist/filelist | 1 | ||||
-rw-r--r-- | dist/s_string.ok | 1 | ||||
-rw-r--r-- | src/bloom/bloom.c | 283 | ||||
-rw-r--r-- | src/include/bloom.h | 24 | ||||
-rw-r--r-- | src/include/extern.h | 17 | ||||
-rw-r--r-- | src/include/wt_internal.h | 2 | ||||
-rw-r--r-- | src/support/hash_city.c | 5 | ||||
-rw-r--r-- | src/support/hash_fnv.c | 5 | ||||
-rw-r--r-- | test/bloom/Makefile.am | 12 | ||||
-rw-r--r-- | test/bloom/test_bloom.c | 306 |
11 files changed, 655 insertions, 2 deletions
diff --git a/build_posix/Make.subdirs b/build_posix/Make.subdirs index 0784de07b24..1ea88099a29 100644 --- a/build_posix/Make.subdirs +++ b/build_posix/Make.subdirs @@ -12,6 +12,7 @@ ext/compressors/bzip2_compress BZIP2 ext/compressors/nop_compress ext/compressors/snappy_compress SNAPPY lang/python PYTHON +test/bloom test/fops test/format HAVE_BDB test/salvage diff --git a/dist/filelist b/dist/filelist index b8bed7968ea..1749cf047d7 100644 --- a/dist/filelist +++ b/dist/filelist @@ -13,6 +13,7 @@ src/block/block_read.c src/block/block_slvg.c src/block/block_vrfy.c src/block/block_write.c +src/bloom/bloom.c src/btree/bt_bulk.c src/btree/bt_cache.c src/btree/bt_cell.c diff --git a/dist/s_string.ok b/dist/s_string.ok index 0bde5d826fe..f65de9b7ec2 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -257,6 +257,7 @@ bzip calloc catfmt cfg +cfkos checkfrag checksum checksums diff --git a/src/bloom/bloom.c b/src/bloom/bloom.c new file mode 100644 index 00000000000..ea022c5ad1e --- /dev/null +++ b/src/bloom/bloom.c @@ -0,0 +1,283 @@ +/*- + * Copyright (c) 2008-2012 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" +#include "bloom.h" + +#define WT_BLOOM_TABLE_CONFIG "key_format=r,value_format=1t,exclusive=true" + +static int __bloom_init( + WT_SESSION_IMPL *, const char *, const char *, WT_BLOOM **); +static int __bloom_setup(WT_BLOOM *, uint64_t, uint64_t, uint32_t, uint32_t); + +static int +__bloom_init(WT_SESSION_IMPL *session, + const char *uri, const char *config, WT_BLOOM **bloomp) +{ + WT_BLOOM *bloom; + WT_DECL_RET; + size_t len; + + bloom = NULL; + WT_ERR(__wt_calloc(session, 1, sizeof(WT_BLOOM), &bloom)); + WT_ERR(__wt_strdup(session, uri, &bloom->uri)); + WT_ERR(__wt_strdup(session, config, &bloom->config)); + len = strlen(WT_BLOOM_TABLE_CONFIG) + 2; + if (config != NULL) + len += strlen(config); + WT_ERR(__wt_calloc(session, len, sizeof(char), &bloom->config)); + /* Add the standard config at the end, so it overrides user settings. */ + (void)snprintf(bloom->config, len, + "%s,%s", config == NULL ? "" : config, WT_BLOOM_TABLE_CONFIG); + + bloom->session = session; + + *bloomp = bloom; + return (0); + +err: if (bloom->uri != NULL) + __wt_free(session, bloom->uri); + if (bloom->config != NULL) + __wt_free(session, bloom->uri); + if (bloom->bitstring != NULL) + __wt_free(session, bloom->bitstring); + if (bloom != NULL) + __wt_free(session, bloom); + return (ret); +} + +/* + * Populate the bloom structure. + * Setup is passed in either the count of items expected (n), or the length + * of the bitstring (m). Depends on whether the function is called via create + * or open. + */ +static int +__bloom_setup( + WT_BLOOM *bloom, uint64_t n, uint64_t m, uint32_t factor, uint32_t k) +{ + WT_ASSERT(bloom->session, k > 1); + + bloom->k = k; + bloom->factor = factor; + if (n != 0) { + bloom->n = n; + bloom->m = bloom->n * bloom->factor; + } else { + bloom->m = m; + bloom->n = bloom->m / bloom->factor; + } + return (0); +} + +/* + * __wt_bloom_create -- + * + * Creates and configures a WT_BLOOM handle, allocates a bitstring in memory + * to use while populating the bloom filter. + * + * count - is the expected number of inserted items + * factor - is the number of bits to use per inserted item + * k - is the number of hash values to set or test per item + */ +int +__wt_bloom_create( + WT_SESSION_IMPL *session, const char *uri, const char *config, + uint64_t count, uint32_t factor, uint32_t k, WT_BLOOM **bloomp) +{ + WT_BLOOM *bloom; + WT_RET(__bloom_init(session, uri, config, &bloom)); + WT_RET(__bloom_setup(bloom, count, 0, factor, k)); + + WT_RET(__bit_alloc(session, bloom->m, &bloom->bitstring)); + + *bloomp = bloom; + return (0); +} + +/* + * __wt_bloom_open -- + * Open a Bloom filter object for use by a single session. The filter must have + * been created and finalized. + */ +int +__wt_bloom_open(WT_SESSION_IMPL *session, + const char *uri, uint32_t factor, uint32_t k, WT_BLOOM **bloomp) +{ + WT_BLOOM *bloom; + WT_CURSOR *c; + WT_SESSION *wt_session; + uint64_t size; + + wt_session = (WT_SESSION *)session; + + WT_RET(__bloom_init(session, uri, NULL, &bloom)); + + /* Find the largest key, to get the size of the filter. */ + WT_RET(wt_session->open_cursor(wt_session, bloom->uri, NULL, NULL, &c)); + WT_RET(c->prev(c)); + c->get_key(c, &size); + WT_RET(c->close(c)); + + WT_RET(__bloom_setup(bloom, 0, size, factor, k)); + + *bloomp = bloom; + return (0); +} + +/* + * __wt_bloom_insert -- + * Adds the given key to the Bloom filter. + */ +int +__wt_bloom_insert(WT_BLOOM *bloom, WT_ITEM *key) +{ + uint64_t h1, h2; + int i; + + h1 = __wt_hash_fnv64(key->data, key->size); + h2 = __wt_hash_city64(key->data, key->size); + for (i = 0; i < bloom->k; i++, h1 += h2) { + __bit_set(bloom->bitstring, (uint32_t)(h1 % bloom->m)); + } + return (0); +} + +/* + * __wt_bloom_finalize -- + * Writes the Bloom filter to stable storage. After calling finalize, only + * read operations can be performed on the bloom filter. + */ +int +__wt_bloom_finalize(WT_BLOOM *bloom) +{ + WT_SESSION *wt_session; + WT_CURSOR *c; + uint64_t i; + + wt_session = (WT_SESSION *)bloom->session; + + /* + * Create a bit table to store the bloom filter in. + * TODO: should this call __wt_schema_create directly? + */ + WT_RET(wt_session->create(wt_session, bloom->uri, bloom->config)); + + wt_session->open_cursor(wt_session, bloom->uri, NULL, "bulk", &c); + /* Add the entries from the array into the table. */ + for (i = 0; i < bloom->m; i++) { + c->set_value(c, __bit_test(bloom->bitstring, i)); + WT_RET(c->insert(c)); + } + WT_RET(c->close(c)); + __wt_free(bloom->session, bloom->bitstring); + bloom->bitstring = NULL; + + return (0); +} + +/* + * __wt_bloom_get -- + * Tests whether the given key is in the Bloom filter. + * Returns zero if found, WT_NOTFOUND if not. + */ +int +__wt_bloom_get(WT_BLOOM *bloom, WT_ITEM *key) +{ + WT_CURSOR *c; + WT_DECL_RET; + WT_SESSION *wt_session; + int i; + uint64_t h1, h2; + uint8_t bit; + + WT_ASSERT(bloom->session, bloom->bitstring == NULL); + + wt_session = (WT_SESSION *)bloom->session; + + /* Create a cursor on the first time through. */ + if (bloom->c == NULL) { + WT_RET(wt_session->open_cursor( + wt_session, bloom->uri, NULL, NULL, &c)); + bloom->c = c; + } else + c = bloom->c; + + /* + * This comparison code is complex to avoid calculating the second + * hash if possible. + */ + h1 = __wt_hash_fnv64(key->data, key->size); + + /* + * Add 1 to the hash because Wired Tiger tables are 1 based, and the + * original bitstring array was 0 based. + */ + c->set_key(c, (h1 % bloom->m) + 1); + WT_RET(c->search(c)); + WT_RET(c->get_value(c, &bit)); + if (bit == 0) + return (WT_NOTFOUND); + h2 = __wt_hash_city64(key->data, key->size); + for (i = 0, h1 += h2; i < bloom->k - 1; i++, h1 += h2) { + c->set_key(c, (h1 % bloom->m) + 1); + WT_ERR(c->search(c)); + WT_ERR(c->get_value(c, &bit)); + + if (bit == 0) + return (WT_NOTFOUND); + } + return (0); + +err: /* Don't return WT_NOTFOUND from a failed search. */ + if (ret == WT_NOTFOUND) + ret = WT_ERROR; + __wt_err(bloom->session, ret, "Failed lookup in bloom filter."); + return (ret); +} + +/* + * __wt_bloom_close -- + * Close the Bloom filter, release any resources. + */ +int +__wt_bloom_close(WT_BLOOM *bloom) +{ + WT_SESSION_IMPL *session; + + session = bloom->session; + + if (bloom->c != NULL) + bloom->c->close(bloom->c); + __wt_free(session, bloom->uri); + __wt_free(session, bloom->config); + __wt_free(session, bloom->bitstring); + __wt_free(session, bloom); + + return (0); +} + +/* + * __wt_bloom_drop -- + * Drop a Bloom filter, release any resources. + */ +int +__wt_bloom_drop(WT_BLOOM *bloom, const char *config) +{ + WT_DECL_RET; + WT_SESSION *wt_session; + + wt_session = (WT_SESSION *)bloom->session; + if (bloom->c != NULL) { + bloom->c->close(bloom->c); + bloom->c = NULL; + } + WT_RET(wt_session->drop(wt_session, bloom->uri, config)); + WT_TRET(__wt_bloom_close(bloom)); + + return (ret); +} diff --git a/src/include/bloom.h b/src/include/bloom.h new file mode 100644 index 00000000000..cbe5e1eb1a9 --- /dev/null +++ b/src/include/bloom.h @@ -0,0 +1,24 @@ +/*- + * Copyright (c) 2008-2012 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ +/* + * REFERENCES: + * http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf + * http://code.google.com/p/cityhash-c/ + */ + +struct __wt_bloom { + const char *uri; + char *config; + uint8_t *bitstring; /* For in memory representation. */ + WT_SESSION_IMPL *session; + WT_CURSOR *c; + + uint8_t k; /* The number of hash functions used. */ + uint8_t factor; /* The number of bits per item inserted. */ + uint64_t m; /* The number of slots in the bit string. */ + uint64_t n; /* The number of items to be inserted. */ +}; diff --git a/src/include/extern.h b/src/include/extern.h index 8fdc0d53a89..bb39305c8f7 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -223,6 +223,23 @@ extern int __wt_block_write_off(WT_SESSION_IMPL *session, uint32_t *sizep, uint32_t *cksump, int force_extend); +extern int __wt_bloom_create( WT_SESSION_IMPL *session, + const char *uri, + const char *config, + uint64_t count, + uint32_t factor, + uint32_t k, + WT_BLOOM **bloomp); +extern int __wt_bloom_open(WT_SESSION_IMPL *session, + const char *uri, + uint32_t factor, + uint32_t k, + WT_BLOOM **bloomp); +extern int __wt_bloom_insert(WT_BLOOM *bloom, WT_ITEM *key); +extern int __wt_bloom_finalize(WT_BLOOM *bloom); +extern int __wt_bloom_get(WT_BLOOM *bloom, WT_ITEM *key); +extern int __wt_bloom_close(WT_BLOOM *bloom); +extern int __wt_bloom_drop(WT_BLOOM *bloom, const char *config); extern int __wt_bulk_init(WT_CURSOR_BULK *cbulk); extern int __wt_bulk_insert(WT_CURSOR_BULK *cbulk); extern int __wt_bulk_end(WT_CURSOR_BULK *cbulk); diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h index daf0f882859..0b6287c0bed 100644 --- a/src/include/wt_internal.h +++ b/src/include/wt_internal.h @@ -55,6 +55,8 @@ struct __wt_block_desc; typedef struct __wt_block_desc WT_BLOCK_DESC; struct __wt_block_header; typedef struct __wt_block_header WT_BLOCK_HEADER; +struct __wt_bloom; + typedef struct __wt_bloom WT_BLOOM; struct __wt_btree; typedef struct __wt_btree WT_BTREE; struct __wt_btree_session; diff --git a/src/support/hash_city.c b/src/support/hash_city.c index 7e679d31cc1..2fc5869400f 100644 --- a/src/support/hash_city.c +++ b/src/support/hash_city.c @@ -60,7 +60,10 @@ #include "wt_internal.h" static inline uint64_t CityHash64(const char *, size_t); -/* Wired Tiger wrapper around third party hash implementations. */ +/* + * __wt_hash_city64 -- + * Wired Tiger wrapper around third party hash implementation. + */ uint64_t __wt_hash_city64(const void *string, uint32_t len) { diff --git a/src/support/hash_fnv.c b/src/support/hash_fnv.c index 75f52961720..457eaa2d857 100644 --- a/src/support/hash_fnv.c +++ b/src/support/hash_fnv.c @@ -111,7 +111,10 @@ static inline uint64_t fnv_64a_buf(void *, size_t , uint64_t); */ #define FNV1A_64_INIT ((uint64_t)0xcbf29ce484222325ULL) -/* Wired Tiger wrapper around third party hash implementation. */ +/* + * __wt_hash_fnv64 -- + * Wired Tiger wrapper around third party hash implementation. + */ uint64_t __wt_hash_fnv64(const void *string, uint32_t len) { diff --git a/test/bloom/Makefile.am b/test/bloom/Makefile.am new file mode 100644 index 00000000000..12a877063cc --- /dev/null +++ b/test/bloom/Makefile.am @@ -0,0 +1,12 @@ +INCLUDES = -I$(top_builddir) -I$(top_srcdir)/src/include + +noinst_PROGRAMS = t +t_SOURCES = test_bloom.c +t_LDADD = $(top_builddir)/libwiredtiger.la +t_LDFLAGS = -static + +# Run this during a "make check" smoke test. +TESTS = $(noinst_PROGRAMS) + +clean-local: + rm -rf WiredTiger* *.core __* diff --git a/test/bloom/test_bloom.c b/test/bloom/test_bloom.c new file mode 100644 index 00000000000..efff603d203 --- /dev/null +++ b/test/bloom/test_bloom.c @@ -0,0 +1,306 @@ +/*- + * Copyright (c) 2008-2012 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +typedef struct { + char *progname; /* Program name */ + + WT_CONNECTION *wt_conn; /* WT_CONNECTION handle */ + WT_SESSION *wt_session; /* WT_SESSION handle */ + + char *config_open; /* Command-line configuration */ + + uint32_t c_bitcnt; /* Config values */ + uint32_t c_cache; + uint32_t c_key_max; + uint32_t c_ops; + uint32_t c_k; /* Number of hash iterations */ + uint32_t c_factor; /* Number of bits per item */ + uint32_t c_srand; + + uint8_t **entries; +} GLOBAL; +GLOBAL g; + +static int cleanup(void); +void die(int e, const char *fmt, ...); +static int handle_message(WT_EVENT_HANDLER *handler, const char *message); +static void onint(int signo); +static int populate_entries(void); +static int run(void); +static int setup(void); +static void usage(void); + +static WT_EVENT_HANDLER event_handler = { + NULL, + handle_message, + NULL +}; + +int +main(int argc, char *argv[]) +{ + int ch; + + if ((g.progname = strrchr(argv[0], '/')) == NULL) + g.progname = argv[0]; + else + ++g.progname; + + /* Configure the FreeBSD malloc for debugging. */ + (void)setenv("MALLOC_OPTIONS", "AJZ", 1); + + /* Set default configuration values. */ + g.c_cache = 10; + g.c_ops = 100000; + g.c_key_max = 1000; + g.c_k = 4; + g.c_factor = 8; + g.c_srand = 3233456; + + /* Set values from the command line. */ + while ((ch = getopt(argc, argv, "c:f:k:o:s:")) != EOF) + switch (ch) { + case 'c': /* Cache size */ + g.c_cache = (u_int)atoi(optarg); + break; + case 'f': /* Factor */ + g.c_factor = (u_int)atoi(optarg); + break; + case 'k': /* Number of hash functions */ + g.c_k = (u_int)atoi(optarg); + break; + case 'o': /* Number of ops */ + g.c_ops = (u_int)atoi(optarg); + break; + case 's': /* Number of ops */ + g.c_srand = (u_int)atoi(optarg); + break; + default: + usage(); + } + + argc -= optind; + argv += optind; + + /* Clean up on signal. */ + (void)signal(SIGINT, onint); + + setup(); + run(); + cleanup(); + + return (EXIT_SUCCESS); +} + +int setup(void) +{ + WT_CONNECTION *conn; + WT_SESSION *session; + int ret; + char config[512]; + + /* + * This test doesn't test public Wired Tiger functionality, it still + * needs connection and session handles. + */ + + /* + * Open configuration -- put command line configuration options at the + * end so they can override "standard" configuration. + */ + snprintf(config, sizeof(config), + "create,error_prefix=\"%s\",cache_size=%" PRIu32 "MB,%s", + g.progname, g.c_cache, g.config_open == NULL ? "" : g.config_open); + + if ((ret = wiredtiger_open(NULL, &event_handler, config, &conn)) != 0) + die(ret, "wiredtiger_open"); + + if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) + die(ret, "connection.open_session"); + + g.wt_conn = conn; + g.wt_session = session; + if ((ret = populate_entries()) != 0) + die(ret, "populate_entries"); + + return (0); +} + +int run(void) +{ + WT_BLOOM *bloomp; + WT_ITEM item; + WT_SESSION_IMPL *sess; + const char *uri = "table:my_bloom"; + int ret; + uint32_t fp, i; + + /* Use the internal session handle to access private APIs. */ + sess = (WT_SESSION_IMPL *)g.wt_session; + + if ((ret = __wt_bloom_create( + sess, uri, NULL, g.c_ops, g.c_factor, g.c_k, &bloomp)) != 0) + die(ret, "__wt_bloom_create"); + + memset(&item, 0, sizeof(item)); + item.size = g.c_key_max; + for (i = 0; i < g.c_ops; i++) { + item.data = g.entries[i]; + if ((ret = __wt_bloom_insert(bloomp, &item)) != 0) + die(ret, "__wt_bloom_insert: %d", i); + } + + if ((ret = __wt_bloom_finalize(bloomp)) != 0) + die(ret, "__wt_bloom_finalize"); + + for (i = 0; i < g.c_ops; i++) { + item.data = g.entries[i]; + if ((ret = __wt_bloom_get(bloomp, &item)) != 0) { + fprintf(stderr, "get failed at record: %d\n", i); + die(ret, "__wt_bloom_get"); + } + } + if ((ret = __wt_bloom_close(bloomp)) != 0) + die(ret, "__wt_bloom_close"); + + g.wt_session->checkpoint(g.wt_session, NULL); + if ((ret = __wt_bloom_open(sess, uri, g.c_factor, g.c_k, &bloomp)) != 0) + die(ret, "__wt_bloom_open"); + for (i = 0; i < g.c_ops; i++) { + item.data = g.entries[i]; + if ((ret = __wt_bloom_get(bloomp, &item)) != 0) + die(ret, "__wt_bloom_get"); + } + + /* + * Try out some values we didn't insert - choose a different size to + * ensure the value doesn't overlap with existing values. + */ + item.size = g.c_key_max + 10; + item.data = calloc(item.size, 1); + memset((void *)item.data, 'a', item.size); + for (i = 0, fp = 0; i < g.c_ops; i++) { + ((uint8_t *)item.data)[i % item.size] = + 'a' + ((uint8_t)rand() % 26); + if ((ret = __wt_bloom_get(bloomp, &item)) == 0) + ++fp; + } + free((void *)item.data); + printf("Out of %d ops, got %d false positives, %.4f%%\n", + g.c_ops, fp, (float)fp/g.c_ops); + if ((ret = __wt_bloom_drop(bloomp, NULL)) != 0) + die(ret, "__wt_bloom_drop"); + + return (0); +} + +int cleanup(void) +{ + uint32_t i; + + for (i = 0; i < g.c_ops; i++) + free(g.entries[i]); + free(g.entries); + g.wt_session->close(g.wt_session, NULL); + g.wt_conn->close(g.wt_conn, NULL); + return (0); +} + +/* + * Create and keep all the strings used to populate the bloom filter, so that + * we can do validation with the same set of entries. + */ +static int populate_entries(void) +{ + uint32_t i, j; + uint8_t **entries; + + srand(g.c_srand); + + entries = calloc(g.c_ops, sizeof(uint8_t *)); + if (entries == NULL) + die(ENOMEM, "key buffer malloc"); + + for (i = 0; i < g.c_ops; i++) { + entries[i] = calloc(g.c_key_max, sizeof(uint8_t)); + if (entries[i] == NULL) + die(ENOMEM, "key buffer malloc 2"); + for (j = 0; j < g.c_key_max; j++) + entries[i][j] = 'a' + ((uint8_t)rand() % 26); + } + + g.entries = entries; + return (0); +} + +static int +handle_message(WT_EVENT_HANDLER *handler, const char *message) +{ + (void)handler; + + return (printf("%s\n", message) < 0 ? -1 : 0); +} + +/* + * die -- + * Report an error and quit. + */ +void +die(int e, const char *fmt, ...) +{ + va_list ap; + + if (fmt != NULL) { /* Death message. */ + fprintf(stderr, "%s: ", g.progname); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + if (e != 0) + fprintf(stderr, ": %s", wiredtiger_strerror(e)); + fprintf(stderr, "\n"); + } + + exit(EXIT_FAILURE); +} + +/* + * onint -- + * Interrupt signal handler. + */ +static void +onint(int signo) +{ + (void)signo; + + /* Remove the run's files except for __rand. + (void)system("rm -rf WiredTiger WiredTiger.* __[a-qs-z]* __run");*/ + + fprintf(stderr, "\n"); + exit(EXIT_FAILURE); +} + +/* + * usage -- + * Display usage statement and exit failure. + */ +static void +usage(void) +{ + fprintf(stderr, "usage: %s [-cfkos]\n", g.progname); + fprintf(stderr, "%s", + "\t-c cache size\n" + "\t-f number of bits per item\n" + "\t-k size of entry strings\n" + "\t-o number of operations to perform\n" + "\t-s random seed for run\n"); + + fprintf(stderr, "\n"); + + exit(EXIT_FAILURE); +} |