diff options
author | Alex Gorrod <alexg@wiredtiger.com> | 2012-08-29 16:00:39 +1000 |
---|---|---|
committer | Alex Gorrod <alexg@wiredtiger.com> | 2012-08-29 16:00:39 +1000 |
commit | efbbae44e71ae3e9b7e6944eddf4742b403a10f4 (patch) | |
tree | 3e3e5d3f7185b9050d9500ae74a4c9420d5f2e05 /test/bloom | |
parent | bc8ea6d32e35a264e950beb39b3c7f8603595c30 (diff) | |
download | mongo-efbbae44e71ae3e9b7e6944eddf4742b403a10f4.tar.gz |
Add bloom filter implementation and test.
The bloom filters will be used internally by the LSM implementation.
They do not form part of the public API.
Diffstat (limited to 'test/bloom')
-rw-r--r-- | test/bloom/Makefile.am | 12 | ||||
-rw-r--r-- | test/bloom/test_bloom.c | 306 |
2 files changed, 318 insertions, 0 deletions
diff --git a/test/bloom/Makefile.am b/test/bloom/Makefile.am new file mode 100644 index 00000000000..12a877063cc --- /dev/null +++ b/test/bloom/Makefile.am @@ -0,0 +1,12 @@ +INCLUDES = -I$(top_builddir) -I$(top_srcdir)/src/include + +noinst_PROGRAMS = t +t_SOURCES = test_bloom.c +t_LDADD = $(top_builddir)/libwiredtiger.la +t_LDFLAGS = -static + +# Run this during a "make check" smoke test. +TESTS = $(noinst_PROGRAMS) + +clean-local: + rm -rf WiredTiger* *.core __* diff --git a/test/bloom/test_bloom.c b/test/bloom/test_bloom.c new file mode 100644 index 00000000000..efff603d203 --- /dev/null +++ b/test/bloom/test_bloom.c @@ -0,0 +1,306 @@ +/*- + * Copyright (c) 2008-2012 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +typedef struct { + char *progname; /* Program name */ + + WT_CONNECTION *wt_conn; /* WT_CONNECTION handle */ + WT_SESSION *wt_session; /* WT_SESSION handle */ + + char *config_open; /* Command-line configuration */ + + uint32_t c_bitcnt; /* Config values */ + uint32_t c_cache; + uint32_t c_key_max; + uint32_t c_ops; + uint32_t c_k; /* Number of hash iterations */ + uint32_t c_factor; /* Number of bits per item */ + uint32_t c_srand; + + uint8_t **entries; +} GLOBAL; +GLOBAL g; + +static int cleanup(void); +void die(int e, const char *fmt, ...); +static int handle_message(WT_EVENT_HANDLER *handler, const char *message); +static void onint(int signo); +static int populate_entries(void); +static int run(void); +static int setup(void); +static void usage(void); + +static WT_EVENT_HANDLER event_handler = { + NULL, + handle_message, + NULL +}; + +int +main(int argc, char *argv[]) +{ + int ch; + + if ((g.progname = strrchr(argv[0], '/')) == NULL) + g.progname = argv[0]; + else + ++g.progname; + + /* Configure the FreeBSD malloc for debugging. */ + (void)setenv("MALLOC_OPTIONS", "AJZ", 1); + + /* Set default configuration values. */ + g.c_cache = 10; + g.c_ops = 100000; + g.c_key_max = 1000; + g.c_k = 4; + g.c_factor = 8; + g.c_srand = 3233456; + + /* Set values from the command line. */ + while ((ch = getopt(argc, argv, "c:f:k:o:s:")) != EOF) + switch (ch) { + case 'c': /* Cache size */ + g.c_cache = (u_int)atoi(optarg); + break; + case 'f': /* Factor */ + g.c_factor = (u_int)atoi(optarg); + break; + case 'k': /* Number of hash functions */ + g.c_k = (u_int)atoi(optarg); + break; + case 'o': /* Number of ops */ + g.c_ops = (u_int)atoi(optarg); + break; + case 's': /* Number of ops */ + g.c_srand = (u_int)atoi(optarg); + break; + default: + usage(); + } + + argc -= optind; + argv += optind; + + /* Clean up on signal. */ + (void)signal(SIGINT, onint); + + setup(); + run(); + cleanup(); + + return (EXIT_SUCCESS); +} + +int setup(void) +{ + WT_CONNECTION *conn; + WT_SESSION *session; + int ret; + char config[512]; + + /* + * This test doesn't test public Wired Tiger functionality, it still + * needs connection and session handles. + */ + + /* + * Open configuration -- put command line configuration options at the + * end so they can override "standard" configuration. + */ + snprintf(config, sizeof(config), + "create,error_prefix=\"%s\",cache_size=%" PRIu32 "MB,%s", + g.progname, g.c_cache, g.config_open == NULL ? "" : g.config_open); + + if ((ret = wiredtiger_open(NULL, &event_handler, config, &conn)) != 0) + die(ret, "wiredtiger_open"); + + if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) + die(ret, "connection.open_session"); + + g.wt_conn = conn; + g.wt_session = session; + if ((ret = populate_entries()) != 0) + die(ret, "populate_entries"); + + return (0); +} + +int run(void) +{ + WT_BLOOM *bloomp; + WT_ITEM item; + WT_SESSION_IMPL *sess; + const char *uri = "table:my_bloom"; + int ret; + uint32_t fp, i; + + /* Use the internal session handle to access private APIs. */ + sess = (WT_SESSION_IMPL *)g.wt_session; + + if ((ret = __wt_bloom_create( + sess, uri, NULL, g.c_ops, g.c_factor, g.c_k, &bloomp)) != 0) + die(ret, "__wt_bloom_create"); + + memset(&item, 0, sizeof(item)); + item.size = g.c_key_max; + for (i = 0; i < g.c_ops; i++) { + item.data = g.entries[i]; + if ((ret = __wt_bloom_insert(bloomp, &item)) != 0) + die(ret, "__wt_bloom_insert: %d", i); + } + + if ((ret = __wt_bloom_finalize(bloomp)) != 0) + die(ret, "__wt_bloom_finalize"); + + for (i = 0; i < g.c_ops; i++) { + item.data = g.entries[i]; + if ((ret = __wt_bloom_get(bloomp, &item)) != 0) { + fprintf(stderr, "get failed at record: %d\n", i); + die(ret, "__wt_bloom_get"); + } + } + if ((ret = __wt_bloom_close(bloomp)) != 0) + die(ret, "__wt_bloom_close"); + + g.wt_session->checkpoint(g.wt_session, NULL); + if ((ret = __wt_bloom_open(sess, uri, g.c_factor, g.c_k, &bloomp)) != 0) + die(ret, "__wt_bloom_open"); + for (i = 0; i < g.c_ops; i++) { + item.data = g.entries[i]; + if ((ret = __wt_bloom_get(bloomp, &item)) != 0) + die(ret, "__wt_bloom_get"); + } + + /* + * Try out some values we didn't insert - choose a different size to + * ensure the value doesn't overlap with existing values. + */ + item.size = g.c_key_max + 10; + item.data = calloc(item.size, 1); + memset((void *)item.data, 'a', item.size); + for (i = 0, fp = 0; i < g.c_ops; i++) { + ((uint8_t *)item.data)[i % item.size] = + 'a' + ((uint8_t)rand() % 26); + if ((ret = __wt_bloom_get(bloomp, &item)) == 0) + ++fp; + } + free((void *)item.data); + printf("Out of %d ops, got %d false positives, %.4f%%\n", + g.c_ops, fp, (float)fp/g.c_ops); + if ((ret = __wt_bloom_drop(bloomp, NULL)) != 0) + die(ret, "__wt_bloom_drop"); + + return (0); +} + +int cleanup(void) +{ + uint32_t i; + + for (i = 0; i < g.c_ops; i++) + free(g.entries[i]); + free(g.entries); + g.wt_session->close(g.wt_session, NULL); + g.wt_conn->close(g.wt_conn, NULL); + return (0); +} + +/* + * Create and keep all the strings used to populate the bloom filter, so that + * we can do validation with the same set of entries. + */ +static int populate_entries(void) +{ + uint32_t i, j; + uint8_t **entries; + + srand(g.c_srand); + + entries = calloc(g.c_ops, sizeof(uint8_t *)); + if (entries == NULL) + die(ENOMEM, "key buffer malloc"); + + for (i = 0; i < g.c_ops; i++) { + entries[i] = calloc(g.c_key_max, sizeof(uint8_t)); + if (entries[i] == NULL) + die(ENOMEM, "key buffer malloc 2"); + for (j = 0; j < g.c_key_max; j++) + entries[i][j] = 'a' + ((uint8_t)rand() % 26); + } + + g.entries = entries; + return (0); +} + +static int +handle_message(WT_EVENT_HANDLER *handler, const char *message) +{ + (void)handler; + + return (printf("%s\n", message) < 0 ? -1 : 0); +} + +/* + * die -- + * Report an error and quit. + */ +void +die(int e, const char *fmt, ...) +{ + va_list ap; + + if (fmt != NULL) { /* Death message. */ + fprintf(stderr, "%s: ", g.progname); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + if (e != 0) + fprintf(stderr, ": %s", wiredtiger_strerror(e)); + fprintf(stderr, "\n"); + } + + exit(EXIT_FAILURE); +} + +/* + * onint -- + * Interrupt signal handler. + */ +static void +onint(int signo) +{ + (void)signo; + + /* Remove the run's files except for __rand. + (void)system("rm -rf WiredTiger WiredTiger.* __[a-qs-z]* __run");*/ + + fprintf(stderr, "\n"); + exit(EXIT_FAILURE); +} + +/* + * usage -- + * Display usage statement and exit failure. + */ +static void +usage(void) +{ + fprintf(stderr, "usage: %s [-cfkos]\n", g.progname); + fprintf(stderr, "%s", + "\t-c cache size\n" + "\t-f number of bits per item\n" + "\t-k size of entry strings\n" + "\t-o number of operations to perform\n" + "\t-s random seed for run\n"); + + fprintf(stderr, "\n"); + + exit(EXIT_FAILURE); +} |