summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <mjc@wiredtiger.com>2012-10-10 02:43:27 -0700
committerMichael Cahill <mjc@wiredtiger.com>2012-10-10 02:43:27 -0700
commit2a9dc16175a54a8ecef14b1470e283a37c1e5de6 (patch)
tree1905e0db4fae7f5c399d93f0eeaf587222dfa17e
parent324b94333905f222df56a2412dcae9ca0820d731 (diff)
parent633f04e2ff30450f1023663fe22c6306b928cf71 (diff)
downloadmongo-2a9dc16175a54a8ecef14b1470e283a37c1e5de6.tar.gz
Merge pull request #352 from wiredtiger/bloom-bulk-fastpath
Force-feed a Bloom filter bitmap into a bulk load.
-rw-r--r--dist/api_data.py16
-rw-r--r--src/bloom/bloom.c26
-rw-r--r--src/btree/rec_write.c23
-rw-r--r--src/config/config_def.c2
-rw-r--r--src/cursor/cur_bulk.c6
-rw-r--r--src/cursor/cur_file.c12
-rw-r--r--src/include/cursor.h3
-rw-r--r--src/include/extern.h2
-rw-r--r--src/include/wiredtiger.in8
9 files changed, 78 insertions, 20 deletions
diff --git a/dist/api_data.py b/dist/api_data.py
index caf8693f0c5..3eb98368002 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -285,11 +285,17 @@ methods = {
number key; valid only for cursors with record number keys''',
type='boolean'),
Config('bulk', 'false', r'''
- configure the cursor for bulk loads, a fast load path
- that may only be used for newly created objects. Cursors
- configured for bulk load only support the WT_CURSOR::insert
- and WT_CURSOR::close methods''',
- type='boolean'),
+ configure the cursor for bulk loads, a fast load path that may
+ only be used for newly created objects. Cursors configured for
+ bulk load only support the WT_CURSOR::insert and
+ WT_CURSOR::close methods. The value is usually a true/false
+ flag, but the the special value \c "bitmap" is for use with
+ fixed-length column stores, and allows chunks of a memory
+ resident bitmap to be loaded directly into a file by passing a
+ \c WT_ITEM to WT_CURSOR::set_value where the \c size field
+ indicates the number of records in the bitmap (as specified by
+ the file's \c value_format)''',
+ type='string'),
Config('checkpoint', '', r'''
the name of a checkpoint to open (the reserved name
"WiredTigerCheckpoint" opens the most recent internal
diff --git a/src/bloom/bloom.c b/src/bloom/bloom.c
index ed69d57f77c..a6fc72cdf49 100644
--- a/src/bloom/bloom.c
+++ b/src/bloom/bloom.c
@@ -161,30 +161,40 @@ __wt_bloom_insert(WT_BLOOM *bloom, WT_ITEM *key)
int
__wt_bloom_finalize(WT_BLOOM *bloom)
{
- WT_SESSION *wt_session;
WT_CURSOR *c;
+ WT_DECL_RET;
+ WT_ITEM values;
+ WT_SESSION *wt_session;
uint64_t i;
wt_session = (WT_SESSION *)bloom->session;
+ WT_CLEAR(values);
/*
* Create a bit table to store the bloom filter in.
* TODO: should this call __wt_schema_create directly?
*/
WT_RET(wt_session->create(wt_session, bloom->uri, bloom->config));
-
WT_RET(wt_session->open_cursor(
- wt_session, bloom->uri, NULL, "bulk", &c));
+ wt_session, bloom->uri, NULL, "bulk=bitmap", &c));
+
/* Add the entries from the array into the table. */
- for (i = 0; i < bloom->m; i++) {
- c->set_value(c, __bit_test(bloom->bitstring, i));
- WT_RET(c->insert(c));
+ for (i = 0; i < bloom->m; i += values.size) {
+ values.data = bloom->bitstring + i;
+ /*
+ * Shave off some bytes for pure paranoia, in case WiredTiger
+ * reserves some special sizes.
+ */
+ values.size = (uint32_t)WT_MIN(bloom->m - i, UINT32_MAX - 100);
+ c->set_value(c, &values);
+ WT_ERR(c->insert(c));
}
- WT_RET(c->close(c));
+
+err: WT_TRET(c->close(c));
__wt_free(bloom->session, bloom->bitstring);
bloom->bitstring = NULL;
- return (0);
+ return (ret);
}
/*
diff --git a/src/btree/rec_write.c b/src/btree/rec_write.c
index 17619e27224..5dd05717011 100644
--- a/src/btree/rec_write.c
+++ b/src/btree/rec_write.c
@@ -1496,12 +1496,35 @@ __wt_rec_col_fix_bulk_insert(WT_CURSOR_BULK *cbulk)
WT_CURSOR *cursor;
WT_RECONCILE *r;
WT_SESSION_IMPL *session;
+ const uint8_t *data;
+ uint32_t entries, page_entries, page_size;
session = (WT_SESSION_IMPL *)cbulk->cbt.iface.session;
r = cbulk->reconcile;
btree = session->btree;
cursor = &cbulk->cbt.iface;
+ if (cbulk->bitmap) {
+ for (data = cursor->value.data, entries = cursor->value.size;
+ entries > 0;
+ entries -= page_entries, data += page_size) {
+ page_entries = WT_MIN(entries,
+ r->space_avail * 8 / btree->bitcnt);
+ page_size = __bitstr_size(page_entries * btree->bitcnt);
+
+ memcpy(r->first_free, data, page_size);
+ r->recno += page_entries;
+
+ /* Leave the last page for wrapup. */
+ if (entries > page_entries) {
+ __rec_incr(session, r, page_entries, page_size);
+ WT_RET(__rec_split(session, r));
+ } else
+ cbulk->entry = page_entries;
+ }
+ return (0);
+ }
+
if (cbulk->entry == cbulk->nrecs) {
if (cbulk->entry != 0) {
/*
diff --git a/src/config/config_def.c b/src/config/config_def.c
index 7e753775550..2b19994a76b 100644
--- a/src/config/config_def.c
+++ b/src/config/config_def.c
@@ -292,7 +292,7 @@ __wt_confdfl_session_open_cursor =
WT_CONFIG_CHECK
__wt_confchk_session_open_cursor[] = {
{ "append", "boolean", NULL },
- { "bulk", "boolean", NULL },
+ { "bulk", "string", NULL },
{ "checkpoint", "string", NULL },
{ "dump", "string", "choices=[\"hex\",\"print\"]" },
{ "next_random", "boolean", NULL },
diff --git a/src/cursor/cur_bulk.c b/src/cursor/cur_bulk.c
index ea503edea46..6199bb015be 100644
--- a/src/cursor/cur_bulk.c
+++ b/src/cursor/cur_bulk.c
@@ -70,7 +70,7 @@ __curbulk_close(WT_CURSOR *cursor)
* Initialize a bulk cursor.
*/
int
-__wt_curbulk_init(WT_CURSOR_BULK *cbulk)
+__wt_curbulk_init(WT_CURSOR_BULK *cbulk, int bitmap)
{
WT_CURSOR *c = &cbulk->cbt.iface;
@@ -84,5 +84,9 @@ __wt_curbulk_init(WT_CURSOR_BULK *cbulk)
c->insert = __curbulk_insert;
c->close = __curbulk_close;
+ cbulk->bitmap = bitmap;
+ if (bitmap)
+ F_SET(c, WT_CURSTD_RAW);
+
return (__wt_bulk_init(cbulk));
}
diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c
index 489dc4b7dc4..1af6158d4bc 100644
--- a/src/cursor/cur_file.c
+++ b/src/cursor/cur_file.c
@@ -312,7 +312,7 @@ __wt_curfile_create(WT_SESSION_IMPL *session,
WT_CURSOR_BTREE *cbt;
WT_DECL_RET;
size_t csize;
- int bulk;
+ int bitmap, bulk;
cbt = NULL;
@@ -320,7 +320,13 @@ __wt_curfile_create(WT_SESSION_IMPL *session,
WT_ASSERT(session, btree != NULL);
WT_RET(__wt_config_gets_defno(session, cfg, "bulk", &cval));
- bulk = (cval.val != 0);
+ if ((cval.type == ITEM_ID || cval.type == ITEM_STRING) &&
+ WT_STRING_MATCH("bitmap", cval.str, cval.len))
+ bitmap = bulk = 1;
+ else {
+ bitmap = 0;
+ bulk = (cval.val != 0);
+ }
csize = bulk ? sizeof(WT_CURSOR_BULK) : sizeof(WT_CURSOR_BTREE);
WT_RET(__wt_calloc(session, 1, csize, &cbt));
@@ -334,7 +340,7 @@ __wt_curfile_create(WT_SESSION_IMPL *session,
cbt->btree = session->btree;
if (bulk)
- WT_ERR(__wt_curbulk_init((WT_CURSOR_BULK *)cbt));
+ WT_ERR(__wt_curbulk_init((WT_CURSOR_BULK *)cbt, bitmap));
/*
* no_cache
diff --git a/src/include/cursor.h b/src/include/cursor.h
index ed926a04df4..e83412eaf8b 100644
--- a/src/include/cursor.h
+++ b/src/include/cursor.h
@@ -137,6 +137,9 @@ struct __wt_cursor_bulk {
uint32_t entry; /* Entry count */
uint32_t nrecs; /* Max records per chunk */
+ /* Special bitmap bulk load for fixed-length column stores. */
+ int bitmap;
+
void *reconcile; /* Reconciliation information */
};
diff --git a/src/include/extern.h b/src/include/extern.h
index b783ff4400f..a8ef1594153 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -600,7 +600,7 @@ extern int __wt_curbackup_open(WT_SESSION_IMPL *session,
const char *uri,
const char *cfg[],
WT_CURSOR **cursorp);
-extern int __wt_curbulk_init(WT_CURSOR_BULK *cbulk);
+extern int __wt_curbulk_init(WT_CURSOR_BULK *cbulk, int bitmap);
extern int __wt_curconfig_open(WT_SESSION_IMPL *session,
const char *uri,
const char *cfg[],
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index 048efdd0b74..8870b5927b9 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -525,7 +525,13 @@ struct __wt_session {
* @config{bulk, configure the cursor for bulk loads\, a fast load path
* that may only be used for newly created objects. Cursors configured
* for bulk load only support the WT_CURSOR::insert and WT_CURSOR::close
- * methods.,a boolean flag; default \c false.}
+ * methods. The value is usually a true/false flag\, but the the
+ * special value \c "bitmap" is for use with fixed-length column
+ * stores\, and allows chunks of a memory resident bitmap to be loaded
+ * directly into a file by passing a \c WT_ITEM to WT_CURSOR::set_value
+ * where the \c size field indicates the number of records in the bitmap
+ * (as specified by the file's \c value_format).,a string; default \c
+ * false.}
* @config{checkpoint, the name of a checkpoint to open (the reserved
* name "WiredTigerCheckpoint" opens the most recent internal checkpoint
* taken for the object). The cursor does not support data