diff options
author | Michael Cahill <mjc@wiredtiger.com> | 2012-10-10 02:43:27 -0700 |
---|---|---|
committer | Michael Cahill <mjc@wiredtiger.com> | 2012-10-10 02:43:27 -0700 |
commit | 2a9dc16175a54a8ecef14b1470e283a37c1e5de6 (patch) | |
tree | 1905e0db4fae7f5c399d93f0eeaf587222dfa17e | |
parent | 324b94333905f222df56a2412dcae9ca0820d731 (diff) | |
parent | 633f04e2ff30450f1023663fe22c6306b928cf71 (diff) | |
download | mongo-2a9dc16175a54a8ecef14b1470e283a37c1e5de6.tar.gz |
Merge pull request #352 from wiredtiger/bloom-bulk-fastpath
Force-feed a Bloom filter bitmap into a bulk load.
-rw-r--r-- | dist/api_data.py | 16 | ||||
-rw-r--r-- | src/bloom/bloom.c | 26 | ||||
-rw-r--r-- | src/btree/rec_write.c | 23 | ||||
-rw-r--r-- | src/config/config_def.c | 2 | ||||
-rw-r--r-- | src/cursor/cur_bulk.c | 6 | ||||
-rw-r--r-- | src/cursor/cur_file.c | 12 | ||||
-rw-r--r-- | src/include/cursor.h | 3 | ||||
-rw-r--r-- | src/include/extern.h | 2 | ||||
-rw-r--r-- | src/include/wiredtiger.in | 8 |
9 files changed, 78 insertions, 20 deletions
diff --git a/dist/api_data.py b/dist/api_data.py index caf8693f0c5..3eb98368002 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -285,11 +285,17 @@ methods = { number key; valid only for cursors with record number keys''', type='boolean'), Config('bulk', 'false', r''' - configure the cursor for bulk loads, a fast load path - that may only be used for newly created objects. Cursors - configured for bulk load only support the WT_CURSOR::insert - and WT_CURSOR::close methods''', - type='boolean'), + configure the cursor for bulk loads, a fast load path that may + only be used for newly created objects. Cursors configured for + bulk load only support the WT_CURSOR::insert and + WT_CURSOR::close methods. The value is usually a true/false + flag, but the the special value \c "bitmap" is for use with + fixed-length column stores, and allows chunks of a memory + resident bitmap to be loaded directly into a file by passing a + \c WT_ITEM to WT_CURSOR::set_value where the \c size field + indicates the number of records in the bitmap (as specified by + the file's \c value_format)''', + type='string'), Config('checkpoint', '', r''' the name of a checkpoint to open (the reserved name "WiredTigerCheckpoint" opens the most recent internal diff --git a/src/bloom/bloom.c b/src/bloom/bloom.c index ed69d57f77c..a6fc72cdf49 100644 --- a/src/bloom/bloom.c +++ b/src/bloom/bloom.c @@ -161,30 +161,40 @@ __wt_bloom_insert(WT_BLOOM *bloom, WT_ITEM *key) int __wt_bloom_finalize(WT_BLOOM *bloom) { - WT_SESSION *wt_session; WT_CURSOR *c; + WT_DECL_RET; + WT_ITEM values; + WT_SESSION *wt_session; uint64_t i; wt_session = (WT_SESSION *)bloom->session; + WT_CLEAR(values); /* * Create a bit table to store the bloom filter in. * TODO: should this call __wt_schema_create directly? */ WT_RET(wt_session->create(wt_session, bloom->uri, bloom->config)); - WT_RET(wt_session->open_cursor( - wt_session, bloom->uri, NULL, "bulk", &c)); + wt_session, bloom->uri, NULL, "bulk=bitmap", &c)); + /* Add the entries from the array into the table. */ - for (i = 0; i < bloom->m; i++) { - c->set_value(c, __bit_test(bloom->bitstring, i)); - WT_RET(c->insert(c)); + for (i = 0; i < bloom->m; i += values.size) { + values.data = bloom->bitstring + i; + /* + * Shave off some bytes for pure paranoia, in case WiredTiger + * reserves some special sizes. + */ + values.size = (uint32_t)WT_MIN(bloom->m - i, UINT32_MAX - 100); + c->set_value(c, &values); + WT_ERR(c->insert(c)); } - WT_RET(c->close(c)); + +err: WT_TRET(c->close(c)); __wt_free(bloom->session, bloom->bitstring); bloom->bitstring = NULL; - return (0); + return (ret); } /* diff --git a/src/btree/rec_write.c b/src/btree/rec_write.c index 17619e27224..5dd05717011 100644 --- a/src/btree/rec_write.c +++ b/src/btree/rec_write.c @@ -1496,12 +1496,35 @@ __wt_rec_col_fix_bulk_insert(WT_CURSOR_BULK *cbulk) WT_CURSOR *cursor; WT_RECONCILE *r; WT_SESSION_IMPL *session; + const uint8_t *data; + uint32_t entries, page_entries, page_size; session = (WT_SESSION_IMPL *)cbulk->cbt.iface.session; r = cbulk->reconcile; btree = session->btree; cursor = &cbulk->cbt.iface; + if (cbulk->bitmap) { + for (data = cursor->value.data, entries = cursor->value.size; + entries > 0; + entries -= page_entries, data += page_size) { + page_entries = WT_MIN(entries, + r->space_avail * 8 / btree->bitcnt); + page_size = __bitstr_size(page_entries * btree->bitcnt); + + memcpy(r->first_free, data, page_size); + r->recno += page_entries; + + /* Leave the last page for wrapup. */ + if (entries > page_entries) { + __rec_incr(session, r, page_entries, page_size); + WT_RET(__rec_split(session, r)); + } else + cbulk->entry = page_entries; + } + return (0); + } + if (cbulk->entry == cbulk->nrecs) { if (cbulk->entry != 0) { /* diff --git a/src/config/config_def.c b/src/config/config_def.c index 7e753775550..2b19994a76b 100644 --- a/src/config/config_def.c +++ b/src/config/config_def.c @@ -292,7 +292,7 @@ __wt_confdfl_session_open_cursor = WT_CONFIG_CHECK __wt_confchk_session_open_cursor[] = { { "append", "boolean", NULL }, - { "bulk", "boolean", NULL }, + { "bulk", "string", NULL }, { "checkpoint", "string", NULL }, { "dump", "string", "choices=[\"hex\",\"print\"]" }, { "next_random", "boolean", NULL }, diff --git a/src/cursor/cur_bulk.c b/src/cursor/cur_bulk.c index ea503edea46..6199bb015be 100644 --- a/src/cursor/cur_bulk.c +++ b/src/cursor/cur_bulk.c @@ -70,7 +70,7 @@ __curbulk_close(WT_CURSOR *cursor) * Initialize a bulk cursor. */ int -__wt_curbulk_init(WT_CURSOR_BULK *cbulk) +__wt_curbulk_init(WT_CURSOR_BULK *cbulk, int bitmap) { WT_CURSOR *c = &cbulk->cbt.iface; @@ -84,5 +84,9 @@ __wt_curbulk_init(WT_CURSOR_BULK *cbulk) c->insert = __curbulk_insert; c->close = __curbulk_close; + cbulk->bitmap = bitmap; + if (bitmap) + F_SET(c, WT_CURSTD_RAW); + return (__wt_bulk_init(cbulk)); } diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c index 489dc4b7dc4..1af6158d4bc 100644 --- a/src/cursor/cur_file.c +++ b/src/cursor/cur_file.c @@ -312,7 +312,7 @@ __wt_curfile_create(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt; WT_DECL_RET; size_t csize; - int bulk; + int bitmap, bulk; cbt = NULL; @@ -320,7 +320,13 @@ __wt_curfile_create(WT_SESSION_IMPL *session, WT_ASSERT(session, btree != NULL); WT_RET(__wt_config_gets_defno(session, cfg, "bulk", &cval)); - bulk = (cval.val != 0); + if ((cval.type == ITEM_ID || cval.type == ITEM_STRING) && + WT_STRING_MATCH("bitmap", cval.str, cval.len)) + bitmap = bulk = 1; + else { + bitmap = 0; + bulk = (cval.val != 0); + } csize = bulk ? sizeof(WT_CURSOR_BULK) : sizeof(WT_CURSOR_BTREE); WT_RET(__wt_calloc(session, 1, csize, &cbt)); @@ -334,7 +340,7 @@ __wt_curfile_create(WT_SESSION_IMPL *session, cbt->btree = session->btree; if (bulk) - WT_ERR(__wt_curbulk_init((WT_CURSOR_BULK *)cbt)); + WT_ERR(__wt_curbulk_init((WT_CURSOR_BULK *)cbt, bitmap)); /* * no_cache diff --git a/src/include/cursor.h b/src/include/cursor.h index ed926a04df4..e83412eaf8b 100644 --- a/src/include/cursor.h +++ b/src/include/cursor.h @@ -137,6 +137,9 @@ struct __wt_cursor_bulk { uint32_t entry; /* Entry count */ uint32_t nrecs; /* Max records per chunk */ + /* Special bitmap bulk load for fixed-length column stores. */ + int bitmap; + void *reconcile; /* Reconciliation information */ }; diff --git a/src/include/extern.h b/src/include/extern.h index b783ff4400f..a8ef1594153 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -600,7 +600,7 @@ extern int __wt_curbackup_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp); -extern int __wt_curbulk_init(WT_CURSOR_BULK *cbulk); +extern int __wt_curbulk_init(WT_CURSOR_BULK *cbulk, int bitmap); extern int __wt_curconfig_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 048efdd0b74..8870b5927b9 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -525,7 +525,13 @@ struct __wt_session { * @config{bulk, configure the cursor for bulk loads\, a fast load path * that may only be used for newly created objects. Cursors configured * for bulk load only support the WT_CURSOR::insert and WT_CURSOR::close - * methods.,a boolean flag; default \c false.} + * methods. The value is usually a true/false flag\, but the the + * special value \c "bitmap" is for use with fixed-length column + * stores\, and allows chunks of a memory resident bitmap to be loaded + * directly into a file by passing a \c WT_ITEM to WT_CURSOR::set_value + * where the \c size field indicates the number of records in the bitmap + * (as specified by the file's \c value_format).,a string; default \c + * false.} * @config{checkpoint, the name of a checkpoint to open (the reserved * name "WiredTigerCheckpoint" opens the most recent internal checkpoint * taken for the object). The cursor does not support data |