diff options
author | Alex Gorrod <agorrod@wiredtiger.com> | 2012-10-25 03:59:17 +0000 |
---|---|---|
committer | Alex Gorrod <agorrod@wiredtiger.com> | 2012-10-25 03:59:17 +0000 |
commit | e62e67f00c81d7f6c50108d9df2842093553427a (patch) | |
tree | be434f46910931fd54313dc0ac727a6fc983ea30 | |
parent | 1579fcf1b14b835e2607953363c61b9a5c31890f (diff) | |
download | mongo-e62e67f00c81d7f6c50108d9df2842093553427a.tar.gz |
Fix a bug in bulk load of bitmap files.
Fix a related bug in the bloom code that uses bitmap stores.
-rw-r--r-- | dist/api_data.py | 4 | ||||
-rw-r--r-- | src/bloom/bloom.c | 4 | ||||
-rw-r--r-- | src/btree/rec_write.c | 70 | ||||
-rw-r--r-- | src/include/wiredtiger.in | 5 |
4 files changed, 51 insertions, 32 deletions
diff --git a/dist/api_data.py b/dist/api_data.py index ed4898bda7e..d63d17228a8 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -313,7 +313,9 @@ methods = { resident bitmap to be loaded directly into a file by passing a \c WT_ITEM to WT_CURSOR::set_value where the \c size field indicates the number of records in the bitmap (as specified by - the file's \c value_format)''', + the file's \c value_format). Bulk load bitmap values must end + on a byte boundary relative to the bit count - except for the + last set of values loaded.''', type='string'), Config('checkpoint', '', r''' the name of a checkpoint to open (the reserved name diff --git a/src/bloom/bloom.c b/src/bloom/bloom.c index b53bd3663e0..3ac0329a22f 100644 --- a/src/bloom/bloom.c +++ b/src/bloom/bloom.c @@ -180,12 +180,12 @@ __wt_bloom_finalize(WT_BLOOM *bloom) /* Add the entries from the array into the table. */ for (i = 0; i < bloom->m; i += values.size) { - values.data = bloom->bitstring + i; + values.data = bloom->bitstring + (i >> 3); /* * Shave off some bytes for pure paranoia, in case WiredTiger * reserves some special sizes. */ - values.size = (uint32_t)WT_MIN(bloom->m - i, UINT32_MAX - 100); + values.size = (uint32_t)WT_MIN(bloom->m - i, UINT32_MAX - 128); c->set_value(c, &values); WT_ERR(c->insert(c)); } diff --git a/src/btree/rec_write.c b/src/btree/rec_write.c index a23afca13d9..c3d04b7f765 100644 --- a/src/btree/rec_write.c +++ b/src/btree/rec_write.c @@ -1452,6 +1452,35 @@ __wt_rec_row_bulk_insert(WT_CURSOR_BULK *cbulk) #define WT_FIX_ENTRIES(btree, bytes) (((bytes) * 8) / (btree)->bitcnt) +static inline int +__rec_bulk_insert_split_check(WT_CURSOR_BULK *cbulk) +{ + WT_BTREE *btree; + WT_RECONCILE *r; + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)cbulk->cbt.iface.session; + r = cbulk->reconcile; + btree = session->btree; + + if (cbulk->entry == cbulk->nrecs) { + if (cbulk->entry != 0) { + /* + * If everything didn't fit, update the counters and + * split. + * + * Boundary: split or write the page. + */ + __rec_incr(session, r, cbulk->entry, + __bitstr_size(cbulk->entry * btree->bitcnt)); + WT_RET(__rec_split(session, r)); + } + cbulk->entry = 0; + cbulk->nrecs = WT_FIX_ENTRIES(btree, r->space_avail); + } + return (0); +} + /* * __wt_rec_col_fix_bulk_insert -- * Fixed-length column-store bulk insert. @@ -1464,7 +1493,7 @@ __wt_rec_col_fix_bulk_insert(WT_CURSOR_BULK *cbulk) WT_RECONCILE *r; WT_SESSION_IMPL *session; const uint8_t *data; - uint32_t entries, page_entries, page_size; + uint32_t entries, offset, page_entries, page_size; session = (WT_SESSION_IMPL *)cbulk->cbt.iface.session; r = cbulk->reconcile; @@ -1472,41 +1501,28 @@ __wt_rec_col_fix_bulk_insert(WT_CURSOR_BULK *cbulk) cursor = &cbulk->cbt.iface; if (cbulk->bitmap) { + if (((r->recno - 1) * btree->bitcnt) & 0x7) { + WT_RET_MSG(session, EINVAL, + "Bulk bitmap load not aligned on a byte boundary"); + return (EINVAL); + } for (data = cursor->value.data, entries = cursor->value.size; entries > 0; entries -= page_entries, data += page_size) { - page_entries = WT_MIN(entries, - WT_FIX_ENTRIES(btree, r->space_avail)); - page_size = __bitstr_size(page_entries * btree->bitcnt); + WT_RET(__rec_bulk_insert_split_check(cbulk)); - memcpy(r->first_free, data, page_size); + page_entries = + WT_MIN(entries, cbulk->nrecs - cbulk->entry); + page_size = __bitstr_size(page_entries * btree->bitcnt); + offset = __bitstr_size(cbulk->entry * btree->bitcnt); + memcpy(r->first_free + offset, data, page_size); + cbulk->entry += page_entries; r->recno += page_entries; - - /* Leave the last page for wrapup. */ - if (entries > page_entries) { - __rec_incr(session, r, page_entries, page_size); - WT_RET(__rec_split(session, r)); - } else - cbulk->entry = page_entries; } return (0); } - if (cbulk->entry == cbulk->nrecs) { - if (cbulk->entry != 0) { - /* - * If everything didn't fit, update the counters and - * split. - * - * Boundary: split or write the page. - */ - __rec_incr(session, r, cbulk->entry, - __bitstr_size(cbulk->entry * btree->bitcnt)); - WT_RET(__rec_split(session, r)); - } - cbulk->entry = 0; - cbulk->nrecs = WT_FIX_ENTRIES(btree, r->space_avail); - } + WT_RET(__rec_bulk_insert_split_check(cbulk)); __bit_setv(r->first_free, cbulk->entry, btree->bitcnt, ((uint8_t *)cursor->value.data)[0]); diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index deccb7ed980..d6a39ddfb89 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -530,8 +530,9 @@ struct __wt_session { * stores\, and allows chunks of a memory resident bitmap to be loaded * directly into a file by passing a \c WT_ITEM to WT_CURSOR::set_value * where the \c size field indicates the number of records in the bitmap - * (as specified by the file's \c value_format).,a string; default \c - * false.} + * (as specified by the file's \c value_format). Bulk load bitmap values + * must end on a byte boundary relative to the bit count - except for + * the last set of values loaded..,a string; default \c false.} * @config{checkpoint, the name of a checkpoint to open (the reserved * name "WiredTigerCheckpoint" opens the most recent internal checkpoint * taken for the object). The cursor does not support data |