diff options
author | Alex Gorrod <alexander.gorrod@mongodb.com> | 2015-05-08 14:12:59 +1000 |
---|---|---|
committer | Alex Gorrod <alexg@wiredtiger.com> | 2015-07-03 06:18:05 +0000 |
commit | 10eb756c7bb8cc1a6847a2f2fec5fcb2ee883d91 (patch) | |
tree | de0be96737440fe355bb60cc16fbcb161de181be | |
parent | 7a9e1bdade725fd59a0fd87ca77c7dd66aeba1ec (diff) | |
download | mongo-10eb756c7bb8cc1a6847a2f2fec5fcb2ee883d91.tar.gz |
WT-1922 Add support for bulk load in LSM trees. Also references
SERVER-18321
(cherry picked from commit 4d37a27896872dc5d280f5e85666e1d8431ec33b)
-rw-r--r-- | build_win/filelist.win | 1 | ||||
-rw-r--r-- | dist/filelist | 1 | ||||
-rw-r--r-- | src/include/extern.h | 4 | ||||
-rw-r--r-- | src/include/lsm.h | 17 | ||||
-rw-r--r-- | src/lsm/lsm_cursor.c | 104 | ||||
-rw-r--r-- | src/lsm/lsm_cursor_bulk.c | 116 |
6 files changed, 205 insertions, 38 deletions
diff --git a/build_win/filelist.win b/build_win/filelist.win index e297ca16b06..8655c0eda8e 100644 --- a/build_win/filelist.win +++ b/build_win/filelist.win @@ -85,6 +85,7 @@ src/log/log.c src/log/log_auto.c src/log/log_slot.c src/lsm/lsm_cursor.c +src/lsm/lsm_cursor_bulk.c src/lsm/lsm_manager.c src/lsm/lsm_merge.c src/lsm/lsm_meta.c diff --git a/dist/filelist b/dist/filelist index ee70ccf765e..af72bab6718 100644 --- a/dist/filelist +++ b/dist/filelist @@ -85,6 +85,7 @@ src/log/log.c src/log/log_auto.c src/log/log_slot.c src/lsm/lsm_cursor.c +src/lsm/lsm_cursor_bulk.c src/lsm/lsm_manager.c src/lsm/lsm_merge.c src/lsm/lsm_meta.c diff --git a/src/include/extern.h b/src/include/extern.h index af50ca9180f..63b6bb2cbc5 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -364,8 +364,12 @@ extern int __wt_log_slot_wait(WT_SESSION_IMPL *session, WT_LOGSLOT *slot); extern int64_t __wt_log_slot_release(WT_LOGSLOT *slot, uint64_t size); extern int __wt_log_slot_free(WT_SESSION_IMPL *session, WT_LOGSLOT *slot); extern int __wt_log_slot_grow_buffers(WT_SESSION_IMPL *session, size_t newsize); +extern int __wt_clsm_request_switch(WT_CURSOR_LSM *clsm); +extern int __wt_clsm_await_switch(WT_CURSOR_LSM *clsm); extern int __wt_clsm_init_merge( WT_CURSOR *cursor, u_int start_chunk, uint32_t start_id, u_int nchunks); +extern int __wt_clsm_close(WT_CURSOR *cursor); extern int __wt_clsm_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp); +extern int __wt_clsm_open_bulk(WT_CURSOR_LSM *clsm, const char *cfg[]); extern int __wt_lsm_manager_config(WT_SESSION_IMPL *session, const char **cfg); extern int __wt_lsm_manager_reconfig(WT_SESSION_IMPL *session, const char **cfg); extern int __wt_lsm_manager_start(WT_SESSION_IMPL *session); diff --git a/src/include/lsm.h b/src/include/lsm.h index aa1d797e3b5..dc6a0d7e027 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -57,15 +57,16 @@ struct __wt_cursor_lsm { u_int update_count; /* Updates performed. */ -#define WT_CLSM_ACTIVE 0x01 /* Incremented the session count */ -#define WT_CLSM_ITERATE_NEXT 0x02 /* Forward iteration */ -#define WT_CLSM_ITERATE_PREV 0x04 /* Backward iteration */ -#define WT_CLSM_MERGE 0x08 /* Merge cursor, don't update */ -#define WT_CLSM_MINOR_MERGE 0x10 /* Minor merge, include tombstones */ -#define WT_CLSM_MULTIPLE 0x20 /* Multiple cursors have values for the +#define WT_CLSM_ACTIVE 0x001 /* Incremented the session count */ +#define WT_CLSM_BULK 0x002 /* Open for snapshot isolation */ +#define WT_CLSM_ITERATE_NEXT 0x004 /* Forward iteration */ +#define WT_CLSM_ITERATE_PREV 0x008 /* Backward iteration */ +#define WT_CLSM_MERGE 0x010 /* Merge cursor, don't update */ +#define WT_CLSM_MINOR_MERGE 0x020 /* Minor merge, include tombstones */ +#define WT_CLSM_MULTIPLE 0x040 /* Multiple cursors have values for the current key */ -#define WT_CLSM_OPEN_READ 0x40 /* Open for reads */ -#define WT_CLSM_OPEN_SNAPSHOT 0x80 /* Open for snapshot isolation */ +#define WT_CLSM_OPEN_READ 0x080 /* Open for reads */ +#define WT_CLSM_OPEN_SNAPSHOT 0x100 /* Open for snapshot isolation */ uint32_t flags; }; diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index 7665e417722..56126a4b724 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -20,11 +20,11 @@ static int __clsm_open_cursors(WT_CURSOR_LSM *, int, u_int, uint32_t); static int __clsm_reset_cursors(WT_CURSOR_LSM *, WT_CURSOR *); /* - * __clsm_request_switch -- + * __wt_clsm_request_switch -- * Request an LSM tree switch for a cursor operation. */ -static inline int -__clsm_request_switch(WT_CURSOR_LSM *clsm) +int +__wt_clsm_request_switch(WT_CURSOR_LSM *clsm) { WT_DECL_RET; WT_LSM_TREE *lsm_tree; @@ -44,9 +44,9 @@ __clsm_request_switch(WT_CURSOR_LSM *clsm) if (lsm_tree->nchunks == 0 || (clsm->dsk_gen == lsm_tree->dsk_gen && !F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH))) { + F_SET(lsm_tree, WT_LSM_TREE_NEED_SWITCH); ret = __wt_lsm_manager_push_entry( session, WT_LSM_WORK_SWITCH, 0, lsm_tree); - F_SET(lsm_tree, WT_LSM_TREE_NEED_SWITCH); } WT_TRET(__wt_lsm_tree_readunlock(session, lsm_tree)); } @@ -55,6 +55,41 @@ __clsm_request_switch(WT_CURSOR_LSM *clsm) } /* + * __wt_clsm_await_switch -- + * Wait for a switch to have completed in the LSM tree + */ +int +__wt_clsm_await_switch(WT_CURSOR_LSM *clsm) +{ + WT_LSM_TREE *lsm_tree; + WT_SESSION_IMPL *session; + int waited; + + lsm_tree = clsm->lsm_tree; + session = (WT_SESSION_IMPL *)clsm->iface.session; + + /* + * If there is no primary chunk, or a chunk has overflowed the hard + * limit, which either means a worker thread has fallen behind or there + * has just been a user-level checkpoint, wait until the tree changes. + * + * We used to switch chunks in the application thread here, but that is + * problematic because there is a transaction in progress and it could + * roll back, leaving the metadata inconsistent. + */ + for (waited = 0; + lsm_tree->nchunks == 0 || + clsm->dsk_gen == lsm_tree->dsk_gen; + ++waited) { + if (waited % 1000 == 0) + WT_RET(__wt_lsm_manager_push_entry( + session, WT_LSM_WORK_SWITCH, 0, lsm_tree)); + __wt_sleep(0, 10); + } + return (0); +} + +/* * __clsm_enter_update -- * Make sure an LSM cursor is ready to perform an update. */ @@ -65,7 +100,7 @@ __clsm_enter_update(WT_CURSOR_LSM *clsm) WT_LSM_CHUNK *primary_chunk; WT_LSM_TREE *lsm_tree; WT_SESSION_IMPL *session; - int hard_limit, have_primary, ovfl, waited; + int hard_limit, have_primary, ovfl; lsm_tree = clsm->lsm_tree; ovfl = 0; @@ -108,30 +143,13 @@ __clsm_enter_update(WT_CURSOR_LSM *clsm) } /* Request a switch. */ - WT_RET(__clsm_request_switch(clsm)); + WT_RET(__wt_clsm_request_switch(clsm)); /* If we only overflowed the soft limit, we're done. */ if (have_primary && !hard_limit) return (0); - /* - * If there is no primary chunk, or it has overflowed the hard limit, - * which either means a worker thread has fallen behind or there has - * just been a user-level checkpoint, wait until the tree changes. - * - * We used to switch chunks in the application thread if we got to - * here, but that is problematic because there is a transaction in - * progress and it could roll back, leaving the metadata inconsistent. - */ - for (waited = 0; - lsm_tree->nchunks == 0 || - clsm->dsk_gen == lsm_tree->dsk_gen; - ++waited) { - if (waited % 1000 == 0) - WT_RET(__wt_lsm_manager_push_entry( - session, WT_LSM_WORK_SWITCH, 0, lsm_tree)); - __wt_sleep(0, 10); - } + WT_RET(__wt_clsm_await_switch(clsm)); return (0); } @@ -1423,11 +1441,11 @@ err: __clsm_leave(clsm); } /* - * __clsm_close -- + * __wt_clsm_close -- * WT_CURSOR->close method for the LSM cursor type. */ -static int -__clsm_close(WT_CURSOR *cursor) +int +__wt_clsm_close(WT_CURSOR *cursor) { WT_CURSOR_LSM *clsm; WT_DECL_RET; @@ -1481,14 +1499,17 @@ __wt_clsm_open(WT_SESSION_IMPL *session, __clsm_update, /* update */ __clsm_remove, /* remove */ __wt_cursor_reconfigure, /* reconfigure */ - __clsm_close); /* close */ + __wt_clsm_close); /* close */ WT_CURSOR *cursor; WT_CURSOR_LSM *clsm; WT_DECL_RET; WT_LSM_TREE *lsm_tree; + int bulk; + bulk = 0; clsm = NULL; cursor = NULL; + lsm_tree = NULL; if (!WT_PREFIX_MATCH(uri, "lsm:")) return (EINVAL); @@ -1498,9 +1519,21 @@ __wt_clsm_open(WT_SESSION_IMPL *session, WT_RET_MSG(session, EINVAL, "LSM does not support opening by checkpoint"); + WT_RET(__wt_config_gets_def(session, cfg, "bulk", 0, &cval)); + if (cval.val != 0) + bulk = 1; + /* Get the LSM tree. */ WT_WITH_DHANDLE_LOCK(session, - ret = __wt_lsm_tree_get(session, uri, 0, &lsm_tree)); + ret = __wt_lsm_tree_get(session, uri, bulk, &lsm_tree)); + /* + * Check whether the exclusive open for a bulk load succeeded, and + * if it did ensure that it's safe to bulk load into the tree. + */ + if (bulk && (ret == EBUSY || (ret == 0 && lsm_tree->nchunks > 1))) + WT_ERR_MSG(session, EINVAL, + "bulk-load is only supported on newly created LSM trees"); + /* Flag any errors from the tree get. */ WT_RET(ret); WT_ERR(__wt_calloc_one(session, &clsm)); @@ -1523,9 +1556,20 @@ __wt_clsm_open(WT_SESSION_IMPL *session, WT_STATIC_ASSERT(offsetof(WT_CURSOR_LSM, iface) == 0); WT_ERR(__wt_cursor_init(cursor, cursor->uri, owner, cfg, cursorp)); + if (bulk) + WT_ERR(__wt_clsm_open_bulk(clsm, cfg)); + if (0) { err: if (clsm != NULL) - WT_TRET(__clsm_close(cursor)); + WT_TRET(__wt_clsm_close(cursor)); + else if (lsm_tree != NULL) + __wt_lsm_tree_release(session, lsm_tree); + + /* + * We open bulk cursors after setting the returned cursor. + * Fix that here. + */ + *cursorp = NULL; } return (ret); diff --git a/src/lsm/lsm_cursor_bulk.c b/src/lsm/lsm_cursor_bulk.c new file mode 100644 index 00000000000..6b51a070e47 --- /dev/null +++ b/src/lsm/lsm_cursor_bulk.c @@ -0,0 +1,116 @@ +/*- + * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __clsm_close_bulk -- + * WT_CURSOR->close method for LSM bulk cursors. + */ +static int +__clsm_close_bulk(WT_CURSOR *cursor) +{ + WT_CURSOR_LSM *clsm; + WT_LSM_TREE *lsm_tree; + + clsm = (WT_CURSOR_LSM *)cursor; + lsm_tree = clsm->lsm_tree; + F_SET(lsm_tree->chunk[0], WT_LSM_CHUNK_ONDISK); + + WT_RET(__wt_clsm_close(cursor)); + return (0); +} +/* + * __clsm_insert_bulk -- + * WT_CURSOR->insert method for LSM bulk cursors. + */ +static int +__clsm_insert_bulk(WT_CURSOR *cursor) +{ + WT_CURSOR *bulk_cursor; + WT_CURSOR_LSM *clsm; + WT_LSM_TREE *lsm_tree; + WT_SESSION_IMPL *session; + + clsm = (WT_CURSOR_LSM *)cursor; + lsm_tree = clsm->lsm_tree; + session = (WT_SESSION_IMPL *)clsm->iface.session; + + WT_ASSERT(session, lsm_tree->nchunks == 1 && clsm->nchunks == 1); + ++lsm_tree->chunk[0]->count; + bulk_cursor = *clsm->cursors; + bulk_cursor->set_key(bulk_cursor, &cursor->key); + bulk_cursor->set_value(bulk_cursor, &cursor->value); + WT_RET(bulk_cursor->insert(bulk_cursor)); + + return (0); +} + +/* + * __wt_clsm_open_bulk -- + * WT_SESSION->open_cursor method for LSM bulk cursors. + */ +int +__wt_clsm_open_bulk(WT_CURSOR_LSM *clsm, const char *cfg[]) +{ + WT_CURSOR *cursor, *bulk_cursor; + WT_LSM_TREE *lsm_tree; + WT_SESSION_IMPL *session; + + bulk_cursor = NULL; + cursor = &clsm->iface; + lsm_tree = clsm->lsm_tree; + session = (WT_SESSION_IMPL *)clsm->iface.session; + + F_SET(clsm, WT_CLSM_BULK); + + /* Bulk cursors are limited to insert and close. */ + __wt_cursor_set_notsup(cursor); + cursor->insert = __clsm_insert_bulk; + cursor->close = __clsm_close_bulk; + + /* Setup the first chunk in the tree. */ + WT_RET(__wt_clsm_request_switch(clsm)); + WT_RET(__wt_clsm_await_switch(clsm)); + + /* + * Grab and release the LSM tree lock to ensure that the first chunk + * has been fully created before proceeding. We have the LSM tree + * open exclusive, so that saves us from needing the lock generally. + */ + WT_RET(__wt_lsm_tree_readlock(session, lsm_tree)); + WT_RET(__wt_lsm_tree_readunlock(session, lsm_tree)); + + /* + * Open a bulk cursor on the first chunk, it's not a regular LSM chunk + * cursor, but use the standard storage locations. Allocate the space + * for a bloom filter - it makes cleanup simpler. Cleaned up by + * cursor close on error. + */ + WT_RET(__wt_calloc_one(session, &clsm->blooms)); + clsm->bloom_alloc = 1; + WT_RET(__wt_calloc_one(session, &clsm->cursors)); + clsm->cursor_alloc = 1; + clsm->nchunks = 1; + + /* + * Open a bulk cursor on the first chunk in the tree - take a read + * lock on the LSM tree while we are opening the chunk, to ensure + * that the first chunk has been fully created before we succeed. + * Pass through the application config to ensure the tree is open + * for bulk access. + */ + WT_RET(__wt_open_cursor(session, + lsm_tree->chunk[0]->uri, &clsm->iface, cfg, &bulk_cursor)); + clsm->cursors[0] = bulk_cursor; + /* LSM cursors are always raw */ + F_SET(bulk_cursor, WT_CURSTD_RAW); + + return (0); +} + |