summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gorrod <alexander.gorrod@mongodb.com>2015-05-08 14:12:59 +1000
committerAlex Gorrod <alexg@wiredtiger.com>2015-07-03 06:18:05 +0000
commit10eb756c7bb8cc1a6847a2f2fec5fcb2ee883d91 (patch)
treede0be96737440fe355bb60cc16fbcb161de181be
parent7a9e1bdade725fd59a0fd87ca77c7dd66aeba1ec (diff)
downloadmongo-10eb756c7bb8cc1a6847a2f2fec5fcb2ee883d91.tar.gz
WT-1922 Add support for bulk load in LSM trees. Also references
SERVER-18321 (cherry picked from commit 4d37a27896872dc5d280f5e85666e1d8431ec33b)
-rw-r--r--build_win/filelist.win1
-rw-r--r--dist/filelist1
-rw-r--r--src/include/extern.h4
-rw-r--r--src/include/lsm.h17
-rw-r--r--src/lsm/lsm_cursor.c104
-rw-r--r--src/lsm/lsm_cursor_bulk.c116
6 files changed, 205 insertions, 38 deletions
diff --git a/build_win/filelist.win b/build_win/filelist.win
index e297ca16b06..8655c0eda8e 100644
--- a/build_win/filelist.win
+++ b/build_win/filelist.win
@@ -85,6 +85,7 @@ src/log/log.c
src/log/log_auto.c
src/log/log_slot.c
src/lsm/lsm_cursor.c
+src/lsm/lsm_cursor_bulk.c
src/lsm/lsm_manager.c
src/lsm/lsm_merge.c
src/lsm/lsm_meta.c
diff --git a/dist/filelist b/dist/filelist
index ee70ccf765e..af72bab6718 100644
--- a/dist/filelist
+++ b/dist/filelist
@@ -85,6 +85,7 @@ src/log/log.c
src/log/log_auto.c
src/log/log_slot.c
src/lsm/lsm_cursor.c
+src/lsm/lsm_cursor_bulk.c
src/lsm/lsm_manager.c
src/lsm/lsm_merge.c
src/lsm/lsm_meta.c
diff --git a/src/include/extern.h b/src/include/extern.h
index af50ca9180f..63b6bb2cbc5 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -364,8 +364,12 @@ extern int __wt_log_slot_wait(WT_SESSION_IMPL *session, WT_LOGSLOT *slot);
extern int64_t __wt_log_slot_release(WT_LOGSLOT *slot, uint64_t size);
extern int __wt_log_slot_free(WT_SESSION_IMPL *session, WT_LOGSLOT *slot);
extern int __wt_log_slot_grow_buffers(WT_SESSION_IMPL *session, size_t newsize);
+extern int __wt_clsm_request_switch(WT_CURSOR_LSM *clsm);
+extern int __wt_clsm_await_switch(WT_CURSOR_LSM *clsm);
extern int __wt_clsm_init_merge( WT_CURSOR *cursor, u_int start_chunk, uint32_t start_id, u_int nchunks);
+extern int __wt_clsm_close(WT_CURSOR *cursor);
extern int __wt_clsm_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp);
+extern int __wt_clsm_open_bulk(WT_CURSOR_LSM *clsm, const char *cfg[]);
extern int __wt_lsm_manager_config(WT_SESSION_IMPL *session, const char **cfg);
extern int __wt_lsm_manager_reconfig(WT_SESSION_IMPL *session, const char **cfg);
extern int __wt_lsm_manager_start(WT_SESSION_IMPL *session);
diff --git a/src/include/lsm.h b/src/include/lsm.h
index aa1d797e3b5..dc6a0d7e027 100644
--- a/src/include/lsm.h
+++ b/src/include/lsm.h
@@ -57,15 +57,16 @@ struct __wt_cursor_lsm {
u_int update_count; /* Updates performed. */
-#define WT_CLSM_ACTIVE 0x01 /* Incremented the session count */
-#define WT_CLSM_ITERATE_NEXT 0x02 /* Forward iteration */
-#define WT_CLSM_ITERATE_PREV 0x04 /* Backward iteration */
-#define WT_CLSM_MERGE 0x08 /* Merge cursor, don't update */
-#define WT_CLSM_MINOR_MERGE 0x10 /* Minor merge, include tombstones */
-#define WT_CLSM_MULTIPLE 0x20 /* Multiple cursors have values for the
+#define WT_CLSM_ACTIVE 0x001 /* Incremented the session count */
+#define WT_CLSM_BULK 0x002 /* Open for snapshot isolation */
+#define WT_CLSM_ITERATE_NEXT 0x004 /* Forward iteration */
+#define WT_CLSM_ITERATE_PREV 0x008 /* Backward iteration */
+#define WT_CLSM_MERGE 0x010 /* Merge cursor, don't update */
+#define WT_CLSM_MINOR_MERGE 0x020 /* Minor merge, include tombstones */
+#define WT_CLSM_MULTIPLE 0x040 /* Multiple cursors have values for the
current key */
-#define WT_CLSM_OPEN_READ 0x40 /* Open for reads */
-#define WT_CLSM_OPEN_SNAPSHOT 0x80 /* Open for snapshot isolation */
+#define WT_CLSM_OPEN_READ 0x080 /* Open for reads */
+#define WT_CLSM_OPEN_SNAPSHOT 0x100 /* Open for snapshot isolation */
uint32_t flags;
};
diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c
index 7665e417722..56126a4b724 100644
--- a/src/lsm/lsm_cursor.c
+++ b/src/lsm/lsm_cursor.c
@@ -20,11 +20,11 @@ static int __clsm_open_cursors(WT_CURSOR_LSM *, int, u_int, uint32_t);
static int __clsm_reset_cursors(WT_CURSOR_LSM *, WT_CURSOR *);
/*
- * __clsm_request_switch --
+ * __wt_clsm_request_switch --
* Request an LSM tree switch for a cursor operation.
*/
-static inline int
-__clsm_request_switch(WT_CURSOR_LSM *clsm)
+int
+__wt_clsm_request_switch(WT_CURSOR_LSM *clsm)
{
WT_DECL_RET;
WT_LSM_TREE *lsm_tree;
@@ -44,9 +44,9 @@ __clsm_request_switch(WT_CURSOR_LSM *clsm)
if (lsm_tree->nchunks == 0 ||
(clsm->dsk_gen == lsm_tree->dsk_gen &&
!F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH))) {
+ F_SET(lsm_tree, WT_LSM_TREE_NEED_SWITCH);
ret = __wt_lsm_manager_push_entry(
session, WT_LSM_WORK_SWITCH, 0, lsm_tree);
- F_SET(lsm_tree, WT_LSM_TREE_NEED_SWITCH);
}
WT_TRET(__wt_lsm_tree_readunlock(session, lsm_tree));
}
@@ -55,6 +55,41 @@ __clsm_request_switch(WT_CURSOR_LSM *clsm)
}
/*
+ * __wt_clsm_await_switch --
+ * Wait for a switch to have completed in the LSM tree
+ */
+int
+__wt_clsm_await_switch(WT_CURSOR_LSM *clsm)
+{
+ WT_LSM_TREE *lsm_tree;
+ WT_SESSION_IMPL *session;
+ int waited;
+
+ lsm_tree = clsm->lsm_tree;
+ session = (WT_SESSION_IMPL *)clsm->iface.session;
+
+ /*
+ * If there is no primary chunk, or a chunk has overflowed the hard
+ * limit, which either means a worker thread has fallen behind or there
+ * has just been a user-level checkpoint, wait until the tree changes.
+ *
+ * We used to switch chunks in the application thread here, but that is
+ * problematic because there is a transaction in progress and it could
+ * roll back, leaving the metadata inconsistent.
+ */
+ for (waited = 0;
+ lsm_tree->nchunks == 0 ||
+ clsm->dsk_gen == lsm_tree->dsk_gen;
+ ++waited) {
+ if (waited % 1000 == 0)
+ WT_RET(__wt_lsm_manager_push_entry(
+ session, WT_LSM_WORK_SWITCH, 0, lsm_tree));
+ __wt_sleep(0, 10);
+ }
+ return (0);
+}
+
+/*
* __clsm_enter_update --
* Make sure an LSM cursor is ready to perform an update.
*/
@@ -65,7 +100,7 @@ __clsm_enter_update(WT_CURSOR_LSM *clsm)
WT_LSM_CHUNK *primary_chunk;
WT_LSM_TREE *lsm_tree;
WT_SESSION_IMPL *session;
- int hard_limit, have_primary, ovfl, waited;
+ int hard_limit, have_primary, ovfl;
lsm_tree = clsm->lsm_tree;
ovfl = 0;
@@ -108,30 +143,13 @@ __clsm_enter_update(WT_CURSOR_LSM *clsm)
}
/* Request a switch. */
- WT_RET(__clsm_request_switch(clsm));
+ WT_RET(__wt_clsm_request_switch(clsm));
/* If we only overflowed the soft limit, we're done. */
if (have_primary && !hard_limit)
return (0);
- /*
- * If there is no primary chunk, or it has overflowed the hard limit,
- * which either means a worker thread has fallen behind or there has
- * just been a user-level checkpoint, wait until the tree changes.
- *
- * We used to switch chunks in the application thread if we got to
- * here, but that is problematic because there is a transaction in
- * progress and it could roll back, leaving the metadata inconsistent.
- */
- for (waited = 0;
- lsm_tree->nchunks == 0 ||
- clsm->dsk_gen == lsm_tree->dsk_gen;
- ++waited) {
- if (waited % 1000 == 0)
- WT_RET(__wt_lsm_manager_push_entry(
- session, WT_LSM_WORK_SWITCH, 0, lsm_tree));
- __wt_sleep(0, 10);
- }
+ WT_RET(__wt_clsm_await_switch(clsm));
return (0);
}
@@ -1423,11 +1441,11 @@ err: __clsm_leave(clsm);
}
/*
- * __clsm_close --
+ * __wt_clsm_close --
* WT_CURSOR->close method for the LSM cursor type.
*/
-static int
-__clsm_close(WT_CURSOR *cursor)
+int
+__wt_clsm_close(WT_CURSOR *cursor)
{
WT_CURSOR_LSM *clsm;
WT_DECL_RET;
@@ -1481,14 +1499,17 @@ __wt_clsm_open(WT_SESSION_IMPL *session,
__clsm_update, /* update */
__clsm_remove, /* remove */
__wt_cursor_reconfigure, /* reconfigure */
- __clsm_close); /* close */
+ __wt_clsm_close); /* close */
WT_CURSOR *cursor;
WT_CURSOR_LSM *clsm;
WT_DECL_RET;
WT_LSM_TREE *lsm_tree;
+ int bulk;
+ bulk = 0;
clsm = NULL;
cursor = NULL;
+ lsm_tree = NULL;
if (!WT_PREFIX_MATCH(uri, "lsm:"))
return (EINVAL);
@@ -1498,9 +1519,21 @@ __wt_clsm_open(WT_SESSION_IMPL *session,
WT_RET_MSG(session, EINVAL,
"LSM does not support opening by checkpoint");
+ WT_RET(__wt_config_gets_def(session, cfg, "bulk", 0, &cval));
+ if (cval.val != 0)
+ bulk = 1;
+
/* Get the LSM tree. */
WT_WITH_DHANDLE_LOCK(session,
- ret = __wt_lsm_tree_get(session, uri, 0, &lsm_tree));
+ ret = __wt_lsm_tree_get(session, uri, bulk, &lsm_tree));
+ /*
+ * Check whether the exclusive open for a bulk load succeeded, and
+ * if it did ensure that it's safe to bulk load into the tree.
+ */
+ if (bulk && (ret == EBUSY || (ret == 0 && lsm_tree->nchunks > 1)))
+ WT_ERR_MSG(session, EINVAL,
+ "bulk-load is only supported on newly created LSM trees");
+ /* Flag any errors from the tree get. */
WT_RET(ret);
WT_ERR(__wt_calloc_one(session, &clsm));
@@ -1523,9 +1556,20 @@ __wt_clsm_open(WT_SESSION_IMPL *session,
WT_STATIC_ASSERT(offsetof(WT_CURSOR_LSM, iface) == 0);
WT_ERR(__wt_cursor_init(cursor, cursor->uri, owner, cfg, cursorp));
+ if (bulk)
+ WT_ERR(__wt_clsm_open_bulk(clsm, cfg));
+
if (0) {
err: if (clsm != NULL)
- WT_TRET(__clsm_close(cursor));
+ WT_TRET(__wt_clsm_close(cursor));
+ else if (lsm_tree != NULL)
+ __wt_lsm_tree_release(session, lsm_tree);
+
+ /*
+ * We open bulk cursors after setting the returned cursor.
+ * Fix that here.
+ */
+ *cursorp = NULL;
}
return (ret);
diff --git a/src/lsm/lsm_cursor_bulk.c b/src/lsm/lsm_cursor_bulk.c
new file mode 100644
index 00000000000..6b51a070e47
--- /dev/null
+++ b/src/lsm/lsm_cursor_bulk.c
@@ -0,0 +1,116 @@
+/*-
+ * Copyright (c) 2014-2015 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __clsm_close_bulk --
+ * WT_CURSOR->close method for LSM bulk cursors.
+ */
+static int
+__clsm_close_bulk(WT_CURSOR *cursor)
+{
+ WT_CURSOR_LSM *clsm;
+ WT_LSM_TREE *lsm_tree;
+
+ clsm = (WT_CURSOR_LSM *)cursor;
+ lsm_tree = clsm->lsm_tree;
+ F_SET(lsm_tree->chunk[0], WT_LSM_CHUNK_ONDISK);
+
+ WT_RET(__wt_clsm_close(cursor));
+ return (0);
+}
+/*
+ * __clsm_insert_bulk --
+ * WT_CURSOR->insert method for LSM bulk cursors.
+ */
+static int
+__clsm_insert_bulk(WT_CURSOR *cursor)
+{
+ WT_CURSOR *bulk_cursor;
+ WT_CURSOR_LSM *clsm;
+ WT_LSM_TREE *lsm_tree;
+ WT_SESSION_IMPL *session;
+
+ clsm = (WT_CURSOR_LSM *)cursor;
+ lsm_tree = clsm->lsm_tree;
+ session = (WT_SESSION_IMPL *)clsm->iface.session;
+
+ WT_ASSERT(session, lsm_tree->nchunks == 1 && clsm->nchunks == 1);
+ ++lsm_tree->chunk[0]->count;
+ bulk_cursor = *clsm->cursors;
+ bulk_cursor->set_key(bulk_cursor, &cursor->key);
+ bulk_cursor->set_value(bulk_cursor, &cursor->value);
+ WT_RET(bulk_cursor->insert(bulk_cursor));
+
+ return (0);
+}
+
+/*
+ * __wt_clsm_open_bulk --
+ * WT_SESSION->open_cursor method for LSM bulk cursors.
+ */
+int
+__wt_clsm_open_bulk(WT_CURSOR_LSM *clsm, const char *cfg[])
+{
+ WT_CURSOR *cursor, *bulk_cursor;
+ WT_LSM_TREE *lsm_tree;
+ WT_SESSION_IMPL *session;
+
+ bulk_cursor = NULL;
+ cursor = &clsm->iface;
+ lsm_tree = clsm->lsm_tree;
+ session = (WT_SESSION_IMPL *)clsm->iface.session;
+
+ F_SET(clsm, WT_CLSM_BULK);
+
+ /* Bulk cursors are limited to insert and close. */
+ __wt_cursor_set_notsup(cursor);
+ cursor->insert = __clsm_insert_bulk;
+ cursor->close = __clsm_close_bulk;
+
+ /* Setup the first chunk in the tree. */
+ WT_RET(__wt_clsm_request_switch(clsm));
+ WT_RET(__wt_clsm_await_switch(clsm));
+
+ /*
+ * Grab and release the LSM tree lock to ensure that the first chunk
+ * has been fully created before proceeding. We have the LSM tree
+ * open exclusive, so that saves us from needing the lock generally.
+ */
+ WT_RET(__wt_lsm_tree_readlock(session, lsm_tree));
+ WT_RET(__wt_lsm_tree_readunlock(session, lsm_tree));
+
+ /*
+ * Open a bulk cursor on the first chunk, it's not a regular LSM chunk
+ * cursor, but use the standard storage locations. Allocate the space
+ * for a bloom filter - it makes cleanup simpler. Cleaned up by
+ * cursor close on error.
+ */
+ WT_RET(__wt_calloc_one(session, &clsm->blooms));
+ clsm->bloom_alloc = 1;
+ WT_RET(__wt_calloc_one(session, &clsm->cursors));
+ clsm->cursor_alloc = 1;
+ clsm->nchunks = 1;
+
+ /*
+ * Open a bulk cursor on the first chunk in the tree - take a read
+ * lock on the LSM tree while we are opening the chunk, to ensure
+ * that the first chunk has been fully created before we succeed.
+ * Pass through the application config to ensure the tree is open
+ * for bulk access.
+ */
+ WT_RET(__wt_open_cursor(session,
+ lsm_tree->chunk[0]->uri, &clsm->iface, cfg, &bulk_cursor));
+ clsm->cursors[0] = bulk_cursor;
+ /* LSM cursors are always raw */
+ F_SET(bulk_cursor, WT_CURSTD_RAW);
+
+ return (0);
+}
+