summaryrefslogtreecommitdiff
path: root/src/lsm
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@mongodb.com>2016-03-25 08:19:12 +1100
committerMichael Cahill <michael.cahill@mongodb.com>2016-03-25 08:19:12 +1100
commit43e885a0f9a3ad046eae1726b005ca1280624be3 (patch)
treef03d31a7b36c476484f26ea5259777aeb7b13201 /src/lsm
parent5cdd3e320cb19cd54111c2572a3d6e33d3009ad4 (diff)
parent9cf8eb2f15c6df7da90c19c86ccf7516ed126183 (diff)
downloadmongo-26e03281a42dc78fa9cc24535cd8e589eea9a5ad.tar.gz
Merge branch 'mongodb-3.4' into mongodb-3.2mongodb-3.2.5
Diffstat (limited to 'src/lsm')
-rw-r--r--src/lsm/lsm_cursor.c34
-rw-r--r--src/lsm/lsm_manager.c17
-rw-r--r--src/lsm/lsm_merge.c13
-rw-r--r--src/lsm/lsm_meta.c336
-rw-r--r--src/lsm/lsm_tree.c257
-rw-r--r--src/lsm/lsm_work_unit.c22
-rw-r--r--src/lsm/lsm_worker.c11
7 files changed, 432 insertions, 258 deletions
diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c
index f76b2bfd9ac..e023b2b407e 100644
--- a/src/lsm/lsm_cursor.c
+++ b/src/lsm/lsm_cursor.c
@@ -1501,22 +1501,22 @@ __wt_clsm_open(WT_SESSION_IMPL *session,
{
WT_CONFIG_ITEM cval;
WT_CURSOR_STATIC_INIT(iface,
- __wt_cursor_get_key, /* get-key */
- __wt_cursor_get_value, /* get-value */
- __wt_cursor_set_key, /* set-key */
- __wt_cursor_set_value, /* set-value */
- __clsm_compare, /* compare */
- __wt_cursor_equals, /* equals */
- __clsm_next, /* next */
- __clsm_prev, /* prev */
- __clsm_reset, /* reset */
- __clsm_search, /* search */
- __clsm_search_near, /* search-near */
- __clsm_insert, /* insert */
- __clsm_update, /* update */
- __clsm_remove, /* remove */
- __wt_cursor_reconfigure, /* reconfigure */
- __wt_clsm_close); /* close */
+ __wt_cursor_get_key, /* get-key */
+ __wt_cursor_get_value, /* get-value */
+ __wt_cursor_set_key, /* set-key */
+ __wt_cursor_set_value, /* set-value */
+ __clsm_compare, /* compare */
+ __wt_cursor_equals, /* equals */
+ __clsm_next, /* next */
+ __clsm_prev, /* prev */
+ __clsm_reset, /* reset */
+ __clsm_search, /* search */
+ __clsm_search_near, /* search-near */
+ __clsm_insert, /* insert */
+ __clsm_update, /* update */
+ __clsm_remove, /* remove */
+ __wt_cursor_reconfigure, /* reconfigure */
+ __wt_clsm_close); /* close */
WT_CURSOR *cursor;
WT_CURSOR_LSM *clsm;
WT_DECL_RET;
@@ -1556,7 +1556,7 @@ __wt_clsm_open(WT_SESSION_IMPL *session,
WT_ERR(ret);
/* Make sure we have exclusive access if and only if we want it */
- WT_ASSERT(session, !bulk || lsm_tree->exclusive);
+ WT_ASSERT(session, !bulk || lsm_tree->excl_session != NULL);
WT_ERR(__wt_calloc_one(session, &clsm));
diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c
index dac8d987328..943a5894ab3 100644
--- a/src/lsm/lsm_manager.c
+++ b/src/lsm/lsm_manager.c
@@ -212,6 +212,10 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session)
conn = S2C(session);
manager = &conn->lsm_manager;
+ if (F_ISSET(conn, WT_CONN_READONLY)) {
+ manager->lsm_workers = 0;
+ return (0);
+ }
/*
* We need at least a manager, a switch thread and a generic
* worker.
@@ -284,6 +288,8 @@ __wt_lsm_manager_destroy(WT_SESSION_IMPL *session)
manager = &conn->lsm_manager;
removed = 0;
+ WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY) ||
+ manager->lsm_workers == 0);
if (manager->lsm_workers > 0) {
/*
* Stop the main LSM manager thread first.
@@ -384,7 +390,7 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session)
F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST);
dhandle_locked = true;
TAILQ_FOREACH(lsm_tree, &S2C(session)->lsmqh, q) {
- if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE))
+ if (!lsm_tree->active)
continue;
WT_ERR(__wt_epoch(session, &now));
pushms = lsm_tree->work_push_ts.tv_sec == 0 ? 0 :
@@ -427,8 +433,10 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session)
session, WT_LSM_WORK_BLOOM, 0, lsm_tree));
WT_ERR(__wt_verbose(session,
WT_VERB_LSM_MANAGER,
- "MGR %s: queue %d mod %d nchunks %d"
- " flags 0x%x aggressive %d pushms %" PRIu64
+ "MGR %s: queue %" PRIu32 " mod %d "
+ "nchunks %" PRIu32
+ " flags %#" PRIx32 " aggressive %" PRIu32
+ " pushms %" PRIu64
" fillms %" PRIu64,
lsm_tree->name, lsm_tree->queue_ref,
lsm_tree->modified, lsm_tree->nchunks,
@@ -616,6 +624,7 @@ __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session,
manager = &S2C(session)->lsm_manager;
+ WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
/*
* Don't add merges or bloom filter creates if merges
* or bloom filters are disabled in the tree.
@@ -641,7 +650,7 @@ __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session,
* is checked.
*/
(void)__wt_atomic_add32(&lsm_tree->queue_ref, 1);
- if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) {
+ if (!lsm_tree->active) {
(void)__wt_atomic_sub32(&lsm_tree->queue_ref, 1);
return (0);
}
diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c
index 29325066da7..6d907284546 100644
--- a/src/lsm/lsm_merge.c
+++ b/src/lsm/lsm_merge.c
@@ -60,10 +60,11 @@ __lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
struct timespec now;
uint64_t msec_since_last_merge, msec_to_create_merge;
- u_int new_aggressive;
+ uint32_t new_aggressive;
new_aggressive = 0;
+ WT_ASSERT(session, lsm_tree->merge_min != 0);
/*
* If the tree is open read-only or we are compacting, be very
* aggressive. Otherwise, we can spend a long time waiting for merges
@@ -124,8 +125,9 @@ __lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
if (new_aggressive > lsm_tree->merge_aggressiveness) {
WT_RET(__wt_verbose(session, WT_VERB_LSM,
- "LSM merge %s got aggressive (old %u new %u), "
- "merge_min %d, %u / %" PRIu64,
+ "LSM merge %s got aggressive "
+ "(old %" PRIu32 " new %" PRIu32 "), "
+ "merge_min %u, %" PRIu64 " / %" PRIu64,
lsm_tree->name, lsm_tree->merge_aggressiveness,
new_aggressive, lsm_tree->merge_min,
msec_since_last_merge, lsm_tree->chunk_fill_ms));
@@ -410,7 +412,8 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id)
start_chunk, end_chunk, dest_id, record_count, generation));
for (verb = start_chunk; verb <= end_chunk; verb++)
WT_ERR(__wt_verbose(session, WT_VERB_LSM,
- "Merging %s: Chunk[%u] id %u, gen: %" PRIu32
+ "Merging %s: Chunk[%u] id %" PRIu32
+ ", gen: %" PRIu32
", size: %" PRIu64 ", records: %" PRIu64,
lsm_tree->name, verb, lsm_tree->chunk[verb]->id,
lsm_tree->chunk[verb]->generation,
@@ -460,7 +463,7 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id)
#define LSM_MERGE_CHECK_INTERVAL WT_THOUSAND
for (insert_count = 0; (ret = src->next(src)) == 0; insert_count++) {
if (insert_count % LSM_MERGE_CHECK_INTERVAL == 0) {
- if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE))
+ if (!lsm_tree->active)
WT_ERR(EINTR);
WT_STAT_FAST_CONN_INCRV(session,
diff --git a/src/lsm/lsm_meta.c b/src/lsm/lsm_meta.c
index d76b2a48aa7..e19e2cd0126 100644
--- a/src/lsm/lsm_meta.c
+++ b/src/lsm/lsm_meta.c
@@ -9,17 +9,17 @@
#include "wt_internal.h"
/*
- * __wt_lsm_meta_read --
- * Read the metadata for an LSM tree.
+ * __lsm_meta_read_v0 --
+ * Read v0 of LSM metadata.
*/
-int
-__wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
+static int
+__lsm_meta_read_v0(
+ WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, const char *lsmconf)
{
WT_CONFIG cparser, lparser;
WT_CONFIG_ITEM ck, cv, fileconf, lk, lv, metadata;
WT_DECL_RET;
WT_LSM_CHUNK *chunk;
- char *lsmconfig;
u_int nchunks;
chunk = NULL; /* -Wconditional-uninitialized */
@@ -28,8 +28,7 @@ __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
if (F_ISSET(S2C(session), WT_CONN_LSM_MERGE))
F_SET(lsm_tree, WT_LSM_TREE_MERGES);
- WT_RET(__wt_metadata_search(session, lsm_tree->name, &lsmconfig));
- WT_ERR(__wt_config_init(session, &cparser, lsmconfig));
+ WT_ERR(__wt_config_init(session, &cparser, lsmconf));
while ((ret = __wt_config_next(&cparser, &ck, &cv)) == 0) {
if (WT_STRING_MATCH("key_format", ck.str, ck.len)) {
__wt_free(session, lsm_tree->key_format);
@@ -48,7 +47,7 @@ __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
* from the file configuration.
*/
WT_ERR(__wt_config_getones(
- session, lsmconfig, "file_config", &fileconf));
+ session, lsmconf, "file_config", &fileconf));
WT_CLEAR(metadata);
WT_ERR_NOTFOUND_OK(__wt_config_subgets(
session, &fileconf, "app_metadata", &metadata));
@@ -160,16 +159,292 @@ __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
*/
}
WT_ERR_NOTFOUND_OK(ret);
+err: return (ret);
+}
+
+/*
+ * __lsm_meta_read_v1 --
+ * Read v1 of LSM metadata.
+ */
+static int
+__lsm_meta_read_v1(
+ WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, const char *lsmconf)
+{
+ WT_CONFIG lparser;
+ WT_CONFIG_ITEM cv, lk, lv, metadata;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ WT_LSM_CHUNK *chunk;
+ const char *file_cfg[] = {
+ WT_CONFIG_BASE(session, file_config), NULL, NULL, NULL };
+ char *fileconf;
+ u_int nchunks;
+
+ chunk = NULL; /* -Wconditional-uninitialized */
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "key_format", &cv));
+ WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->key_format));
+ WT_ERR(__wt_config_getones(session, lsmconf, "value_format", &cv));
+ WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->value_format));
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "collator", &cv));
+ if (cv.len != 0 && !WT_STRING_MATCH("none", cv.str, cv.len)) {
+ /* Extract the application-supplied metadata (if any). */
+ WT_CLEAR(metadata);
+ WT_ERR_NOTFOUND_OK(__wt_config_getones(
+ session, lsmconf, "app_metadata", &metadata));
+ WT_ERR(__wt_collator_config(session, lsm_tree->name,
+ &cv, &metadata,
+ &lsm_tree->collator, &lsm_tree->collator_owned));
+ WT_ERR(__wt_strndup(session,
+ cv.str, cv.len, &lsm_tree->collator_name));
+ }
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.auto_throttle", &cv));
+ if (cv.val)
+ F_SET(lsm_tree, WT_LSM_TREE_THROTTLE);
+ else
+ F_CLR(lsm_tree, WT_LSM_TREE_THROTTLE);
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.bloom", &cv));
+ FLD_SET(lsm_tree->bloom,
+ (cv.val == 0 ? WT_LSM_BLOOM_OFF : WT_LSM_BLOOM_MERGED));
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.bloom_oldest", &cv));
+ if (cv.val != 0)
+ FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST);
+
+ if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) &&
+ FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST))
+ WT_ERR_MSG(session, EINVAL,
+ "Bloom filters can only be created on newest and oldest "
+ "chunks if bloom filters are enabled");
+
+ WT_ERR(__wt_config_getones(
+ session, lsmconf, "lsm.bloom_bit_count", &cv));
+ lsm_tree->bloom_bit_count = (uint32_t)cv.val;
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.bloom_config", &cv));
+ /* Don't include the brackets. */
+ if (cv.type == WT_CONFIG_ITEM_STRUCT) {
+ cv.str++;
+ cv.len -= 2;
+ }
+ WT_ERR(__wt_config_check(session,
+ WT_CONFIG_REF(session, WT_SESSION_create), cv.str, cv.len));
+ WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->bloom_config));
+ WT_ERR(__wt_config_getones(
+ session, lsmconf, "lsm.bloom_hash_count", &cv));
+ lsm_tree->bloom_hash_count = (uint32_t)cv.val;
+
+ WT_ERR(__wt_config_getones(
+ session, lsmconf, "lsm.chunk_count_limit", &cv));
+ lsm_tree->chunk_count_limit = (uint32_t)cv.val;
+ if (cv.val == 0)
+ F_SET(lsm_tree, WT_LSM_TREE_MERGES);
+ else
+ F_CLR(lsm_tree, WT_LSM_TREE_MERGES);
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.chunk_max", &cv));
+ lsm_tree->chunk_max = (uint64_t)cv.val;
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.chunk_size", &cv));
+ lsm_tree->chunk_size = (uint64_t)cv.val;
+
+ if (lsm_tree->chunk_size > lsm_tree->chunk_max)
+ WT_ERR_MSG(session, EINVAL,
+ "Chunk size (chunk_size) must be smaller than or equal to "
+ "the maximum chunk size (chunk_max)");
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.merge_max", &cv));
+ lsm_tree->merge_max = (uint32_t)cv.val;
+ WT_ERR(__wt_config_getones(session, lsmconf, "lsm.merge_min", &cv));
+ lsm_tree->merge_min = (uint32_t)cv.val;
+
+ if (lsm_tree->merge_min > lsm_tree->merge_max)
+ WT_ERR_MSG(session, EINVAL,
+ "LSM merge_min must be less than or equal to merge_max");
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "last", &cv));
+ lsm_tree->last = (u_int)cv.val;
+ WT_ERR(__wt_config_getones(session, lsmconf, "chunks", &cv));
+ WT_ERR(__wt_config_subinit(session, &lparser, &cv));
+ for (nchunks = 0; (ret =
+ __wt_config_next(&lparser, &lk, &lv)) == 0; ) {
+ if (WT_STRING_MATCH("id", lk.str, lk.len)) {
+ WT_ERR(__wt_realloc_def(session,
+ &lsm_tree->chunk_alloc,
+ nchunks + 1, &lsm_tree->chunk));
+ WT_ERR(__wt_calloc_one(session, &chunk));
+ lsm_tree->chunk[nchunks++] = chunk;
+ chunk->id = (uint32_t)lv.val;
+ WT_ERR(__wt_lsm_tree_chunk_name(session,
+ lsm_tree, chunk->id, &chunk->uri));
+ F_SET(chunk,
+ WT_LSM_CHUNK_ONDISK |
+ WT_LSM_CHUNK_STABLE);
+ } else if (WT_STRING_MATCH("bloom", lk.str, lk.len)) {
+ WT_ERR(__wt_lsm_tree_bloom_name(
+ session, lsm_tree, chunk->id, &chunk->bloom_uri));
+ F_SET(chunk, WT_LSM_CHUNK_BLOOM);
+ continue;
+ } else if (WT_STRING_MATCH("chunk_size", lk.str, lk.len)) {
+ chunk->size = (uint64_t)lv.val;
+ continue;
+ } else if (WT_STRING_MATCH("count", lk.str, lk.len)) {
+ chunk->count = (uint64_t)lv.val;
+ continue;
+ } else if (WT_STRING_MATCH("generation", lk.str, lk.len)) {
+ chunk->generation = (uint32_t)lv.val;
+ continue;
+ }
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+ lsm_tree->nchunks = nchunks;
+
+ WT_ERR(__wt_config_getones(session, lsmconf, "old_chunks", &cv));
+ WT_ERR(__wt_config_subinit(session, &lparser, &cv));
+ for (nchunks = 0; (ret =
+ __wt_config_next(&lparser, &lk, &lv)) == 0; ) {
+ if (WT_STRING_MATCH("bloom", lk.str, lk.len)) {
+ WT_ERR(__wt_strndup(session,
+ lv.str, lv.len, &chunk->bloom_uri));
+ F_SET(chunk, WT_LSM_CHUNK_BLOOM);
+ continue;
+ }
+ WT_ERR(__wt_realloc_def(session,
+ &lsm_tree->old_alloc, nchunks + 1,
+ &lsm_tree->old_chunks));
+ WT_ERR(__wt_calloc_one(session, &chunk));
+ lsm_tree->old_chunks[nchunks++] = chunk;
+ WT_ERR(__wt_strndup(session,
+ lk.str, lk.len, &chunk->uri));
+ F_SET(chunk, WT_LSM_CHUNK_ONDISK);
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+ lsm_tree->nold_chunks = nchunks;
+
+ /*
+ * Set up the config for each chunk.
+ *
+ * Make the memory_page_max double the chunk size, so application
+ * threads don't immediately try to force evict the chunk when the
+ * worker thread clears the NO_EVICTION flag.
+ */
+ file_cfg[1] = lsmconf;
+ WT_ERR(__wt_scr_alloc(session, 0, &buf));
+ WT_ERR(__wt_buf_fmt(session, buf,
+ "key_format=u,value_format=u,memory_page_max=%" PRIu64,
+ 2 * lsm_tree->chunk_max));
+ file_cfg[2] = buf->data;
+ WT_ERR(__wt_config_collapse(session, file_cfg, &fileconf));
+ lsm_tree->file_config = fileconf;
+
+ /*
+ * Ignore any other values: the metadata entry might have been
+ * created by a future release, with unknown options.
+ */
+err: __wt_scr_free(session, &buf);
+ return (ret);
+}
+
+/*
+ * __lsm_meta_upgrade_v1 --
+ * Upgrade to v1 of LSM metadata.
+ */
+static int
+__lsm_meta_upgrade_v1(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
+{
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ const char *new_cfg[] = {
+ WT_CONFIG_BASE(session, lsm_meta), NULL, NULL, NULL };
+
+ /* Include the custom config that used to be embedded in file_config. */
+ new_cfg[1] = lsm_tree->file_config;
+
+ WT_ERR(__wt_scr_alloc(session, 0, &buf));
+ WT_ERR(__wt_buf_fmt(session, buf,
+ "key_format=%s,value_format=%s",
+ lsm_tree->key_format, lsm_tree->value_format));
+
+ WT_ERR(__wt_buf_catfmt(session, buf, ",collator=%s",
+ lsm_tree->collator_name != NULL ? lsm_tree->collator_name : ""));
+
+ WT_ERR(__wt_buf_catfmt(session, buf, ",lsm=("));
+
+ WT_ERR(__wt_buf_catfmt(session, buf, "auto_throttle=%d",
+ F_ISSET(lsm_tree, WT_LSM_TREE_THROTTLE)));
+
+ WT_ERR(__wt_buf_catfmt(session, buf, ",bloom=%d",
+ FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_MERGED)));
+ WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_oldest=%d",
+ FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST)));
+ WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_bit_count=%" PRIu32,
+ lsm_tree->bloom_bit_count));
+ if (lsm_tree->bloom_config != NULL &&
+ strlen(lsm_tree->bloom_config) > 0)
+ WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_config=(%s)",
+ lsm_tree->bloom_config));
+ else
+ WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_config="));
+ WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_hash_count=%" PRIu32,
+ lsm_tree->bloom_hash_count));
+
+ WT_ERR(__wt_buf_catfmt(session, buf, ",chunk_count_limit=%" PRIu32,
+ lsm_tree->chunk_count_limit));
+ WT_ERR(__wt_buf_catfmt(session, buf, ",chunk_max=%" PRIu64,
+ lsm_tree->chunk_max));
+ WT_ERR(__wt_buf_catfmt(session, buf, ",merge_max=%" PRIu32,
+ lsm_tree->merge_max));
+ WT_ERR(__wt_buf_catfmt(session, buf, ",merge_min=%" PRIu32,
+ lsm_tree->merge_min));
+
+ WT_ERR(__wt_buf_catfmt(session, buf, ")"));
+
+ new_cfg[2] = buf->data;
+ WT_ERR(__wt_config_merge(session, new_cfg, NULL, &lsm_tree->config));
+
+err: __wt_scr_free(session, &buf);
+ return (ret);
+}
+/*
+ * __wt_lsm_meta_read --
+ * Read the metadata for an LSM tree.
+ */
+int
+__wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
+{
+ WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
+ char *lsmconf;
+ bool upgrade;
+
+ /* LSM trees inherit the merge setting from the connection. */
+ if (F_ISSET(S2C(session), WT_CONN_LSM_MERGE))
+ F_SET(lsm_tree, WT_LSM_TREE_MERGES);
+
+ WT_RET(__wt_metadata_search(session, lsm_tree->name, &lsmconf));
+ upgrade = false;
+ ret = __wt_config_getones(session, lsmconf, "file_config", &cval);
+ if (ret == 0) {
+ ret = __lsm_meta_read_v0(session, lsm_tree, lsmconf);
+ __wt_free(session, lsmconf);
+ WT_RET(ret);
+ upgrade = true;
+ } else if (ret == WT_NOTFOUND) {
+ lsm_tree->config = lsmconf;
+ ret = 0;
+ WT_RET(__lsm_meta_read_v1(session, lsm_tree, lsmconf));
+ }
/*
- * If the default merge_min was not overridden, calculate it now. We
- * do this here so that trees created before merge_min was added get a
- * sane value.
+ * If the default merge_min was not overridden, calculate it now.
*/
if (lsm_tree->merge_min < 2)
lsm_tree->merge_min = WT_MAX(2, lsm_tree->merge_max / 2);
-
-err: __wt_free(session, lsmconfig);
+ /*
+ * If needed, upgrade the configuration. We need to do this after
+ * we have fixed the merge_min value.
+ */
+ if (upgrade)
+ WT_RET(__lsm_meta_upgrade_v1(session, lsm_tree));
return (ret);
}
@@ -184,32 +459,15 @@ __wt_lsm_meta_write(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
WT_DECL_RET;
WT_LSM_CHUNK *chunk;
u_int i;
+ const char *new_cfg[] = { NULL, NULL, NULL };
+ char *new_metadata;
bool first;
+ new_metadata = NULL;
+
WT_RET(__wt_scr_alloc(session, 0, &buf));
- WT_ERR(__wt_buf_fmt(session, buf,
- "key_format=%s,value_format=%s,bloom_config=(%s),file_config=(%s)",
- lsm_tree->key_format, lsm_tree->value_format,
- lsm_tree->bloom_config, lsm_tree->file_config));
- if (lsm_tree->collator_name != NULL)
- WT_ERR(__wt_buf_catfmt(
- session, buf, ",collator=%s", lsm_tree->collator_name));
WT_ERR(__wt_buf_catfmt(session, buf,
- ",last=%" PRIu32
- ",chunk_count_limit=%" PRIu32
- ",chunk_max=%" PRIu64
- ",chunk_size=%" PRIu64
- ",auto_throttle=%" PRIu32
- ",merge_max=%" PRIu32
- ",merge_min=%" PRIu32
- ",bloom=%" PRIu32
- ",bloom_bit_count=%" PRIu32
- ",bloom_hash_count=%" PRIu32,
- lsm_tree->last, lsm_tree->chunk_count_limit,
- lsm_tree->chunk_max, lsm_tree->chunk_size,
- F_ISSET(lsm_tree, WT_LSM_TREE_THROTTLE) ? 1 : 0,
- lsm_tree->merge_max, lsm_tree->merge_min, lsm_tree->bloom,
- lsm_tree->bloom_bit_count, lsm_tree->bloom_hash_count));
+ ",last=%" PRIu32, lsm_tree->last));
WT_ERR(__wt_buf_catfmt(session, buf, ",chunks=["));
for (i = 0; i < lsm_tree->nchunks; i++) {
chunk = lsm_tree->chunk[i];
@@ -243,9 +501,15 @@ __wt_lsm_meta_write(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
session, buf, ",bloom=\"%s\"", chunk->bloom_uri));
}
WT_ERR(__wt_buf_catfmt(session, buf, "]"));
- ret = __wt_metadata_update(session, lsm_tree->name, buf->data);
+
+ /* Update the existing configuration with the new values. */
+ new_cfg[0] = lsm_tree->config;
+ new_cfg[1] = buf->data;
+ WT_ERR(__wt_config_collapse(session, new_cfg, &new_metadata));
+ ret = __wt_metadata_update(session, lsm_tree->name, new_metadata);
WT_ERR(ret);
err: __wt_scr_free(session, &buf);
+ __wt_free(session, new_metadata);
return (ret);
}
diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c
index ab18e41a2f5..cb1ddf22f84 100644
--- a/src/lsm/lsm_tree.c
+++ b/src/lsm/lsm_tree.c
@@ -27,6 +27,7 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final)
WT_UNUSED(final); /* Only used in diagnostic builds */
+ WT_ASSERT(session, !lsm_tree->active);
/*
* The work unit queue should be empty, but it's worth checking
* since work units use a different locking scheme to regular tree
@@ -85,19 +86,27 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final)
* Close an LSM tree structure.
*/
static int
-__lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
+__lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final)
{
WT_DECL_RET;
int i;
- /* Stop any active merges. */
- F_CLR(lsm_tree, WT_LSM_TREE_ACTIVE);
+ /*
+ * Stop any new work units being added. The barrier is necessary
+ * because we rely on the state change being visible before checking
+ * the tree queue state.
+ */
+ lsm_tree->active = false;
+ WT_READ_BARRIER();
/*
- * Wait for all LSM operations and work units that were in flight to
- * finish.
+ * Wait for all LSM operations to drain. If WiredTiger is shutting
+ * down also wait for the tree reference count to go to zero, otherwise
+ * we know a user is holding a reference to the tree, so exclusive
+ * access is not available.
*/
- for (i = 0; lsm_tree->refcnt > 1 || lsm_tree->queue_ref > 0; ++i) {
+ for (i = 0;
+ lsm_tree->queue_ref > 0 || (final && lsm_tree->refcnt > 1); ++i) {
/*
* Remove any work units from the manager queues. Do this step
* repeatedly in case a work unit was in the process of being
@@ -114,11 +123,14 @@ __lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
if (i % WT_THOUSAND == 0) {
WT_WITHOUT_LOCKS(session, ret =
__wt_lsm_manager_clear_tree(session, lsm_tree));
- WT_RET(ret);
+ WT_ERR(ret);
}
__wt_yield();
}
return (0);
+
+err: lsm_tree->active = true;
+ return (ret);
}
/*
@@ -142,7 +154,7 @@ __wt_lsm_tree_close_all(WT_SESSION_IMPL *session)
* is unconditional.
*/
(void)__wt_atomic_add32(&lsm_tree->refcnt, 1);
- WT_TRET(__lsm_tree_close(session, lsm_tree));
+ WT_TRET(__lsm_tree_close(session, lsm_tree, true));
WT_TRET(__lsm_tree_discard(session, lsm_tree, true));
}
@@ -157,9 +169,12 @@ static int
__lsm_tree_set_name(WT_SESSION_IMPL *session,
WT_LSM_TREE *lsm_tree, const char *uri)
{
- if (lsm_tree->name != NULL)
- __wt_free(session, lsm_tree->name);
- WT_RET(__wt_strdup(session, uri, &lsm_tree->name));
+ void *p;
+
+ WT_RET(__wt_strdup(session, uri, &p));
+
+ __wt_free(session, lsm_tree->name);
+ lsm_tree->name = p;
lsm_tree->filename = lsm_tree->name + strlen("lsm:");
return (0);
}
@@ -306,15 +321,15 @@ int
__wt_lsm_tree_create(WT_SESSION_IMPL *session,
const char *uri, bool exclusive, const char *config)
{
- WT_CONFIG_ITEM cval;
- WT_DECL_ITEM(buf);
WT_DECL_RET;
WT_LSM_TREE *lsm_tree;
const char *cfg[] =
- { WT_CONFIG_BASE(session, WT_SESSION_create), config, NULL };
- char *tmpconfig;
+ { WT_CONFIG_BASE(session, lsm_meta), config, NULL };
+ const char *metadata;
- /* If the tree is open, it already exists. */
+ metadata = NULL;
+
+ /* If the tree can be opened, it already exists. */
WT_WITH_HANDLE_LIST_LOCK(session,
ret = __wt_lsm_tree_get(session, uri, false, &lsm_tree));
if (ret == 0) {
@@ -323,139 +338,22 @@ __wt_lsm_tree_create(WT_SESSION_IMPL *session,
}
WT_RET_NOTFOUND_OK(ret);
- /*
- * If the tree has metadata, it already exists.
- *
- * !!!
- * Use a local variable: we don't care what the existing configuration
- * is, but we don't want to overwrite the real config.
- */
- if (__wt_metadata_search(session, uri, &tmpconfig) == 0) {
- __wt_free(session, tmpconfig);
- return (exclusive ? EEXIST : 0);
+ if (!F_ISSET(S2C(session), WT_CONN_READONLY)) {
+ WT_ERR(__wt_config_merge(session, cfg, NULL, &metadata));
+ WT_ERR(__wt_metadata_insert(session, uri, metadata));
}
- WT_RET_NOTFOUND_OK(ret);
-
- /* In-memory configurations don't make sense for LSM. */
- if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
- WT_RET_MSG(session, EINVAL,
- "LSM trees not supported by in-memory configurations");
-
- WT_RET(__wt_config_gets(session, cfg, "key_format", &cval));
- if (WT_STRING_MATCH("r", cval.str, cval.len))
- WT_RET_MSG(session, EINVAL,
- "LSM trees cannot be configured as column stores");
-
- WT_RET(__wt_calloc_one(session, &lsm_tree));
-
- WT_ERR(__lsm_tree_set_name(session, lsm_tree, uri));
-
- WT_ERR(__wt_config_gets(session, cfg, "key_format", &cval));
- WT_ERR(__wt_strndup(
- session, cval.str, cval.len, &lsm_tree->key_format));
- WT_ERR(__wt_config_gets(session, cfg, "value_format", &cval));
- WT_ERR(__wt_strndup(
- session, cval.str, cval.len, &lsm_tree->value_format));
-
- WT_ERR(__wt_config_gets_none(session, cfg, "collator", &cval));
- WT_ERR(__wt_strndup(
- session, cval.str, cval.len, &lsm_tree->collator_name));
-
- WT_ERR(__wt_config_gets(session, cfg, "cache_resident", &cval));
- if (cval.val != 0)
- WT_ERR_MSG(session, EINVAL,
- "The cache_resident flag is not compatible with LSM");
-
- WT_ERR(__wt_config_gets(session, cfg, "lsm.auto_throttle", &cval));
- if (cval.val)
- F_SET(lsm_tree, WT_LSM_TREE_THROTTLE);
- else
- F_CLR(lsm_tree, WT_LSM_TREE_THROTTLE);
- WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom", &cval));
- FLD_SET(lsm_tree->bloom,
- (cval.val == 0 ? WT_LSM_BLOOM_OFF : WT_LSM_BLOOM_MERGED));
- WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_oldest", &cval));
- if (cval.val != 0)
- FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST);
-
- if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) &&
- FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST))
- WT_ERR_MSG(session, EINVAL,
- "Bloom filters can only be created on newest and oldest "
- "chunks if bloom filters are enabled");
-
- WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_config", &cval));
- if (cval.type == WT_CONFIG_ITEM_STRUCT) {
- cval.str++;
- cval.len -= 2;
- }
- WT_ERR(__wt_config_check(session,
- WT_CONFIG_REF(session, WT_SESSION_create), cval.str, cval.len));
- WT_ERR(__wt_strndup(
- session, cval.str, cval.len, &lsm_tree->bloom_config));
-
- WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_bit_count", &cval));
- lsm_tree->bloom_bit_count = (uint32_t)cval.val;
- WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_hash_count", &cval));
- lsm_tree->bloom_hash_count = (uint32_t)cval.val;
- WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_count_limit", &cval));
- lsm_tree->chunk_count_limit = (uint32_t)cval.val;
- if (cval.val == 0)
- F_SET(lsm_tree, WT_LSM_TREE_MERGES);
- else
- F_CLR(lsm_tree, WT_LSM_TREE_MERGES);
- WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_max", &cval));
- lsm_tree->chunk_max = (uint64_t)cval.val;
- WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_size", &cval));
- lsm_tree->chunk_size = (uint64_t)cval.val;
- if (lsm_tree->chunk_size > lsm_tree->chunk_max)
- WT_ERR_MSG(session, EINVAL,
- "Chunk size (chunk_size) must be smaller than or equal to "
- "the maximum chunk size (chunk_max)");
- WT_ERR(__wt_config_gets(session, cfg, "lsm.merge_max", &cval));
- lsm_tree->merge_max = (uint32_t)cval.val;
- WT_ERR(__wt_config_gets(session, cfg, "lsm.merge_min", &cval));
- lsm_tree->merge_min = (uint32_t)cval.val;
- if (lsm_tree->merge_min > lsm_tree->merge_max)
- WT_ERR_MSG(session, EINVAL,
- "LSM merge_min must be less than or equal to merge_max");
-
- /*
- * Set up the config for each chunk.
- *
- * Make the memory_page_max double the chunk size, so application
- * threads don't immediately try to force evict the chunk when the
- * worker thread clears the NO_EVICTION flag.
- */
- WT_ERR(__wt_scr_alloc(session, 0, &buf));
- WT_ERR(__wt_buf_fmt(session, buf,
- "%s,key_format=u,value_format=u,memory_page_max=%" PRIu64,
- config, 2 * lsm_tree->chunk_max));
- WT_ERR(__wt_strndup(
- session, buf->data, buf->size, &lsm_tree->file_config));
-
- /* Create the first chunk and flush the metadata. */
- WT_ERR(__wt_lsm_meta_write(session, lsm_tree));
-
- /* Discard our partially populated handle. */
- ret = __lsm_tree_discard(session, lsm_tree, false);
- lsm_tree = NULL;
/*
* Open our new tree and add it to the handle cache. Don't discard on
* error: the returned handle is NULL on error, and the metadata
* tracking macros handle cleaning up on failure.
*/
- if (ret == 0)
- WT_WITH_HANDLE_LIST_LOCK(session,
- ret = __lsm_tree_open(session, uri, true, &lsm_tree));
+ WT_WITH_HANDLE_LIST_LOCK(session,
+ ret = __lsm_tree_open(session, uri, true, &lsm_tree));
if (ret == 0)
__wt_lsm_tree_release(session, lsm_tree);
- if (0) {
-err: WT_TRET(__lsm_tree_discard(session, lsm_tree, false));
- }
- __wt_scr_free(session, &buf);
+err: __wt_free(session, metadata);
return (ret);
}
@@ -477,27 +375,26 @@ __lsm_tree_find(WT_SESSION_IMPL *session,
/* See if the tree is already open. */
TAILQ_FOREACH(lsm_tree, &S2C(session)->lsmqh, q)
if (strcmp(uri, lsm_tree->name) == 0) {
- /*
- * Short circuit if the handle is already held
- * exclusively or exclusive access is requested and
- * there are references held.
- */
- if ((exclusive && lsm_tree->refcnt > 0) ||
- lsm_tree->exclusive)
- return (EBUSY);
-
if (exclusive) {
/*
* Make sure we win the race to switch on the
* exclusive flag.
*/
- if (!__wt_atomic_cas8(
- &lsm_tree->exclusive, 0, 1))
+ if (!__wt_atomic_cas_ptr(
+ &lsm_tree->excl_session, NULL, session))
return (EBUSY);
- /* Make sure there are no readers */
- if (!__wt_atomic_cas32(
- &lsm_tree->refcnt, 0, 1)) {
- lsm_tree->exclusive = 0;
+
+ /*
+ * Drain the work queue before checking for
+ * open cursors - otherwise we can generate
+ * spurious busy returns.
+ */
+ (void)__wt_atomic_add32(&lsm_tree->refcnt, 1);
+ if (__lsm_tree_close(
+ session, lsm_tree, false) != 0 ||
+ lsm_tree->refcnt != 1) {
+ __wt_lsm_tree_release(
+ session, lsm_tree);
return (EBUSY);
}
} else {
@@ -507,11 +404,11 @@ __lsm_tree_find(WT_SESSION_IMPL *session,
* We got a reference, check if an exclusive
* lock beat us to it.
*/
- if (lsm_tree->exclusive) {
+ if (lsm_tree->excl_session != NULL) {
WT_ASSERT(session,
lsm_tree->refcnt > 0);
- (void)__wt_atomic_sub32(
- &lsm_tree->refcnt, 1);
+ __wt_lsm_tree_release(
+ session, lsm_tree);
return (EBUSY);
}
}
@@ -603,7 +500,7 @@ __lsm_tree_open(WT_SESSION_IMPL *session,
* with getting handles exclusive.
*/
lsm_tree->refcnt = 1;
- lsm_tree->exclusive = exclusive ? 1 : 0;
+ lsm_tree->excl_session = exclusive ? session : NULL;
lsm_tree->queue_ref = 0;
/* Set a flush timestamp as a baseline. */
@@ -611,7 +508,9 @@ __lsm_tree_open(WT_SESSION_IMPL *session,
/* Now the tree is setup, make it visible to others. */
TAILQ_INSERT_HEAD(&S2C(session)->lsmqh, lsm_tree, q);
- F_SET(lsm_tree, WT_LSM_TREE_ACTIVE | WT_LSM_TREE_OPEN);
+ if (!exclusive)
+ lsm_tree->active = true;
+ F_SET(lsm_tree, WT_LSM_TREE_OPEN);
*treep = lsm_tree;
@@ -638,7 +537,7 @@ __wt_lsm_tree_get(WT_SESSION_IMPL *session,
ret = __lsm_tree_open(session, uri, exclusive, treep);
WT_ASSERT(session, ret != 0 ||
- (exclusive ? 1 : 0) == (*treep)->exclusive);
+ (*treep)->excl_session == (exclusive ? session : NULL));
return (ret);
}
@@ -650,8 +549,11 @@ void
__wt_lsm_tree_release(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
WT_ASSERT(session, lsm_tree->refcnt > 0);
- if (lsm_tree->exclusive)
- lsm_tree->exclusive = 0;
+ if (lsm_tree->excl_session == session) {
+ /* We cleared the active flag when getting exclusive access. */
+ lsm_tree->active = true;
+ lsm_tree->excl_session = NULL;
+ }
(void)__wt_atomic_sub32(&lsm_tree->refcnt, 1);
}
@@ -868,7 +770,7 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
F_CLR(lsm_tree, WT_LSM_TREE_NEED_SWITCH);
++lsm_tree->dsk_gen;
- lsm_tree->modified = 1;
+ lsm_tree->modified = true;
/*
* Set the switch transaction in the previous chunk unless this is
@@ -964,9 +866,7 @@ __wt_lsm_tree_drop(
WT_WITH_HANDLE_LIST_LOCK(session,
ret = __wt_lsm_tree_get(session, name, true, &lsm_tree));
WT_RET(ret);
-
- /* Shut down the LSM worker. */
- WT_ERR(__lsm_tree_close(session, lsm_tree));
+ WT_ASSERT(session, !lsm_tree->active);
/* Prevent any new opens. */
WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree));
@@ -995,6 +895,7 @@ __wt_lsm_tree_drop(
WT_ERR(__wt_lsm_tree_writeunlock(session, lsm_tree));
ret = __wt_metadata_remove(session, name);
+ WT_ASSERT(session, !lsm_tree->active);
err: if (locked)
WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree));
WT_WITH_HANDLE_LIST_LOCK(session,
@@ -1027,9 +928,6 @@ __wt_lsm_tree_rename(WT_SESSION_IMPL *session,
ret = __wt_lsm_tree_get(session, olduri, true, &lsm_tree));
WT_RET(ret);
- /* Shut down the LSM worker. */
- WT_ERR(__lsm_tree_close(session, lsm_tree));
-
/* Prevent any new opens. */
WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree));
locked = true;
@@ -1067,8 +965,8 @@ __wt_lsm_tree_rename(WT_SESSION_IMPL *session,
err: if (locked)
WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree));
- if (old != NULL)
- __wt_free(session, old);
+ __wt_free(session, old);
+
/*
* Discard this LSM tree structure. The first operation on the renamed
* tree will create a new one.
@@ -1102,9 +1000,6 @@ __wt_lsm_tree_truncate(
ret = __wt_lsm_tree_get(session, name, true, &lsm_tree));
WT_RET(ret);
- /* Shut down the LSM worker. */
- WT_ERR(__lsm_tree_close(session, lsm_tree));
-
/* Prevent any new opens. */
WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree));
locked = true;
@@ -1308,8 +1203,8 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
if (chunk != NULL) {
WT_ERR(__wt_verbose(session, WT_VERB_LSM,
"Compact force flush %s flags 0x%" PRIx32
- " chunk %u flags 0x%"
- PRIx32, name, lsm_tree->flags, chunk->id, chunk->flags));
+ " chunk %" PRIu32 " flags 0x%" PRIx32,
+ name, lsm_tree->flags, chunk->id, chunk->flags));
flushing = true;
/*
* Make sure the in-memory chunk gets flushed do not push a
@@ -1331,7 +1226,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
}
/* Wait for the work unit queues to drain. */
- while (F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) {
+ while (lsm_tree->active) {
/*
* The flush flag is cleared when the chunk has been flushed.
* Continue to push forced flushes until the chunk is on disk.
@@ -1342,7 +1237,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) {
WT_ERR(__wt_verbose(session,
WT_VERB_LSM,
- "Compact flush done %s chunk %u. "
+ "Compact flush done %s chunk %" PRIu32 ". "
"Start compacting progress %" PRIu64,
name, chunk->id,
lsm_tree->merge_progressing));
@@ -1353,7 +1248,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
progress = lsm_tree->merge_progressing;
} else {
WT_ERR(__wt_verbose(session, WT_VERB_LSM,
- "Compact flush retry %s chunk %u",
+ "Compact flush retry %s chunk %" PRIu32,
name, chunk->id));
WT_ERR(__wt_lsm_manager_push_entry(session,
WT_LSM_WORK_FLUSH, WT_LSM_WORK_FORCE,
@@ -1413,7 +1308,6 @@ err:
__wt_lsm_tree_release(session, lsm_tree);
return (ret);
-
}
/*
@@ -1455,8 +1349,7 @@ __wt_lsm_tree_worker(WT_SESSION_IMPL *session,
continue;
WT_ERR(__wt_schema_worker(session, chunk->uri,
file_func, name_func, cfg, open_flags));
- if (name_func == __wt_backup_list_uri_append &&
- F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
+ if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
WT_ERR(__wt_schema_worker(session, chunk->bloom_uri,
file_func, name_func, cfg, open_flags));
}
diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c
index d5d81df6785..87771e2cb6c 100644
--- a/src/lsm/lsm_work_unit.c
+++ b/src/lsm/lsm_work_unit.c
@@ -29,7 +29,7 @@ __lsm_copy_chunks(WT_SESSION_IMPL *session,
cookie->nchunks = 0;
WT_RET(__wt_lsm_tree_readlock(session, lsm_tree));
- if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE))
+ if (!lsm_tree->active)
return (__wt_lsm_tree_readunlock(session, lsm_tree));
/* Take a copy of the current state of the LSM tree. */
@@ -72,14 +72,14 @@ __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session,
{
WT_DECL_RET;
WT_LSM_CHUNK *chunk, *evict_chunk, *flush_chunk;
- u_int i;
+ uint32_t i;
*chunkp = NULL;
chunk = evict_chunk = flush_chunk = NULL;
WT_ASSERT(session, lsm_tree->queue_ref > 0);
WT_RET(__wt_lsm_tree_readlock(session, lsm_tree));
- if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE) || lsm_tree->nchunks == 0)
+ if (!lsm_tree->active || lsm_tree->nchunks == 0)
return (__wt_lsm_tree_readunlock(session, lsm_tree));
/* Search for a chunk to evict and/or a chunk to flush. */
@@ -118,7 +118,7 @@ __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session,
if (chunk != NULL) {
WT_ERR(__wt_verbose(session, WT_VERB_LSM,
- "Flush%s: return chunk %u of %u: %s",
+ "Flush%s: return chunk %" PRIu32 " of %" PRIu32 ": %s",
force ? " w/ force" : "",
i, lsm_tree->nchunks, chunk->uri));
@@ -322,7 +322,7 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
*/
saved_isolation = session->txn.isolation;
session->txn.isolation = WT_ISO_READ_UNCOMMITTED;
- ret = __wt_cache_op(session, NULL, WT_SYNC_WRITE_LEAVES);
+ ret = __wt_cache_op(session, WT_SYNC_WRITE_LEAVES);
session->txn.isolation = saved_isolation;
WT_TRET(__wt_session_release_btree(session));
}
@@ -334,11 +334,17 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
/*
* Turn on metadata tracking to ensure the checkpoint gets the
* necessary handle locks.
+ *
+ * Ensure that we don't race with a running checkpoint: the checkpoint
+ * lock protects against us racing with an application checkpoint in
+ * this chunk. Don't wait for it, though: checkpoints can take a long
+ * time, and our checkpoint operation should be very quick.
*/
WT_ERR(__wt_meta_track_on(session));
- WT_WITH_SCHEMA_LOCK(session, ret,
- ret = __wt_schema_worker(
- session, chunk->uri, __wt_checkpoint, NULL, NULL, 0));
+ WT_WITH_CHECKPOINT_LOCK(session, ret,
+ WT_WITH_SCHEMA_LOCK(session, ret,
+ ret = __wt_schema_worker(
+ session, chunk->uri, __wt_checkpoint, NULL, NULL, 0)));
WT_TRET(__wt_meta_track_off(session, false, ret != 0));
if (ret != 0)
WT_ERR_MSG(session, ret, "LSM checkpoint");
diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c
index 7562cb1cae3..0874da8db13 100644
--- a/src/lsm/lsm_worker.c
+++ b/src/lsm/lsm_worker.c
@@ -20,7 +20,7 @@ int
__wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args)
{
WT_RET(__wt_verbose(session, WT_VERB_LSM_MANAGER,
- "Start LSM worker %d type 0x%x", args->id, args->type));
+ "Start LSM worker %u type %#" PRIx32, args->id, args->type));
return (__wt_thread_create(session, &args->tid, __lsm_worker, args));
}
@@ -59,9 +59,8 @@ __lsm_worker_general_op(
*/
if (chunk != NULL) {
WT_ERR(__wt_verbose(session, WT_VERB_LSM,
- "Flush%s chunk %d %s",
- force ? " w/ force" : "",
- chunk->id, chunk->uri));
+ "Flush%s chunk %" PRIu32 " %s",
+ force ? " w/ force" : "", chunk->id, chunk->uri));
ret = __wt_lsm_checkpoint_chunk(
session, entry->lsm_tree, chunk);
WT_ASSERT(session, chunk->refcnt > 0);
@@ -140,7 +139,7 @@ __lsm_worker(void *arg)
if (ret == WT_NOTFOUND) {
F_CLR(entry->lsm_tree, WT_LSM_TREE_COMPACTING);
ret = 0;
- } else if (ret == EBUSY)
+ } else if (ret == EBUSY || ret == EINTR)
ret = 0;
/* Paranoia: clear session state. */
@@ -164,7 +163,7 @@ __lsm_worker(void *arg)
if (ret != 0) {
err: __wt_lsm_manager_free_work_unit(session, entry);
WT_PANIC_MSG(session, ret,
- "Error in LSM worker thread %d", cookie->id);
+ "Error in LSM worker thread %u", cookie->id);
}
return (WT_THREAD_RET_VALUE);
}