diff options
author | Michael Cahill <michael.cahill@mongodb.com> | 2016-03-25 08:19:12 +1100 |
---|---|---|
committer | Michael Cahill <michael.cahill@mongodb.com> | 2016-03-25 08:19:12 +1100 |
commit | 43e885a0f9a3ad046eae1726b005ca1280624be3 (patch) | |
tree | f03d31a7b36c476484f26ea5259777aeb7b13201 /src/lsm | |
parent | 5cdd3e320cb19cd54111c2572a3d6e33d3009ad4 (diff) | |
parent | 9cf8eb2f15c6df7da90c19c86ccf7516ed126183 (diff) | |
download | mongo-26e03281a42dc78fa9cc24535cd8e589eea9a5ad.tar.gz |
Merge branch 'mongodb-3.4' into mongodb-3.2mongodb-3.2.5
Diffstat (limited to 'src/lsm')
-rw-r--r-- | src/lsm/lsm_cursor.c | 34 | ||||
-rw-r--r-- | src/lsm/lsm_manager.c | 17 | ||||
-rw-r--r-- | src/lsm/lsm_merge.c | 13 | ||||
-rw-r--r-- | src/lsm/lsm_meta.c | 336 | ||||
-rw-r--r-- | src/lsm/lsm_tree.c | 257 | ||||
-rw-r--r-- | src/lsm/lsm_work_unit.c | 22 | ||||
-rw-r--r-- | src/lsm/lsm_worker.c | 11 |
7 files changed, 432 insertions, 258 deletions
diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index f76b2bfd9ac..e023b2b407e 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -1501,22 +1501,22 @@ __wt_clsm_open(WT_SESSION_IMPL *session, { WT_CONFIG_ITEM cval; WT_CURSOR_STATIC_INIT(iface, - __wt_cursor_get_key, /* get-key */ - __wt_cursor_get_value, /* get-value */ - __wt_cursor_set_key, /* set-key */ - __wt_cursor_set_value, /* set-value */ - __clsm_compare, /* compare */ - __wt_cursor_equals, /* equals */ - __clsm_next, /* next */ - __clsm_prev, /* prev */ - __clsm_reset, /* reset */ - __clsm_search, /* search */ - __clsm_search_near, /* search-near */ - __clsm_insert, /* insert */ - __clsm_update, /* update */ - __clsm_remove, /* remove */ - __wt_cursor_reconfigure, /* reconfigure */ - __wt_clsm_close); /* close */ + __wt_cursor_get_key, /* get-key */ + __wt_cursor_get_value, /* get-value */ + __wt_cursor_set_key, /* set-key */ + __wt_cursor_set_value, /* set-value */ + __clsm_compare, /* compare */ + __wt_cursor_equals, /* equals */ + __clsm_next, /* next */ + __clsm_prev, /* prev */ + __clsm_reset, /* reset */ + __clsm_search, /* search */ + __clsm_search_near, /* search-near */ + __clsm_insert, /* insert */ + __clsm_update, /* update */ + __clsm_remove, /* remove */ + __wt_cursor_reconfigure, /* reconfigure */ + __wt_clsm_close); /* close */ WT_CURSOR *cursor; WT_CURSOR_LSM *clsm; WT_DECL_RET; @@ -1556,7 +1556,7 @@ __wt_clsm_open(WT_SESSION_IMPL *session, WT_ERR(ret); /* Make sure we have exclusive access if and only if we want it */ - WT_ASSERT(session, !bulk || lsm_tree->exclusive); + WT_ASSERT(session, !bulk || lsm_tree->excl_session != NULL); WT_ERR(__wt_calloc_one(session, &clsm)); diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index dac8d987328..943a5894ab3 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -212,6 +212,10 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session) conn = S2C(session); manager = &conn->lsm_manager; + if (F_ISSET(conn, WT_CONN_READONLY)) { + manager->lsm_workers = 0; + return (0); + } /* * We need at least a manager, a switch thread and a generic * worker. @@ -284,6 +288,8 @@ __wt_lsm_manager_destroy(WT_SESSION_IMPL *session) manager = &conn->lsm_manager; removed = 0; + WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY) || + manager->lsm_workers == 0); if (manager->lsm_workers > 0) { /* * Stop the main LSM manager thread first. @@ -384,7 +390,7 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST); dhandle_locked = true; TAILQ_FOREACH(lsm_tree, &S2C(session)->lsmqh, q) { - if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) + if (!lsm_tree->active) continue; WT_ERR(__wt_epoch(session, &now)); pushms = lsm_tree->work_push_ts.tv_sec == 0 ? 0 : @@ -427,8 +433,10 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) session, WT_LSM_WORK_BLOOM, 0, lsm_tree)); WT_ERR(__wt_verbose(session, WT_VERB_LSM_MANAGER, - "MGR %s: queue %d mod %d nchunks %d" - " flags 0x%x aggressive %d pushms %" PRIu64 + "MGR %s: queue %" PRIu32 " mod %d " + "nchunks %" PRIu32 + " flags %#" PRIx32 " aggressive %" PRIu32 + " pushms %" PRIu64 " fillms %" PRIu64, lsm_tree->name, lsm_tree->queue_ref, lsm_tree->modified, lsm_tree->nchunks, @@ -616,6 +624,7 @@ __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session, manager = &S2C(session)->lsm_manager; + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); /* * Don't add merges or bloom filter creates if merges * or bloom filters are disabled in the tree. @@ -641,7 +650,7 @@ __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session, * is checked. */ (void)__wt_atomic_add32(&lsm_tree->queue_ref, 1); - if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) { + if (!lsm_tree->active) { (void)__wt_atomic_sub32(&lsm_tree->queue_ref, 1); return (0); } diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c index 29325066da7..6d907284546 100644 --- a/src/lsm/lsm_merge.c +++ b/src/lsm/lsm_merge.c @@ -60,10 +60,11 @@ __lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { struct timespec now; uint64_t msec_since_last_merge, msec_to_create_merge; - u_int new_aggressive; + uint32_t new_aggressive; new_aggressive = 0; + WT_ASSERT(session, lsm_tree->merge_min != 0); /* * If the tree is open read-only or we are compacting, be very * aggressive. Otherwise, we can spend a long time waiting for merges @@ -124,8 +125,9 @@ __lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) if (new_aggressive > lsm_tree->merge_aggressiveness) { WT_RET(__wt_verbose(session, WT_VERB_LSM, - "LSM merge %s got aggressive (old %u new %u), " - "merge_min %d, %u / %" PRIu64, + "LSM merge %s got aggressive " + "(old %" PRIu32 " new %" PRIu32 "), " + "merge_min %u, %" PRIu64 " / %" PRIu64, lsm_tree->name, lsm_tree->merge_aggressiveness, new_aggressive, lsm_tree->merge_min, msec_since_last_merge, lsm_tree->chunk_fill_ms)); @@ -410,7 +412,8 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id) start_chunk, end_chunk, dest_id, record_count, generation)); for (verb = start_chunk; verb <= end_chunk; verb++) WT_ERR(__wt_verbose(session, WT_VERB_LSM, - "Merging %s: Chunk[%u] id %u, gen: %" PRIu32 + "Merging %s: Chunk[%u] id %" PRIu32 + ", gen: %" PRIu32 ", size: %" PRIu64 ", records: %" PRIu64, lsm_tree->name, verb, lsm_tree->chunk[verb]->id, lsm_tree->chunk[verb]->generation, @@ -460,7 +463,7 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id) #define LSM_MERGE_CHECK_INTERVAL WT_THOUSAND for (insert_count = 0; (ret = src->next(src)) == 0; insert_count++) { if (insert_count % LSM_MERGE_CHECK_INTERVAL == 0) { - if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) + if (!lsm_tree->active) WT_ERR(EINTR); WT_STAT_FAST_CONN_INCRV(session, diff --git a/src/lsm/lsm_meta.c b/src/lsm/lsm_meta.c index d76b2a48aa7..e19e2cd0126 100644 --- a/src/lsm/lsm_meta.c +++ b/src/lsm/lsm_meta.c @@ -9,17 +9,17 @@ #include "wt_internal.h" /* - * __wt_lsm_meta_read -- - * Read the metadata for an LSM tree. + * __lsm_meta_read_v0 -- + * Read v0 of LSM metadata. */ -int -__wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) +static int +__lsm_meta_read_v0( + WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, const char *lsmconf) { WT_CONFIG cparser, lparser; WT_CONFIG_ITEM ck, cv, fileconf, lk, lv, metadata; WT_DECL_RET; WT_LSM_CHUNK *chunk; - char *lsmconfig; u_int nchunks; chunk = NULL; /* -Wconditional-uninitialized */ @@ -28,8 +28,7 @@ __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) if (F_ISSET(S2C(session), WT_CONN_LSM_MERGE)) F_SET(lsm_tree, WT_LSM_TREE_MERGES); - WT_RET(__wt_metadata_search(session, lsm_tree->name, &lsmconfig)); - WT_ERR(__wt_config_init(session, &cparser, lsmconfig)); + WT_ERR(__wt_config_init(session, &cparser, lsmconf)); while ((ret = __wt_config_next(&cparser, &ck, &cv)) == 0) { if (WT_STRING_MATCH("key_format", ck.str, ck.len)) { __wt_free(session, lsm_tree->key_format); @@ -48,7 +47,7 @@ __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) * from the file configuration. */ WT_ERR(__wt_config_getones( - session, lsmconfig, "file_config", &fileconf)); + session, lsmconf, "file_config", &fileconf)); WT_CLEAR(metadata); WT_ERR_NOTFOUND_OK(__wt_config_subgets( session, &fileconf, "app_metadata", &metadata)); @@ -160,16 +159,292 @@ __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) */ } WT_ERR_NOTFOUND_OK(ret); +err: return (ret); +} + +/* + * __lsm_meta_read_v1 -- + * Read v1 of LSM metadata. + */ +static int +__lsm_meta_read_v1( + WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, const char *lsmconf) +{ + WT_CONFIG lparser; + WT_CONFIG_ITEM cv, lk, lv, metadata; + WT_DECL_ITEM(buf); + WT_DECL_RET; + WT_LSM_CHUNK *chunk; + const char *file_cfg[] = { + WT_CONFIG_BASE(session, file_config), NULL, NULL, NULL }; + char *fileconf; + u_int nchunks; + + chunk = NULL; /* -Wconditional-uninitialized */ + + WT_ERR(__wt_config_getones(session, lsmconf, "key_format", &cv)); + WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->key_format)); + WT_ERR(__wt_config_getones(session, lsmconf, "value_format", &cv)); + WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->value_format)); + + WT_ERR(__wt_config_getones(session, lsmconf, "collator", &cv)); + if (cv.len != 0 && !WT_STRING_MATCH("none", cv.str, cv.len)) { + /* Extract the application-supplied metadata (if any). */ + WT_CLEAR(metadata); + WT_ERR_NOTFOUND_OK(__wt_config_getones( + session, lsmconf, "app_metadata", &metadata)); + WT_ERR(__wt_collator_config(session, lsm_tree->name, + &cv, &metadata, + &lsm_tree->collator, &lsm_tree->collator_owned)); + WT_ERR(__wt_strndup(session, + cv.str, cv.len, &lsm_tree->collator_name)); + } + + WT_ERR(__wt_config_getones(session, lsmconf, "lsm.auto_throttle", &cv)); + if (cv.val) + F_SET(lsm_tree, WT_LSM_TREE_THROTTLE); + else + F_CLR(lsm_tree, WT_LSM_TREE_THROTTLE); + + WT_ERR(__wt_config_getones(session, lsmconf, "lsm.bloom", &cv)); + FLD_SET(lsm_tree->bloom, + (cv.val == 0 ? WT_LSM_BLOOM_OFF : WT_LSM_BLOOM_MERGED)); + WT_ERR(__wt_config_getones(session, lsmconf, "lsm.bloom_oldest", &cv)); + if (cv.val != 0) + FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST); + + if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) && + FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST)) + WT_ERR_MSG(session, EINVAL, + "Bloom filters can only be created on newest and oldest " + "chunks if bloom filters are enabled"); + + WT_ERR(__wt_config_getones( + session, lsmconf, "lsm.bloom_bit_count", &cv)); + lsm_tree->bloom_bit_count = (uint32_t)cv.val; + WT_ERR(__wt_config_getones(session, lsmconf, "lsm.bloom_config", &cv)); + /* Don't include the brackets. */ + if (cv.type == WT_CONFIG_ITEM_STRUCT) { + cv.str++; + cv.len -= 2; + } + WT_ERR(__wt_config_check(session, + WT_CONFIG_REF(session, WT_SESSION_create), cv.str, cv.len)); + WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->bloom_config)); + WT_ERR(__wt_config_getones( + session, lsmconf, "lsm.bloom_hash_count", &cv)); + lsm_tree->bloom_hash_count = (uint32_t)cv.val; + + WT_ERR(__wt_config_getones( + session, lsmconf, "lsm.chunk_count_limit", &cv)); + lsm_tree->chunk_count_limit = (uint32_t)cv.val; + if (cv.val == 0) + F_SET(lsm_tree, WT_LSM_TREE_MERGES); + else + F_CLR(lsm_tree, WT_LSM_TREE_MERGES); + WT_ERR(__wt_config_getones(session, lsmconf, "lsm.chunk_max", &cv)); + lsm_tree->chunk_max = (uint64_t)cv.val; + WT_ERR(__wt_config_getones(session, lsmconf, "lsm.chunk_size", &cv)); + lsm_tree->chunk_size = (uint64_t)cv.val; + + if (lsm_tree->chunk_size > lsm_tree->chunk_max) + WT_ERR_MSG(session, EINVAL, + "Chunk size (chunk_size) must be smaller than or equal to " + "the maximum chunk size (chunk_max)"); + + WT_ERR(__wt_config_getones(session, lsmconf, "lsm.merge_max", &cv)); + lsm_tree->merge_max = (uint32_t)cv.val; + WT_ERR(__wt_config_getones(session, lsmconf, "lsm.merge_min", &cv)); + lsm_tree->merge_min = (uint32_t)cv.val; + + if (lsm_tree->merge_min > lsm_tree->merge_max) + WT_ERR_MSG(session, EINVAL, + "LSM merge_min must be less than or equal to merge_max"); + + WT_ERR(__wt_config_getones(session, lsmconf, "last", &cv)); + lsm_tree->last = (u_int)cv.val; + WT_ERR(__wt_config_getones(session, lsmconf, "chunks", &cv)); + WT_ERR(__wt_config_subinit(session, &lparser, &cv)); + for (nchunks = 0; (ret = + __wt_config_next(&lparser, &lk, &lv)) == 0; ) { + if (WT_STRING_MATCH("id", lk.str, lk.len)) { + WT_ERR(__wt_realloc_def(session, + &lsm_tree->chunk_alloc, + nchunks + 1, &lsm_tree->chunk)); + WT_ERR(__wt_calloc_one(session, &chunk)); + lsm_tree->chunk[nchunks++] = chunk; + chunk->id = (uint32_t)lv.val; + WT_ERR(__wt_lsm_tree_chunk_name(session, + lsm_tree, chunk->id, &chunk->uri)); + F_SET(chunk, + WT_LSM_CHUNK_ONDISK | + WT_LSM_CHUNK_STABLE); + } else if (WT_STRING_MATCH("bloom", lk.str, lk.len)) { + WT_ERR(__wt_lsm_tree_bloom_name( + session, lsm_tree, chunk->id, &chunk->bloom_uri)); + F_SET(chunk, WT_LSM_CHUNK_BLOOM); + continue; + } else if (WT_STRING_MATCH("chunk_size", lk.str, lk.len)) { + chunk->size = (uint64_t)lv.val; + continue; + } else if (WT_STRING_MATCH("count", lk.str, lk.len)) { + chunk->count = (uint64_t)lv.val; + continue; + } else if (WT_STRING_MATCH("generation", lk.str, lk.len)) { + chunk->generation = (uint32_t)lv.val; + continue; + } + } + WT_ERR_NOTFOUND_OK(ret); + lsm_tree->nchunks = nchunks; + + WT_ERR(__wt_config_getones(session, lsmconf, "old_chunks", &cv)); + WT_ERR(__wt_config_subinit(session, &lparser, &cv)); + for (nchunks = 0; (ret = + __wt_config_next(&lparser, &lk, &lv)) == 0; ) { + if (WT_STRING_MATCH("bloom", lk.str, lk.len)) { + WT_ERR(__wt_strndup(session, + lv.str, lv.len, &chunk->bloom_uri)); + F_SET(chunk, WT_LSM_CHUNK_BLOOM); + continue; + } + WT_ERR(__wt_realloc_def(session, + &lsm_tree->old_alloc, nchunks + 1, + &lsm_tree->old_chunks)); + WT_ERR(__wt_calloc_one(session, &chunk)); + lsm_tree->old_chunks[nchunks++] = chunk; + WT_ERR(__wt_strndup(session, + lk.str, lk.len, &chunk->uri)); + F_SET(chunk, WT_LSM_CHUNK_ONDISK); + } + WT_ERR_NOTFOUND_OK(ret); + lsm_tree->nold_chunks = nchunks; + + /* + * Set up the config for each chunk. + * + * Make the memory_page_max double the chunk size, so application + * threads don't immediately try to force evict the chunk when the + * worker thread clears the NO_EVICTION flag. + */ + file_cfg[1] = lsmconf; + WT_ERR(__wt_scr_alloc(session, 0, &buf)); + WT_ERR(__wt_buf_fmt(session, buf, + "key_format=u,value_format=u,memory_page_max=%" PRIu64, + 2 * lsm_tree->chunk_max)); + file_cfg[2] = buf->data; + WT_ERR(__wt_config_collapse(session, file_cfg, &fileconf)); + lsm_tree->file_config = fileconf; + + /* + * Ignore any other values: the metadata entry might have been + * created by a future release, with unknown options. + */ +err: __wt_scr_free(session, &buf); + return (ret); +} + +/* + * __lsm_meta_upgrade_v1 -- + * Upgrade to v1 of LSM metadata. + */ +static int +__lsm_meta_upgrade_v1(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) +{ + WT_DECL_ITEM(buf); + WT_DECL_RET; + const char *new_cfg[] = { + WT_CONFIG_BASE(session, lsm_meta), NULL, NULL, NULL }; + + /* Include the custom config that used to be embedded in file_config. */ + new_cfg[1] = lsm_tree->file_config; + + WT_ERR(__wt_scr_alloc(session, 0, &buf)); + WT_ERR(__wt_buf_fmt(session, buf, + "key_format=%s,value_format=%s", + lsm_tree->key_format, lsm_tree->value_format)); + + WT_ERR(__wt_buf_catfmt(session, buf, ",collator=%s", + lsm_tree->collator_name != NULL ? lsm_tree->collator_name : "")); + + WT_ERR(__wt_buf_catfmt(session, buf, ",lsm=(")); + + WT_ERR(__wt_buf_catfmt(session, buf, "auto_throttle=%d", + F_ISSET(lsm_tree, WT_LSM_TREE_THROTTLE))); + + WT_ERR(__wt_buf_catfmt(session, buf, ",bloom=%d", + FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_MERGED))); + WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_oldest=%d", + FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST))); + WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_bit_count=%" PRIu32, + lsm_tree->bloom_bit_count)); + if (lsm_tree->bloom_config != NULL && + strlen(lsm_tree->bloom_config) > 0) + WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_config=(%s)", + lsm_tree->bloom_config)); + else + WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_config=")); + WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_hash_count=%" PRIu32, + lsm_tree->bloom_hash_count)); + + WT_ERR(__wt_buf_catfmt(session, buf, ",chunk_count_limit=%" PRIu32, + lsm_tree->chunk_count_limit)); + WT_ERR(__wt_buf_catfmt(session, buf, ",chunk_max=%" PRIu64, + lsm_tree->chunk_max)); + WT_ERR(__wt_buf_catfmt(session, buf, ",merge_max=%" PRIu32, + lsm_tree->merge_max)); + WT_ERR(__wt_buf_catfmt(session, buf, ",merge_min=%" PRIu32, + lsm_tree->merge_min)); + + WT_ERR(__wt_buf_catfmt(session, buf, ")")); + + new_cfg[2] = buf->data; + WT_ERR(__wt_config_merge(session, new_cfg, NULL, &lsm_tree->config)); + +err: __wt_scr_free(session, &buf); + return (ret); +} +/* + * __wt_lsm_meta_read -- + * Read the metadata for an LSM tree. + */ +int +__wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) +{ + WT_CONFIG_ITEM cval; + WT_DECL_RET; + char *lsmconf; + bool upgrade; + + /* LSM trees inherit the merge setting from the connection. */ + if (F_ISSET(S2C(session), WT_CONN_LSM_MERGE)) + F_SET(lsm_tree, WT_LSM_TREE_MERGES); + + WT_RET(__wt_metadata_search(session, lsm_tree->name, &lsmconf)); + upgrade = false; + ret = __wt_config_getones(session, lsmconf, "file_config", &cval); + if (ret == 0) { + ret = __lsm_meta_read_v0(session, lsm_tree, lsmconf); + __wt_free(session, lsmconf); + WT_RET(ret); + upgrade = true; + } else if (ret == WT_NOTFOUND) { + lsm_tree->config = lsmconf; + ret = 0; + WT_RET(__lsm_meta_read_v1(session, lsm_tree, lsmconf)); + } /* - * If the default merge_min was not overridden, calculate it now. We - * do this here so that trees created before merge_min was added get a - * sane value. + * If the default merge_min was not overridden, calculate it now. */ if (lsm_tree->merge_min < 2) lsm_tree->merge_min = WT_MAX(2, lsm_tree->merge_max / 2); - -err: __wt_free(session, lsmconfig); + /* + * If needed, upgrade the configuration. We need to do this after + * we have fixed the merge_min value. + */ + if (upgrade) + WT_RET(__lsm_meta_upgrade_v1(session, lsm_tree)); return (ret); } @@ -184,32 +459,15 @@ __wt_lsm_meta_write(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_DECL_RET; WT_LSM_CHUNK *chunk; u_int i; + const char *new_cfg[] = { NULL, NULL, NULL }; + char *new_metadata; bool first; + new_metadata = NULL; + WT_RET(__wt_scr_alloc(session, 0, &buf)); - WT_ERR(__wt_buf_fmt(session, buf, - "key_format=%s,value_format=%s,bloom_config=(%s),file_config=(%s)", - lsm_tree->key_format, lsm_tree->value_format, - lsm_tree->bloom_config, lsm_tree->file_config)); - if (lsm_tree->collator_name != NULL) - WT_ERR(__wt_buf_catfmt( - session, buf, ",collator=%s", lsm_tree->collator_name)); WT_ERR(__wt_buf_catfmt(session, buf, - ",last=%" PRIu32 - ",chunk_count_limit=%" PRIu32 - ",chunk_max=%" PRIu64 - ",chunk_size=%" PRIu64 - ",auto_throttle=%" PRIu32 - ",merge_max=%" PRIu32 - ",merge_min=%" PRIu32 - ",bloom=%" PRIu32 - ",bloom_bit_count=%" PRIu32 - ",bloom_hash_count=%" PRIu32, - lsm_tree->last, lsm_tree->chunk_count_limit, - lsm_tree->chunk_max, lsm_tree->chunk_size, - F_ISSET(lsm_tree, WT_LSM_TREE_THROTTLE) ? 1 : 0, - lsm_tree->merge_max, lsm_tree->merge_min, lsm_tree->bloom, - lsm_tree->bloom_bit_count, lsm_tree->bloom_hash_count)); + ",last=%" PRIu32, lsm_tree->last)); WT_ERR(__wt_buf_catfmt(session, buf, ",chunks=[")); for (i = 0; i < lsm_tree->nchunks; i++) { chunk = lsm_tree->chunk[i]; @@ -243,9 +501,15 @@ __wt_lsm_meta_write(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) session, buf, ",bloom=\"%s\"", chunk->bloom_uri)); } WT_ERR(__wt_buf_catfmt(session, buf, "]")); - ret = __wt_metadata_update(session, lsm_tree->name, buf->data); + + /* Update the existing configuration with the new values. */ + new_cfg[0] = lsm_tree->config; + new_cfg[1] = buf->data; + WT_ERR(__wt_config_collapse(session, new_cfg, &new_metadata)); + ret = __wt_metadata_update(session, lsm_tree->name, new_metadata); WT_ERR(ret); err: __wt_scr_free(session, &buf); + __wt_free(session, new_metadata); return (ret); } diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index ab18e41a2f5..cb1ddf22f84 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -27,6 +27,7 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final) WT_UNUSED(final); /* Only used in diagnostic builds */ + WT_ASSERT(session, !lsm_tree->active); /* * The work unit queue should be empty, but it's worth checking * since work units use a different locking scheme to regular tree @@ -85,19 +86,27 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final) * Close an LSM tree structure. */ static int -__lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) +__lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final) { WT_DECL_RET; int i; - /* Stop any active merges. */ - F_CLR(lsm_tree, WT_LSM_TREE_ACTIVE); + /* + * Stop any new work units being added. The barrier is necessary + * because we rely on the state change being visible before checking + * the tree queue state. + */ + lsm_tree->active = false; + WT_READ_BARRIER(); /* - * Wait for all LSM operations and work units that were in flight to - * finish. + * Wait for all LSM operations to drain. If WiredTiger is shutting + * down also wait for the tree reference count to go to zero, otherwise + * we know a user is holding a reference to the tree, so exclusive + * access is not available. */ - for (i = 0; lsm_tree->refcnt > 1 || lsm_tree->queue_ref > 0; ++i) { + for (i = 0; + lsm_tree->queue_ref > 0 || (final && lsm_tree->refcnt > 1); ++i) { /* * Remove any work units from the manager queues. Do this step * repeatedly in case a work unit was in the process of being @@ -114,11 +123,14 @@ __lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) if (i % WT_THOUSAND == 0) { WT_WITHOUT_LOCKS(session, ret = __wt_lsm_manager_clear_tree(session, lsm_tree)); - WT_RET(ret); + WT_ERR(ret); } __wt_yield(); } return (0); + +err: lsm_tree->active = true; + return (ret); } /* @@ -142,7 +154,7 @@ __wt_lsm_tree_close_all(WT_SESSION_IMPL *session) * is unconditional. */ (void)__wt_atomic_add32(&lsm_tree->refcnt, 1); - WT_TRET(__lsm_tree_close(session, lsm_tree)); + WT_TRET(__lsm_tree_close(session, lsm_tree, true)); WT_TRET(__lsm_tree_discard(session, lsm_tree, true)); } @@ -157,9 +169,12 @@ static int __lsm_tree_set_name(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, const char *uri) { - if (lsm_tree->name != NULL) - __wt_free(session, lsm_tree->name); - WT_RET(__wt_strdup(session, uri, &lsm_tree->name)); + void *p; + + WT_RET(__wt_strdup(session, uri, &p)); + + __wt_free(session, lsm_tree->name); + lsm_tree->name = p; lsm_tree->filename = lsm_tree->name + strlen("lsm:"); return (0); } @@ -306,15 +321,15 @@ int __wt_lsm_tree_create(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const char *config) { - WT_CONFIG_ITEM cval; - WT_DECL_ITEM(buf); WT_DECL_RET; WT_LSM_TREE *lsm_tree; const char *cfg[] = - { WT_CONFIG_BASE(session, WT_SESSION_create), config, NULL }; - char *tmpconfig; + { WT_CONFIG_BASE(session, lsm_meta), config, NULL }; + const char *metadata; - /* If the tree is open, it already exists. */ + metadata = NULL; + + /* If the tree can be opened, it already exists. */ WT_WITH_HANDLE_LIST_LOCK(session, ret = __wt_lsm_tree_get(session, uri, false, &lsm_tree)); if (ret == 0) { @@ -323,139 +338,22 @@ __wt_lsm_tree_create(WT_SESSION_IMPL *session, } WT_RET_NOTFOUND_OK(ret); - /* - * If the tree has metadata, it already exists. - * - * !!! - * Use a local variable: we don't care what the existing configuration - * is, but we don't want to overwrite the real config. - */ - if (__wt_metadata_search(session, uri, &tmpconfig) == 0) { - __wt_free(session, tmpconfig); - return (exclusive ? EEXIST : 0); + if (!F_ISSET(S2C(session), WT_CONN_READONLY)) { + WT_ERR(__wt_config_merge(session, cfg, NULL, &metadata)); + WT_ERR(__wt_metadata_insert(session, uri, metadata)); } - WT_RET_NOTFOUND_OK(ret); - - /* In-memory configurations don't make sense for LSM. */ - if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) - WT_RET_MSG(session, EINVAL, - "LSM trees not supported by in-memory configurations"); - - WT_RET(__wt_config_gets(session, cfg, "key_format", &cval)); - if (WT_STRING_MATCH("r", cval.str, cval.len)) - WT_RET_MSG(session, EINVAL, - "LSM trees cannot be configured as column stores"); - - WT_RET(__wt_calloc_one(session, &lsm_tree)); - - WT_ERR(__lsm_tree_set_name(session, lsm_tree, uri)); - - WT_ERR(__wt_config_gets(session, cfg, "key_format", &cval)); - WT_ERR(__wt_strndup( - session, cval.str, cval.len, &lsm_tree->key_format)); - WT_ERR(__wt_config_gets(session, cfg, "value_format", &cval)); - WT_ERR(__wt_strndup( - session, cval.str, cval.len, &lsm_tree->value_format)); - - WT_ERR(__wt_config_gets_none(session, cfg, "collator", &cval)); - WT_ERR(__wt_strndup( - session, cval.str, cval.len, &lsm_tree->collator_name)); - - WT_ERR(__wt_config_gets(session, cfg, "cache_resident", &cval)); - if (cval.val != 0) - WT_ERR_MSG(session, EINVAL, - "The cache_resident flag is not compatible with LSM"); - - WT_ERR(__wt_config_gets(session, cfg, "lsm.auto_throttle", &cval)); - if (cval.val) - F_SET(lsm_tree, WT_LSM_TREE_THROTTLE); - else - F_CLR(lsm_tree, WT_LSM_TREE_THROTTLE); - WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom", &cval)); - FLD_SET(lsm_tree->bloom, - (cval.val == 0 ? WT_LSM_BLOOM_OFF : WT_LSM_BLOOM_MERGED)); - WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_oldest", &cval)); - if (cval.val != 0) - FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST); - - if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) && - FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST)) - WT_ERR_MSG(session, EINVAL, - "Bloom filters can only be created on newest and oldest " - "chunks if bloom filters are enabled"); - - WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_config", &cval)); - if (cval.type == WT_CONFIG_ITEM_STRUCT) { - cval.str++; - cval.len -= 2; - } - WT_ERR(__wt_config_check(session, - WT_CONFIG_REF(session, WT_SESSION_create), cval.str, cval.len)); - WT_ERR(__wt_strndup( - session, cval.str, cval.len, &lsm_tree->bloom_config)); - - WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_bit_count", &cval)); - lsm_tree->bloom_bit_count = (uint32_t)cval.val; - WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_hash_count", &cval)); - lsm_tree->bloom_hash_count = (uint32_t)cval.val; - WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_count_limit", &cval)); - lsm_tree->chunk_count_limit = (uint32_t)cval.val; - if (cval.val == 0) - F_SET(lsm_tree, WT_LSM_TREE_MERGES); - else - F_CLR(lsm_tree, WT_LSM_TREE_MERGES); - WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_max", &cval)); - lsm_tree->chunk_max = (uint64_t)cval.val; - WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_size", &cval)); - lsm_tree->chunk_size = (uint64_t)cval.val; - if (lsm_tree->chunk_size > lsm_tree->chunk_max) - WT_ERR_MSG(session, EINVAL, - "Chunk size (chunk_size) must be smaller than or equal to " - "the maximum chunk size (chunk_max)"); - WT_ERR(__wt_config_gets(session, cfg, "lsm.merge_max", &cval)); - lsm_tree->merge_max = (uint32_t)cval.val; - WT_ERR(__wt_config_gets(session, cfg, "lsm.merge_min", &cval)); - lsm_tree->merge_min = (uint32_t)cval.val; - if (lsm_tree->merge_min > lsm_tree->merge_max) - WT_ERR_MSG(session, EINVAL, - "LSM merge_min must be less than or equal to merge_max"); - - /* - * Set up the config for each chunk. - * - * Make the memory_page_max double the chunk size, so application - * threads don't immediately try to force evict the chunk when the - * worker thread clears the NO_EVICTION flag. - */ - WT_ERR(__wt_scr_alloc(session, 0, &buf)); - WT_ERR(__wt_buf_fmt(session, buf, - "%s,key_format=u,value_format=u,memory_page_max=%" PRIu64, - config, 2 * lsm_tree->chunk_max)); - WT_ERR(__wt_strndup( - session, buf->data, buf->size, &lsm_tree->file_config)); - - /* Create the first chunk and flush the metadata. */ - WT_ERR(__wt_lsm_meta_write(session, lsm_tree)); - - /* Discard our partially populated handle. */ - ret = __lsm_tree_discard(session, lsm_tree, false); - lsm_tree = NULL; /* * Open our new tree and add it to the handle cache. Don't discard on * error: the returned handle is NULL on error, and the metadata * tracking macros handle cleaning up on failure. */ - if (ret == 0) - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __lsm_tree_open(session, uri, true, &lsm_tree)); + WT_WITH_HANDLE_LIST_LOCK(session, + ret = __lsm_tree_open(session, uri, true, &lsm_tree)); if (ret == 0) __wt_lsm_tree_release(session, lsm_tree); - if (0) { -err: WT_TRET(__lsm_tree_discard(session, lsm_tree, false)); - } - __wt_scr_free(session, &buf); +err: __wt_free(session, metadata); return (ret); } @@ -477,27 +375,26 @@ __lsm_tree_find(WT_SESSION_IMPL *session, /* See if the tree is already open. */ TAILQ_FOREACH(lsm_tree, &S2C(session)->lsmqh, q) if (strcmp(uri, lsm_tree->name) == 0) { - /* - * Short circuit if the handle is already held - * exclusively or exclusive access is requested and - * there are references held. - */ - if ((exclusive && lsm_tree->refcnt > 0) || - lsm_tree->exclusive) - return (EBUSY); - if (exclusive) { /* * Make sure we win the race to switch on the * exclusive flag. */ - if (!__wt_atomic_cas8( - &lsm_tree->exclusive, 0, 1)) + if (!__wt_atomic_cas_ptr( + &lsm_tree->excl_session, NULL, session)) return (EBUSY); - /* Make sure there are no readers */ - if (!__wt_atomic_cas32( - &lsm_tree->refcnt, 0, 1)) { - lsm_tree->exclusive = 0; + + /* + * Drain the work queue before checking for + * open cursors - otherwise we can generate + * spurious busy returns. + */ + (void)__wt_atomic_add32(&lsm_tree->refcnt, 1); + if (__lsm_tree_close( + session, lsm_tree, false) != 0 || + lsm_tree->refcnt != 1) { + __wt_lsm_tree_release( + session, lsm_tree); return (EBUSY); } } else { @@ -507,11 +404,11 @@ __lsm_tree_find(WT_SESSION_IMPL *session, * We got a reference, check if an exclusive * lock beat us to it. */ - if (lsm_tree->exclusive) { + if (lsm_tree->excl_session != NULL) { WT_ASSERT(session, lsm_tree->refcnt > 0); - (void)__wt_atomic_sub32( - &lsm_tree->refcnt, 1); + __wt_lsm_tree_release( + session, lsm_tree); return (EBUSY); } } @@ -603,7 +500,7 @@ __lsm_tree_open(WT_SESSION_IMPL *session, * with getting handles exclusive. */ lsm_tree->refcnt = 1; - lsm_tree->exclusive = exclusive ? 1 : 0; + lsm_tree->excl_session = exclusive ? session : NULL; lsm_tree->queue_ref = 0; /* Set a flush timestamp as a baseline. */ @@ -611,7 +508,9 @@ __lsm_tree_open(WT_SESSION_IMPL *session, /* Now the tree is setup, make it visible to others. */ TAILQ_INSERT_HEAD(&S2C(session)->lsmqh, lsm_tree, q); - F_SET(lsm_tree, WT_LSM_TREE_ACTIVE | WT_LSM_TREE_OPEN); + if (!exclusive) + lsm_tree->active = true; + F_SET(lsm_tree, WT_LSM_TREE_OPEN); *treep = lsm_tree; @@ -638,7 +537,7 @@ __wt_lsm_tree_get(WT_SESSION_IMPL *session, ret = __lsm_tree_open(session, uri, exclusive, treep); WT_ASSERT(session, ret != 0 || - (exclusive ? 1 : 0) == (*treep)->exclusive); + (*treep)->excl_session == (exclusive ? session : NULL)); return (ret); } @@ -650,8 +549,11 @@ void __wt_lsm_tree_release(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { WT_ASSERT(session, lsm_tree->refcnt > 0); - if (lsm_tree->exclusive) - lsm_tree->exclusive = 0; + if (lsm_tree->excl_session == session) { + /* We cleared the active flag when getting exclusive access. */ + lsm_tree->active = true; + lsm_tree->excl_session = NULL; + } (void)__wt_atomic_sub32(&lsm_tree->refcnt, 1); } @@ -868,7 +770,7 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) F_CLR(lsm_tree, WT_LSM_TREE_NEED_SWITCH); ++lsm_tree->dsk_gen; - lsm_tree->modified = 1; + lsm_tree->modified = true; /* * Set the switch transaction in the previous chunk unless this is @@ -964,9 +866,7 @@ __wt_lsm_tree_drop( WT_WITH_HANDLE_LIST_LOCK(session, ret = __wt_lsm_tree_get(session, name, true, &lsm_tree)); WT_RET(ret); - - /* Shut down the LSM worker. */ - WT_ERR(__lsm_tree_close(session, lsm_tree)); + WT_ASSERT(session, !lsm_tree->active); /* Prevent any new opens. */ WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree)); @@ -995,6 +895,7 @@ __wt_lsm_tree_drop( WT_ERR(__wt_lsm_tree_writeunlock(session, lsm_tree)); ret = __wt_metadata_remove(session, name); + WT_ASSERT(session, !lsm_tree->active); err: if (locked) WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree)); WT_WITH_HANDLE_LIST_LOCK(session, @@ -1027,9 +928,6 @@ __wt_lsm_tree_rename(WT_SESSION_IMPL *session, ret = __wt_lsm_tree_get(session, olduri, true, &lsm_tree)); WT_RET(ret); - /* Shut down the LSM worker. */ - WT_ERR(__lsm_tree_close(session, lsm_tree)); - /* Prevent any new opens. */ WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree)); locked = true; @@ -1067,8 +965,8 @@ __wt_lsm_tree_rename(WT_SESSION_IMPL *session, err: if (locked) WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree)); - if (old != NULL) - __wt_free(session, old); + __wt_free(session, old); + /* * Discard this LSM tree structure. The first operation on the renamed * tree will create a new one. @@ -1102,9 +1000,6 @@ __wt_lsm_tree_truncate( ret = __wt_lsm_tree_get(session, name, true, &lsm_tree)); WT_RET(ret); - /* Shut down the LSM worker. */ - WT_ERR(__lsm_tree_close(session, lsm_tree)); - /* Prevent any new opens. */ WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree)); locked = true; @@ -1308,8 +1203,8 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) if (chunk != NULL) { WT_ERR(__wt_verbose(session, WT_VERB_LSM, "Compact force flush %s flags 0x%" PRIx32 - " chunk %u flags 0x%" - PRIx32, name, lsm_tree->flags, chunk->id, chunk->flags)); + " chunk %" PRIu32 " flags 0x%" PRIx32, + name, lsm_tree->flags, chunk->id, chunk->flags)); flushing = true; /* * Make sure the in-memory chunk gets flushed do not push a @@ -1331,7 +1226,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) } /* Wait for the work unit queues to drain. */ - while (F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) { + while (lsm_tree->active) { /* * The flush flag is cleared when the chunk has been flushed. * Continue to push forced flushes until the chunk is on disk. @@ -1342,7 +1237,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) { WT_ERR(__wt_verbose(session, WT_VERB_LSM, - "Compact flush done %s chunk %u. " + "Compact flush done %s chunk %" PRIu32 ". " "Start compacting progress %" PRIu64, name, chunk->id, lsm_tree->merge_progressing)); @@ -1353,7 +1248,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) progress = lsm_tree->merge_progressing; } else { WT_ERR(__wt_verbose(session, WT_VERB_LSM, - "Compact flush retry %s chunk %u", + "Compact flush retry %s chunk %" PRIu32, name, chunk->id)); WT_ERR(__wt_lsm_manager_push_entry(session, WT_LSM_WORK_FLUSH, WT_LSM_WORK_FORCE, @@ -1413,7 +1308,6 @@ err: __wt_lsm_tree_release(session, lsm_tree); return (ret); - } /* @@ -1455,8 +1349,7 @@ __wt_lsm_tree_worker(WT_SESSION_IMPL *session, continue; WT_ERR(__wt_schema_worker(session, chunk->uri, file_func, name_func, cfg, open_flags)); - if (name_func == __wt_backup_list_uri_append && - F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) + if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) WT_ERR(__wt_schema_worker(session, chunk->bloom_uri, file_func, name_func, cfg, open_flags)); } diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index d5d81df6785..87771e2cb6c 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -29,7 +29,7 @@ __lsm_copy_chunks(WT_SESSION_IMPL *session, cookie->nchunks = 0; WT_RET(__wt_lsm_tree_readlock(session, lsm_tree)); - if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) + if (!lsm_tree->active) return (__wt_lsm_tree_readunlock(session, lsm_tree)); /* Take a copy of the current state of the LSM tree. */ @@ -72,14 +72,14 @@ __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, { WT_DECL_RET; WT_LSM_CHUNK *chunk, *evict_chunk, *flush_chunk; - u_int i; + uint32_t i; *chunkp = NULL; chunk = evict_chunk = flush_chunk = NULL; WT_ASSERT(session, lsm_tree->queue_ref > 0); WT_RET(__wt_lsm_tree_readlock(session, lsm_tree)); - if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE) || lsm_tree->nchunks == 0) + if (!lsm_tree->active || lsm_tree->nchunks == 0) return (__wt_lsm_tree_readunlock(session, lsm_tree)); /* Search for a chunk to evict and/or a chunk to flush. */ @@ -118,7 +118,7 @@ __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, if (chunk != NULL) { WT_ERR(__wt_verbose(session, WT_VERB_LSM, - "Flush%s: return chunk %u of %u: %s", + "Flush%s: return chunk %" PRIu32 " of %" PRIu32 ": %s", force ? " w/ force" : "", i, lsm_tree->nchunks, chunk->uri)); @@ -322,7 +322,7 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, */ saved_isolation = session->txn.isolation; session->txn.isolation = WT_ISO_READ_UNCOMMITTED; - ret = __wt_cache_op(session, NULL, WT_SYNC_WRITE_LEAVES); + ret = __wt_cache_op(session, WT_SYNC_WRITE_LEAVES); session->txn.isolation = saved_isolation; WT_TRET(__wt_session_release_btree(session)); } @@ -334,11 +334,17 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, /* * Turn on metadata tracking to ensure the checkpoint gets the * necessary handle locks. + * + * Ensure that we don't race with a running checkpoint: the checkpoint + * lock protects against us racing with an application checkpoint in + * this chunk. Don't wait for it, though: checkpoints can take a long + * time, and our checkpoint operation should be very quick. */ WT_ERR(__wt_meta_track_on(session)); - WT_WITH_SCHEMA_LOCK(session, ret, - ret = __wt_schema_worker( - session, chunk->uri, __wt_checkpoint, NULL, NULL, 0)); + WT_WITH_CHECKPOINT_LOCK(session, ret, + WT_WITH_SCHEMA_LOCK(session, ret, + ret = __wt_schema_worker( + session, chunk->uri, __wt_checkpoint, NULL, NULL, 0))); WT_TRET(__wt_meta_track_off(session, false, ret != 0)); if (ret != 0) WT_ERR_MSG(session, ret, "LSM checkpoint"); diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c index 7562cb1cae3..0874da8db13 100644 --- a/src/lsm/lsm_worker.c +++ b/src/lsm/lsm_worker.c @@ -20,7 +20,7 @@ int __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) { WT_RET(__wt_verbose(session, WT_VERB_LSM_MANAGER, - "Start LSM worker %d type 0x%x", args->id, args->type)); + "Start LSM worker %u type %#" PRIx32, args->id, args->type)); return (__wt_thread_create(session, &args->tid, __lsm_worker, args)); } @@ -59,9 +59,8 @@ __lsm_worker_general_op( */ if (chunk != NULL) { WT_ERR(__wt_verbose(session, WT_VERB_LSM, - "Flush%s chunk %d %s", - force ? " w/ force" : "", - chunk->id, chunk->uri)); + "Flush%s chunk %" PRIu32 " %s", + force ? " w/ force" : "", chunk->id, chunk->uri)); ret = __wt_lsm_checkpoint_chunk( session, entry->lsm_tree, chunk); WT_ASSERT(session, chunk->refcnt > 0); @@ -140,7 +139,7 @@ __lsm_worker(void *arg) if (ret == WT_NOTFOUND) { F_CLR(entry->lsm_tree, WT_LSM_TREE_COMPACTING); ret = 0; - } else if (ret == EBUSY) + } else if (ret == EBUSY || ret == EINTR) ret = 0; /* Paranoia: clear session state. */ @@ -164,7 +163,7 @@ __lsm_worker(void *arg) if (ret != 0) { err: __wt_lsm_manager_free_work_unit(session, entry); WT_PANIC_MSG(session, ret, - "Error in LSM worker thread %d", cookie->id); + "Error in LSM worker thread %u", cookie->id); } return (WT_THREAD_RET_VALUE); } |