summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/lsm/lsm_tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/wiredtiger/src/lsm/lsm_tree.c')
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_tree.c135
1 files changed, 66 insertions, 69 deletions
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_tree.c b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
index da106ae2089..db9fd581110 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_tree.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
@@ -55,7 +55,7 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final)
__wt_free(session, lsm_tree->bloom_config);
__wt_free(session, lsm_tree->file_config);
- WT_TRET(__wt_rwlock_destroy(session, &lsm_tree->rwlock));
+ __wt_rwlock_destroy(session, &lsm_tree->rwlock);
for (i = 0; i < lsm_tree->nchunks; i++) {
if ((chunk = lsm_tree->chunk[i]) == NULL)
@@ -85,10 +85,9 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final)
* __lsm_tree_close --
* Close an LSM tree structure.
*/
-static int
+static void
__lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final)
{
- WT_DECL_RET;
int i;
/*
@@ -97,7 +96,7 @@ __lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final)
* the tree queue state.
*/
lsm_tree->active = false;
- WT_READ_BARRIER();
+ WT_FULL_BARRIER();
/*
* Wait for all LSM operations to drain. If WiredTiger is shutting
@@ -120,17 +119,11 @@ __lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final)
* other schema level operations will return EBUSY, even though
* we're dropping the schema lock here.
*/
- if (i % WT_THOUSAND == 0) {
- WT_WITHOUT_LOCKS(session, ret =
+ if (i % WT_THOUSAND == 0)
+ WT_WITHOUT_LOCKS(session,
__wt_lsm_manager_clear_tree(session, lsm_tree));
- WT_ERR(ret);
- }
__wt_yield();
}
- return (0);
-
-err: lsm_tree->active = true;
- return (ret);
}
/*
@@ -154,7 +147,7 @@ __wt_lsm_tree_close_all(WT_SESSION_IMPL *session)
* is unconditional.
*/
(void)__wt_atomic_add32(&lsm_tree->refcnt, 1);
- WT_TRET(__lsm_tree_close(session, lsm_tree, true));
+ __lsm_tree_close(session, lsm_tree, true);
WT_TRET(__lsm_tree_discard(session, lsm_tree, true));
}
@@ -390,9 +383,8 @@ __lsm_tree_find(WT_SESSION_IMPL *session,
* spurious busy returns.
*/
(void)__wt_atomic_add32(&lsm_tree->refcnt, 1);
- if (__lsm_tree_close(
- session, lsm_tree, false) != 0 ||
- lsm_tree->refcnt != 1) {
+ __lsm_tree_close(session, lsm_tree, false);
+ if (lsm_tree->refcnt != 1) {
__wt_lsm_tree_release(
session, lsm_tree);
return (EBUSY);
@@ -730,7 +722,7 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
uint32_t chunks_moved, nchunks, new_id;
bool first_switch;
- WT_RET(__wt_lsm_tree_writelock(session, lsm_tree));
+ __wt_lsm_tree_writelock(session, lsm_tree);
nchunks = lsm_tree->nchunks;
@@ -755,10 +747,10 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
WT_ERR(__wt_realloc_def(session, &lsm_tree->chunk_alloc,
nchunks + 1, &lsm_tree->chunk));
- WT_ERR(__wt_verbose(session, WT_VERB_LSM,
+ __wt_verbose(session, WT_VERB_LSM,
"Tree %s switch to: %" PRIu32 ", checkpoint throttle %" PRIu64
", merge throttle %" PRIu64, lsm_tree->name,
- new_id, lsm_tree->ckpt_throttle, lsm_tree->merge_throttle));
+ new_id, lsm_tree->ckpt_throttle, lsm_tree->merge_throttle);
WT_ERR(__wt_calloc_one(session, &chunk));
chunk->id = new_id;
@@ -771,6 +763,11 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
++lsm_tree->dsk_gen;
lsm_tree->modified = true;
+ /*
+ * Ensure the updated disk generation is visible to all other threads
+ * before updating the transaction ID.
+ */
+ WT_FULL_BARRIER();
/*
* Set the switch transaction in the previous chunk unless this is
@@ -806,7 +803,7 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
session, WT_LSM_WORK_DROP, 0, lsm_tree));
}
-err: WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree));
+err: __wt_lsm_tree_writeunlock(session, lsm_tree);
/*
* Errors that happen during a tree switch leave the tree in a state
* where we can't make progress. Error out of WiredTiger.
@@ -869,7 +866,7 @@ __wt_lsm_tree_drop(
WT_ASSERT(session, !lsm_tree->active);
/* Prevent any new opens. */
- WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree));
+ __wt_lsm_tree_writelock(session, lsm_tree);
locked = true;
/* Drop the chunks. */
@@ -892,12 +889,12 @@ __wt_lsm_tree_drop(
}
locked = false;
- WT_ERR(__wt_lsm_tree_writeunlock(session, lsm_tree));
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
ret = __wt_metadata_remove(session, name);
WT_ASSERT(session, !lsm_tree->active);
err: if (locked)
- WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree));
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
WT_WITH_HANDLE_LIST_LOCK(session,
tret = __lsm_tree_discard(session, lsm_tree, false));
WT_TRET(tret);
@@ -929,7 +926,7 @@ __wt_lsm_tree_rename(WT_SESSION_IMPL *session,
WT_RET(ret);
/* Prevent any new opens. */
- WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree));
+ __wt_lsm_tree_writelock(session, lsm_tree);
locked = true;
/* Set the new name. */
@@ -960,11 +957,11 @@ __wt_lsm_tree_rename(WT_SESSION_IMPL *session,
WT_ERR(__wt_lsm_meta_write(session, lsm_tree));
locked = false;
- WT_ERR(__wt_lsm_tree_writeunlock(session, lsm_tree));
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
WT_ERR(__wt_metadata_remove(session, olduri));
err: if (locked)
- WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree));
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
__wt_free(session, old);
/*
@@ -1001,7 +998,7 @@ __wt_lsm_tree_truncate(
WT_RET(ret);
/* Prevent any new opens. */
- WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree));
+ __wt_lsm_tree_writelock(session, lsm_tree);
locked = true;
/* Create the new chunk. */
@@ -1016,14 +1013,14 @@ __wt_lsm_tree_truncate(
WT_ERR(__wt_lsm_meta_write(session, lsm_tree));
locked = false;
- WT_ERR(__wt_lsm_tree_writeunlock(session, lsm_tree));
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
__wt_lsm_tree_release(session, lsm_tree);
err: if (locked)
- WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree));
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
if (ret != 0) {
if (chunk != NULL) {
- (void)__wt_schema_drop(session, chunk->uri, NULL);
+ WT_TRET(__wt_schema_drop(session, chunk->uri, NULL));
__wt_free(session, chunk);
}
/*
@@ -1043,66 +1040,56 @@ err: if (locked)
* __wt_lsm_tree_readlock --
* Acquire a shared lock on an LSM tree.
*/
-int
+void
__wt_lsm_tree_readlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- WT_RET(__wt_readlock(session, lsm_tree->rwlock));
+ __wt_readlock(session, lsm_tree->rwlock);
/*
* Diagnostic: avoid deadlocks with the schema lock: if we need it for
* an operation, we should already have it.
*/
F_SET(session, WT_SESSION_NO_EVICTION | WT_SESSION_NO_SCHEMA_LOCK);
- return (0);
}
/*
* __wt_lsm_tree_readunlock --
* Release a shared lock on an LSM tree.
*/
-int
+void
__wt_lsm_tree_readunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- WT_DECL_RET;
-
F_CLR(session, WT_SESSION_NO_EVICTION | WT_SESSION_NO_SCHEMA_LOCK);
- if ((ret = __wt_readunlock(session, lsm_tree->rwlock)) != 0)
- WT_PANIC_RET(session, ret, "Unlocking an LSM tree");
- return (0);
+ __wt_readunlock(session, lsm_tree->rwlock);
}
/*
* __wt_lsm_tree_writelock --
* Acquire an exclusive lock on an LSM tree.
*/
-int
+void
__wt_lsm_tree_writelock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- WT_RET(__wt_writelock(session, lsm_tree->rwlock));
+ __wt_writelock(session, lsm_tree->rwlock);
/*
* Diagnostic: avoid deadlocks with the schema lock: if we need it for
* an operation, we should already have it.
*/
F_SET(session, WT_SESSION_NO_EVICTION | WT_SESSION_NO_SCHEMA_LOCK);
- return (0);
}
/*
* __wt_lsm_tree_writeunlock --
* Release an exclusive lock on an LSM tree.
*/
-int
+void
__wt_lsm_tree_writeunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- WT_DECL_RET;
-
F_CLR(session, WT_SESSION_NO_EVICTION | WT_SESSION_NO_SCHEMA_LOCK);
- if ((ret = __wt_writeunlock(session, lsm_tree->rwlock)) != 0)
- WT_PANIC_RET(session, ret, "Unlocking an LSM tree");
- return (0);
+ __wt_writeunlock(session, lsm_tree->rwlock);
}
/*
@@ -1168,7 +1155,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
*/
/* Lock the tree: single-thread compaction. */
- WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree));
+ __wt_lsm_tree_writelock(session, lsm_tree);
locked = true;
/* Clear any merge throttle: compact throws out that calculation. */
@@ -1187,8 +1174,15 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
*/
if (lsm_tree->nchunks > 0 &&
(chunk = lsm_tree->chunk[lsm_tree->nchunks - 1]) != NULL) {
- if (chunk->switch_txn == WT_TXN_NONE)
+ if (chunk->switch_txn == WT_TXN_NONE) {
+ /*
+ * Make sure any cursors open on the tree see the
+ * new switch generation before updating.
+ */
+ ++lsm_tree->dsk_gen;
+ WT_FULL_BARRIER();
chunk->switch_txn = __wt_txn_id_alloc(session, false);
+ }
/*
* If we have a chunk, we want to look for it to be on-disk.
* So we need to add a reference to keep it available.
@@ -1198,13 +1192,13 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
}
locked = false;
- WT_ERR(__wt_lsm_tree_writeunlock(session, lsm_tree));
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
if (chunk != NULL) {
- WT_ERR(__wt_verbose(session, WT_VERB_LSM,
+ __wt_verbose(session, WT_VERB_LSM,
"Compact force flush %s flags 0x%" PRIx32
" chunk %" PRIu32 " flags 0x%" PRIx32,
- name, lsm_tree->flags, chunk->id, chunk->flags));
+ name, lsm_tree->flags, chunk->id, chunk->flags);
flushing = true;
/*
* Make sure the in-memory chunk gets flushed do not push a
@@ -1221,8 +1215,8 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
compacting = true;
progress = lsm_tree->merge_progressing;
F_SET(lsm_tree, WT_LSM_TREE_COMPACTING);
- WT_ERR(__wt_verbose(session, WT_VERB_LSM,
- "COMPACT: Start compacting %s", lsm_tree->name));
+ __wt_verbose(session, WT_VERB_LSM,
+ "COMPACT: Start compacting %s", lsm_tree->name);
}
/* Wait for the work unit queues to drain. */
@@ -1235,21 +1229,21 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
if (flushing) {
WT_ASSERT(session, chunk != NULL);
if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) {
- WT_ERR(__wt_verbose(session,
+ __wt_verbose(session,
WT_VERB_LSM,
"Compact flush done %s chunk %" PRIu32 ". "
"Start compacting progress %" PRIu64,
name, chunk->id,
- lsm_tree->merge_progressing));
+ lsm_tree->merge_progressing);
(void)__wt_atomic_sub32(&chunk->refcnt, 1);
flushing = ref = false;
compacting = true;
F_SET(lsm_tree, WT_LSM_TREE_COMPACTING);
progress = lsm_tree->merge_progressing;
} else {
- WT_ERR(__wt_verbose(session, WT_VERB_LSM,
+ __wt_verbose(session, WT_VERB_LSM,
"Compact flush retry %s chunk %" PRIu32,
- name, chunk->id));
+ name, chunk->id);
WT_ERR(__wt_lsm_manager_push_entry(session,
WT_LSM_WORK_FLUSH, WT_LSM_WORK_FORCE,
lsm_tree));
@@ -1301,10 +1295,10 @@ err:
lsm_tree->merge_aggressiveness = 0;
}
if (locked)
- WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree));
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
- WT_TRET(__wt_verbose(session, WT_VERB_LSM,
- "Compact %s complete, return %d", name, ret));
+ __wt_verbose(session, WT_VERB_LSM,
+ "Compact %s complete, return %d", name, ret);
__wt_lsm_tree_release(session, lsm_tree);
return (ret);
@@ -1338,9 +1332,10 @@ __wt_lsm_tree_worker(WT_SESSION_IMPL *session,
* with merges so that merging doesn't change the chunk
* array out from underneath us.
*/
- WT_ERR(exclusive ?
- __wt_lsm_tree_writelock(session, lsm_tree) :
- __wt_lsm_tree_readlock(session, lsm_tree));
+ if (exclusive)
+ __wt_lsm_tree_writelock(session, lsm_tree);
+ else
+ __wt_lsm_tree_readlock(session, lsm_tree);
locked = true;
for (i = 0; i < lsm_tree->nchunks; i++) {
chunk = lsm_tree->chunk[i];
@@ -1359,10 +1354,12 @@ __wt_lsm_tree_worker(WT_SESSION_IMPL *session,
WT_ERR(__wt_schema_worker(session, chunk->bloom_uri,
file_func, name_func, cfg, open_flags));
}
-err: if (locked)
- WT_TRET(exclusive ?
- __wt_lsm_tree_writeunlock(session, lsm_tree) :
- __wt_lsm_tree_readunlock(session, lsm_tree));
+err: if (locked) {
+ if (exclusive)
+ __wt_lsm_tree_writeunlock(session, lsm_tree);
+ else
+ __wt_lsm_tree_readunlock(session, lsm_tree);
+ }
__wt_lsm_tree_release(session, lsm_tree);
return (ret);
}