diff options
Diffstat (limited to 'src/third_party/wiredtiger/src/lsm/lsm_tree.c')
-rw-r--r-- | src/third_party/wiredtiger/src/lsm/lsm_tree.c | 135 |
1 files changed, 66 insertions, 69 deletions
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_tree.c b/src/third_party/wiredtiger/src/lsm/lsm_tree.c index da106ae2089..db9fd581110 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_tree.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_tree.c @@ -55,7 +55,7 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final) __wt_free(session, lsm_tree->bloom_config); __wt_free(session, lsm_tree->file_config); - WT_TRET(__wt_rwlock_destroy(session, &lsm_tree->rwlock)); + __wt_rwlock_destroy(session, &lsm_tree->rwlock); for (i = 0; i < lsm_tree->nchunks; i++) { if ((chunk = lsm_tree->chunk[i]) == NULL) @@ -85,10 +85,9 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final) * __lsm_tree_close -- * Close an LSM tree structure. */ -static int +static void __lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final) { - WT_DECL_RET; int i; /* @@ -97,7 +96,7 @@ __lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final) * the tree queue state. */ lsm_tree->active = false; - WT_READ_BARRIER(); + WT_FULL_BARRIER(); /* * Wait for all LSM operations to drain. If WiredTiger is shutting @@ -120,17 +119,11 @@ __lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final) * other schema level operations will return EBUSY, even though * we're dropping the schema lock here. */ - if (i % WT_THOUSAND == 0) { - WT_WITHOUT_LOCKS(session, ret = + if (i % WT_THOUSAND == 0) + WT_WITHOUT_LOCKS(session, __wt_lsm_manager_clear_tree(session, lsm_tree)); - WT_ERR(ret); - } __wt_yield(); } - return (0); - -err: lsm_tree->active = true; - return (ret); } /* @@ -154,7 +147,7 @@ __wt_lsm_tree_close_all(WT_SESSION_IMPL *session) * is unconditional. */ (void)__wt_atomic_add32(&lsm_tree->refcnt, 1); - WT_TRET(__lsm_tree_close(session, lsm_tree, true)); + __lsm_tree_close(session, lsm_tree, true); WT_TRET(__lsm_tree_discard(session, lsm_tree, true)); } @@ -390,9 +383,8 @@ __lsm_tree_find(WT_SESSION_IMPL *session, * spurious busy returns. */ (void)__wt_atomic_add32(&lsm_tree->refcnt, 1); - if (__lsm_tree_close( - session, lsm_tree, false) != 0 || - lsm_tree->refcnt != 1) { + __lsm_tree_close(session, lsm_tree, false); + if (lsm_tree->refcnt != 1) { __wt_lsm_tree_release( session, lsm_tree); return (EBUSY); @@ -730,7 +722,7 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) uint32_t chunks_moved, nchunks, new_id; bool first_switch; - WT_RET(__wt_lsm_tree_writelock(session, lsm_tree)); + __wt_lsm_tree_writelock(session, lsm_tree); nchunks = lsm_tree->nchunks; @@ -755,10 +747,10 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_ERR(__wt_realloc_def(session, &lsm_tree->chunk_alloc, nchunks + 1, &lsm_tree->chunk)); - WT_ERR(__wt_verbose(session, WT_VERB_LSM, + __wt_verbose(session, WT_VERB_LSM, "Tree %s switch to: %" PRIu32 ", checkpoint throttle %" PRIu64 ", merge throttle %" PRIu64, lsm_tree->name, - new_id, lsm_tree->ckpt_throttle, lsm_tree->merge_throttle)); + new_id, lsm_tree->ckpt_throttle, lsm_tree->merge_throttle); WT_ERR(__wt_calloc_one(session, &chunk)); chunk->id = new_id; @@ -771,6 +763,11 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) ++lsm_tree->dsk_gen; lsm_tree->modified = true; + /* + * Ensure the updated disk generation is visible to all other threads + * before updating the transaction ID. + */ + WT_FULL_BARRIER(); /* * Set the switch transaction in the previous chunk unless this is @@ -806,7 +803,7 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) session, WT_LSM_WORK_DROP, 0, lsm_tree)); } -err: WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree)); +err: __wt_lsm_tree_writeunlock(session, lsm_tree); /* * Errors that happen during a tree switch leave the tree in a state * where we can't make progress. Error out of WiredTiger. @@ -869,7 +866,7 @@ __wt_lsm_tree_drop( WT_ASSERT(session, !lsm_tree->active); /* Prevent any new opens. */ - WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree)); + __wt_lsm_tree_writelock(session, lsm_tree); locked = true; /* Drop the chunks. */ @@ -892,12 +889,12 @@ __wt_lsm_tree_drop( } locked = false; - WT_ERR(__wt_lsm_tree_writeunlock(session, lsm_tree)); + __wt_lsm_tree_writeunlock(session, lsm_tree); ret = __wt_metadata_remove(session, name); WT_ASSERT(session, !lsm_tree->active); err: if (locked) - WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree)); + __wt_lsm_tree_writeunlock(session, lsm_tree); WT_WITH_HANDLE_LIST_LOCK(session, tret = __lsm_tree_discard(session, lsm_tree, false)); WT_TRET(tret); @@ -929,7 +926,7 @@ __wt_lsm_tree_rename(WT_SESSION_IMPL *session, WT_RET(ret); /* Prevent any new opens. */ - WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree)); + __wt_lsm_tree_writelock(session, lsm_tree); locked = true; /* Set the new name. */ @@ -960,11 +957,11 @@ __wt_lsm_tree_rename(WT_SESSION_IMPL *session, WT_ERR(__wt_lsm_meta_write(session, lsm_tree)); locked = false; - WT_ERR(__wt_lsm_tree_writeunlock(session, lsm_tree)); + __wt_lsm_tree_writeunlock(session, lsm_tree); WT_ERR(__wt_metadata_remove(session, olduri)); err: if (locked) - WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree)); + __wt_lsm_tree_writeunlock(session, lsm_tree); __wt_free(session, old); /* @@ -1001,7 +998,7 @@ __wt_lsm_tree_truncate( WT_RET(ret); /* Prevent any new opens. */ - WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree)); + __wt_lsm_tree_writelock(session, lsm_tree); locked = true; /* Create the new chunk. */ @@ -1016,14 +1013,14 @@ __wt_lsm_tree_truncate( WT_ERR(__wt_lsm_meta_write(session, lsm_tree)); locked = false; - WT_ERR(__wt_lsm_tree_writeunlock(session, lsm_tree)); + __wt_lsm_tree_writeunlock(session, lsm_tree); __wt_lsm_tree_release(session, lsm_tree); err: if (locked) - WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree)); + __wt_lsm_tree_writeunlock(session, lsm_tree); if (ret != 0) { if (chunk != NULL) { - (void)__wt_schema_drop(session, chunk->uri, NULL); + WT_TRET(__wt_schema_drop(session, chunk->uri, NULL)); __wt_free(session, chunk); } /* @@ -1043,66 +1040,56 @@ err: if (locked) * __wt_lsm_tree_readlock -- * Acquire a shared lock on an LSM tree. */ -int +void __wt_lsm_tree_readlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { - WT_RET(__wt_readlock(session, lsm_tree->rwlock)); + __wt_readlock(session, lsm_tree->rwlock); /* * Diagnostic: avoid deadlocks with the schema lock: if we need it for * an operation, we should already have it. */ F_SET(session, WT_SESSION_NO_EVICTION | WT_SESSION_NO_SCHEMA_LOCK); - return (0); } /* * __wt_lsm_tree_readunlock -- * Release a shared lock on an LSM tree. */ -int +void __wt_lsm_tree_readunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { - WT_DECL_RET; - F_CLR(session, WT_SESSION_NO_EVICTION | WT_SESSION_NO_SCHEMA_LOCK); - if ((ret = __wt_readunlock(session, lsm_tree->rwlock)) != 0) - WT_PANIC_RET(session, ret, "Unlocking an LSM tree"); - return (0); + __wt_readunlock(session, lsm_tree->rwlock); } /* * __wt_lsm_tree_writelock -- * Acquire an exclusive lock on an LSM tree. */ -int +void __wt_lsm_tree_writelock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { - WT_RET(__wt_writelock(session, lsm_tree->rwlock)); + __wt_writelock(session, lsm_tree->rwlock); /* * Diagnostic: avoid deadlocks with the schema lock: if we need it for * an operation, we should already have it. */ F_SET(session, WT_SESSION_NO_EVICTION | WT_SESSION_NO_SCHEMA_LOCK); - return (0); } /* * __wt_lsm_tree_writeunlock -- * Release an exclusive lock on an LSM tree. */ -int +void __wt_lsm_tree_writeunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { - WT_DECL_RET; - F_CLR(session, WT_SESSION_NO_EVICTION | WT_SESSION_NO_SCHEMA_LOCK); - if ((ret = __wt_writeunlock(session, lsm_tree->rwlock)) != 0) - WT_PANIC_RET(session, ret, "Unlocking an LSM tree"); - return (0); + __wt_writeunlock(session, lsm_tree->rwlock); } /* @@ -1168,7 +1155,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) */ /* Lock the tree: single-thread compaction. */ - WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree)); + __wt_lsm_tree_writelock(session, lsm_tree); locked = true; /* Clear any merge throttle: compact throws out that calculation. */ @@ -1187,8 +1174,15 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) */ if (lsm_tree->nchunks > 0 && (chunk = lsm_tree->chunk[lsm_tree->nchunks - 1]) != NULL) { - if (chunk->switch_txn == WT_TXN_NONE) + if (chunk->switch_txn == WT_TXN_NONE) { + /* + * Make sure any cursors open on the tree see the + * new switch generation before updating. + */ + ++lsm_tree->dsk_gen; + WT_FULL_BARRIER(); chunk->switch_txn = __wt_txn_id_alloc(session, false); + } /* * If we have a chunk, we want to look for it to be on-disk. * So we need to add a reference to keep it available. @@ -1198,13 +1192,13 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) } locked = false; - WT_ERR(__wt_lsm_tree_writeunlock(session, lsm_tree)); + __wt_lsm_tree_writeunlock(session, lsm_tree); if (chunk != NULL) { - WT_ERR(__wt_verbose(session, WT_VERB_LSM, + __wt_verbose(session, WT_VERB_LSM, "Compact force flush %s flags 0x%" PRIx32 " chunk %" PRIu32 " flags 0x%" PRIx32, - name, lsm_tree->flags, chunk->id, chunk->flags)); + name, lsm_tree->flags, chunk->id, chunk->flags); flushing = true; /* * Make sure the in-memory chunk gets flushed do not push a @@ -1221,8 +1215,8 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) compacting = true; progress = lsm_tree->merge_progressing; F_SET(lsm_tree, WT_LSM_TREE_COMPACTING); - WT_ERR(__wt_verbose(session, WT_VERB_LSM, - "COMPACT: Start compacting %s", lsm_tree->name)); + __wt_verbose(session, WT_VERB_LSM, + "COMPACT: Start compacting %s", lsm_tree->name); } /* Wait for the work unit queues to drain. */ @@ -1235,21 +1229,21 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) if (flushing) { WT_ASSERT(session, chunk != NULL); if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) { - WT_ERR(__wt_verbose(session, + __wt_verbose(session, WT_VERB_LSM, "Compact flush done %s chunk %" PRIu32 ". " "Start compacting progress %" PRIu64, name, chunk->id, - lsm_tree->merge_progressing)); + lsm_tree->merge_progressing); (void)__wt_atomic_sub32(&chunk->refcnt, 1); flushing = ref = false; compacting = true; F_SET(lsm_tree, WT_LSM_TREE_COMPACTING); progress = lsm_tree->merge_progressing; } else { - WT_ERR(__wt_verbose(session, WT_VERB_LSM, + __wt_verbose(session, WT_VERB_LSM, "Compact flush retry %s chunk %" PRIu32, - name, chunk->id)); + name, chunk->id); WT_ERR(__wt_lsm_manager_push_entry(session, WT_LSM_WORK_FLUSH, WT_LSM_WORK_FORCE, lsm_tree)); @@ -1301,10 +1295,10 @@ err: lsm_tree->merge_aggressiveness = 0; } if (locked) - WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree)); + __wt_lsm_tree_writeunlock(session, lsm_tree); - WT_TRET(__wt_verbose(session, WT_VERB_LSM, - "Compact %s complete, return %d", name, ret)); + __wt_verbose(session, WT_VERB_LSM, + "Compact %s complete, return %d", name, ret); __wt_lsm_tree_release(session, lsm_tree); return (ret); @@ -1338,9 +1332,10 @@ __wt_lsm_tree_worker(WT_SESSION_IMPL *session, * with merges so that merging doesn't change the chunk * array out from underneath us. */ - WT_ERR(exclusive ? - __wt_lsm_tree_writelock(session, lsm_tree) : - __wt_lsm_tree_readlock(session, lsm_tree)); + if (exclusive) + __wt_lsm_tree_writelock(session, lsm_tree); + else + __wt_lsm_tree_readlock(session, lsm_tree); locked = true; for (i = 0; i < lsm_tree->nchunks; i++) { chunk = lsm_tree->chunk[i]; @@ -1359,10 +1354,12 @@ __wt_lsm_tree_worker(WT_SESSION_IMPL *session, WT_ERR(__wt_schema_worker(session, chunk->bloom_uri, file_func, name_func, cfg, open_flags)); } -err: if (locked) - WT_TRET(exclusive ? - __wt_lsm_tree_writeunlock(session, lsm_tree) : - __wt_lsm_tree_readunlock(session, lsm_tree)); +err: if (locked) { + if (exclusive) + __wt_lsm_tree_writeunlock(session, lsm_tree); + else + __wt_lsm_tree_readunlock(session, lsm_tree); + } __wt_lsm_tree_release(session, lsm_tree); return (ret); } |