diff options
author | Alex Gorrod <alexander.gorrod@mongodb.com> | 2017-08-18 11:59:27 +1000 |
---|---|---|
committer | Michael Cahill <michael.cahill@mongodb.com> | 2017-08-18 11:59:27 +1000 |
commit | 3ab33869ab17a87797c8fee66d0a54b58c2fabd5 (patch) | |
tree | af077514b4a7f4582f7d9d4d33e7799112c20ed9 /src | |
parent | 0a56c3021b78a84b61a534eafcca367b08bcd74c (diff) | |
download | mongo-3ab33869ab17a87797c8fee66d0a54b58c2fabd5.tar.gz |
WT-3365 Fix LSM tree visibility rules when timestamps are used. (#3592)
Diffstat (limited to 'src')
-rw-r--r-- | src/include/extern.h | 1 | ||||
-rw-r--r-- | src/include/lsm.h | 14 | ||||
-rw-r--r-- | src/lsm/lsm_cursor.c | 3 | ||||
-rw-r--r-- | src/lsm/lsm_tree.c | 4 | ||||
-rw-r--r-- | src/lsm/lsm_work_unit.c | 61 |
5 files changed, 74 insertions, 9 deletions
diff --git a/src/include/extern.h b/src/include/extern.h index 6c1e50c3d4c..58de2f98030 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -478,6 +478,7 @@ extern int __wt_lsm_tree_worker(WT_SESSION_IMPL *session, const char *uri, int ( extern int __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool force, WT_LSM_CHUNK **chunkp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_lsm_work_switch( WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT **entryp, bool *ran) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_lsm_chunk_visible_all( WT_SESSION_IMPL *session, WT_LSM_CHUNK *chunk); extern int __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); diff --git a/src/include/lsm.h b/src/include/lsm.h index df7d6c8d5ca..397f17400de 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -97,6 +97,11 @@ struct __wt_lsm_chunk { * out, or by compact to get the most * recent chunk flushed. */ + WT_DECL_TIMESTAMP(switch_timestamp)/* + * The timestamp used to decide when + * updates need to detect conflicts. + */ + WT_SPINLOCK timestamp_spinlock; uint32_t id; /* ID used to generate URIs */ uint32_t generation; /* Merge generation */ @@ -107,10 +112,11 @@ struct __wt_lsm_chunk { int8_t evicted; /* 1/0: in-memory chunk was evicted */ uint8_t flushing; /* 1/0: chunk flush in progress */ -#define WT_LSM_CHUNK_BLOOM 0x01 -#define WT_LSM_CHUNK_MERGING 0x02 -#define WT_LSM_CHUNK_ONDISK 0x04 -#define WT_LSM_CHUNK_STABLE 0x08 +#define WT_LSM_CHUNK_BLOOM 0x01 +#define WT_LSM_CHUNK_HAS_TIMESTAMP 0x02 +#define WT_LSM_CHUNK_MERGING 0x04 +#define WT_LSM_CHUNK_ONDISK 0x08 +#define WT_LSM_CHUNK_STABLE 0x10 uint32_t flags; }; diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index 02f2f4952ed..39656c17ee0 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -538,8 +538,7 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) { chunk = lsm_tree->chunk[ngood - 1]; clsm->chunks[ngood - 1]->switch_txn = chunk->switch_txn; - if (__wt_txn_visible_all( - session, chunk->switch_txn, NULL)) + if (__wt_lsm_chunk_visible_all(session, chunk)) break; } } else { diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index 33d9e472df6..e6eccf96467 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -35,6 +35,7 @@ __lsm_tree_discard_state(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) if ((chunk = lsm_tree->chunk[i]) == NULL) continue; + __wt_spin_destroy(session, &chunk->timestamp_spinlock); __wt_free(session, chunk->bloom_uri); __wt_free(session, chunk->uri); __wt_free(session, chunk); @@ -44,6 +45,7 @@ __lsm_tree_discard_state(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) chunk = lsm_tree->old_chunks[i]; WT_ASSERT(session, chunk != NULL); + __wt_spin_destroy(session, &chunk->timestamp_spinlock); __wt_free(session, chunk->bloom_uri); __wt_free(session, chunk->uri); __wt_free(session, chunk); @@ -280,6 +282,8 @@ __wt_lsm_tree_setup_chunk( WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)); __wt_epoch(session, &chunk->create_time); + __wt_spin_init(session, + &chunk->timestamp_spinlock, "LSM chunk timestamp"); WT_RET(__wt_lsm_tree_chunk_name( session, lsm_tree, chunk->id, &chunk->uri)); diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index 2f21e8acdc3..816eafebe99 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -256,6 +256,63 @@ err: } /* + * __wt_lsm_chunk_visible_all -- + * Setup a timestamp and check visibility for a chunk, can be called + * from multiple threads in parallel + */ +bool +__wt_lsm_chunk_visible_all( + WT_SESSION_IMPL *session, WT_LSM_CHUNK *chunk) +{ + /* Once a chunk has been flushed it's contents must be visible */ + if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK | WT_LSM_CHUNK_STABLE)) + return (true); + + if (chunk->switch_txn == WT_TXN_NONE || + !__wt_txn_visible_all(session, chunk->switch_txn, NULL)) + return (false); + +#ifdef HAVE_TIMESTAMPS + { + WT_TXN_GLOBAL *txn_global; + + txn_global = &S2C(session)->txn_global; + + /* + * Once all transactions with updates in the chunk are visible all + * timestamps associated with those updates are assigned so setup a + * timestamp for visibility checking. + */ + if (txn_global->has_commit_timestamp || + txn_global->has_pinned_timestamp) { + if (!F_ISSET(chunk, WT_LSM_CHUNK_HAS_TIMESTAMP)) { + __wt_spin_lock(session, &chunk->timestamp_spinlock); + /* Set the timestamp if we won the race */ + if (!F_ISSET(chunk, WT_LSM_CHUNK_HAS_TIMESTAMP)) { + __wt_readlock(session, &txn_global->rwlock); + __wt_timestamp_set(&chunk->switch_timestamp, + &txn_global->commit_timestamp); + __wt_readunlock(session, &txn_global->rwlock); + F_SET(chunk, WT_LSM_CHUNK_HAS_TIMESTAMP); + } + __wt_spin_unlock(session, &chunk->timestamp_spinlock); + } + if (!__wt_txn_visible_all( + session, chunk->switch_txn, &chunk->switch_timestamp)) + return (false); + } else + /* + * If timestamps aren't in use when the chunk becomes visible + * use the zero timestamp for visibility checks. Otherwise + * there could be confusion if timestamps start being used. + */ + F_SET(chunk, WT_LSM_CHUNK_HAS_TIMESTAMP); + } +#endif + return (true); +} + +/* * __wt_lsm_checkpoint_chunk -- * Flush a single LSM chunk to disk. */ @@ -295,14 +352,12 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, /* Stop if a running transaction needs the chunk. */ WT_RET(__wt_txn_update_oldest( session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT)); - if (chunk->switch_txn == WT_TXN_NONE || - !__wt_txn_visible_all(session, chunk->switch_txn, NULL)) { + if (!__wt_lsm_chunk_visible_all(session, chunk)) { __wt_verbose(session, WT_VERB_LSM, "LSM worker %s: running transaction, return", chunk->uri); return (0); } - if (!__wt_atomic_cas8(&chunk->flushing, 0, 1)) return (0); flush_set = true; |