summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gorrod <alexander.gorrod@mongodb.com>2017-08-18 11:59:27 +1000
committerMichael Cahill <michael.cahill@mongodb.com>2017-08-18 11:59:27 +1000
commit3ab33869ab17a87797c8fee66d0a54b58c2fabd5 (patch)
treeaf077514b4a7f4582f7d9d4d33e7799112c20ed9
parent0a56c3021b78a84b61a534eafcca367b08bcd74c (diff)
downloadmongo-3ab33869ab17a87797c8fee66d0a54b58c2fabd5.tar.gz
WT-3365 Fix LSM tree visibility rules when timestamps are used. (#3592)
-rw-r--r--src/include/extern.h1
-rw-r--r--src/include/lsm.h14
-rw-r--r--src/lsm/lsm_cursor.c3
-rw-r--r--src/lsm/lsm_tree.c4
-rw-r--r--src/lsm/lsm_work_unit.c61
5 files changed, 74 insertions, 9 deletions
diff --git a/src/include/extern.h b/src/include/extern.h
index 6c1e50c3d4c..58de2f98030 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -478,6 +478,7 @@ extern int __wt_lsm_tree_worker(WT_SESSION_IMPL *session, const char *uri, int (
extern int __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool force, WT_LSM_CHUNK **chunkp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_lsm_work_switch( WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT **entryp, bool *ran) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_lsm_chunk_visible_all( WT_SESSION_IMPL *session, WT_LSM_CHUNK *chunk);
extern int __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/include/lsm.h b/src/include/lsm.h
index df7d6c8d5ca..397f17400de 100644
--- a/src/include/lsm.h
+++ b/src/include/lsm.h
@@ -97,6 +97,11 @@ struct __wt_lsm_chunk {
* out, or by compact to get the most
* recent chunk flushed.
*/
+ WT_DECL_TIMESTAMP(switch_timestamp)/*
+ * The timestamp used to decide when
+ * updates need to detect conflicts.
+ */
+ WT_SPINLOCK timestamp_spinlock;
uint32_t id; /* ID used to generate URIs */
uint32_t generation; /* Merge generation */
@@ -107,10 +112,11 @@ struct __wt_lsm_chunk {
int8_t evicted; /* 1/0: in-memory chunk was evicted */
uint8_t flushing; /* 1/0: chunk flush in progress */
-#define WT_LSM_CHUNK_BLOOM 0x01
-#define WT_LSM_CHUNK_MERGING 0x02
-#define WT_LSM_CHUNK_ONDISK 0x04
-#define WT_LSM_CHUNK_STABLE 0x08
+#define WT_LSM_CHUNK_BLOOM 0x01
+#define WT_LSM_CHUNK_HAS_TIMESTAMP 0x02
+#define WT_LSM_CHUNK_MERGING 0x04
+#define WT_LSM_CHUNK_ONDISK 0x08
+#define WT_LSM_CHUNK_STABLE 0x10
uint32_t flags;
};
diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c
index 02f2f4952ed..39656c17ee0 100644
--- a/src/lsm/lsm_cursor.c
+++ b/src/lsm/lsm_cursor.c
@@ -538,8 +538,7 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
chunk = lsm_tree->chunk[ngood - 1];
clsm->chunks[ngood - 1]->switch_txn =
chunk->switch_txn;
- if (__wt_txn_visible_all(
- session, chunk->switch_txn, NULL))
+ if (__wt_lsm_chunk_visible_all(session, chunk))
break;
}
} else {
diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c
index 33d9e472df6..e6eccf96467 100644
--- a/src/lsm/lsm_tree.c
+++ b/src/lsm/lsm_tree.c
@@ -35,6 +35,7 @@ __lsm_tree_discard_state(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
if ((chunk = lsm_tree->chunk[i]) == NULL)
continue;
+ __wt_spin_destroy(session, &chunk->timestamp_spinlock);
__wt_free(session, chunk->bloom_uri);
__wt_free(session, chunk->uri);
__wt_free(session, chunk);
@@ -44,6 +45,7 @@ __lsm_tree_discard_state(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
chunk = lsm_tree->old_chunks[i];
WT_ASSERT(session, chunk != NULL);
+ __wt_spin_destroy(session, &chunk->timestamp_spinlock);
__wt_free(session, chunk->bloom_uri);
__wt_free(session, chunk->uri);
__wt_free(session, chunk);
@@ -280,6 +282,8 @@ __wt_lsm_tree_setup_chunk(
WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
__wt_epoch(session, &chunk->create_time);
+ __wt_spin_init(session,
+ &chunk->timestamp_spinlock, "LSM chunk timestamp");
WT_RET(__wt_lsm_tree_chunk_name(
session, lsm_tree, chunk->id, &chunk->uri));
diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c
index 2f21e8acdc3..816eafebe99 100644
--- a/src/lsm/lsm_work_unit.c
+++ b/src/lsm/lsm_work_unit.c
@@ -256,6 +256,63 @@ err:
}
/*
+ * __wt_lsm_chunk_visible_all --
+ * Setup a timestamp and check visibility for a chunk, can be called
+ * from multiple threads in parallel
+ */
+bool
+__wt_lsm_chunk_visible_all(
+ WT_SESSION_IMPL *session, WT_LSM_CHUNK *chunk)
+{
+ /* Once a chunk has been flushed it's contents must be visible */
+ if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK | WT_LSM_CHUNK_STABLE))
+ return (true);
+
+ if (chunk->switch_txn == WT_TXN_NONE ||
+ !__wt_txn_visible_all(session, chunk->switch_txn, NULL))
+ return (false);
+
+#ifdef HAVE_TIMESTAMPS
+ {
+ WT_TXN_GLOBAL *txn_global;
+
+ txn_global = &S2C(session)->txn_global;
+
+ /*
+ * Once all transactions with updates in the chunk are visible all
+ * timestamps associated with those updates are assigned so setup a
+ * timestamp for visibility checking.
+ */
+ if (txn_global->has_commit_timestamp ||
+ txn_global->has_pinned_timestamp) {
+ if (!F_ISSET(chunk, WT_LSM_CHUNK_HAS_TIMESTAMP)) {
+ __wt_spin_lock(session, &chunk->timestamp_spinlock);
+ /* Set the timestamp if we won the race */
+ if (!F_ISSET(chunk, WT_LSM_CHUNK_HAS_TIMESTAMP)) {
+ __wt_readlock(session, &txn_global->rwlock);
+ __wt_timestamp_set(&chunk->switch_timestamp,
+ &txn_global->commit_timestamp);
+ __wt_readunlock(session, &txn_global->rwlock);
+ F_SET(chunk, WT_LSM_CHUNK_HAS_TIMESTAMP);
+ }
+ __wt_spin_unlock(session, &chunk->timestamp_spinlock);
+ }
+ if (!__wt_txn_visible_all(
+ session, chunk->switch_txn, &chunk->switch_timestamp))
+ return (false);
+ } else
+ /*
+ * If timestamps aren't in use when the chunk becomes visible
+ * use the zero timestamp for visibility checks. Otherwise
+ * there could be confusion if timestamps start being used.
+ */
+ F_SET(chunk, WT_LSM_CHUNK_HAS_TIMESTAMP);
+ }
+#endif
+ return (true);
+}
+
+/*
* __wt_lsm_checkpoint_chunk --
* Flush a single LSM chunk to disk.
*/
@@ -295,14 +352,12 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
/* Stop if a running transaction needs the chunk. */
WT_RET(__wt_txn_update_oldest(
session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT));
- if (chunk->switch_txn == WT_TXN_NONE ||
- !__wt_txn_visible_all(session, chunk->switch_txn, NULL)) {
+ if (!__wt_lsm_chunk_visible_all(session, chunk)) {
__wt_verbose(session, WT_VERB_LSM,
"LSM worker %s: running transaction, return",
chunk->uri);
return (0);
}
-
if (!__wt_atomic_cas8(&chunk->flushing, 0, 1))
return (0);
flush_set = true;