summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRamon Fernandez <ramon@mongodb.com>2015-11-24 11:35:39 -0500
committerRamon Fernandez <ramon@mongodb.com>2015-11-24 11:36:31 -0500
commit327660ff3324dfcb3ea38fc63a4fc7f1cdcb9078 (patch)
tree8b235079567e54a013a66210f1f6371ea41af785
parent4f4cf16be11d2820974c1d6a0e95c9af3281e2d1 (diff)
downloadmongo-327660ff3324dfcb3ea38fc63a4fc7f1cdcb9078.tar.gz
Import wiredtiger-wiredtiger-mongodb-3.2.0-rc3-206-gb65381f.tar.gz from wiredtiger branch mongodb-3.2
ref: 4d72349..b65381f e90b590 WT-2237 Avoid yields if we race allocating transaction IDs. 0a52a80 WT-2237 Have threads publish unique transaction IDs so that updates always become visible immediately on commit
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c4
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h1
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i122
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_tree.c4
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_track.c2
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c4
6 files changed, 72 insertions, 65 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index c025ae62bb3..92ed2b3e559 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -408,7 +408,7 @@ __split_ref_move_final(
* until all threads are known to have exited the index of the page that
* previously "owned" the WT_REF. Set that field to a safe value.
*/
- txn_new_id = __wt_txn_new_id(session);
+ txn_new_id = __wt_txn_id_alloc(session, false);
/*
* The WT_REF structures moved to newly allocated child pages reference
@@ -1822,7 +1822,7 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
* threads will not try to forcibly evict the page again until
* all concurrent transactions commit.
*/
- page->modify->inmem_split_txn = __wt_txn_new_id(session);
+ page->modify->inmem_split_txn = __wt_txn_id_alloc(session, false);
/*
* Update the page accounting.
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index f5a2c1c7dda..b199252a1dc 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -70,6 +70,7 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_txn_state {
};
struct __wt_txn_global {
+ uint64_t alloc; /* Transaction ID to allocate. */
volatile uint64_t current; /* Current transaction ID. */
/* The oldest running transaction ID (may race). */
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index e49e3d1257b..ef9d5a273cf 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -289,23 +289,6 @@ __wt_txn_autocommit_check(WT_SESSION_IMPL *session)
}
/*
- * __wt_txn_new_id --
- * Allocate a new transaction ID.
- */
-static inline uint64_t
-__wt_txn_new_id(WT_SESSION_IMPL *session)
-{
- /*
- * We want the global value to lead the allocated values, so that any
- * allocated transaction ID eventually becomes globally visible. When
- * there are no transactions running, the oldest_id will reach the
- * global current ID, so we want post-increment semantics. Our atomic
- * add primitive does pre-increment, so adjust the result here.
- */
- return (__wt_atomic_addv64(&S2C(session)->txn_global.current, 1) - 1);
-}
-
-/*
* __wt_txn_idle_cache_check --
* If there is no transaction active in this thread and we haven't checked
* if the cache is full, do it now. If we have to block for eviction,
@@ -332,6 +315,59 @@ __wt_txn_idle_cache_check(WT_SESSION_IMPL *session)
}
/*
+ * __wt_txn_id_alloc --
+ * Allocate a new transaction ID.
+ */
+static inline uint64_t
+__wt_txn_id_alloc(WT_SESSION_IMPL *session, bool publish)
+{
+ WT_TXN_GLOBAL *txn_global;
+ uint64_t id;
+ u_int i;
+
+ txn_global = &S2C(session)->txn_global;
+
+ /*
+ * Allocating transaction IDs involves several steps.
+ *
+ * Firstly, we do an atomic increment to allocate a unique ID. The
+ * field we increment is not used anywhere else.
+ *
+ * Then we optionally publish the allocated ID into the global
+ * transaction table. It is critical that this becomes visible before
+ * the global current value moves past our ID, or some concurrent
+ * reader could get a snapshot that makes our changes visible before we
+ * commit.
+ *
+ * Lastly, we spin to update the current ID. This is the only place
+ * that the current ID is updated, and it is in the same cache line as
+ * the field we allocate from, so we should usually succeed on the
+ * first try.
+ *
+ * We want the global value to lead the allocated values, so that any
+ * allocated transaction ID eventually becomes globally visible. When
+ * there are no transactions running, the oldest_id will reach the
+ * global current ID, so we want post-increment semantics. Our atomic
+ * add primitive does pre-increment, so adjust the result here.
+ */
+ id = __wt_atomic_addv64(&S2C(session)->txn_global.alloc, 1) - 1;
+
+ if (publish) {
+ session->txn.id = id;
+ WT_SESSION_TXN_STATE(session)->id = id;
+ }
+
+ for (i = 0; txn_global->current != id; i++)
+ if (i < 100)
+ WT_PAUSE();
+ else
+ __wt_yield();
+
+ WT_PUBLISH(txn_global->current, id + 1);
+ return (id);
+}
+
+/*
* __wt_txn_id_check --
* A transaction is going to do an update, start an auto commit
* transaction if required and allocate a transaction ID.
@@ -339,57 +375,27 @@ __wt_txn_idle_cache_check(WT_SESSION_IMPL *session)
static inline int
__wt_txn_id_check(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
WT_TXN *txn;
- WT_TXN_GLOBAL *txn_global;
- WT_TXN_STATE *txn_state;
txn = &session->txn;
WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING));
+ if (F_ISSET(txn, WT_TXN_HAS_ID))
+ return (0);
+
/* If the transaction is idle, check that the cache isn't full. */
WT_RET(__wt_txn_idle_cache_check(session));
- if (!F_ISSET(txn, WT_TXN_HAS_ID)) {
- conn = S2C(session);
- txn_global = &conn->txn_global;
- txn_state = WT_SESSION_TXN_STATE(session);
+ (void)__wt_txn_id_alloc(session, true);
- WT_ASSERT(session, txn_state->id == WT_TXN_NONE);
-
- /*
- * Allocate a transaction ID.
- *
- * We use an atomic compare and swap to ensure that we get a
- * unique ID that is published before the global counter is
- * updated.
- *
- * If two threads race to allocate an ID, only the latest ID
- * will proceed. The winning thread can be sure its snapshot
- * contains all of the earlier active IDs. Threads that race
- * and get an earlier ID may not appear in the snapshot, but
- * they will loop and allocate a new ID before proceeding to
- * make any updates.
- *
- * This potentially wastes transaction IDs when threads race to
- * begin transactions: that is the price we pay to keep this
- * path latch free.
- */
- do {
- txn_state->id = txn->id = txn_global->current;
- } while (!__wt_atomic_casv64(
- &txn_global->current, txn->id, txn->id + 1) ||
- WT_TXNID_LT(txn->id, txn_global->last_running));
-
- /*
- * If we have used 64-bits of transaction IDs, there is nothing
- * more we can do.
- */
- if (txn->id == WT_TXN_ABORTED)
- WT_RET_MSG(session, ENOMEM, "Out of transaction IDs");
- F_SET(txn, WT_TXN_HAS_ID);
- }
+ /*
+ * If we have used 64-bits of transaction IDs, there is nothing
+ * more we can do.
+ */
+ if (txn->id == WT_TXN_ABORTED)
+ WT_RET_MSG(session, ENOMEM, "Out of transaction IDs");
+ F_SET(txn, WT_TXN_HAS_ID);
return (0);
}
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_tree.c b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
index 0c3642e70e8..5d819607413 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_tree.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
@@ -876,7 +876,7 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
*/
if (last_chunk != NULL && last_chunk->switch_txn == WT_TXN_NONE &&
!F_ISSET(last_chunk, WT_LSM_CHUNK_ONDISK))
- last_chunk->switch_txn = __wt_txn_new_id(session);
+ last_chunk->switch_txn = __wt_txn_id_alloc(session, false);
/*
* If a maximum number of chunks are configured, drop the any chunks
@@ -1287,7 +1287,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
if (lsm_tree->nchunks > 0 &&
(chunk = lsm_tree->chunk[lsm_tree->nchunks - 1]) != NULL) {
if (chunk->switch_txn == WT_TXN_NONE)
- chunk->switch_txn = __wt_txn_new_id(session);
+ chunk->switch_txn = __wt_txn_id_alloc(session, false);
/*
* If we have a chunk, we want to look for it to be on-disk.
* So we need to add a reference to keep it available.
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_track.c b/src/third_party/wiredtiger/src/reconcile/rec_track.c
index 5d8870dc5ae..18ed5c6b551 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_track.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_track.c
@@ -820,7 +820,7 @@ __wt_ovfl_txnc_add(WT_SESSION_IMPL *session, WT_PAGE *page,
txnc->value_offset = WT_PTRDIFF32(p, txnc);
txnc->value_size = WT_STORE_SIZE(value_size);
memcpy(p, value, value_size);
- txnc->current = __wt_txn_new_id(session);
+ txnc->current = __wt_txn_id_alloc(session, false);
__wt_cache_page_inmem_incr(
session, page, WT_OVFL_SIZE(txnc, WT_OVFL_TXNC));
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index 2079410a4d1..850e7e83803 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -712,8 +712,8 @@ __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[])
conn = S2C(session);
txn_global = &conn->txn_global;
- txn_global->current = txn_global->last_running =
- txn_global->oldest_id = WT_TXN_FIRST;
+ txn_global->alloc = txn_global->current =
+ txn_global->last_running = txn_global->oldest_id = WT_TXN_FIRST;
WT_RET(__wt_rwlock_alloc(session,
&txn_global->nsnap_rwlock, "named snapshot lock"));