diff options
author | Alex Gorrod <alexander.gorrod@mongodb.com> | 2015-11-24 16:47:56 +1100 |
---|---|---|
committer | Michael Cahill <michael.cahill@mongodb.com> | 2015-12-02 14:56:01 +1100 |
commit | 16a418b471953ad04973f5b013b1647e940ab769 (patch) | |
tree | 52bd3121205c09494a7453b9a949f62094a9d139 | |
parent | deb2d8109ca59cc9e223fd4f5be19915b949c628 (diff) | |
download | mongo-16a418b471953ad04973f5b013b1647e940ab769.tar.gz |
Merge pull request #2331 from wiredtiger/WT-2237
WT-2237 Have threads publish unique transaction IDs so that updates always become visible immediately on commit.
(cherry picked from commit 0a52a80a39fc47145fb755d792792ae820b266ed)
-rw-r--r-- | src/btree/bt_split.c | 4 | ||||
-rw-r--r-- | src/include/txn.h | 1 | ||||
-rw-r--r-- | src/include/txn.i | 118 | ||||
-rw-r--r-- | src/lsm/lsm_tree.c | 4 | ||||
-rw-r--r-- | src/reconcile/rec_track.c | 2 | ||||
-rw-r--r-- | src/txn/txn.c | 4 |
6 files changed, 68 insertions, 65 deletions
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 6f31ff89aa7..9d8e463feb0 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -502,7 +502,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent) * array, a thread might see a freed WT_REF. Set the eviction * transaction requirement for the newly created internal pages. */ - child->modify->mod_split_txn = __wt_txn_new_id(session); + child->modify->mod_split_txn = __wt_txn_id_alloc(session, false); /* * The newly allocated child's page index references the same @@ -1383,7 +1383,7 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref) * threads will not try to forcibly evict the page again until * all concurrent transactions commit. */ - page->modify->inmem_split_txn = __wt_txn_new_id(session); + page->modify->inmem_split_txn = __wt_txn_id_alloc(session, false); /* * Update the page accounting. diff --git a/src/include/txn.h b/src/include/txn.h index 0d5f337fff0..634363b9216 100644 --- a/src/include/txn.h +++ b/src/include/txn.h @@ -34,6 +34,7 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_txn_state { }; struct __wt_txn_global { + uint64_t alloc; /* Transaction ID to allocate. */ volatile uint64_t current; /* Current transaction ID. */ /* The oldest running transaction ID (may race). */ diff --git a/src/include/txn.i b/src/include/txn.i index 73d7f1f0518..3701d81522a 100644 --- a/src/include/txn.i +++ b/src/include/txn.i @@ -280,23 +280,6 @@ __wt_txn_autocommit_check(WT_SESSION_IMPL *session) } /* - * __wt_txn_new_id -- - * Allocate a new transaction ID. - */ -static inline uint64_t -__wt_txn_new_id(WT_SESSION_IMPL *session) -{ - /* - * We want the global value to lead the allocated values, so that any - * allocated transaction ID eventually becomes globally visible. When - * there are no transactions running, the oldest_id will reach the - * global current ID, so we want post-increment semantics. Our atomic - * add primitive does pre-increment, so adjust the result here. - */ - return (__wt_atomic_addv64(&S2C(session)->txn_global.current, 1) - 1); -} - -/* * __wt_txn_idle_cache_check -- * If there is no transaction active in this thread and we haven't checked * if the cache is full, do it now. If we have to block for eviction, @@ -323,6 +306,55 @@ __wt_txn_idle_cache_check(WT_SESSION_IMPL *session) } /* + * __wt_txn_id_alloc -- + * Allocate a new transaction ID. + */ +static inline uint64_t +__wt_txn_id_alloc(WT_SESSION_IMPL *session, bool publish) +{ + WT_TXN_GLOBAL *txn_global; + uint64_t id; + + txn_global = &S2C(session)->txn_global; + + /* + * Allocating transaction IDs involves several steps. + * + * Firstly, we do an atomic increment to allocate a unique ID. The + * field we increment is not used anywhere else. + * + * Then we optionally publish the allocated ID into the global + * transaction table. It is critical that this becomes visible before + * the global current value moves past our ID, or some concurrent + * reader could get a snapshot that makes our changes visible before we + * commit. + * + * Lastly, we spin to update the current ID. This is the only place + * that the current ID is updated, and it is in the same cache line as + * the field we allocate from, so we should usually succeed on the + * first try. + * + * We want the global value to lead the allocated values, so that any + * allocated transaction ID eventually becomes globally visible. When + * there are no transactions running, the oldest_id will reach the + * global current ID, so we want post-increment semantics. Our atomic + * add primitive does pre-increment, so adjust the result here. + */ + id = __wt_atomic_addv64(&S2C(session)->txn_global.alloc, 1) - 1; + + if (publish) { + session->txn.id = id; + WT_PUBLISH(WT_SESSION_TXN_STATE(session)->id, id); + } + + while (txn_global->current != id || + !__wt_atomic_casv64(&txn_global->current, id, id + 1)) + __wt_yield(); + + return (id); +} + +/* * __wt_txn_id_check -- * A transaction is going to do an update, start an auto commit * transaction if required and allocate a transaction ID. @@ -330,57 +362,27 @@ __wt_txn_idle_cache_check(WT_SESSION_IMPL *session) static inline int __wt_txn_id_check(WT_SESSION_IMPL *session) { - WT_CONNECTION_IMPL *conn; WT_TXN *txn; - WT_TXN_GLOBAL *txn_global; - WT_TXN_STATE *txn_state; txn = &session->txn; WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING)); + if (F_ISSET(txn, WT_TXN_HAS_ID)) + return (0); + /* If the transaction is idle, check that the cache isn't full. */ WT_RET(__wt_txn_idle_cache_check(session)); - if (!F_ISSET(txn, WT_TXN_HAS_ID)) { - conn = S2C(session); - txn_global = &conn->txn_global; - txn_state = WT_SESSION_TXN_STATE(session); + (void)__wt_txn_id_alloc(session, true); - WT_ASSERT(session, txn_state->id == WT_TXN_NONE); - - /* - * Allocate a transaction ID. - * - * We use an atomic compare and swap to ensure that we get a - * unique ID that is published before the global counter is - * updated. - * - * If two threads race to allocate an ID, only the latest ID - * will proceed. The winning thread can be sure its snapshot - * contains all of the earlier active IDs. Threads that race - * and get an earlier ID may not appear in the snapshot, but - * they will loop and allocate a new ID before proceeding to - * make any updates. - * - * This potentially wastes transaction IDs when threads race to - * begin transactions: that is the price we pay to keep this - * path latch free. - */ - do { - txn_state->id = txn->id = txn_global->current; - } while (!__wt_atomic_casv64( - &txn_global->current, txn->id, txn->id + 1) || - WT_TXNID_LT(txn->id, txn_global->last_running)); - - /* - * If we have used 64-bits of transaction IDs, there is nothing - * more we can do. - */ - if (txn->id == WT_TXN_ABORTED) - WT_RET_MSG(session, ENOMEM, "Out of transaction IDs"); - F_SET(txn, WT_TXN_HAS_ID); - } + /* + * If we have used 64-bits of transaction IDs, there is nothing + * more we can do. + */ + if (txn->id == WT_TXN_ABORTED) + WT_RET_MSG(session, ENOMEM, "Out of transaction IDs"); + F_SET(txn, WT_TXN_HAS_ID); return (0); } diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index 0680d98bdde..d3979da0da1 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -858,7 +858,7 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) */ if (last_chunk != NULL && last_chunk->switch_txn == WT_TXN_NONE && !F_ISSET(last_chunk, WT_LSM_CHUNK_ONDISK)) - last_chunk->switch_txn = __wt_txn_new_id(session); + last_chunk->switch_txn = __wt_txn_id_alloc(session, false); /* * If a maximum number of chunks are configured, drop the any chunks @@ -1257,7 +1257,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) if (lsm_tree->nchunks > 0 && (chunk = lsm_tree->chunk[lsm_tree->nchunks - 1]) != NULL) { if (chunk->switch_txn == WT_TXN_NONE) - chunk->switch_txn = __wt_txn_new_id(session); + chunk->switch_txn = __wt_txn_id_alloc(session, false); /* * If we have a chunk, we want to look for it to be on-disk. * So we need to add a reference to keep it available. diff --git a/src/reconcile/rec_track.c b/src/reconcile/rec_track.c index 36e85713421..17ad1c5fdc4 100644 --- a/src/reconcile/rec_track.c +++ b/src/reconcile/rec_track.c @@ -820,7 +820,7 @@ __wt_ovfl_txnc_add(WT_SESSION_IMPL *session, WT_PAGE *page, txnc->value_offset = WT_PTRDIFF32(p, txnc); txnc->value_size = WT_STORE_SIZE(value_size); memcpy(p, value, value_size); - txnc->current = __wt_txn_new_id(session); + txnc->current = __wt_txn_id_alloc(session, false); __wt_cache_page_inmem_incr( session, page, WT_OVFL_SIZE(txnc, WT_OVFL_TXNC)); diff --git a/src/txn/txn.c b/src/txn/txn.c index eb2b621f315..f89a8ae1b53 100644 --- a/src/txn/txn.c +++ b/src/txn/txn.c @@ -594,8 +594,8 @@ __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]) conn = S2C(session); txn_global = &conn->txn_global; - txn_global->current = txn_global->last_running = - txn_global->oldest_id = WT_TXN_FIRST; + txn_global->alloc = txn_global->current = + txn_global->last_running = txn_global->oldest_id = WT_TXN_FIRST; WT_RET(__wt_calloc_def( session, conn->session_size, &txn_global->states)); |