summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@wiredtiger.com>2013-05-28 15:46:08 +1000
committerMichael Cahill <michael.cahill@wiredtiger.com>2013-05-28 15:46:08 +1000
commit86e90668908d40b480c0148cd4a1caf9056f4c7b (patch)
tree10d0de63da8f42f99270906686039659bbebd53e
parent2b2ca70375063a536b9e3102571358644c8e5314 (diff)
downloadmongo-86e90668908d40b480c0148cd4a1caf9056f4c7b.tar.gz
Avoid a race when updating the shared table of transaction IDs and calculating the oldest active transaction ID.
-rw-r--r--src/include/txn.h2
-rw-r--r--src/txn/txn.c170
2 files changed, 91 insertions, 81 deletions
diff --git a/src/include/txn.h b/src/include/txn.h
index 4d32fccbbd0..165c5b705dd 100644
--- a/src/include/txn.h
+++ b/src/include/txn.h
@@ -57,6 +57,7 @@ struct __wt_txn_global {
volatile wt_txnid_t oldest_id;
volatile uint32_t gen; /* Completed transaction generation */
+ volatile uint32_t scan_gen; /* Snapshot scan generation */
WT_TXN_STATE *states; /* Per-session transaction states */
};
@@ -85,6 +86,7 @@ struct __wt_txn {
/* Saved global state, to avoid repeating scans. */
wt_txnid_t last_id;
uint32_t last_gen;
+ uint32_t last_scan_gen;
/*
* Arrays of txn IDs in WT_UPDATE or WT_REF structures created or
diff --git a/src/txn/txn.c b/src/txn/txn.c
index ca454f8266a..ef803864695 100644
--- a/src/txn/txn.c
+++ b/src/txn/txn.c
@@ -27,34 +27,19 @@ __wt_txnid_cmp(const void *v1, const void *v2)
* Sort a snapshot for faster searching and set the min/max bounds.
*/
static void
-__txn_sort_snapshot(WT_SESSION_IMPL *session,
- uint32_t n, wt_txnid_t id, wt_txnid_t oldest_id)
+__txn_sort_snapshot(WT_SESSION_IMPL *session, uint32_t n, wt_txnid_t id)
{
WT_TXN *txn;
- WT_TXN_GLOBAL *txn_global;
- wt_txnid_t id_copy;
txn = &session->txn;
- txn_global = &S2C(session)->txn_global;
if (n > 1)
qsort(txn->snapshot, n, sizeof(wt_txnid_t), __wt_txnid_cmp);
txn->snapshot_count = n;
- txn->snap_min = (n == 0) ? id : txn->snapshot[0];
txn->snap_max = id;
+ txn->snap_min = (n == 0 || TXNID_LT(id, txn->snapshot[0])) ?
+ id : txn->snapshot[0];
WT_ASSERT(session, n == 0 || txn->snap_min != WT_TXN_NONE);
- if (TXNID_LT(txn->snap_min, oldest_id))
- oldest_id = txn->snap_min;
-
- /*
- * Update the oldest snapshot if we have a newer value. We're not
- * holding a lock, so copy then swap into place if our oldest version
- * is newer.
- */
- do {
- id_copy = txn_global->oldest_id;
- } while (TXNID_LT(id_copy, oldest_id) &&
- !WT_ATOMIC_CAS(txn_global->oldest_id, id_copy, oldest_id));
}
/*
@@ -82,70 +67,69 @@ __wt_txn_refresh(
WT_TXN *txn;
WT_TXN_GLOBAL *txn_global;
WT_TXN_STATE *s, *txn_state;
- wt_txnid_t current_id, id, oldest_id;
+ wt_txnid_t current_id, id, snap_min, oldest_id;
uint32_t i, n, session_cnt;
conn = S2C(session);
txn = &session->txn;
txn_global = &conn->txn_global;
txn_state = &txn_global->states[session->id];
- oldest_id = txn_global->oldest_id;
- /* If nothing has changed since last time, we're done. */
- if (!alloc_id &&
+ if (alloc_id) {
+ /*
+ * Allocate a transaction ID.
+ *
+ * We use an atomic compare and swap to ensure that we get a
+ * unique ID that is published before the global counter is
+ * updated.
+ *
+ * If two threads race to allocate an ID, only the latest ID
+ * will proceed. The winning thread can be sure its snapshot
+ * contains all of the earlier active IDs. Threads that race
+ * and get an earlier ID may not appear in the snapshot, but
+ * they will loop and allocate a new ID before proceeding to
+ * make any updates.
+ *
+ * This potentially wastes transaction IDs when threads race to
+ * begin transactions: that is the price we pay to keep this
+ * path latch free.
+ */
+ do {
+ current_id = txn_global->current;
+ txn_state->id = txn->id = current_id + 1;
+ } while (!WT_ATOMIC_CAS(txn_global->current,
+ current_id, txn->id) ||
+ txn->id == WT_TXN_NONE ||
+ txn->id == WT_TXN_ABORTED);
+
+ if (!get_snapshot)
+ return;
+ } else if (!alloc_id && get_snapshot &&
txn->id == max_id &&
txn->last_id == txn_global->current &&
txn->last_gen == txn_global->gen &&
- TXNID_LE(oldest_id, txn->snap_min))
- goto done;
+ TXNID_LE(txn_global->oldest_id, txn->snap_min)) {
+ /* If nothing has changed since last time, we're done. */
+ txn_state->snap_min = txn->snap_min;
+ return;
+ }
do {
- /* Take a copy of the current transaction generation. */
+ /* Take a copy of the current generation numbers. */
+ txn->last_scan_gen = txn_global->scan_gen;
txn->last_gen = txn_global->gen;
+ txn->last_id = current_id = txn_global->current;
- if (alloc_id) {
- /*
- * Allocate a transaction ID.
- *
- * We use an atomic compare and swap to ensure that we
- * get a unique ID that is published before the global
- * counter is updated.
- *
- * If two threads race to allocate an ID, only the
- * latest ID will proceed. The winning thread can be
- * sure its snapshot contains all of the earlier active
- * IDs. Threads that race and get an earlier ID may
- * not appear in the snapshot, but they will loop and
- * allocate a new ID before proceeding to make any
- * updates.
- *
- * This potentially wastes transaction IDs when threads
- * race to begin transactions: that is the price we pay
- * to keep this path latch free.
- */
- do {
- current_id = txn_global->current;
- txn_state->id = txn->id = current_id + 1;
- } while (!WT_ATOMIC_CAS(txn_global->current,
- current_id, txn->id) ||
- txn->id == WT_TXN_NONE ||
- txn->id == WT_TXN_ABORTED);
- txn->last_id = oldest_id = max_id = txn->id;
- } else {
- txn->last_id = current_id = txn_global->current;
- oldest_id =
- (max_id != WT_TXN_NONE) ? max_id : current_id + 1;
- }
-
- if (!get_snapshot)
- return;
+ if (alloc_id)
+ snap_min = txn->id;
+ else
+ snap_min = current_id + 1;
/*
- * Publish a new snap_min value that we refine below. This
- * prevents the global oldest value from moving forward
- * underneath us.
+ * Constrain the oldest ID we calculate to be less than the
+ * specified value.
*/
- txn_state->snap_min = txn_global->oldest_id;
+ oldest_id = (max_id != WT_TXN_NONE) ? max_id : snap_min;
/* Copy the array of concurrent transactions. */
WT_ORDERED_READ(session_cnt, conn->session_cnt);
@@ -154,34 +138,57 @@ __wt_txn_refresh(
i++, s++) {
/*
* Ignore everything about the session's own
- * transaction: we are in the process of updating it.
+ * transaction if we are in the process of updating it.
*/
- if (s == txn_state)
+ if (get_snapshot && s == txn_state)
continue;
if ((id = s->id) != WT_TXN_NONE) {
txn->snapshot[n++] = id;
- if (TXNID_LT(id, oldest_id))
- oldest_id = id;
+ if (TXNID_LT(id, snap_min))
+ snap_min = id;
}
if ((id = s->snap_min) != WT_TXN_NONE &&
TXNID_LT(id, oldest_id))
oldest_id = id;
}
- __txn_sort_snapshot(session, n, current_id + 1, oldest_id);
+ if (TXNID_LT(snap_min, oldest_id))
+ oldest_id = snap_min;
/*
* Ensure the snapshot reads are scheduled before re-checking
- * the global current ID.
+ * the global generation.
*/
WT_READ_BARRIER();
- } while (txn->last_id != txn_global->current ||
- txn->last_gen != txn_global->gen ||
- TXNID_LT(txn->snap_min, txn_global->oldest_id));
-done: WT_ASSERT(session,
- TXNID_LE(txn_global->oldest_id, txn->snap_min));
- txn_state->snap_min = txn->snap_min;
+ /*
+ * When getting an ordinary snapshot, it is sufficient to
+ * unconditionally bump the scan generation. Otherwise, we're
+ * trying to update the oldest ID, so require that the scan
+ * generation has not changed while we have been scanning.
+ */
+ if (get_snapshot) {
+ txn_state->snap_min = snap_min;
+ WT_ATOMIC_ADD(txn_global->scan_gen, 1);
+ }
+ } while (txn->last_gen != txn_global->gen ||
+ (!get_snapshot && !WT_ATOMIC_CAS(txn_global->scan_gen,
+ txn->last_scan_gen, txn->last_scan_gen + 1)));
+
+ ++txn->last_scan_gen;
+
+ /* Update the oldest ID if another thread hasn't beat us to it. */
+ do {
+ id = txn_global->oldest_id;
+ } while ((!get_snapshot ||
+ txn->last_scan_gen == txn_global->scan_gen) &&
+ TXNID_LT(id, oldest_id) &&
+ !WT_ATOMIC_CAS(txn_global->oldest_id, id, oldest_id));
+
+ if (get_snapshot) {
+ __txn_sort_snapshot(session, n, current_id + 1);
+ WT_ASSERT(session, n == 0 || snap_min == txn->snap_min);
+ }
}
/*
@@ -198,15 +205,16 @@ __wt_txn_get_evict_snapshot(WT_SESSION_IMPL *session)
txn_global = &S2C(session)->txn_global;
/*
- * The oldest active snapshot ID in the system should *not* be visible
- * to eviction. Create a snapshot containing that ID.
+ * The oldest active snapshot ID in the system that should *not* be
+ * visible to eviction. Create a snapshot containing that ID.
*/
+ __wt_txn_refresh(session, WT_TXN_NONE, 0, 0);
oldest_id = txn_global->oldest_id;
- __txn_sort_snapshot(session, 0, oldest_id, oldest_id);
+ __txn_sort_snapshot(session, 0, oldest_id);
/*
* Note that we carefully don't update the global table with this
- * snap_min value: there is already a running transaction in this
+ * snap_min value: there may already be a running transaction in this
* session with its own value in the global table.
*/
}