summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlex Gorrod <alexg@wiredtiger.com>2013-05-28 18:10:55 +1000
committerAlex Gorrod <alexg@wiredtiger.com>2013-05-28 18:10:55 +1000
commit1cb684c03fce26d93f143b1637b4366003fc258e (patch)
tree0cb99c87709937adfc8882dd9e60166ff54c2a72 /src
parent81bd4412d2510049431201235fc89194b4ca4f1a (diff)
parente85144c61c1a18e1d31506e3b948cf593c1df7ac (diff)
downloadmongo-1cb684c03fce26d93f143b1637b4366003fc258e.tar.gz
Merge branch 'develop' into hot-backup
Diffstat (limited to 'src')
-rw-r--r--src/btree/bt_evict.c16
-rw-r--r--src/btree/bt_ovfl.c12
-rw-r--r--src/btree/bt_page.c16
-rw-r--r--src/config/config_def.c5
-rw-r--r--src/conn/conn_cache_pool.c5
-rw-r--r--src/include/btree.i7
-rw-r--r--src/include/extern.h7
-rw-r--r--src/include/misc.h7
-rw-r--r--src/include/txn.h21
-rw-r--r--src/include/txn.i10
-rw-r--r--src/include/wiredtiger.in4
-rw-r--r--src/txn/txn.c278
12 files changed, 166 insertions, 222 deletions
diff --git a/src/btree/bt_evict.c b/src/btree/bt_evict.c
index c6b3eb17588..91a102c8d0a 100644
--- a/src/btree/bt_evict.c
+++ b/src/btree/bt_evict.c
@@ -361,9 +361,6 @@ __wt_evict_page(WT_SESSION_IMPL *session, WT_PAGE *page)
} else
__wt_txn_release_snapshot(session);
- /* If the oldest transaction was updated, keep the newer value. */
- saved_txn.oldest_snap_min = txn->oldest_snap_min;
-
*txn = saved_txn;
return (ret);
}
@@ -527,8 +524,8 @@ __wt_sync_file(WT_SESSION_IMPL *session, int syncop)
/* Write dirty pages if nobody beat us to it. */
if (__wt_page_is_modified(page)) {
if (txn->isolation == TXN_ISO_READ_COMMITTED)
- __wt_txn_get_snapshot(session,
- WT_TXN_NONE, WT_TXN_NONE, 0);
+ __wt_txn_refresh(
+ session, WT_TXN_NONE, 0, 1);
ret = __wt_rec_write(session, page, NULL, 0);
if (txn->isolation == TXN_ISO_READ_COMMITTED)
__wt_txn_release_snapshot(session);
@@ -708,9 +705,6 @@ __evict_walk(WT_SESSION_IMPL *session, u_int *entriesp, int clean)
cache = S2C(session)->cache;
retries = 0;
- /* Update the oldest transaction ID -- we use it to filter pages. */
- __wt_txn_get_oldest(session);
-
/*
* NOTE: we don't hold the schema lock, so we have to take care
* that the handles we see are open and valid.
@@ -804,7 +798,6 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, int clean)
WT_DECL_RET;
WT_EVICT_ENTRY *end, *evict, *start;
WT_PAGE *page;
- wt_txnid_t oldest_txn;
int modified, restarts, levels;
btree = S2BT(session);
@@ -813,7 +806,6 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, int clean)
end = start + WT_EVICT_WALK_PER_FILE;
if (end > cache->evict + cache->evict_slots)
end = cache->evict + cache->evict_slots;
- oldest_txn = session->txn.oldest_snap_min;
WT_ASSERT(session, btree->evict_page == NULL ||
WT_PAGE_IS_ROOT(btree->evict_page) ||
@@ -924,8 +916,8 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, int clean)
* transaction that were running last time we wrote the
* page has since rolled back.
*/
- if (modified &&
- TXNID_LE(oldest_txn, page->modify->disk_txn) &&
+ if (modified && !__wt_txn_visible_all(session,
+ page->modify->disk_txn) &&
!F_ISSET(cache, WT_EVICT_STUCK))
continue;
}
diff --git a/src/btree/bt_ovfl.c b/src/btree/bt_ovfl.c
index dee629aea0b..9e08762a5c8 100644
--- a/src/btree/bt_ovfl.c
+++ b/src/btree/bt_ovfl.c
@@ -165,19 +165,11 @@ __ovfl_cache_row_visible(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip)
first = WT_ROW_UPDATE(page, rip);
WT_ASSERT(session, first != NULL);
- /*
- * Check to see if there's a globally visible update. If there's no
- * globally visible update using our cached copy of the oldest ID
- * required in the system, refresh that ID and rescan, it's better
- * than doing I/O and caching copies of an overflow record.
- */
- for (upd = first; upd != NULL; upd = upd->next)
- if (__wt_txn_visible_all(session, upd->txnid))
- return (1);
- __wt_txn_get_oldest(session);
+ /* Check to see if there's a globally visible update. */
for (upd = first; upd != NULL; upd = upd->next)
if (__wt_txn_visible_all(session, upd->txnid))
return (1);
+
return (0);
}
diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c
index ace833f8e26..5e43c53d6cd 100644
--- a/src/btree/bt_page.c
+++ b/src/btree/bt_page.c
@@ -30,8 +30,12 @@ __wt_page_in_func(
{
WT_DECL_RET;
WT_PAGE *page;
+ WT_PAGE_MODIFY *mod;
+ WT_TXN *txn;
int busy, oldgen;
+ txn = &session->txn;
+
for (oldgen = 0;;) {
switch (ref->state) {
case WT_REF_DISK:
@@ -79,9 +83,17 @@ __wt_page_in_func(
* updates. This should be extremely unlikely in real
* applications, wait for eviction of the page to avoid
* the issue.
+ *
+ * Also, make sure the page isn't too big. Only do
+ * this check once per transaction: it is not a common
+ * case, and we don't want to get stuck if it isn't
+ * possible to evict the page.
*/
- if (page->modify != NULL &&
- __wt_txn_ancient(session, page->modify->first_id)) {
+ if ((mod = page->modify) != NULL &&
+ (__wt_txn_ancient(session, mod->first_id) ||
+ (!F_ISSET(txn, TXN_FORCE_EVICT) &&
+ __wt_eviction_page_force(session, page)))) {
+ F_SET(txn, TXN_FORCE_EVICT);
page->read_gen = WT_READ_GEN_OLDEST;
WT_RET(__wt_page_release(session, page));
break;
diff --git a/src/config/config_def.c b/src/config/config_def.c
index b17767d9550..0e07db35b05 100644
--- a/src/config/config_def.c
+++ b/src/config/config_def.c
@@ -25,6 +25,7 @@ static const WT_CONFIG_CHECK confchk_connection_open_session[] = {
static const WT_CONFIG_CHECK confchk_shared_cache_subconfigs[] = {
{ "chunk", "int", "min=1MB,max=10TB", NULL },
+ { "enable", "boolean", NULL, NULL },
{ "name", "string", NULL, NULL },
{ "reserve", "int", NULL, NULL },
{ "size", "int", "min=1MB,max=10TB", NULL },
@@ -289,7 +290,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
{ "connection.reconfigure",
"cache_size=100MB,error_prefix=,eviction_dirty_target=80,"
"eviction_target=80,eviction_trigger=95,shared_cache=(chunk=10MB,"
- "name=pool,reserve=0,size=500MB),statistics=0,verbose=",
+ "enable=0,name=pool,reserve=0,size=500MB),statistics=0,verbose=",
confchk_connection_reconfigure
},
{ "cursor.close",
@@ -394,7 +395,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"direct_io=,error_prefix=,eviction_dirty_target=80,eviction_target=80"
",eviction_trigger=95,extensions=,file_extend=,hazard_max=1000,"
"logging=0,lsm_merge=,mmap=,multiprocess=0,session_max=50,"
- "shared_cache=(chunk=10MB,name=pool,reserve=0,size=500MB),"
+ "shared_cache=(chunk=10MB,enable=0,name=pool,reserve=0,size=500MB),"
"statistics=0,statistics_log=(clear=,path=\"WiredTigerStat.%H\","
"sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),sync=,transactional=,"
"use_environment_priv=0,verbose=",
diff --git a/src/conn/conn_cache_pool.c b/src/conn/conn_cache_pool.c
index fc22e437b64..8270865d784 100644
--- a/src/conn/conn_cache_pool.c
+++ b/src/conn/conn_cache_pool.c
@@ -48,8 +48,9 @@ __wt_conn_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg)
reconfiguring = 1;
else {
/* Only setup if a shared cache was explicitly configured. */
- if (__wt_config_gets(session, WT_SKIP_DEFAULT_CONFIG(cfg),
- "shared_cache", &cval) == WT_NOTFOUND)
+ WT_RET(__wt_config_gets(
+ session, cfg, "shared_cache.enable", &cval));
+ if (!cval.val)
return (0);
WT_RET_NOTFOUND_OK(
__wt_config_gets(session, cfg, "shared_cache.name", &cval));
diff --git a/src/include/btree.i b/src/include/btree.i
index ed3031471bc..3d49972043f 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -445,11 +445,10 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_PAGE *page)
return (0);
/*
- * Try to immediately evict pages if they require forced eviction or
- * have the special "oldest" read generation.
+ * Try to immediately evict pages if they have the special "oldest"
+ * read generation.
*/
- if ((page->read_gen == WT_READ_GEN_OLDEST ||
- __wt_eviction_page_force(session, page)) &&
+ if (page->read_gen == WT_READ_GEN_OLDEST &&
WT_ATOMIC_CAS(page->ref->state, WT_REF_MEM, WT_REF_LOCKED)) {
if ((ret = __wt_hazard_clear(session, page)) != 0) {
page->ref->state = WT_REF_MEM;
diff --git a/src/include/extern.h b/src/include/extern.h
index 6a1ef920c7b..13b83371c76 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -1250,11 +1250,10 @@ extern void __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats);
extern void __wt_stat_clear_connection_stats(void *stats_arg);
extern int __wt_txnid_cmp(const void *v1, const void *v2);
extern void __wt_txn_release_snapshot(WT_SESSION_IMPL *session);
-extern void __wt_txn_get_oldest(WT_SESSION_IMPL *session);
-extern void __wt_txn_get_snapshot( WT_SESSION_IMPL *session,
- wt_txnid_t my_id,
+extern void __wt_txn_refresh( WT_SESSION_IMPL *session,
wt_txnid_t max_id,
- int force);
+ int alloc_id,
+ int get_snapshot);
extern void __wt_txn_get_evict_snapshot(WT_SESSION_IMPL *session);
extern int __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[]);
extern void __wt_txn_release(WT_SESSION_IMPL *session);
diff --git a/src/include/misc.h b/src/include/misc.h
index b4aa47df7af..1d22922ac0c 100644
--- a/src/include/misc.h
+++ b/src/include/misc.h
@@ -160,13 +160,6 @@
#define WT_DECL_RET int ret = 0
/*
- * Skip the default configuration string in an list of configurations. The
- * default config is always the first entry in the array, and the array always
- * has an explicit NULL terminator, so this is safe.
- */
-#define WT_SKIP_DEFAULT_CONFIG(c) &(c)[1]
-
-/*
* In diagnostic mode we track the locations from which hazard pointers and
* scratch buffers were acquired.
*/
diff --git a/src/include/txn.h b/src/include/txn.h
index 80e0a05668a..165c5b705dd 100644
--- a/src/include/txn.h
+++ b/src/include/txn.h
@@ -49,7 +49,16 @@ struct __wt_txn_state {
struct __wt_txn_global {
volatile wt_txnid_t current; /* Current transaction ID. */
+
+ /*
+ * The oldest transaction ID that is not yet visible to some
+ * transaction in the system.
+ */
+ volatile wt_txnid_t oldest_id;
+
volatile uint32_t gen; /* Completed transaction generation */
+ volatile uint32_t scan_gen; /* Snapshot scan generation */
+
WT_TXN_STATE *states; /* Per-session transaction states */
};
@@ -74,15 +83,10 @@ struct __wt_txn {
wt_txnid_t *snapshot;
uint32_t snapshot_count;
- /*
- * When this transaction started, the oldest transaction ID that was
- * not yet visible to some transaction in the system.
- */
- wt_txnid_t oldest_snap_min;
-
/* Saved global state, to avoid repeating scans. */
wt_txnid_t last_id;
uint32_t last_gen;
+ uint32_t last_scan_gen;
/*
* Arrays of txn IDs in WT_UPDATE or WT_REF structures created or
@@ -98,7 +102,8 @@ struct __wt_txn {
#define TXN_AUTOCOMMIT 0x01
#define TXN_ERROR 0x02
-#define TXN_OLDEST 0x04
-#define TXN_RUNNING 0x08
+#define TXN_FORCE_EVICT 0x04
+#define TXN_OLDEST 0x08
+#define TXN_RUNNING 0x10
uint32_t flags;
};
diff --git a/src/include/txn.i b/src/include/txn.i
index 8d003184e7b..4e597853694 100644
--- a/src/include/txn.i
+++ b/src/include/txn.i
@@ -140,10 +140,12 @@ __wt_txn_visible(WT_SESSION_IMPL *session, wt_txnid_t id)
static inline int
__wt_txn_visible_all(WT_SESSION_IMPL *session, wt_txnid_t id)
{
- WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+ wt_txnid_t oldest_id;
- txn = &session->txn;
- return (TXNID_LT(id, txn->oldest_snap_min));
+ txn_global = &S2C(session)->txn_global;
+ oldest_id = txn_global->oldest_id;
+ return (TXNID_LT(id, oldest_id));
}
/*
@@ -276,7 +278,7 @@ __wt_txn_read_first(WT_SESSION_IMPL *session)
if (txn->isolation == TXN_ISO_READ_COMMITTED ||
(!F_ISSET(txn, TXN_RUNNING) &&
txn->isolation == TXN_ISO_SNAPSHOT))
- __wt_txn_get_snapshot(session, WT_TXN_NONE, WT_TXN_NONE, 0);
+ __wt_txn_refresh(session, WT_TXN_NONE, 0, 1);
else if (!F_ISSET(txn, TXN_RUNNING))
txn_state->snap_min = txn_global->current;
}
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index 466d1232848..2e510b76bcb 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -1096,6 +1096,8 @@ struct __wt_connection {
* @config{shared_cache = (, shared cache configuration options. A
* database should configure either a cache_size or a shared_cache not
* both., a set of related configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;enable, whether the connection is
+ * using a shared cache., a boolean flag; default \c false.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;chunk, the granularity that a shared
* cache is redistributed., an integer between 1MB and 10TB; default \c
* 10MB.}
@@ -1377,6 +1379,8 @@ struct __wt_connection {
* @config{shared_cache = (, shared cache configuration options. A database
* should configure either a cache_size or a shared_cache not both., a set of
* related configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;enable, whether the connection is using a
+ * shared cache., a boolean flag; default \c false.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;chunk, the granularity that a shared cache is
* redistributed., an integer between 1MB and 10TB; default \c 10MB.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;reserve, amount of cache this database is
diff --git a/src/txn/txn.c b/src/txn/txn.c
index 70c7bd2bdfb..c1f1ed89cdc 100644
--- a/src/txn/txn.c
+++ b/src/txn/txn.c
@@ -27,8 +27,7 @@ __wt_txnid_cmp(const void *v1, const void *v2)
* Sort a snapshot for faster searching and set the min/max bounds.
*/
static void
-__txn_sort_snapshot(WT_SESSION_IMPL *session,
- uint32_t n, wt_txnid_t id, wt_txnid_t oldest_snap_min)
+__txn_sort_snapshot(WT_SESSION_IMPL *session, uint32_t n, wt_txnid_t id)
{
WT_TXN *txn;
@@ -37,13 +36,10 @@ __txn_sort_snapshot(WT_SESSION_IMPL *session,
if (n > 1)
qsort(txn->snapshot, n, sizeof(wt_txnid_t), __wt_txnid_cmp);
txn->snapshot_count = n;
- txn->snap_min = (n == 0) ? id : txn->snapshot[0];
txn->snap_max = id;
+ txn->snap_min = (n == 0 || TXNID_LT(id, txn->snapshot[0])) ?
+ id : txn->snapshot[0];
WT_ASSERT(session, n == 0 || txn->snap_min != WT_TXN_NONE);
- if (TXNID_LT(txn->snap_min, oldest_snap_min))
- oldest_snap_min = txn->snap_min;
-
- txn->oldest_snap_min = oldest_snap_min;
}
/*
@@ -60,70 +56,18 @@ __wt_txn_release_snapshot(WT_SESSION_IMPL *session)
}
/*
- * __wt_txn_get_oldest --
- * Update the current transaction's cached copy of the oldest possible
- * snap_min value.
+ * __wt_txn_refresh --
+ * Allocate a transaction ID and/or a snapshot.
*/
void
-__wt_txn_get_oldest(WT_SESSION_IMPL *session)
-{
- WT_CONNECTION_IMPL *conn;
- WT_TXN *txn;
- WT_TXN_GLOBAL *txn_global;
- WT_TXN_STATE *s;
- wt_txnid_t current_id, id, oldest_snap_min;
- uint32_t i, session_cnt;
-
- conn = S2C(session);
- txn = &session->txn;
- txn_global = &conn->txn_global;
-
- do {
- current_id = txn_global->current;
- oldest_snap_min =
- (txn->id != WT_TXN_NONE) ? txn->id : current_id + 1;
-
- WT_ORDERED_READ(session_cnt, conn->session_cnt);
- for (i = 0, s = txn_global->states;
- i < session_cnt;
- i++, s++) {
- if ((id = s->snap_min) != WT_TXN_NONE &&
- TXNID_LT(id, oldest_snap_min))
- oldest_snap_min = id;
- /*
- * It is possible that there is no snapshot active,
- * even though there are transactions running (at
- * isolation levels lower than snapshot isolation). If
- * a new snapshot is taken, it will have a snap_min
- * value of the lowest running transaction.
- *
- * We need to make sure that the oldest snap_min we
- * calculate won't be made invalid in that case, so
- * make sure it is at least as old as the oldest
- * running transaction.
- */
- if ((id = s->id) != WT_TXN_NONE &&
- TXNID_LT(id, oldest_snap_min))
- oldest_snap_min = id;
- }
- } while (current_id != txn_global->current);
-
- txn->oldest_snap_min = oldest_snap_min;
-}
-
-/*
- * __wt_txn_get_snapshot --
- * Set up a snapshot in the current transaction, without allocating an ID.
- */
-void
-__wt_txn_get_snapshot(
- WT_SESSION_IMPL *session, wt_txnid_t my_id, wt_txnid_t max_id, int force)
+__wt_txn_refresh(
+ WT_SESSION_IMPL *session, wt_txnid_t max_id, int alloc_id, int get_snapshot)
{
WT_CONNECTION_IMPL *conn;
WT_TXN *txn;
WT_TXN_GLOBAL *txn_global;
WT_TXN_STATE *s, *txn_state;
- wt_txnid_t current_id, id, oldest_snap_min;
+ wt_txnid_t current_id, id, snap_min, oldest_id;
uint32_t i, n, session_cnt;
conn = S2C(session);
@@ -131,59 +75,118 @@ __wt_txn_get_snapshot(
txn_global = &conn->txn_global;
txn_state = &txn_global->states[session->id];
- /* If nothing has changed since last time, we're done. */
- if (!force && txn->last_id == txn_global->current &&
- txn->last_gen == txn_global->gen) {
- WT_ASSERT(session,
- TXNID_LE(txn->oldest_snap_min, txn->snap_min));
+ if (alloc_id) {
+ /*
+ * Allocate a transaction ID.
+ *
+ * We use an atomic compare and swap to ensure that we get a
+ * unique ID that is published before the global counter is
+ * updated.
+ *
+ * If two threads race to allocate an ID, only the latest ID
+ * will proceed. The winning thread can be sure its snapshot
+ * contains all of the earlier active IDs. Threads that race
+ * and get an earlier ID may not appear in the snapshot, but
+ * they will loop and allocate a new ID before proceeding to
+ * make any updates.
+ *
+ * This potentially wastes transaction IDs when threads race to
+ * begin transactions: that is the price we pay to keep this
+ * path latch free.
+ */
+ do {
+ current_id = txn_global->current;
+ txn_state->id = txn->id = current_id + 1;
+ } while (!WT_ATOMIC_CAS(txn_global->current,
+ current_id, txn->id) ||
+ txn->id == WT_TXN_NONE ||
+ txn->id == WT_TXN_ABORTED);
+
+ if (!get_snapshot)
+ return;
+ } else if (!alloc_id && get_snapshot &&
+ txn->id == max_id &&
+ txn->last_id == txn_global->current &&
+ txn->last_gen == txn_global->gen &&
+ TXNID_LE(txn_global->oldest_id, txn->snap_min)) {
+ /* If nothing has changed since last time, we're done. */
txn_state->snap_min = txn->snap_min;
return;
}
do {
- /* Take a copy of the current session ID. */
+ /* Take a copy of the current generation numbers. */
+ txn->last_scan_gen = txn_global->scan_gen;
txn->last_gen = txn_global->gen;
txn->last_id = current_id = txn_global->current;
- oldest_snap_min = current_id + 1;
+
+ if (alloc_id)
+ snap_min = txn->id;
+ else
+ snap_min = current_id + 1;
+
+ /*
+ * Constrain the oldest ID we calculate to be less than the
+ * specified value.
+ */
+ oldest_id = (max_id != WT_TXN_NONE) ? max_id : snap_min;
/* Copy the array of concurrent transactions. */
WT_ORDERED_READ(session_cnt, conn->session_cnt);
for (i = n = 0, s = txn_global->states;
i < session_cnt;
i++, s++) {
- if ((id = s->id) != WT_TXN_NONE &&
- TXNID_LT(id, oldest_snap_min))
- oldest_snap_min = id;
/*
- * Ignore everything else about the session's own
- * transaction: we are in the process of updating it.
+ * Ignore everything about the session's own
+ * transaction if we are in the process of updating it.
*/
- if (i == session->id)
+ if (get_snapshot && s == txn_state)
continue;
- if (id != WT_TXN_NONE &&
- (max_id == WT_TXN_NONE || TXNID_LT(id, max_id)))
+ if ((id = s->id) != WT_TXN_NONE) {
txn->snapshot[n++] = id;
- /* Ignore the session's own transaction. */
+ if (TXNID_LT(id, snap_min))
+ snap_min = id;
+ }
if ((id = s->snap_min) != WT_TXN_NONE &&
- TXNID_LT(id, oldest_snap_min))
- oldest_snap_min = id;
+ TXNID_LT(id, oldest_id))
+ oldest_id = id;
}
+ if (TXNID_LT(snap_min, oldest_id))
+ oldest_id = snap_min;
+
/*
* Ensure the snapshot reads are scheduled before re-checking
- * the global current ID.
+ * the global generation.
*/
WT_READ_BARRIER();
- } while (current_id != txn_global->current ||
- txn->last_gen != txn_global->gen);
-
- __txn_sort_snapshot(session, n,
- (max_id != WT_TXN_NONE) ? max_id : current_id + 1,
- oldest_snap_min);
- id = (my_id == WT_TXN_NONE || TXNID_LT(txn->snap_min, my_id)) ?
- txn->snap_min : my_id;
- WT_ASSERT(session, TXNID_LE(oldest_snap_min, id));
- txn_state->snap_min = id;
+
+ /*
+ * When getting an ordinary snapshot, it is sufficient to
+ * unconditionally bump the scan generation. Otherwise, we're
+ * trying to update the oldest ID, so require that the scan
+ * generation has not changed while we have been scanning.
+ */
+ if (get_snapshot) {
+ txn_state->snap_min = snap_min;
+ WT_ATOMIC_ADD(txn_global->scan_gen, 1);
+ }
+ } while (txn->last_gen != txn_global->gen ||
+ (!get_snapshot && !WT_ATOMIC_CAS(txn_global->scan_gen,
+ txn->last_scan_gen, txn->last_scan_gen + 1)));
+
+ ++txn->last_scan_gen;
+
+ /* Update the oldest ID if another thread hasn't beat us to it. */
+ do {
+ id = txn_global->oldest_id;
+ } while ((!get_snapshot ||
+ txn->last_scan_gen == txn_global->scan_gen) &&
+ TXNID_LT(id, oldest_id) &&
+ !WT_ATOMIC_CAS(txn_global->oldest_id, id, oldest_id));
+
+ if (get_snapshot)
+ __txn_sort_snapshot(session, n, current_id + 1);
}
/*
@@ -194,21 +197,22 @@ __wt_txn_get_snapshot(
void
__wt_txn_get_evict_snapshot(WT_SESSION_IMPL *session)
{
- WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+ wt_txnid_t oldest_id;
- txn = &session->txn;
+ txn_global = &S2C(session)->txn_global;
/*
- * The oldest active snapshot ID in the system should *not* be visible
- * to eviction. Create a snapshot containing that ID.
+ * The oldest active snapshot ID in the system that should *not* be
+ * visible to eviction. Create a snapshot containing that ID.
*/
- __wt_txn_get_oldest(session);
- __txn_sort_snapshot(
- session, 0, txn->oldest_snap_min, txn->oldest_snap_min);
+ __wt_txn_refresh(session, WT_TXN_NONE, 0, 0);
+ oldest_id = txn_global->oldest_id;
+ __txn_sort_snapshot(session, 0, oldest_id);
/*
* Note that we carefully don't update the global table with this
- * snap_min value: there is already a running transaction in this
+ * snap_min value: there may already be a running transaction in this
* session with its own value in the global table.
*/
}
@@ -224,9 +228,7 @@ __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[])
WT_CONNECTION_IMPL *conn;
WT_TXN *txn;
WT_TXN_GLOBAL *txn_global;
- WT_TXN_STATE *s, *txn_state;
- wt_txnid_t id, oldest_snap_min;
- uint32_t i, n, session_cnt;
+ WT_TXN_STATE *txn_state;
conn = S2C(session);
txn = &session->txn;
@@ -246,66 +248,8 @@ __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[])
TXN_ISO_READ_COMMITTED : TXN_ISO_READ_UNCOMMITTED;
F_SET(txn, TXN_RUNNING);
-
- do {
- /*
- * Allocate a transaction ID.
- *
- * We use an atomic increment to ensure that we get a unique
- * ID, then publish that to the global state table.
- *
- * If two threads race to allocate an ID, only the latest ID
- * will proceed. The winning thread can be sure its snapshot
- * contains all of the earlier active IDs. Threads that race
- * and get an earlier ID may not appear in the snapshot,
- * but they will loop and allocate a new ID before proceeding
- * to make any updates.
- *
- * This potentially wastes transaction IDs when threads race to
- * begin transactions, but that is the price we pay to keep
- * this path latch free.
- */
- do {
- txn->id = WT_ATOMIC_ADD(txn_global->current, 1);
- } while (txn->id == WT_TXN_NONE || txn->id == WT_TXN_ABORTED);
- WT_PUBLISH(txn_state->id, txn->id);
-
- /*
- * If we are starting a snapshot isolation transaction, get
- * a snapshot of the running transactions.
- *
- * If we already have a snapshot (e.g., for an auto-commit
- * operation), update it so that the newly-allocated ID is
- * visible.
- */
- if (txn->isolation == TXN_ISO_SNAPSHOT) {
- txn->last_gen = txn_global->gen;
- oldest_snap_min = txn->id;
-
- /* Copy the array of concurrent transactions. */
- WT_ORDERED_READ(session_cnt, conn->session_cnt);
- for (i = n = 0, s = txn_global->states;
- i < session_cnt;
- i++, s++) {
- if ((id = s->snap_min) != WT_TXN_NONE &&
- TXNID_LT(id, oldest_snap_min))
- oldest_snap_min = id;
- if ((id = s->id) != WT_TXN_NONE)
- txn->snapshot[n++] = id;
- }
-
- __txn_sort_snapshot(
- session, n, txn->id, oldest_snap_min);
- txn_state->snap_min = txn->snap_min;
- }
-
- /*
- * Ensure the snapshot reads are complete before re-checking
- * the global current ID.
- */
- WT_READ_BARRIER();
- } while (txn->id != txn_global->current);
-
+ __wt_txn_refresh(
+ session, WT_TXN_NONE, 1, txn->isolation == TXN_ISO_SNAPSHOT);
return (0);
}
@@ -340,7 +284,7 @@ __wt_txn_release(WT_SESSION_IMPL *session)
if (session->ncursors == 0)
__wt_txn_release_snapshot(session);
txn->isolation = session->isolation;
- F_CLR(txn, TXN_ERROR | TXN_OLDEST | TXN_RUNNING);
+ F_CLR(txn, TXN_ERROR | TXN_FORCE_EVICT | TXN_OLDEST | TXN_RUNNING);
/* Update the global generation number. */
++txn_global->gen;
@@ -371,8 +315,8 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
* the cursor. Get the new snapshot before releasing the ID for the
* commit.
*/
- if (session->ncursors > 0)
- __wt_txn_get_snapshot(session, txn->id, WT_TXN_NONE, 1);
+ if (session->ncursors > 0 && txn->isolation != TXN_ISO_READ_UNCOMMITTED)
+ __wt_txn_refresh(session, txn->id + 1, 0, 1);
__wt_txn_release(session);
return (0);
}