diff options
author | Alex Gorrod <alexg@wiredtiger.com> | 2013-05-28 18:10:55 +1000 |
---|---|---|
committer | Alex Gorrod <alexg@wiredtiger.com> | 2013-05-28 18:10:55 +1000 |
commit | 1cb684c03fce26d93f143b1637b4366003fc258e (patch) | |
tree | 0cb99c87709937adfc8882dd9e60166ff54c2a72 /src | |
parent | 81bd4412d2510049431201235fc89194b4ca4f1a (diff) | |
parent | e85144c61c1a18e1d31506e3b948cf593c1df7ac (diff) | |
download | mongo-1cb684c03fce26d93f143b1637b4366003fc258e.tar.gz |
Merge branch 'develop' into hot-backup
Diffstat (limited to 'src')
-rw-r--r-- | src/btree/bt_evict.c | 16 | ||||
-rw-r--r-- | src/btree/bt_ovfl.c | 12 | ||||
-rw-r--r-- | src/btree/bt_page.c | 16 | ||||
-rw-r--r-- | src/config/config_def.c | 5 | ||||
-rw-r--r-- | src/conn/conn_cache_pool.c | 5 | ||||
-rw-r--r-- | src/include/btree.i | 7 | ||||
-rw-r--r-- | src/include/extern.h | 7 | ||||
-rw-r--r-- | src/include/misc.h | 7 | ||||
-rw-r--r-- | src/include/txn.h | 21 | ||||
-rw-r--r-- | src/include/txn.i | 10 | ||||
-rw-r--r-- | src/include/wiredtiger.in | 4 | ||||
-rw-r--r-- | src/txn/txn.c | 278 |
12 files changed, 166 insertions, 222 deletions
diff --git a/src/btree/bt_evict.c b/src/btree/bt_evict.c index c6b3eb17588..91a102c8d0a 100644 --- a/src/btree/bt_evict.c +++ b/src/btree/bt_evict.c @@ -361,9 +361,6 @@ __wt_evict_page(WT_SESSION_IMPL *session, WT_PAGE *page) } else __wt_txn_release_snapshot(session); - /* If the oldest transaction was updated, keep the newer value. */ - saved_txn.oldest_snap_min = txn->oldest_snap_min; - *txn = saved_txn; return (ret); } @@ -527,8 +524,8 @@ __wt_sync_file(WT_SESSION_IMPL *session, int syncop) /* Write dirty pages if nobody beat us to it. */ if (__wt_page_is_modified(page)) { if (txn->isolation == TXN_ISO_READ_COMMITTED) - __wt_txn_get_snapshot(session, - WT_TXN_NONE, WT_TXN_NONE, 0); + __wt_txn_refresh( + session, WT_TXN_NONE, 0, 1); ret = __wt_rec_write(session, page, NULL, 0); if (txn->isolation == TXN_ISO_READ_COMMITTED) __wt_txn_release_snapshot(session); @@ -708,9 +705,6 @@ __evict_walk(WT_SESSION_IMPL *session, u_int *entriesp, int clean) cache = S2C(session)->cache; retries = 0; - /* Update the oldest transaction ID -- we use it to filter pages. */ - __wt_txn_get_oldest(session); - /* * NOTE: we don't hold the schema lock, so we have to take care * that the handles we see are open and valid. @@ -804,7 +798,6 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, int clean) WT_DECL_RET; WT_EVICT_ENTRY *end, *evict, *start; WT_PAGE *page; - wt_txnid_t oldest_txn; int modified, restarts, levels; btree = S2BT(session); @@ -813,7 +806,6 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, int clean) end = start + WT_EVICT_WALK_PER_FILE; if (end > cache->evict + cache->evict_slots) end = cache->evict + cache->evict_slots; - oldest_txn = session->txn.oldest_snap_min; WT_ASSERT(session, btree->evict_page == NULL || WT_PAGE_IS_ROOT(btree->evict_page) || @@ -924,8 +916,8 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, int clean) * transaction that were running last time we wrote the * page has since rolled back. */ - if (modified && - TXNID_LE(oldest_txn, page->modify->disk_txn) && + if (modified && !__wt_txn_visible_all(session, + page->modify->disk_txn) && !F_ISSET(cache, WT_EVICT_STUCK)) continue; } diff --git a/src/btree/bt_ovfl.c b/src/btree/bt_ovfl.c index dee629aea0b..9e08762a5c8 100644 --- a/src/btree/bt_ovfl.c +++ b/src/btree/bt_ovfl.c @@ -165,19 +165,11 @@ __ovfl_cache_row_visible(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip) first = WT_ROW_UPDATE(page, rip); WT_ASSERT(session, first != NULL); - /* - * Check to see if there's a globally visible update. If there's no - * globally visible update using our cached copy of the oldest ID - * required in the system, refresh that ID and rescan, it's better - * than doing I/O and caching copies of an overflow record. - */ - for (upd = first; upd != NULL; upd = upd->next) - if (__wt_txn_visible_all(session, upd->txnid)) - return (1); - __wt_txn_get_oldest(session); + /* Check to see if there's a globally visible update. */ for (upd = first; upd != NULL; upd = upd->next) if (__wt_txn_visible_all(session, upd->txnid)) return (1); + return (0); } diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c index ace833f8e26..5e43c53d6cd 100644 --- a/src/btree/bt_page.c +++ b/src/btree/bt_page.c @@ -30,8 +30,12 @@ __wt_page_in_func( { WT_DECL_RET; WT_PAGE *page; + WT_PAGE_MODIFY *mod; + WT_TXN *txn; int busy, oldgen; + txn = &session->txn; + for (oldgen = 0;;) { switch (ref->state) { case WT_REF_DISK: @@ -79,9 +83,17 @@ __wt_page_in_func( * updates. This should be extremely unlikely in real * applications, wait for eviction of the page to avoid * the issue. + * + * Also, make sure the page isn't too big. Only do + * this check once per transaction: it is not a common + * case, and we don't want to get stuck if it isn't + * possible to evict the page. */ - if (page->modify != NULL && - __wt_txn_ancient(session, page->modify->first_id)) { + if ((mod = page->modify) != NULL && + (__wt_txn_ancient(session, mod->first_id) || + (!F_ISSET(txn, TXN_FORCE_EVICT) && + __wt_eviction_page_force(session, page)))) { + F_SET(txn, TXN_FORCE_EVICT); page->read_gen = WT_READ_GEN_OLDEST; WT_RET(__wt_page_release(session, page)); break; diff --git a/src/config/config_def.c b/src/config/config_def.c index b17767d9550..0e07db35b05 100644 --- a/src/config/config_def.c +++ b/src/config/config_def.c @@ -25,6 +25,7 @@ static const WT_CONFIG_CHECK confchk_connection_open_session[] = { static const WT_CONFIG_CHECK confchk_shared_cache_subconfigs[] = { { "chunk", "int", "min=1MB,max=10TB", NULL }, + { "enable", "boolean", NULL, NULL }, { "name", "string", NULL, NULL }, { "reserve", "int", NULL, NULL }, { "size", "int", "min=1MB,max=10TB", NULL }, @@ -289,7 +290,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { { "connection.reconfigure", "cache_size=100MB,error_prefix=,eviction_dirty_target=80," "eviction_target=80,eviction_trigger=95,shared_cache=(chunk=10MB," - "name=pool,reserve=0,size=500MB),statistics=0,verbose=", + "enable=0,name=pool,reserve=0,size=500MB),statistics=0,verbose=", confchk_connection_reconfigure }, { "cursor.close", @@ -394,7 +395,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "direct_io=,error_prefix=,eviction_dirty_target=80,eviction_target=80" ",eviction_trigger=95,extensions=,file_extend=,hazard_max=1000," "logging=0,lsm_merge=,mmap=,multiprocess=0,session_max=50," - "shared_cache=(chunk=10MB,name=pool,reserve=0,size=500MB)," + "shared_cache=(chunk=10MB,enable=0,name=pool,reserve=0,size=500MB)," "statistics=0,statistics_log=(clear=,path=\"WiredTigerStat.%H\"," "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),sync=,transactional=," "use_environment_priv=0,verbose=", diff --git a/src/conn/conn_cache_pool.c b/src/conn/conn_cache_pool.c index fc22e437b64..8270865d784 100644 --- a/src/conn/conn_cache_pool.c +++ b/src/conn/conn_cache_pool.c @@ -48,8 +48,9 @@ __wt_conn_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg) reconfiguring = 1; else { /* Only setup if a shared cache was explicitly configured. */ - if (__wt_config_gets(session, WT_SKIP_DEFAULT_CONFIG(cfg), - "shared_cache", &cval) == WT_NOTFOUND) + WT_RET(__wt_config_gets( + session, cfg, "shared_cache.enable", &cval)); + if (!cval.val) return (0); WT_RET_NOTFOUND_OK( __wt_config_gets(session, cfg, "shared_cache.name", &cval)); diff --git a/src/include/btree.i b/src/include/btree.i index ed3031471bc..3d49972043f 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -445,11 +445,10 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_PAGE *page) return (0); /* - * Try to immediately evict pages if they require forced eviction or - * have the special "oldest" read generation. + * Try to immediately evict pages if they have the special "oldest" + * read generation. */ - if ((page->read_gen == WT_READ_GEN_OLDEST || - __wt_eviction_page_force(session, page)) && + if (page->read_gen == WT_READ_GEN_OLDEST && WT_ATOMIC_CAS(page->ref->state, WT_REF_MEM, WT_REF_LOCKED)) { if ((ret = __wt_hazard_clear(session, page)) != 0) { page->ref->state = WT_REF_MEM; diff --git a/src/include/extern.h b/src/include/extern.h index 6a1ef920c7b..13b83371c76 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -1250,11 +1250,10 @@ extern void __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats); extern void __wt_stat_clear_connection_stats(void *stats_arg); extern int __wt_txnid_cmp(const void *v1, const void *v2); extern void __wt_txn_release_snapshot(WT_SESSION_IMPL *session); -extern void __wt_txn_get_oldest(WT_SESSION_IMPL *session); -extern void __wt_txn_get_snapshot( WT_SESSION_IMPL *session, - wt_txnid_t my_id, +extern void __wt_txn_refresh( WT_SESSION_IMPL *session, wt_txnid_t max_id, - int force); + int alloc_id, + int get_snapshot); extern void __wt_txn_get_evict_snapshot(WT_SESSION_IMPL *session); extern int __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[]); extern void __wt_txn_release(WT_SESSION_IMPL *session); diff --git a/src/include/misc.h b/src/include/misc.h index b4aa47df7af..1d22922ac0c 100644 --- a/src/include/misc.h +++ b/src/include/misc.h @@ -160,13 +160,6 @@ #define WT_DECL_RET int ret = 0 /* - * Skip the default configuration string in an list of configurations. The - * default config is always the first entry in the array, and the array always - * has an explicit NULL terminator, so this is safe. - */ -#define WT_SKIP_DEFAULT_CONFIG(c) &(c)[1] - -/* * In diagnostic mode we track the locations from which hazard pointers and * scratch buffers were acquired. */ diff --git a/src/include/txn.h b/src/include/txn.h index 80e0a05668a..165c5b705dd 100644 --- a/src/include/txn.h +++ b/src/include/txn.h @@ -49,7 +49,16 @@ struct __wt_txn_state { struct __wt_txn_global { volatile wt_txnid_t current; /* Current transaction ID. */ + + /* + * The oldest transaction ID that is not yet visible to some + * transaction in the system. + */ + volatile wt_txnid_t oldest_id; + volatile uint32_t gen; /* Completed transaction generation */ + volatile uint32_t scan_gen; /* Snapshot scan generation */ + WT_TXN_STATE *states; /* Per-session transaction states */ }; @@ -74,15 +83,10 @@ struct __wt_txn { wt_txnid_t *snapshot; uint32_t snapshot_count; - /* - * When this transaction started, the oldest transaction ID that was - * not yet visible to some transaction in the system. - */ - wt_txnid_t oldest_snap_min; - /* Saved global state, to avoid repeating scans. */ wt_txnid_t last_id; uint32_t last_gen; + uint32_t last_scan_gen; /* * Arrays of txn IDs in WT_UPDATE or WT_REF structures created or @@ -98,7 +102,8 @@ struct __wt_txn { #define TXN_AUTOCOMMIT 0x01 #define TXN_ERROR 0x02 -#define TXN_OLDEST 0x04 -#define TXN_RUNNING 0x08 +#define TXN_FORCE_EVICT 0x04 +#define TXN_OLDEST 0x08 +#define TXN_RUNNING 0x10 uint32_t flags; }; diff --git a/src/include/txn.i b/src/include/txn.i index 8d003184e7b..4e597853694 100644 --- a/src/include/txn.i +++ b/src/include/txn.i @@ -140,10 +140,12 @@ __wt_txn_visible(WT_SESSION_IMPL *session, wt_txnid_t id) static inline int __wt_txn_visible_all(WT_SESSION_IMPL *session, wt_txnid_t id) { - WT_TXN *txn; + WT_TXN_GLOBAL *txn_global; + wt_txnid_t oldest_id; - txn = &session->txn; - return (TXNID_LT(id, txn->oldest_snap_min)); + txn_global = &S2C(session)->txn_global; + oldest_id = txn_global->oldest_id; + return (TXNID_LT(id, oldest_id)); } /* @@ -276,7 +278,7 @@ __wt_txn_read_first(WT_SESSION_IMPL *session) if (txn->isolation == TXN_ISO_READ_COMMITTED || (!F_ISSET(txn, TXN_RUNNING) && txn->isolation == TXN_ISO_SNAPSHOT)) - __wt_txn_get_snapshot(session, WT_TXN_NONE, WT_TXN_NONE, 0); + __wt_txn_refresh(session, WT_TXN_NONE, 0, 1); else if (!F_ISSET(txn, TXN_RUNNING)) txn_state->snap_min = txn_global->current; } diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 466d1232848..2e510b76bcb 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -1096,6 +1096,8 @@ struct __wt_connection { * @config{shared_cache = (, shared cache configuration options. A * database should configure either a cache_size or a shared_cache not * both., a set of related configuration options defined below.} + * @config{ enable, whether the connection is + * using a shared cache., a boolean flag; default \c false.} * @config{ chunk, the granularity that a shared * cache is redistributed., an integer between 1MB and 10TB; default \c * 10MB.} @@ -1377,6 +1379,8 @@ struct __wt_connection { * @config{shared_cache = (, shared cache configuration options. A database * should configure either a cache_size or a shared_cache not both., a set of * related configuration options defined below.} + * @config{ enable, whether the connection is using a + * shared cache., a boolean flag; default \c false.} * @config{ chunk, the granularity that a shared cache is * redistributed., an integer between 1MB and 10TB; default \c 10MB.} * @config{ reserve, amount of cache this database is diff --git a/src/txn/txn.c b/src/txn/txn.c index 70c7bd2bdfb..c1f1ed89cdc 100644 --- a/src/txn/txn.c +++ b/src/txn/txn.c @@ -27,8 +27,7 @@ __wt_txnid_cmp(const void *v1, const void *v2) * Sort a snapshot for faster searching and set the min/max bounds. */ static void -__txn_sort_snapshot(WT_SESSION_IMPL *session, - uint32_t n, wt_txnid_t id, wt_txnid_t oldest_snap_min) +__txn_sort_snapshot(WT_SESSION_IMPL *session, uint32_t n, wt_txnid_t id) { WT_TXN *txn; @@ -37,13 +36,10 @@ __txn_sort_snapshot(WT_SESSION_IMPL *session, if (n > 1) qsort(txn->snapshot, n, sizeof(wt_txnid_t), __wt_txnid_cmp); txn->snapshot_count = n; - txn->snap_min = (n == 0) ? id : txn->snapshot[0]; txn->snap_max = id; + txn->snap_min = (n == 0 || TXNID_LT(id, txn->snapshot[0])) ? + id : txn->snapshot[0]; WT_ASSERT(session, n == 0 || txn->snap_min != WT_TXN_NONE); - if (TXNID_LT(txn->snap_min, oldest_snap_min)) - oldest_snap_min = txn->snap_min; - - txn->oldest_snap_min = oldest_snap_min; } /* @@ -60,70 +56,18 @@ __wt_txn_release_snapshot(WT_SESSION_IMPL *session) } /* - * __wt_txn_get_oldest -- - * Update the current transaction's cached copy of the oldest possible - * snap_min value. + * __wt_txn_refresh -- + * Allocate a transaction ID and/or a snapshot. */ void -__wt_txn_get_oldest(WT_SESSION_IMPL *session) -{ - WT_CONNECTION_IMPL *conn; - WT_TXN *txn; - WT_TXN_GLOBAL *txn_global; - WT_TXN_STATE *s; - wt_txnid_t current_id, id, oldest_snap_min; - uint32_t i, session_cnt; - - conn = S2C(session); - txn = &session->txn; - txn_global = &conn->txn_global; - - do { - current_id = txn_global->current; - oldest_snap_min = - (txn->id != WT_TXN_NONE) ? txn->id : current_id + 1; - - WT_ORDERED_READ(session_cnt, conn->session_cnt); - for (i = 0, s = txn_global->states; - i < session_cnt; - i++, s++) { - if ((id = s->snap_min) != WT_TXN_NONE && - TXNID_LT(id, oldest_snap_min)) - oldest_snap_min = id; - /* - * It is possible that there is no snapshot active, - * even though there are transactions running (at - * isolation levels lower than snapshot isolation). If - * a new snapshot is taken, it will have a snap_min - * value of the lowest running transaction. - * - * We need to make sure that the oldest snap_min we - * calculate won't be made invalid in that case, so - * make sure it is at least as old as the oldest - * running transaction. - */ - if ((id = s->id) != WT_TXN_NONE && - TXNID_LT(id, oldest_snap_min)) - oldest_snap_min = id; - } - } while (current_id != txn_global->current); - - txn->oldest_snap_min = oldest_snap_min; -} - -/* - * __wt_txn_get_snapshot -- - * Set up a snapshot in the current transaction, without allocating an ID. - */ -void -__wt_txn_get_snapshot( - WT_SESSION_IMPL *session, wt_txnid_t my_id, wt_txnid_t max_id, int force) +__wt_txn_refresh( + WT_SESSION_IMPL *session, wt_txnid_t max_id, int alloc_id, int get_snapshot) { WT_CONNECTION_IMPL *conn; WT_TXN *txn; WT_TXN_GLOBAL *txn_global; WT_TXN_STATE *s, *txn_state; - wt_txnid_t current_id, id, oldest_snap_min; + wt_txnid_t current_id, id, snap_min, oldest_id; uint32_t i, n, session_cnt; conn = S2C(session); @@ -131,59 +75,118 @@ __wt_txn_get_snapshot( txn_global = &conn->txn_global; txn_state = &txn_global->states[session->id]; - /* If nothing has changed since last time, we're done. */ - if (!force && txn->last_id == txn_global->current && - txn->last_gen == txn_global->gen) { - WT_ASSERT(session, - TXNID_LE(txn->oldest_snap_min, txn->snap_min)); + if (alloc_id) { + /* + * Allocate a transaction ID. + * + * We use an atomic compare and swap to ensure that we get a + * unique ID that is published before the global counter is + * updated. + * + * If two threads race to allocate an ID, only the latest ID + * will proceed. The winning thread can be sure its snapshot + * contains all of the earlier active IDs. Threads that race + * and get an earlier ID may not appear in the snapshot, but + * they will loop and allocate a new ID before proceeding to + * make any updates. + * + * This potentially wastes transaction IDs when threads race to + * begin transactions: that is the price we pay to keep this + * path latch free. + */ + do { + current_id = txn_global->current; + txn_state->id = txn->id = current_id + 1; + } while (!WT_ATOMIC_CAS(txn_global->current, + current_id, txn->id) || + txn->id == WT_TXN_NONE || + txn->id == WT_TXN_ABORTED); + + if (!get_snapshot) + return; + } else if (!alloc_id && get_snapshot && + txn->id == max_id && + txn->last_id == txn_global->current && + txn->last_gen == txn_global->gen && + TXNID_LE(txn_global->oldest_id, txn->snap_min)) { + /* If nothing has changed since last time, we're done. */ txn_state->snap_min = txn->snap_min; return; } do { - /* Take a copy of the current session ID. */ + /* Take a copy of the current generation numbers. */ + txn->last_scan_gen = txn_global->scan_gen; txn->last_gen = txn_global->gen; txn->last_id = current_id = txn_global->current; - oldest_snap_min = current_id + 1; + + if (alloc_id) + snap_min = txn->id; + else + snap_min = current_id + 1; + + /* + * Constrain the oldest ID we calculate to be less than the + * specified value. + */ + oldest_id = (max_id != WT_TXN_NONE) ? max_id : snap_min; /* Copy the array of concurrent transactions. */ WT_ORDERED_READ(session_cnt, conn->session_cnt); for (i = n = 0, s = txn_global->states; i < session_cnt; i++, s++) { - if ((id = s->id) != WT_TXN_NONE && - TXNID_LT(id, oldest_snap_min)) - oldest_snap_min = id; /* - * Ignore everything else about the session's own - * transaction: we are in the process of updating it. + * Ignore everything about the session's own + * transaction if we are in the process of updating it. */ - if (i == session->id) + if (get_snapshot && s == txn_state) continue; - if (id != WT_TXN_NONE && - (max_id == WT_TXN_NONE || TXNID_LT(id, max_id))) + if ((id = s->id) != WT_TXN_NONE) { txn->snapshot[n++] = id; - /* Ignore the session's own transaction. */ + if (TXNID_LT(id, snap_min)) + snap_min = id; + } if ((id = s->snap_min) != WT_TXN_NONE && - TXNID_LT(id, oldest_snap_min)) - oldest_snap_min = id; + TXNID_LT(id, oldest_id)) + oldest_id = id; } + if (TXNID_LT(snap_min, oldest_id)) + oldest_id = snap_min; + /* * Ensure the snapshot reads are scheduled before re-checking - * the global current ID. + * the global generation. */ WT_READ_BARRIER(); - } while (current_id != txn_global->current || - txn->last_gen != txn_global->gen); - - __txn_sort_snapshot(session, n, - (max_id != WT_TXN_NONE) ? max_id : current_id + 1, - oldest_snap_min); - id = (my_id == WT_TXN_NONE || TXNID_LT(txn->snap_min, my_id)) ? - txn->snap_min : my_id; - WT_ASSERT(session, TXNID_LE(oldest_snap_min, id)); - txn_state->snap_min = id; + + /* + * When getting an ordinary snapshot, it is sufficient to + * unconditionally bump the scan generation. Otherwise, we're + * trying to update the oldest ID, so require that the scan + * generation has not changed while we have been scanning. + */ + if (get_snapshot) { + txn_state->snap_min = snap_min; + WT_ATOMIC_ADD(txn_global->scan_gen, 1); + } + } while (txn->last_gen != txn_global->gen || + (!get_snapshot && !WT_ATOMIC_CAS(txn_global->scan_gen, + txn->last_scan_gen, txn->last_scan_gen + 1))); + + ++txn->last_scan_gen; + + /* Update the oldest ID if another thread hasn't beat us to it. */ + do { + id = txn_global->oldest_id; + } while ((!get_snapshot || + txn->last_scan_gen == txn_global->scan_gen) && + TXNID_LT(id, oldest_id) && + !WT_ATOMIC_CAS(txn_global->oldest_id, id, oldest_id)); + + if (get_snapshot) + __txn_sort_snapshot(session, n, current_id + 1); } /* @@ -194,21 +197,22 @@ __wt_txn_get_snapshot( void __wt_txn_get_evict_snapshot(WT_SESSION_IMPL *session) { - WT_TXN *txn; + WT_TXN_GLOBAL *txn_global; + wt_txnid_t oldest_id; - txn = &session->txn; + txn_global = &S2C(session)->txn_global; /* - * The oldest active snapshot ID in the system should *not* be visible - * to eviction. Create a snapshot containing that ID. + * The oldest active snapshot ID in the system that should *not* be + * visible to eviction. Create a snapshot containing that ID. */ - __wt_txn_get_oldest(session); - __txn_sort_snapshot( - session, 0, txn->oldest_snap_min, txn->oldest_snap_min); + __wt_txn_refresh(session, WT_TXN_NONE, 0, 0); + oldest_id = txn_global->oldest_id; + __txn_sort_snapshot(session, 0, oldest_id); /* * Note that we carefully don't update the global table with this - * snap_min value: there is already a running transaction in this + * snap_min value: there may already be a running transaction in this * session with its own value in the global table. */ } @@ -224,9 +228,7 @@ __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[]) WT_CONNECTION_IMPL *conn; WT_TXN *txn; WT_TXN_GLOBAL *txn_global; - WT_TXN_STATE *s, *txn_state; - wt_txnid_t id, oldest_snap_min; - uint32_t i, n, session_cnt; + WT_TXN_STATE *txn_state; conn = S2C(session); txn = &session->txn; @@ -246,66 +248,8 @@ __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[]) TXN_ISO_READ_COMMITTED : TXN_ISO_READ_UNCOMMITTED; F_SET(txn, TXN_RUNNING); - - do { - /* - * Allocate a transaction ID. - * - * We use an atomic increment to ensure that we get a unique - * ID, then publish that to the global state table. - * - * If two threads race to allocate an ID, only the latest ID - * will proceed. The winning thread can be sure its snapshot - * contains all of the earlier active IDs. Threads that race - * and get an earlier ID may not appear in the snapshot, - * but they will loop and allocate a new ID before proceeding - * to make any updates. - * - * This potentially wastes transaction IDs when threads race to - * begin transactions, but that is the price we pay to keep - * this path latch free. - */ - do { - txn->id = WT_ATOMIC_ADD(txn_global->current, 1); - } while (txn->id == WT_TXN_NONE || txn->id == WT_TXN_ABORTED); - WT_PUBLISH(txn_state->id, txn->id); - - /* - * If we are starting a snapshot isolation transaction, get - * a snapshot of the running transactions. - * - * If we already have a snapshot (e.g., for an auto-commit - * operation), update it so that the newly-allocated ID is - * visible. - */ - if (txn->isolation == TXN_ISO_SNAPSHOT) { - txn->last_gen = txn_global->gen; - oldest_snap_min = txn->id; - - /* Copy the array of concurrent transactions. */ - WT_ORDERED_READ(session_cnt, conn->session_cnt); - for (i = n = 0, s = txn_global->states; - i < session_cnt; - i++, s++) { - if ((id = s->snap_min) != WT_TXN_NONE && - TXNID_LT(id, oldest_snap_min)) - oldest_snap_min = id; - if ((id = s->id) != WT_TXN_NONE) - txn->snapshot[n++] = id; - } - - __txn_sort_snapshot( - session, n, txn->id, oldest_snap_min); - txn_state->snap_min = txn->snap_min; - } - - /* - * Ensure the snapshot reads are complete before re-checking - * the global current ID. - */ - WT_READ_BARRIER(); - } while (txn->id != txn_global->current); - + __wt_txn_refresh( + session, WT_TXN_NONE, 1, txn->isolation == TXN_ISO_SNAPSHOT); return (0); } @@ -340,7 +284,7 @@ __wt_txn_release(WT_SESSION_IMPL *session) if (session->ncursors == 0) __wt_txn_release_snapshot(session); txn->isolation = session->isolation; - F_CLR(txn, TXN_ERROR | TXN_OLDEST | TXN_RUNNING); + F_CLR(txn, TXN_ERROR | TXN_FORCE_EVICT | TXN_OLDEST | TXN_RUNNING); /* Update the global generation number. */ ++txn_global->gen; @@ -371,8 +315,8 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) * the cursor. Get the new snapshot before releasing the ID for the * commit. */ - if (session->ncursors > 0) - __wt_txn_get_snapshot(session, txn->id, WT_TXN_NONE, 1); + if (session->ncursors > 0 && txn->isolation != TXN_ISO_READ_UNCOMMITTED) + __wt_txn_refresh(session, txn->id + 1, 0, 1); __wt_txn_release(session); return (0); } |