diff options
author | Michael Cahill <michael.cahill@wiredtiger.com> | 2015-01-14 04:54:08 +1100 |
---|---|---|
committer | Michael Cahill <michael.cahill@wiredtiger.com> | 2015-01-14 04:54:08 +1100 |
commit | 16b057c89ce15ce6d37224ab04a826c0eac14896 (patch) | |
tree | f7d2274b31b46f80b55037f6beda73589b3f24cf | |
parent | 53cfcbcc48c857cfbfb08a043c1476a02bccb459 (diff) | |
parent | 3090b47b76f6876e18fabe41b52926099f5243d8 (diff) | |
download | mongo-16b057c89ce15ce6d37224ab04a826c0eac14896.tar.gz |
Merge branch 'develop' into slow-deepen-split
Conflicts:
src/btree/bt_split.c
-rw-r--r-- | dist/stat_data.py | 2 | ||||
-rw-r--r-- | src/btree/bt_delete.c | 9 | ||||
-rw-r--r-- | src/btree/bt_page.c | 15 | ||||
-rw-r--r-- | src/btree/bt_split.c | 32 | ||||
-rw-r--r-- | src/btree/bt_walk.c | 9 | ||||
-rw-r--r-- | src/evict/evict_lru.c | 13 | ||||
-rw-r--r-- | src/include/btree.i | 97 | ||||
-rw-r--r-- | src/include/cache.i | 59 | ||||
-rw-r--r-- | src/include/cursor.i | 12 | ||||
-rw-r--r-- | src/include/stat.h | 1 | ||||
-rw-r--r-- | src/include/txn.i | 10 | ||||
-rw-r--r-- | src/include/wiredtiger.in | 202 | ||||
-rw-r--r-- | src/include/wt_internal.h | 2 | ||||
-rw-r--r-- | src/lsm/lsm_cursor.c | 2 | ||||
-rw-r--r-- | src/session/session_api.c | 7 | ||||
-rw-r--r-- | src/support/stat.c | 3 | ||||
-rw-r--r-- | src/txn/txn.c | 9 | ||||
-rw-r--r-- | src/txn/txn_log.c | 10 |
18 files changed, 295 insertions, 199 deletions
diff --git a/dist/stat_data.py b/dist/stat_data.py index ae442bcc463..69e8d2ed21e 100644 --- a/dist/stat_data.py +++ b/dist/stat_data.py @@ -162,6 +162,8 @@ connection_stats = [ 'pages selected for eviction unable to be evicted'), CacheStat('cache_eviction_force', 'pages evicted because they exceeded the in-memory maximum'), + CacheStat('cache_eviction_force_delete', + 'pages evicted because they had chains of deleted items'), CacheStat('cache_eviction_force_fail', 'failed eviction of pages that exceeded the in-memory maximum'), CacheStat('cache_eviction_hazard', 'hazard pointer blocked page eviction'), diff --git a/src/btree/bt_delete.c b/src/btree/bt_delete.c index c97ea176c97..570b7f80742 100644 --- a/src/btree/bt_delete.c +++ b/src/btree/bt_delete.c @@ -207,6 +207,9 @@ __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref) { int skip; + if (ref->state != WT_REF_DELETED) + return (0); + /* * Deleted pages come from two sources: either it's a fast-delete as * described above, or the page has been emptied by other operations @@ -225,11 +228,13 @@ __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref) * the page could switch to an in-memory state at any time. Lock down * the structure, just to be safe. */ + if (ref->page_del == NULL) + return (1); + if (!WT_ATOMIC_CAS4(ref->state, WT_REF_DELETED, WT_REF_LOCKED)) return (0); - skip = ref->page_del == NULL || - __wt_txn_visible(session, ref->page_del->txnid) ? 1 : 0; + skip = __wt_txn_visible(session, ref->page_del->txnid) ? 1 : 0; WT_PUBLISH(ref->state, WT_REF_DELETED); return (skip); diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c index 181ffdb3736..561e1c19218 100644 --- a/src/btree/bt_page.c +++ b/src/btree/bt_page.c @@ -37,8 +37,11 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_PAGE *page) page->type != WT_PAGE_ROW_LEAF) return (0); - /* Eviction may be turned off, although that's rare. */ - if (F_ISSET(btree, WT_BTREE_NO_EVICTION)) + /* + * Eviction may be turned off (although that's rare), or we may be in + * the middle of a checkpoint. + */ + if (F_ISSET(btree, WT_BTREE_NO_EVICTION) || btree->checkpointing) return (0); /* @@ -128,7 +131,13 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags force_attempts < 10 && __evict_force_check(session, page)) { ++force_attempts; - WT_RET(__wt_page_release(session, ref, flags)); + if ((ret = __wt_page_release_busy( + session, ref, flags)) == EBUSY) { + /* If forced eviction fails, stall. */ + ret = 0; + wait_cnt += 1000; + } else + WT_RET(ret); WT_STAT_FAST_CONN_INCR( session, page_forcible_evict_blocked); break; diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 2bae34b620b..9a3186a0015 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -812,9 +812,10 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, WT_DECL_RET; WT_PAGE *parent; WT_PAGE_INDEX *alloc_index, *pindex; - WT_REF **alloc_refp, *parent_ref; + WT_REF **alloc_refp, *next_ref, *parent_ref; size_t size; - uint32_t children, i, j, parent_entries, result_entries; + uint32_t children, i, j; + uint32_t deleted_entries, parent_entries, result_entries; int complete, hazard, locked; parent = NULL; /* -Wconditional-uninitialized */ @@ -861,7 +862,22 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, pindex = WT_INTL_INDEX_COPY(parent); parent_entries = pindex->entries; - result_entries = (parent_entries - 1) + new_entries; + + /* + * Remove any refs to deleted pages while we are splitting, we have + * the internal page locked down, and are copying the refs into a new + * array anyway. + */ + for (i = 0, deleted_entries = 0; i < parent_entries; ++i) + if (pindex->index[i]->state == WT_REF_DELETED) + deleted_entries++; + + /* + * The final entry count consists of: The original count, plus any + * new pages, less any refs we are removing because they only + * contained deleted items, less 1 for the page being replaced. + */ + result_entries = (parent_entries + new_entries) - (deleted_entries + 1); /* * Allocate and initialize a new page index array for the parent, then @@ -873,8 +889,9 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, WT_MEMSIZE_ADD(parent_incr, size); alloc_index->index = (WT_REF **)(alloc_index + 1); alloc_index->entries = result_entries; - for (alloc_refp = alloc_index->index, i = 0; i < parent_entries; ++i) - if (pindex->index[i] == ref) + for (alloc_refp = alloc_index->index, i = 0; i < parent_entries; ++i) { + next_ref = pindex->index[i]; + if (next_ref == ref) for (j = 0; j < new_entries; ++j) { ref_new[j]->home = parent; *alloc_refp++ = ref_new[j]; @@ -886,8 +903,9 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, */ ref_new[j] = NULL; } - else - *alloc_refp++ = pindex->index[i]; + else if (next_ref->state != WT_REF_DELETED) + *alloc_refp++ = next_ref; + } /* * Update the parent page's index: this update makes the split visible diff --git a/src/btree/bt_walk.c b/src/btree/bt_walk.c index c74a7177401..a2b2a6bb7c8 100644 --- a/src/btree/bt_walk.c +++ b/src/btree/bt_walk.c @@ -208,6 +208,12 @@ restart: /* break; } else if (LF_ISSET(WT_READ_TRUNCATE)) { /* + * Avoid pulling a deleted page back in to try + * to delete it again. + */ + if (__wt_delete_page_skip(session, ref)) + break; + /* * If deleting a range, try to delete the page * without instantiating it. */ @@ -242,8 +248,7 @@ restart: /* * If iterating a cursor, try to skip deleted * pages that are visible to us. */ - if (ref->state == WT_REF_DELETED && - __wt_delete_page_skip(session, ref)) + if (__wt_delete_page_skip(session, ref)) break; } diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 60a5f82f233..a4ae0aaf55b 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -437,7 +437,7 @@ __evict_pass(WT_SESSION_IMPL *session) WT_EVICT_WORKER *worker; int loop; uint32_t flags; - uint64_t bytes_inuse, pages_evicted; + uint64_t bytes_inuse, dirty_target_size, pages_evicted, target_size; conn = S2C(session); cache = conn->cache; @@ -465,9 +465,16 @@ __evict_pass(WT_SESSION_IMPL *session) if (loop > 10) LF_SET(WT_EVICT_PASS_AGGRESSIVE); - /* Start a worker if we have capacity and the cache is full. */ + /* + * Start a worker if we have capacity and we haven't reached + * the eviction targets. + */ bytes_inuse = __wt_cache_bytes_inuse(cache); - if (bytes_inuse > conn->cache_size && + target_size = (conn->cache_size * cache->eviction_target) / 100; + dirty_target_size = + (conn->cache_size * cache->eviction_dirty_target) / 100; + if ((bytes_inuse > target_size || + cache->bytes_dirty > dirty_target_size) && conn->evict_workers < conn->evict_workers_max) { WT_RET(__wt_verbose(session, WT_VERB_EVICTSERVER, "Starting evict worker: %"PRIu32"\n", diff --git a/src/include/btree.i b/src/include/btree.i index a333e4af565..6955b672926 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -165,65 +165,6 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page) } /* - * __wt_cache_read_gen -- - * Get the current read generation number. - */ -static inline uint64_t -__wt_cache_read_gen(WT_SESSION_IMPL *session) -{ - return (S2C(session)->cache->read_gen); -} - -/* - * __wt_cache_read_gen_incr -- - * Increment the current read generation number. - */ -static inline void -__wt_cache_read_gen_incr(WT_SESSION_IMPL *session) -{ - ++S2C(session)->cache->read_gen; -} - -/* - * __wt_cache_read_gen_set -- - * Get the read generation to store in a page. - */ -static inline uint64_t -__wt_cache_read_gen_set(WT_SESSION_IMPL *session) -{ - /* - * We return read-generations from the future (where "the future" is - * measured by increments of the global read generation). The reason - * is because when acquiring a new hazard pointer for a page, we can - * check its read generation, and if the read generation isn't less - * than the current global generation, we don't bother updating the - * page. In other words, the goal is to avoid some number of updates - * immediately after each update we have to make. - */ - return (__wt_cache_read_gen(session) + WT_READGEN_STEP); -} - -/* - * __wt_cache_pages_inuse -- - * Return the number of pages in use. - */ -static inline uint64_t -__wt_cache_pages_inuse(WT_CACHE *cache) -{ - return (cache->pages_inmem - cache->pages_evict); -} - -/* - * __wt_cache_bytes_inuse -- - * Return the number of bytes in use. - */ -static inline uint64_t -__wt_cache_bytes_inuse(WT_CACHE *cache) -{ - return (cache->bytes_inmem - cache->bytes_evict); -} - -/* * __wt_page_evict_soon -- * Set a page to be evicted as soon as possible. */ @@ -917,16 +858,16 @@ __wt_ref_info(WT_SESSION_IMPL *session, } /* - * __wt_page_release -- - * Release a reference to a page. + * __wt_page_release_busy -- + * Release a reference to a page, fail if busy during forced eviction. */ static inline int -__wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) +__wt_page_release_busy(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) { WT_BTREE *btree; WT_DECL_RET; WT_PAGE *page; - int locked; + int locked, too_big; btree = S2BT(session); @@ -938,6 +879,8 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) return (0); page = ref->page; + too_big = (page->memory_footprint < btree->maxmempage) ? 0 : 1; + /* * Attempt to evict pages with the special "oldest" read generation. * @@ -970,12 +913,19 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) return (ret); (void)WT_ATOMIC_ADD4(btree->evict_busy, 1); - if ((ret = __wt_evict_page(session, ref)) == 0) - WT_STAT_FAST_CONN_INCR(session, cache_eviction_force); - else { + if ((ret = __wt_evict_page(session, ref)) == 0) { + if (too_big) + WT_STAT_FAST_CONN_INCR(session, cache_eviction_force); + else + /* + * If the page isn't too big, we are evicting it + * because it had a chain of deleted entries that make + * traversal expensive. + */ + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_force_delete); + } else { WT_STAT_FAST_CONN_INCR(session, cache_eviction_force_fail); - if (ret == EBUSY) - ret = 0; } (void)WT_ATOMIC_SUB4(btree->evict_busy, 1); @@ -983,6 +933,17 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) } /* + * __wt_page_release -- + * Release a reference to a page. + */ +static inline int +__wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) +{ + WT_RET_BUSY_OK(__wt_page_release_busy(session, ref, flags)); + return (0); +} + +/* * __wt_page_swap_func -- * Swap one page's hazard pointer for another one when hazard pointer * coupling up/down the tree. diff --git a/src/include/cache.i b/src/include/cache.i index b997781272a..ee969255241 100644 --- a/src/include/cache.i +++ b/src/include/cache.i @@ -7,6 +7,65 @@ */ /* + * __wt_cache_read_gen -- + * Get the current read generation number. + */ +static inline uint64_t +__wt_cache_read_gen(WT_SESSION_IMPL *session) +{ + return (S2C(session)->cache->read_gen); +} + +/* + * __wt_cache_read_gen_incr -- + * Increment the current read generation number. + */ +static inline void +__wt_cache_read_gen_incr(WT_SESSION_IMPL *session) +{ + ++S2C(session)->cache->read_gen; +} + +/* + * __wt_cache_read_gen_set -- + * Get the read generation to store in a page. + */ +static inline uint64_t +__wt_cache_read_gen_set(WT_SESSION_IMPL *session) +{ + /* + * We return read-generations from the future (where "the future" is + * measured by increments of the global read generation). The reason + * is because when acquiring a new hazard pointer for a page, we can + * check its read generation, and if the read generation isn't less + * than the current global generation, we don't bother updating the + * page. In other words, the goal is to avoid some number of updates + * immediately after each update we have to make. + */ + return (__wt_cache_read_gen(session) + WT_READGEN_STEP); +} + +/* + * __wt_cache_pages_inuse -- + * Return the number of pages in use. + */ +static inline uint64_t +__wt_cache_pages_inuse(WT_CACHE *cache) +{ + return (cache->pages_inmem - cache->pages_evict); +} + +/* + * __wt_cache_bytes_inuse -- + * Return the number of bytes in use. + */ +static inline uint64_t +__wt_cache_bytes_inuse(WT_CACHE *cache) +{ + return (cache->bytes_inmem - cache->bytes_evict); +} + +/* * __wt_eviction_check -- * Wake the eviction server if necessary. */ diff --git a/src/include/cursor.i b/src/include/cursor.i index ae6aafdd638..8fa9790e096 100644 --- a/src/include/cursor.i +++ b/src/include/cursor.i @@ -176,11 +176,23 @@ static inline int __cursor_func_init(WT_CURSOR_BTREE *cbt, int reenter) { WT_SESSION_IMPL *session; + WT_TXN *txn; session = (WT_SESSION_IMPL *)cbt->iface.session; + txn = &session->txn; if (reenter) WT_RET(__curfile_leave(cbt)); + + /* + * If there is no transaction active in this thread and we haven't + * checked if the cache is full, do it now. If we have to block for + * eviction, this is the best time to do it. + */ + if (F_ISSET(txn, TXN_RUNNING) && + !F_ISSET(txn, TXN_HAS_ID) && !F_ISSET(txn, TXN_HAS_SNAPSHOT)) + WT_RET(__wt_cache_full_check(session)); + if (!F_ISSET(cbt, WT_CBT_ACTIVE)) WT_RET(__curfile_enter(cbt)); __wt_txn_cursor_op(session); diff --git a/src/include/stat.h b/src/include/stat.h index cbd22c7b9d0..6efb9970065 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -164,6 +164,7 @@ struct __wt_connection_stats { WT_STATS cache_eviction_dirty; WT_STATS cache_eviction_fail; WT_STATS cache_eviction_force; + WT_STATS cache_eviction_force_delete; WT_STATS cache_eviction_force_fail; WT_STATS cache_eviction_hazard; WT_STATS cache_eviction_internal; diff --git a/src/include/txn.i b/src/include/txn.i index 745a8f75a99..656181790ed 100644 --- a/src/include/txn.i +++ b/src/include/txn.i @@ -227,6 +227,16 @@ __wt_txn_id_check(WT_SESSION_IMPL *session) txn = &session->txn; WT_ASSERT(session, F_ISSET(txn, TXN_RUNNING)); + + /* + * If there is no transaction active in this thread and we haven't + * checked if the cache is full, do it now. If we have to block for + * eviction, this is the best time to do it. + */ + if (F_ISSET(txn, TXN_RUNNING) && + !F_ISSET(txn, TXN_HAS_ID) && !F_ISSET(txn, TXN_HAS_SNAPSHOT)) + WT_RET(__wt_cache_full_check(session)); + if (!F_ISSET(txn, TXN_HAS_ID)) { conn = S2C(session); txn_global = &conn->txn_global; diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index c731c107651..91eb41af4f3 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -3165,206 +3165,208 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_CACHE_EVICTION_FAIL 1030 /*! cache: pages evicted because they exceeded the in-memory maximum */ #define WT_STAT_CONN_CACHE_EVICTION_FORCE 1031 +/*! cache: pages evicted because they had chains of deleted items */ +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1032 /*! cache: failed eviction of pages that exceeded the in-memory maximum */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1032 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1033 /*! cache: hazard pointer blocked page eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1033 +#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1034 /*! cache: internal pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1034 +#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1035 /*! cache: maximum page size at eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1035 +#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1036 /*! cache: eviction server candidate queue empty when topping up */ -#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY 1036 +#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY 1037 /*! cache: eviction server candidate queue not empty when topping up */ -#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY 1037 +#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY 1038 /*! cache: eviction server evicting pages */ -#define WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING 1038 +#define WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING 1039 /*! cache: eviction server populating queue, but not evicting pages */ -#define WT_STAT_CONN_CACHE_EVICTION_SERVER_NOT_EVICTING 1039 +#define WT_STAT_CONN_CACHE_EVICTION_SERVER_NOT_EVICTING 1040 /*! cache: eviction server unable to reach eviction goal */ -#define WT_STAT_CONN_CACHE_EVICTION_SLOW 1040 +#define WT_STAT_CONN_CACHE_EVICTION_SLOW 1041 /*! cache: pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT 1041 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT 1042 /*! cache: pages walked for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_WALK 1042 +#define WT_STAT_CONN_CACHE_EVICTION_WALK 1043 /*! cache: in-memory page splits */ -#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1043 +#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1044 /*! cache: tracked dirty pages in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1044 +#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1045 /*! cache: pages currently held in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_INUSE 1045 +#define WT_STAT_CONN_CACHE_PAGES_INUSE 1046 /*! cache: pages read into cache */ -#define WT_STAT_CONN_CACHE_READ 1046 +#define WT_STAT_CONN_CACHE_READ 1047 /*! cache: pages written from cache */ -#define WT_STAT_CONN_CACHE_WRITE 1047 +#define WT_STAT_CONN_CACHE_WRITE 1048 /*! connection: pthread mutex condition wait calls */ -#define WT_STAT_CONN_COND_WAIT 1048 +#define WT_STAT_CONN_COND_WAIT 1049 /*! cursor: cursor create calls */ -#define WT_STAT_CONN_CURSOR_CREATE 1049 +#define WT_STAT_CONN_CURSOR_CREATE 1050 /*! cursor: cursor insert calls */ -#define WT_STAT_CONN_CURSOR_INSERT 1050 +#define WT_STAT_CONN_CURSOR_INSERT 1051 /*! cursor: cursor next calls */ -#define WT_STAT_CONN_CURSOR_NEXT 1051 +#define WT_STAT_CONN_CURSOR_NEXT 1052 /*! cursor: cursor prev calls */ -#define WT_STAT_CONN_CURSOR_PREV 1052 +#define WT_STAT_CONN_CURSOR_PREV 1053 /*! cursor: cursor remove calls */ -#define WT_STAT_CONN_CURSOR_REMOVE 1053 +#define WT_STAT_CONN_CURSOR_REMOVE 1054 /*! cursor: cursor reset calls */ -#define WT_STAT_CONN_CURSOR_RESET 1054 +#define WT_STAT_CONN_CURSOR_RESET 1055 /*! cursor: cursor search calls */ -#define WT_STAT_CONN_CURSOR_SEARCH 1055 +#define WT_STAT_CONN_CURSOR_SEARCH 1056 /*! cursor: cursor search near calls */ -#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1056 +#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1057 /*! cursor: cursor update calls */ -#define WT_STAT_CONN_CURSOR_UPDATE 1057 +#define WT_STAT_CONN_CURSOR_UPDATE 1058 /*! data-handle: connection dhandles swept */ -#define WT_STAT_CONN_DH_CONN_HANDLES 1058 +#define WT_STAT_CONN_DH_CONN_HANDLES 1059 /*! data-handle: connection candidate referenced */ -#define WT_STAT_CONN_DH_CONN_REF 1059 +#define WT_STAT_CONN_DH_CONN_REF 1060 /*! data-handle: connection sweeps */ -#define WT_STAT_CONN_DH_CONN_SWEEPS 1060 +#define WT_STAT_CONN_DH_CONN_SWEEPS 1061 /*! data-handle: connection time-of-death sets */ -#define WT_STAT_CONN_DH_CONN_TOD 1061 +#define WT_STAT_CONN_DH_CONN_TOD 1062 /*! data-handle: session dhandles swept */ -#define WT_STAT_CONN_DH_SESSION_HANDLES 1062 +#define WT_STAT_CONN_DH_SESSION_HANDLES 1063 /*! data-handle: session sweep attempts */ -#define WT_STAT_CONN_DH_SESSION_SWEEPS 1063 +#define WT_STAT_CONN_DH_SESSION_SWEEPS 1064 /*! connection: files currently open */ -#define WT_STAT_CONN_FILE_OPEN 1064 +#define WT_STAT_CONN_FILE_OPEN 1065 /*! log: log buffer size increases */ -#define WT_STAT_CONN_LOG_BUFFER_GROW 1065 +#define WT_STAT_CONN_LOG_BUFFER_GROW 1066 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1066 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1067 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1067 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1068 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1068 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1069 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1069 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1070 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1070 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1071 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1071 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1072 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1072 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1073 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1073 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1074 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1074 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1075 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1075 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1076 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1076 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1077 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1077 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1078 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1078 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1079 /*! log: log read operations */ -#define WT_STAT_CONN_LOG_READS 1079 +#define WT_STAT_CONN_LOG_READS 1080 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1080 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1081 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1081 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1082 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1082 +#define WT_STAT_CONN_LOG_SCANS 1083 /*! log: consolidated slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1083 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1084 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1084 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1085 /*! log: consolidated slot joins */ -#define WT_STAT_CONN_LOG_SLOT_JOINS 1085 +#define WT_STAT_CONN_LOG_SLOT_JOINS 1086 /*! log: consolidated slot join races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1086 +#define WT_STAT_CONN_LOG_SLOT_RACES 1087 /*! log: slots selected for switching that were unavailable */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_FAILS 1087 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_FAILS 1088 /*! log: record size exceeded maximum */ -#define WT_STAT_CONN_LOG_SLOT_TOOBIG 1088 +#define WT_STAT_CONN_LOG_SLOT_TOOBIG 1089 /*! log: failed to find a slot large enough for record */ -#define WT_STAT_CONN_LOG_SLOT_TOOSMALL 1089 +#define WT_STAT_CONN_LOG_SLOT_TOOSMALL 1090 /*! log: consolidated slot join transitions */ -#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1090 +#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1091 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1091 +#define WT_STAT_CONN_LOG_SYNC 1092 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1092 +#define WT_STAT_CONN_LOG_WRITES 1093 /*! LSM: sleep for LSM checkpoint throttle */ -#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1093 +#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1094 /*! LSM: sleep for LSM merge throttle */ -#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1094 +#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1095 /*! LSM: rows merged in an LSM tree */ -#define WT_STAT_CONN_LSM_ROWS_MERGED 1095 +#define WT_STAT_CONN_LSM_ROWS_MERGED 1096 /*! LSM: application work units currently queued */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1096 +#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1097 /*! LSM: merge work units currently queued */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1097 +#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1098 /*! LSM: tree queue hit maximum */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1098 +#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1099 /*! LSM: switch work units currently queued */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1099 +#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1100 /*! LSM: tree maintenance operations scheduled */ -#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1100 +#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1101 /*! LSM: tree maintenance operations discarded */ -#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1101 +#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1102 /*! LSM: tree maintenance operations executed */ -#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1102 +#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1103 /*! connection: memory allocations */ -#define WT_STAT_CONN_MEMORY_ALLOCATION 1103 +#define WT_STAT_CONN_MEMORY_ALLOCATION 1104 /*! connection: memory frees */ -#define WT_STAT_CONN_MEMORY_FREE 1104 +#define WT_STAT_CONN_MEMORY_FREE 1105 /*! connection: memory re-allocations */ -#define WT_STAT_CONN_MEMORY_GROW 1105 +#define WT_STAT_CONN_MEMORY_GROW 1106 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1106 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1107 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1107 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1108 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1108 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1109 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1109 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1110 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1110 +#define WT_STAT_CONN_PAGE_SLEEP 1111 /*! connection: total read I/Os */ -#define WT_STAT_CONN_READ_IO 1111 +#define WT_STAT_CONN_READ_IO 1112 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1112 +#define WT_STAT_CONN_REC_PAGES 1113 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1113 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1114 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1114 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1115 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1115 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1116 /*! connection: pthread mutex shared lock read-lock calls */ -#define WT_STAT_CONN_RWLOCK_READ 1116 +#define WT_STAT_CONN_RWLOCK_READ 1117 /*! connection: pthread mutex shared lock write-lock calls */ -#define WT_STAT_CONN_RWLOCK_WRITE 1117 +#define WT_STAT_CONN_RWLOCK_WRITE 1118 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1118 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1119 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1119 +#define WT_STAT_CONN_SESSION_OPEN 1120 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1120 +#define WT_STAT_CONN_TXN_BEGIN 1121 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1121 +#define WT_STAT_CONN_TXN_CHECKPOINT 1122 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1122 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1123 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1123 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1124 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1124 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1125 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1125 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1126 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1126 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1127 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1127 +#define WT_STAT_CONN_TXN_COMMIT 1128 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1128 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1129 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1129 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1130 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1130 +#define WT_STAT_CONN_TXN_ROLLBACK 1131 /*! connection: total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1131 +#define WT_STAT_CONN_WRITE_IO 1132 /*! * @} diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h index 138b64a6e27..1b3a9b62626 100644 --- a/src/include/wt_internal.h +++ b/src/include/wt_internal.h @@ -322,13 +322,13 @@ struct __wt_update; #include "misc.i" #include "intpack.i" /* required by cell.i, packing.i */ #include "packing.i" +#include "cache.i" /* required by txn.i */ #include "cell.i" /* required by btree.i */ #include "mutex.i" /* required by btree.i */ #include "txn.i" /* required by btree.i */ #include "btree.i" /* required by cursor.i */ -#include "cache.i" /* required by cursor.i */ #include "cursor.i" #include "bitstring.i" diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index 52bd3e9373d..0d44b16d85c 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -171,8 +171,6 @@ __clsm_enter(WT_CURSOR_LSM *clsm, int reset, int update) lsm_tree->nchunks != 0) goto open; - WT_RET(__wt_cache_full_check(session)); - if (clsm->dsk_gen != lsm_tree->dsk_gen && lsm_tree->nchunks != 0) goto open; diff --git a/src/session/session_api.c b/src/session/session_api.c index 3ab5e0acab1..8ee143133ae 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -736,13 +736,6 @@ __session_begin_transaction(WT_SESSION *wt_session, const char *config) if (F_ISSET(&session->txn, TXN_RUNNING)) WT_ERR_MSG(session, EINVAL, "Transaction already running"); - /* - * There is no transaction active in this thread; check if the cache is - * full, if we have to block for eviction, this is the best time to do - * it. - */ - WT_ERR(__wt_cache_full_check(session)); - ret = __wt_txn_begin(session, cfg); err: API_END_RET(session, ret); diff --git a/src/support/stat.c b/src/support/stat.c index f4ae082add3..223d62d0559 100644 --- a/src/support/stat.c +++ b/src/support/stat.c @@ -376,6 +376,8 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats) "cache: pages currently held in the cache"; stats->cache_eviction_force.desc = "cache: pages evicted because they exceeded the in-memory maximum"; + stats->cache_eviction_force_delete.desc = + "cache: pages evicted because they had chains of deleted items"; stats->cache_eviction_app.desc = "cache: pages evicted by application threads"; stats->cache_read.desc = "cache: pages read into cache"; @@ -554,6 +556,7 @@ __wt_stat_refresh_connection_stats(void *stats_arg) stats->cache_eviction_dirty.v = 0; stats->cache_eviction_deepen.v = 0; stats->cache_eviction_force.v = 0; + stats->cache_eviction_force_delete.v = 0; stats->cache_eviction_app.v = 0; stats->cache_read.v = 0; stats->cache_eviction_fail.v = 0; diff --git a/src/txn/txn.c b/src/txn/txn.c index fd80efd5ebd..5b8f11a88a5 100644 --- a/src/txn/txn.c +++ b/src/txn/txn.c @@ -361,8 +361,15 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) /* If we are logging, write a commit log record. */ if (ret == 0 && txn->mod_count > 0 && FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED) && - !F_ISSET(session, WT_SESSION_NO_LOGGING)) + !F_ISSET(session, WT_SESSION_NO_LOGGING)) { + /* + * We are about to block on I/O writing the log. + * Release our snapshot in case it is keeping data pinned. + * This is particularly important for checkpoints. + */ + __wt_txn_release_snapshot(session); ret = __wt_txn_log_commit(session, cfg); + } /* * If anything went wrong, roll back. diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c index f66bd7e09c8..f706efa8a70 100644 --- a/src/txn/txn_log.c +++ b/src/txn/txn_log.c @@ -270,6 +270,7 @@ __wt_txn_checkpoint_log( { WT_DECL_ITEM(logrec); WT_DECL_RET; + WT_ITEM *ckpt_snapshot, empty; WT_LSN *ckpt_lsn; WT_TXN *txn; uint8_t *end, *p; @@ -319,19 +320,22 @@ __wt_txn_checkpoint_log( */ if (!txn->full_ckpt) { txn->ckpt_nsnapshot = 0; + WT_CLEAR(empty); + ckpt_snapshot = ∅ *ckpt_lsn = S2C(session)->log->alloc_lsn; - } + } else + ckpt_snapshot = txn->ckpt_snapshot; /* Write the checkpoint log record. */ WT_ERR(__wt_struct_size(session, &recsize, fmt, rectype, ckpt_lsn->file, ckpt_lsn->offset, - txn->ckpt_nsnapshot, &txn->ckpt_snapshot)); + txn->ckpt_nsnapshot, ckpt_snapshot)); WT_ERR(__wt_logrec_alloc(session, recsize, &logrec)); WT_ERR(__wt_struct_pack(session, (uint8_t *)logrec->data + logrec->size, recsize, fmt, rectype, ckpt_lsn->file, ckpt_lsn->offset, - txn->ckpt_nsnapshot, &txn->ckpt_snapshot)); + txn->ckpt_nsnapshot, ckpt_snapshot)); logrec->size += (uint32_t)recsize; WT_ERR(__wt_log_write(session, logrec, lsnp, 0)); |