diff options
author | Michael Cahill <michael.cahill@wiredtiger.com> | 2015-02-17 21:48:44 +1100 |
---|---|---|
committer | Michael Cahill <michael.cahill@wiredtiger.com> | 2015-02-17 21:48:44 +1100 |
commit | 788265ed273c63183053e6325a9aa03c89c02860 (patch) | |
tree | 50c87aa3c025d5ea57fad396944fdf6dbd2b5347 | |
parent | 748e7b0c58b358b14340bacae41f9c46f3c06f7e (diff) | |
download | mongo-788265ed273c63183053e6325a9aa03c89c02860.tar.gz |
Combine the various checks for whether a page can be evicted into one place.
-rw-r--r-- | src/btree/bt_page.c | 18 | ||||
-rw-r--r-- | src/evict/evict_lru.c | 22 | ||||
-rw-r--r-- | src/evict/evict_page.c | 43 | ||||
-rw-r--r-- | src/include/btree.i | 88 |
4 files changed, 101 insertions, 70 deletions
diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c index d1da615dafe..c64ad6bae8d 100644 --- a/src/btree/bt_page.c +++ b/src/btree/bt_page.c @@ -37,12 +37,9 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags) page->type != WT_PAGE_ROW_LEAF) return (0); - /* - * Eviction may be turned off (although that's rare), or we may be in - * the middle of a checkpoint. - */ + /* Eviction may be turned off. */ if (LF_ISSET(WT_READ_NO_EVICT) || - F_ISSET(btree, WT_BTREE_NO_EVICTION) || btree->checkpointing) + F_ISSET(btree, WT_BTREE_NO_EVICTION)) return (0); /* @@ -52,16 +49,13 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags) if (page->modify == NULL) return (0); - /* - * If the page was recently split in-memory, don't force it out: we - * hope eviction will find it first. - */ - if (!__wt_txn_visible_all(session, page->modify->first_dirty_txn)) - return (0); - /* Trigger eviction on the next page release. */ __wt_page_evict_soon(page); + /* If eviction cannot succeed, don't try. */ + if (!__wt_page_can_evict(session, page, 1)) + return (0); + return (1); } diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index c6b962f9f5d..062356af637 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -839,7 +839,7 @@ __evict_walk(WT_SESSION_IMPL *session, uint32_t flags) WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle; WT_DECL_RET; - u_int max_entries, old_slot, retries, slot; + u_int max_entries, old_slot, retries, slot, spins; int incr, dhandle_locked; WT_DECL_SPINLOCK_ID(id); @@ -885,8 +885,16 @@ retry: while (slot < max_entries && ret == 0) { * reference count to keep it alive while we sweep. */ if (!dhandle_locked) { - if ((ret = __wt_spin_trylock( - session, &conn->dhandle_lock, &id)) != 0) + for (spins = 0; (ret = __wt_spin_trylock( + session, &conn->dhandle_lock, &id)) == EBUSY && + !F_ISSET(cache, WT_EVICT_CLEAR_WALKS); + spins++) { + if (spins < 1000) + __wt_yield(); + else + __wt_sleep(0, 1000); + } + if (ret != 0) break; dhandle_locked = 1; } @@ -1126,12 +1134,8 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags) continue; } -fast: /* - * If the file is being checkpointed, there's a period of time - * where we can't discard dirty pages because of possible races - * with the checkpointing thread. - */ - if (modified && btree->checkpointing) +fast: /* If the page can't be evicted, give up. */ + if (!__wt_page_can_evict(session, page, 0)) continue; /* diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c index 0cff584f2ab..180e30a924d 100644 --- a/src/evict/evict_page.c +++ b/src/evict/evict_page.c @@ -320,13 +320,11 @@ static int __evict_review(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive, int top, int *inmem_splitp, int *istreep) { - WT_BTREE *btree; WT_DECL_RET; WT_PAGE *page; WT_PAGE_MODIFY *mod; uint32_t flags; - btree = S2BT(session); flags = WT_EVICTING; /* @@ -369,48 +367,11 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref, WT_RET(ret); } - /* - * If the tree was deepened, there's a requirement that newly created - * internal pages not be evicted until all threads are known to have - * exited the original page index array, because evicting an internal - * page discards its WT_REF array, and a thread traversing the original - * page index array might see an freed WT_REF. During the split we set - * a transaction value, once that's globally visible, we know we can - * evict the created page. - */ - if (!exclusive && mod != NULL && WT_PAGE_IS_INTERNAL(page) && - !__wt_txn_visible_all(session, mod->mod_split_txn)) + /* Check whether the page can be evicted. */ + if (!__wt_page_can_evict(session, page, 0)) return (EBUSY); /* - * If the file is being checkpointed, we can't evict dirty pages: - * if we write a page and free the previous version of the page, that - * previous version might be referenced by an internal page already - * been written in the checkpoint, leaving the checkpoint inconsistent. - * - * Don't rely on new updates being skipped by the transaction used - * for transaction reads: (1) there are paths that dirty pages for - * artificial reasons; (2) internal pages aren't transactional; and - * (3) if an update was skipped during the checkpoint (leaving the page - * dirty), then rolled back, we could still successfully overwrite a - * page and corrupt the checkpoint. - * - * Further, we can't race with the checkpoint's reconciliation of - * an internal page as we evict a clean child from the page's subtree. - * This works in the usual way: eviction locks the page and then checks - * for existing hazard pointers, the checkpoint thread reconciling an - * internal page acquires hazard pointers on child pages it reads, and - * is blocked by the exclusive lock. - */ - if (mod != NULL && btree->checkpointing && - (__wt_page_is_modified(page) || - F_ISSET(mod, WT_PM_REC_MULTIBLOCK))) { - WT_STAT_FAST_CONN_INCR(session, cache_eviction_checkpoint); - WT_STAT_FAST_DATA_INCR(session, cache_eviction_checkpoint); - return (EBUSY); - } - - /* * Check for an append-only workload needing an in-memory split. * * We can't do this earlier because in-memory splits require exclusive diff --git a/src/include/btree.i b/src/include/btree.i index 2896620e503..dde9c2717fd 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -941,6 +941,81 @@ __wt_ref_info(WT_SESSION_IMPL *session, } /* + * __wt_page_can_evict -- + * Check whether a page can be evicted. + */ +static inline int +__wt_page_can_evict( + WT_SESSION_IMPL *session, WT_PAGE *page, int check_splits) +{ + WT_BTREE *btree; + WT_PAGE_MODIFY *mod; + + btree = S2BT(session); + mod = page->modify; + + /* + * If the tree was deepened, there's a requirement that newly created + * internal pages not be evicted until all threads are known to have + * exited the original page index array, because evicting an internal + * page discards its WT_REF array, and a thread traversing the original + * page index array might see an freed WT_REF. During the split we set + * a transaction value, once that's globally visible, we know we can + * evict the created page. + */ + if (WT_PAGE_IS_INTERNAL(page) && mod != NULL && + !__wt_txn_visible_all(session, mod->mod_split_txn)) + return (0); + + /* + * If the file is being checkpointed, we can't evict dirty pages: + * if we write a page and free the previous version of the page, that + * previous version might be referenced by an internal page already + * been written in the checkpoint, leaving the checkpoint inconsistent. + * + * Don't rely on new updates being skipped by the transaction used + * for transaction reads: (1) there are paths that dirty pages for + * artificial reasons; (2) internal pages aren't transactional; and + * (3) if an update was skipped during the checkpoint (leaving the page + * dirty), then rolled back, we could still successfully overwrite a + * page and corrupt the checkpoint. + * + * Further, we can't race with the checkpoint's reconciliation of + * an internal page as we evict a clean child from the page's subtree. + * This works in the usual way: eviction locks the page and then checks + * for existing hazard pointers, the checkpoint thread reconciling an + * internal page acquires hazard pointers on child pages it reads, and + * is blocked by the exclusive lock. + */ + if (mod != NULL && btree->checkpointing && + (__wt_page_is_modified(page) || + F_ISSET(mod, WT_PM_REC_MULTIBLOCK))) { + WT_STAT_FAST_CONN_INCR(session, cache_eviction_checkpoint); + WT_STAT_FAST_DATA_INCR(session, cache_eviction_checkpoint); + return (0); + } + + /* + * If we aren't (potentially) doing eviction that can restore updates + * and the updates on this page are too recent, give up. + */ + if (page->read_gen != WT_READGEN_OLDEST && mod != NULL && + !__wt_txn_visible_all(session, __wt_page_is_modified(page) ? + mod->update_txn : mod->rec_max_txn)) + return (0); + + /* + * If the page was recently split in-memory, don't force it out: we + * hope eviction will find it first. + */ + if (check_splits && mod != NULL && + !__wt_txn_visible_all(session, mod->inmem_split_txn)) + return (0); + + return (1); +} + +/* * __wt_page_release_evict -- * Attempt to release and immediately evict a page. */ @@ -1010,10 +1085,9 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) /* * Attempt to evict pages with the special "oldest" read generation. - * * This is set for pages that grow larger than the configured - * memory_page_max setting, and when we are attempting to scan without - * trashing the cache. + * memory_page_max setting, when we see many deleted items, and when we + * are attempting to scan without trashing the cache. * * Skip this if eviction is disabled for this operation or this tree, * or if there is no chance of eviction succeeding for dirty pages due @@ -1021,12 +1095,10 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) * it contains an update that isn't stable. Also skip forced eviction * if we just did an in-memory split. */ - if (LF_ISSET(WT_READ_NO_EVICT) || - page->read_gen != WT_READGEN_OLDEST || + if (page->read_gen != WT_READGEN_OLDEST || + LF_ISSET(WT_READ_NO_EVICT) || F_ISSET(btree, WT_BTREE_NO_EVICTION) || - (__wt_page_is_modified(page) && (btree->checkpointing || - !__wt_txn_visible_all(session, page->modify->first_dirty_txn) || - !__wt_txn_visible_all(session, page->modify->inmem_split_txn)))) + !__wt_page_can_evict(session, page, 1)) return (__wt_hazard_clear(session, page)); WT_RET_BUSY_OK(__wt_page_release_evict(session, ref)); |