summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@wiredtiger.com>2015-02-17 21:48:44 +1100
committerMichael Cahill <michael.cahill@wiredtiger.com>2015-02-17 21:48:44 +1100
commit788265ed273c63183053e6325a9aa03c89c02860 (patch)
tree50c87aa3c025d5ea57fad396944fdf6dbd2b5347
parent748e7b0c58b358b14340bacae41f9c46f3c06f7e (diff)
downloadmongo-788265ed273c63183053e6325a9aa03c89c02860.tar.gz
Combine the various checks for whether a page can be evicted into one place.
-rw-r--r--src/btree/bt_page.c18
-rw-r--r--src/evict/evict_lru.c22
-rw-r--r--src/evict/evict_page.c43
-rw-r--r--src/include/btree.i88
4 files changed, 101 insertions, 70 deletions
diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c
index d1da615dafe..c64ad6bae8d 100644
--- a/src/btree/bt_page.c
+++ b/src/btree/bt_page.c
@@ -37,12 +37,9 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags)
page->type != WT_PAGE_ROW_LEAF)
return (0);
- /*
- * Eviction may be turned off (although that's rare), or we may be in
- * the middle of a checkpoint.
- */
+ /* Eviction may be turned off. */
if (LF_ISSET(WT_READ_NO_EVICT) ||
- F_ISSET(btree, WT_BTREE_NO_EVICTION) || btree->checkpointing)
+ F_ISSET(btree, WT_BTREE_NO_EVICTION))
return (0);
/*
@@ -52,16 +49,13 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags)
if (page->modify == NULL)
return (0);
- /*
- * If the page was recently split in-memory, don't force it out: we
- * hope eviction will find it first.
- */
- if (!__wt_txn_visible_all(session, page->modify->first_dirty_txn))
- return (0);
-
/* Trigger eviction on the next page release. */
__wt_page_evict_soon(page);
+ /* If eviction cannot succeed, don't try. */
+ if (!__wt_page_can_evict(session, page, 1))
+ return (0);
+
return (1);
}
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index c6b962f9f5d..062356af637 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -839,7 +839,7 @@ __evict_walk(WT_SESSION_IMPL *session, uint32_t flags)
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
- u_int max_entries, old_slot, retries, slot;
+ u_int max_entries, old_slot, retries, slot, spins;
int incr, dhandle_locked;
WT_DECL_SPINLOCK_ID(id);
@@ -885,8 +885,16 @@ retry: while (slot < max_entries && ret == 0) {
* reference count to keep it alive while we sweep.
*/
if (!dhandle_locked) {
- if ((ret = __wt_spin_trylock(
- session, &conn->dhandle_lock, &id)) != 0)
+ for (spins = 0; (ret = __wt_spin_trylock(
+ session, &conn->dhandle_lock, &id)) == EBUSY &&
+ !F_ISSET(cache, WT_EVICT_CLEAR_WALKS);
+ spins++) {
+ if (spins < 1000)
+ __wt_yield();
+ else
+ __wt_sleep(0, 1000);
+ }
+ if (ret != 0)
break;
dhandle_locked = 1;
}
@@ -1126,12 +1134,8 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
continue;
}
-fast: /*
- * If the file is being checkpointed, there's a period of time
- * where we can't discard dirty pages because of possible races
- * with the checkpointing thread.
- */
- if (modified && btree->checkpointing)
+fast: /* If the page can't be evicted, give up. */
+ if (!__wt_page_can_evict(session, page, 0))
continue;
/*
diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c
index 0cff584f2ab..180e30a924d 100644
--- a/src/evict/evict_page.c
+++ b/src/evict/evict_page.c
@@ -320,13 +320,11 @@ static int
__evict_review(WT_SESSION_IMPL *session, WT_REF *ref,
int exclusive, int top, int *inmem_splitp, int *istreep)
{
- WT_BTREE *btree;
WT_DECL_RET;
WT_PAGE *page;
WT_PAGE_MODIFY *mod;
uint32_t flags;
- btree = S2BT(session);
flags = WT_EVICTING;
/*
@@ -369,48 +367,11 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref,
WT_RET(ret);
}
- /*
- * If the tree was deepened, there's a requirement that newly created
- * internal pages not be evicted until all threads are known to have
- * exited the original page index array, because evicting an internal
- * page discards its WT_REF array, and a thread traversing the original
- * page index array might see an freed WT_REF. During the split we set
- * a transaction value, once that's globally visible, we know we can
- * evict the created page.
- */
- if (!exclusive && mod != NULL && WT_PAGE_IS_INTERNAL(page) &&
- !__wt_txn_visible_all(session, mod->mod_split_txn))
+ /* Check whether the page can be evicted. */
+ if (!__wt_page_can_evict(session, page, 0))
return (EBUSY);
/*
- * If the file is being checkpointed, we can't evict dirty pages:
- * if we write a page and free the previous version of the page, that
- * previous version might be referenced by an internal page already
- * been written in the checkpoint, leaving the checkpoint inconsistent.
- *
- * Don't rely on new updates being skipped by the transaction used
- * for transaction reads: (1) there are paths that dirty pages for
- * artificial reasons; (2) internal pages aren't transactional; and
- * (3) if an update was skipped during the checkpoint (leaving the page
- * dirty), then rolled back, we could still successfully overwrite a
- * page and corrupt the checkpoint.
- *
- * Further, we can't race with the checkpoint's reconciliation of
- * an internal page as we evict a clean child from the page's subtree.
- * This works in the usual way: eviction locks the page and then checks
- * for existing hazard pointers, the checkpoint thread reconciling an
- * internal page acquires hazard pointers on child pages it reads, and
- * is blocked by the exclusive lock.
- */
- if (mod != NULL && btree->checkpointing &&
- (__wt_page_is_modified(page) ||
- F_ISSET(mod, WT_PM_REC_MULTIBLOCK))) {
- WT_STAT_FAST_CONN_INCR(session, cache_eviction_checkpoint);
- WT_STAT_FAST_DATA_INCR(session, cache_eviction_checkpoint);
- return (EBUSY);
- }
-
- /*
* Check for an append-only workload needing an in-memory split.
*
* We can't do this earlier because in-memory splits require exclusive
diff --git a/src/include/btree.i b/src/include/btree.i
index 2896620e503..dde9c2717fd 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -941,6 +941,81 @@ __wt_ref_info(WT_SESSION_IMPL *session,
}
/*
+ * __wt_page_can_evict --
+ * Check whether a page can be evicted.
+ */
+static inline int
+__wt_page_can_evict(
+ WT_SESSION_IMPL *session, WT_PAGE *page, int check_splits)
+{
+ WT_BTREE *btree;
+ WT_PAGE_MODIFY *mod;
+
+ btree = S2BT(session);
+ mod = page->modify;
+
+ /*
+ * If the tree was deepened, there's a requirement that newly created
+ * internal pages not be evicted until all threads are known to have
+ * exited the original page index array, because evicting an internal
+ * page discards its WT_REF array, and a thread traversing the original
+ * page index array might see an freed WT_REF. During the split we set
+ * a transaction value, once that's globally visible, we know we can
+ * evict the created page.
+ */
+ if (WT_PAGE_IS_INTERNAL(page) && mod != NULL &&
+ !__wt_txn_visible_all(session, mod->mod_split_txn))
+ return (0);
+
+ /*
+ * If the file is being checkpointed, we can't evict dirty pages:
+ * if we write a page and free the previous version of the page, that
+ * previous version might be referenced by an internal page already
+ * been written in the checkpoint, leaving the checkpoint inconsistent.
+ *
+ * Don't rely on new updates being skipped by the transaction used
+ * for transaction reads: (1) there are paths that dirty pages for
+ * artificial reasons; (2) internal pages aren't transactional; and
+ * (3) if an update was skipped during the checkpoint (leaving the page
+ * dirty), then rolled back, we could still successfully overwrite a
+ * page and corrupt the checkpoint.
+ *
+ * Further, we can't race with the checkpoint's reconciliation of
+ * an internal page as we evict a clean child from the page's subtree.
+ * This works in the usual way: eviction locks the page and then checks
+ * for existing hazard pointers, the checkpoint thread reconciling an
+ * internal page acquires hazard pointers on child pages it reads, and
+ * is blocked by the exclusive lock.
+ */
+ if (mod != NULL && btree->checkpointing &&
+ (__wt_page_is_modified(page) ||
+ F_ISSET(mod, WT_PM_REC_MULTIBLOCK))) {
+ WT_STAT_FAST_CONN_INCR(session, cache_eviction_checkpoint);
+ WT_STAT_FAST_DATA_INCR(session, cache_eviction_checkpoint);
+ return (0);
+ }
+
+ /*
+ * If we aren't (potentially) doing eviction that can restore updates
+ * and the updates on this page are too recent, give up.
+ */
+ if (page->read_gen != WT_READGEN_OLDEST && mod != NULL &&
+ !__wt_txn_visible_all(session, __wt_page_is_modified(page) ?
+ mod->update_txn : mod->rec_max_txn))
+ return (0);
+
+ /*
+ * If the page was recently split in-memory, don't force it out: we
+ * hope eviction will find it first.
+ */
+ if (check_splits && mod != NULL &&
+ !__wt_txn_visible_all(session, mod->inmem_split_txn))
+ return (0);
+
+ return (1);
+}
+
+/*
* __wt_page_release_evict --
* Attempt to release and immediately evict a page.
*/
@@ -1010,10 +1085,9 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
/*
* Attempt to evict pages with the special "oldest" read generation.
- *
* This is set for pages that grow larger than the configured
- * memory_page_max setting, and when we are attempting to scan without
- * trashing the cache.
+ * memory_page_max setting, when we see many deleted items, and when we
+ * are attempting to scan without trashing the cache.
*
* Skip this if eviction is disabled for this operation or this tree,
* or if there is no chance of eviction succeeding for dirty pages due
@@ -1021,12 +1095,10 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
* it contains an update that isn't stable. Also skip forced eviction
* if we just did an in-memory split.
*/
- if (LF_ISSET(WT_READ_NO_EVICT) ||
- page->read_gen != WT_READGEN_OLDEST ||
+ if (page->read_gen != WT_READGEN_OLDEST ||
+ LF_ISSET(WT_READ_NO_EVICT) ||
F_ISSET(btree, WT_BTREE_NO_EVICTION) ||
- (__wt_page_is_modified(page) && (btree->checkpointing ||
- !__wt_txn_visible_all(session, page->modify->first_dirty_txn) ||
- !__wt_txn_visible_all(session, page->modify->inmem_split_txn))))
+ !__wt_page_can_evict(session, page, 1))
return (__wt_hazard_clear(session, page));
WT_RET_BUSY_OK(__wt_page_release_evict(session, ref));