summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@wiredtiger.com>2015-02-17 21:50:53 +1100
committerMichael Cahill <michael.cahill@wiredtiger.com>2015-02-17 21:50:53 +1100
commit67527fc235406469e69dbaec3dcd571469e660c0 (patch)
treea7459cc3783dd882b54dd496d464048418742838
parent4d37b9ac5c8719e3e92482348d64c24a4e96ed22 (diff)
downloadmongo-67527fc235406469e69dbaec3dcd571469e660c0.tar.gz
Make the eviction walk incremental: don't spend too long in any one file, fix tracking of whether we are making progress.
-rw-r--r--src/btree/bt_compact.c2
-rw-r--r--src/btree/bt_curnext.c2
-rw-r--r--src/btree/bt_curprev.c2
-rw-r--r--src/btree/bt_handle.c2
-rw-r--r--src/btree/bt_stat.c4
-rw-r--r--src/btree/bt_sync.c10
-rw-r--r--src/btree/bt_walk.c6
-rw-r--r--src/evict/evict_file.c8
-rw-r--r--src/evict/evict_lru.c36
-rw-r--r--src/include/cache.h7
-rw-r--r--src/include/extern.h2
11 files changed, 46 insertions, 35 deletions
diff --git a/src/btree/bt_compact.c b/src/btree/bt_compact.c
index 405410c6a1c..eeec041d57b 100644
--- a/src/btree/bt_compact.c
+++ b/src/btree/bt_compact.c
@@ -149,7 +149,7 @@ __wt_compact(WT_SESSION_IMPL *session, const char *cfg[])
* read, set its generation to a low value so it is evicted
* quickly.
*/
- WT_ERR(__wt_tree_walk(session, &ref,
+ WT_ERR(__wt_tree_walk(session, &ref, NULL,
WT_READ_COMPACT | WT_READ_NO_GEN | WT_READ_WONT_NEED));
if (ref == NULL)
break;
diff --git a/src/btree/bt_curnext.c b/src/btree/bt_curnext.c
index 6140dca1fad..d80a5f4740d 100644
--- a/src/btree/bt_curnext.c
+++ b/src/btree/bt_curnext.c
@@ -487,7 +487,7 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, int truncating)
__wt_page_evict_soon(page);
cbt->page_deleted_count = 0;
- WT_ERR(__wt_tree_walk(session, &cbt->ref, flags));
+ WT_ERR(__wt_tree_walk(session, &cbt->ref, NULL, flags));
WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND);
}
diff --git a/src/btree/bt_curprev.c b/src/btree/bt_curprev.c
index 880cb777954..f1ca81ee145 100644
--- a/src/btree/bt_curprev.c
+++ b/src/btree/bt_curprev.c
@@ -574,7 +574,7 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, int truncating)
__wt_page_evict_soon(page);
cbt->page_deleted_count = 0;
- WT_ERR(__wt_tree_walk(session, &cbt->ref, flags));
+ WT_ERR(__wt_tree_walk(session, &cbt->ref, NULL, flags));
WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND);
}
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c
index 6a2789c909b..299849ad365 100644
--- a/src/btree/bt_handle.c
+++ b/src/btree/bt_handle.c
@@ -566,7 +566,7 @@ __btree_get_last_recno(WT_SESSION_IMPL *session)
btree = S2BT(session);
next_walk = NULL;
- WT_RET(__wt_tree_walk(session, &next_walk, WT_READ_PREV));
+ WT_RET(__wt_tree_walk(session, &next_walk, NULL, WT_READ_PREV));
if (next_walk == NULL)
return (WT_NOTFOUND);
diff --git a/src/btree/bt_stat.c b/src/btree/bt_stat.c
index b8d56fe9d92..2e34a925f84 100644
--- a/src/btree/bt_stat.c
+++ b/src/btree/bt_stat.c
@@ -56,8 +56,8 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
WT_STAT_SET(stats, btree_row_leaf, 0);
next_walk = NULL;
- while ((ret =
- __wt_tree_walk(session, &next_walk, 0)) == 0 && next_walk != NULL) {
+ while ((ret = __wt_tree_walk(session, &next_walk, NULL, 0)) == 0 &&
+ next_walk != NULL) {
WT_WITH_PAGE_INDEX(session,
ret = __stat_page(session, next_walk->page, stats));
WT_RET(ret);
diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c
index 7c6d49c8ea0..3cde2fa28a9 100644
--- a/src/btree/bt_sync.c
+++ b/src/btree/bt_sync.c
@@ -56,7 +56,7 @@ __sync_file(WT_SESSION_IMPL *session, int syncop)
flags |= WT_READ_NO_WAIT | WT_READ_SKIP_INTL;
for (walk = NULL;;) {
- WT_ERR(__wt_tree_walk(session, &walk, flags));
+ WT_ERR(__wt_tree_walk(session, &walk, NULL, flags));
if (walk == NULL)
break;
@@ -107,7 +107,7 @@ __sync_file(WT_SESSION_IMPL *session, int syncop)
/* Write all dirty in-cache pages. */
flags |= WT_READ_NO_EVICT;
for (walk = NULL;;) {
- WT_ERR(__wt_tree_walk(session, &walk, flags));
+ WT_ERR(__wt_tree_walk(session, &walk, NULL, flags));
if (walk == NULL)
break;
@@ -175,6 +175,12 @@ err: /* On error, clear any left-over tree walk. */
WT_FULL_BARRIER();
/*
+ * If this tree was being skipped by the eviction server during
+ * the checkpoint, clear the wait.
+ */
+ btree->evict_walk_period = 0;
+
+ /*
* Wake the eviction server, in case application threads have
* stalled while the eviction server decided it couldn't make
* progress. Without this, application threads will be stalled
diff --git a/src/btree/bt_walk.c b/src/btree/bt_walk.c
index 8b2ce6d8fc1..70a5bc0b1ae 100644
--- a/src/btree/bt_walk.c
+++ b/src/btree/bt_walk.c
@@ -13,7 +13,8 @@
* Move to the next/previous page in the tree.
*/
int
-__wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags)
+__wt_tree_walk(WT_SESSION_IMPL *session,
+ WT_REF **refp, uint64_t *refcntp, uint32_t flags)
{
WT_BTREE *btree;
WT_DECL_RET;
@@ -178,6 +179,9 @@ restart: /*
else
++slot;
+ if (refcntp != NULL)
+ ++*refcntp;
+
for (descending = 0;;) {
ref = pindex->index[slot];
diff --git a/src/evict/evict_file.c b/src/evict/evict_file.c
index 5aa85872a3b..910aef070ca 100644
--- a/src/evict/evict_file.c
+++ b/src/evict/evict_file.c
@@ -36,8 +36,8 @@ __wt_evict_file(WT_SESSION_IMPL *session, int syncop)
/* Walk the tree, discarding pages. */
next_ref = NULL;
- WT_ERR(__wt_tree_walk(
- session, &next_ref, WT_READ_CACHE | WT_READ_NO_EVICT));
+ WT_ERR(__wt_tree_walk(session, &next_ref, NULL,
+ WT_READ_CACHE | WT_READ_NO_EVICT));
while ((ref = next_ref) != NULL) {
page = ref->page;
@@ -73,8 +73,8 @@ __wt_evict_file(WT_SESSION_IMPL *session, int syncop)
* the reconciliation, the next walk call could miss a page in
* the tree.
*/
- WT_ERR(__wt_tree_walk(
- session, &next_ref, WT_READ_CACHE | WT_READ_NO_EVICT));
+ WT_ERR(__wt_tree_walk(session, &next_ref, NULL,
+ WT_READ_CACHE | WT_READ_NO_EVICT));
switch (syncop) {
case WT_SYNC_CLOSE:
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index 062356af637..e6985d750bd 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -839,7 +839,7 @@ __evict_walk(WT_SESSION_IMPL *session, uint32_t flags)
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
- u_int max_entries, old_slot, retries, slot, spins;
+ u_int max_entries, prev_slot, retries, slot, start_slot, spins;
int incr, dhandle_locked;
WT_DECL_SPINLOCK_ID(id);
@@ -869,7 +869,7 @@ __evict_walk(WT_SESSION_IMPL *session, uint32_t flags)
* Set the starting slot in the queue and the maximum pages added
* per walk.
*/
- slot = cache->evict_entries;
+ start_slot = slot = cache->evict_entries;
max_entries = slot + WT_EVICT_WALK_INCR;
retry: while (slot < max_entries && ret == 0) {
@@ -934,10 +934,10 @@ retry: while (slot < max_entries && ret == 0) {
continue;
/*
- * Also skip files that are configured to stick in cache until
- * we get aggressive.
+ * Also skip files that are checkpointing or configured to
+ * stick in cache until we get aggressive.
*/
- if (btree->evict_priority != 0 &&
+ if ((btree->checkpointing || btree->evict_priority != 0) &&
!LF_ISSET(WT_EVICT_PASS_AGGRESSIVE))
continue;
@@ -950,7 +950,7 @@ retry: while (slot < max_entries && ret == 0) {
btree->evict_walk_skips++ < btree->evict_walk_period)
continue;
btree->evict_walk_skips = 0;
- old_slot = slot;
+ prev_slot = slot;
(void)WT_ATOMIC_ADD4(dhandle->session_inuse, 1);
incr = 1;
@@ -972,15 +972,14 @@ retry: while (slot < max_entries && ret == 0) {
__wt_spin_unlock(session, &cache->evict_walk_lock);
/*
- * If we didn't find enough candidates in the file, skip it
- * next time.
+ * If we didn't find any candidates in the file, skip it next
+ * time.
*/
- if (slot >= old_slot + WT_EVICT_WALK_PER_FILE ||
- slot >= max_entries)
- btree->evict_walk_period = 0;
- else
+ if (slot == prev_slot)
btree->evict_walk_period = WT_MIN(
- WT_MAX(1, 2 * btree->evict_walk_period), 1000);
+ WT_MAX(1, 2 * btree->evict_walk_period), 100);
+ else
+ btree->evict_walk_period = 0;
}
if (incr) {
@@ -1003,8 +1002,9 @@ retry: while (slot < max_entries && ret == 0) {
if (!F_ISSET(cache, WT_EVICT_CLEAR_WALKS) && ret == 0 &&
slot < max_entries && (retries < 2 ||
(!LF_ISSET(WT_EVICT_PASS_WOULD_BLOCK) &&
- retries < 10 && slot > 0))) {
+ retries < 10 && slot > start_slot))) {
cache->evict_file_next = NULL;
+ start_slot = slot;
++retries;
goto retry;
}
@@ -1072,9 +1072,9 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
* Get some more eviction candidate pages.
*/
for (evict = start, pages_walked = 0, internal_pages = restarts = 0;
- evict < end && (ret == 0 || ret == WT_NOTFOUND);
- ret = __wt_tree_walk(session, &btree->evict_ref, walk_flags),
- ++pages_walked) {
+ evict < end && pages_walked < WT_EVICT_MAX_PER_FILE &&
+ (ret == 0 || ret == WT_NOTFOUND);
+ ret = __wt_tree_walk(session, &btree->evict_ref, &pages_walked, walk_flags)) {
if (btree->evict_ref == NULL) {
/*
* Take care with terminating this loop.
@@ -1451,7 +1451,7 @@ __wt_cache_dump(WT_SESSION_IMPL *session)
next_walk = NULL;
session->dhandle = dhandle;
while (__wt_tree_walk(session,
- &next_walk, WT_READ_CACHE | WT_READ_NO_WAIT) == 0 &&
+ &next_walk, NULL, WT_READ_CACHE | WT_READ_NO_WAIT) == 0 &&
next_walk != NULL) {
page = next_walk->page;
if (page->type == WT_PAGE_COL_INT ||
diff --git a/src/include/cache.h b/src/include/cache.h
index de6faad608a..24ea14ab11d 100644
--- a/src/include/cache.h
+++ b/src/include/cache.h
@@ -13,9 +13,10 @@
#define WT_EVICT_INT_SKEW (1<<20) /* Prefer leaf pages over internal
pages by this many increments of the
read generation. */
-#define WT_EVICT_WALK_PER_FILE 10 /* Pages to visit per file */
-#define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */
-#define WT_EVICT_WALK_INCR 100 /* Pages added each walk */
+#define WT_EVICT_WALK_PER_FILE 10 /* Pages to queue per file */
+#define WT_EVICT_MAX_PER_FILE 100 /* Max pages to visit per file */
+#define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */
+#define WT_EVICT_WALK_INCR 100 /* Pages added each walk */
#define WT_EVICT_PASS_AGGRESSIVE 0x01
#define WT_EVICT_PASS_ALL 0x02
diff --git a/src/include/extern.h b/src/include/extern.h
index 40493b4aab4..c5909f7af2d 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -157,7 +157,7 @@ extern int __wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_verify_dsk_image(WT_SESSION_IMPL *session, const char *addr, const WT_PAGE_HEADER *dsk, size_t size, int empty_page_ok);
extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *addr, WT_ITEM *buf);
-extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags);
+extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *refcntp, uint32_t flags);
extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, WT_ITEM *value, WT_UPDATE *upd, int is_remove);
extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt);
extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page);