diff options
author | Michael Cahill <michael.cahill@wiredtiger.com> | 2015-02-17 21:50:53 +1100 |
---|---|---|
committer | Michael Cahill <michael.cahill@wiredtiger.com> | 2015-02-17 21:50:53 +1100 |
commit | 67527fc235406469e69dbaec3dcd571469e660c0 (patch) | |
tree | a7459cc3783dd882b54dd496d464048418742838 | |
parent | 4d37b9ac5c8719e3e92482348d64c24a4e96ed22 (diff) | |
download | mongo-67527fc235406469e69dbaec3dcd571469e660c0.tar.gz |
Make the eviction walk incremental: don't spend too long in any one file, fix tracking of whether we are making progress.
-rw-r--r-- | src/btree/bt_compact.c | 2 | ||||
-rw-r--r-- | src/btree/bt_curnext.c | 2 | ||||
-rw-r--r-- | src/btree/bt_curprev.c | 2 | ||||
-rw-r--r-- | src/btree/bt_handle.c | 2 | ||||
-rw-r--r-- | src/btree/bt_stat.c | 4 | ||||
-rw-r--r-- | src/btree/bt_sync.c | 10 | ||||
-rw-r--r-- | src/btree/bt_walk.c | 6 | ||||
-rw-r--r-- | src/evict/evict_file.c | 8 | ||||
-rw-r--r-- | src/evict/evict_lru.c | 36 | ||||
-rw-r--r-- | src/include/cache.h | 7 | ||||
-rw-r--r-- | src/include/extern.h | 2 |
11 files changed, 46 insertions, 35 deletions
diff --git a/src/btree/bt_compact.c b/src/btree/bt_compact.c index 405410c6a1c..eeec041d57b 100644 --- a/src/btree/bt_compact.c +++ b/src/btree/bt_compact.c @@ -149,7 +149,7 @@ __wt_compact(WT_SESSION_IMPL *session, const char *cfg[]) * read, set its generation to a low value so it is evicted * quickly. */ - WT_ERR(__wt_tree_walk(session, &ref, + WT_ERR(__wt_tree_walk(session, &ref, NULL, WT_READ_COMPACT | WT_READ_NO_GEN | WT_READ_WONT_NEED)); if (ref == NULL) break; diff --git a/src/btree/bt_curnext.c b/src/btree/bt_curnext.c index 6140dca1fad..d80a5f4740d 100644 --- a/src/btree/bt_curnext.c +++ b/src/btree/bt_curnext.c @@ -487,7 +487,7 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, int truncating) __wt_page_evict_soon(page); cbt->page_deleted_count = 0; - WT_ERR(__wt_tree_walk(session, &cbt->ref, flags)); + WT_ERR(__wt_tree_walk(session, &cbt->ref, NULL, flags)); WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND); } diff --git a/src/btree/bt_curprev.c b/src/btree/bt_curprev.c index 880cb777954..f1ca81ee145 100644 --- a/src/btree/bt_curprev.c +++ b/src/btree/bt_curprev.c @@ -574,7 +574,7 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, int truncating) __wt_page_evict_soon(page); cbt->page_deleted_count = 0; - WT_ERR(__wt_tree_walk(session, &cbt->ref, flags)); + WT_ERR(__wt_tree_walk(session, &cbt->ref, NULL, flags)); WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND); } diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index 6a2789c909b..299849ad365 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -566,7 +566,7 @@ __btree_get_last_recno(WT_SESSION_IMPL *session) btree = S2BT(session); next_walk = NULL; - WT_RET(__wt_tree_walk(session, &next_walk, WT_READ_PREV)); + WT_RET(__wt_tree_walk(session, &next_walk, NULL, WT_READ_PREV)); if (next_walk == NULL) return (WT_NOTFOUND); diff --git a/src/btree/bt_stat.c b/src/btree/bt_stat.c index b8d56fe9d92..2e34a925f84 100644 --- a/src/btree/bt_stat.c +++ b/src/btree/bt_stat.c @@ -56,8 +56,8 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst) WT_STAT_SET(stats, btree_row_leaf, 0); next_walk = NULL; - while ((ret = - __wt_tree_walk(session, &next_walk, 0)) == 0 && next_walk != NULL) { + while ((ret = __wt_tree_walk(session, &next_walk, NULL, 0)) == 0 && + next_walk != NULL) { WT_WITH_PAGE_INDEX(session, ret = __stat_page(session, next_walk->page, stats)); WT_RET(ret); diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c index 7c6d49c8ea0..3cde2fa28a9 100644 --- a/src/btree/bt_sync.c +++ b/src/btree/bt_sync.c @@ -56,7 +56,7 @@ __sync_file(WT_SESSION_IMPL *session, int syncop) flags |= WT_READ_NO_WAIT | WT_READ_SKIP_INTL; for (walk = NULL;;) { - WT_ERR(__wt_tree_walk(session, &walk, flags)); + WT_ERR(__wt_tree_walk(session, &walk, NULL, flags)); if (walk == NULL) break; @@ -107,7 +107,7 @@ __sync_file(WT_SESSION_IMPL *session, int syncop) /* Write all dirty in-cache pages. */ flags |= WT_READ_NO_EVICT; for (walk = NULL;;) { - WT_ERR(__wt_tree_walk(session, &walk, flags)); + WT_ERR(__wt_tree_walk(session, &walk, NULL, flags)); if (walk == NULL) break; @@ -175,6 +175,12 @@ err: /* On error, clear any left-over tree walk. */ WT_FULL_BARRIER(); /* + * If this tree was being skipped by the eviction server during + * the checkpoint, clear the wait. + */ + btree->evict_walk_period = 0; + + /* * Wake the eviction server, in case application threads have * stalled while the eviction server decided it couldn't make * progress. Without this, application threads will be stalled diff --git a/src/btree/bt_walk.c b/src/btree/bt_walk.c index 8b2ce6d8fc1..70a5bc0b1ae 100644 --- a/src/btree/bt_walk.c +++ b/src/btree/bt_walk.c @@ -13,7 +13,8 @@ * Move to the next/previous page in the tree. */ int -__wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags) +__wt_tree_walk(WT_SESSION_IMPL *session, + WT_REF **refp, uint64_t *refcntp, uint32_t flags) { WT_BTREE *btree; WT_DECL_RET; @@ -178,6 +179,9 @@ restart: /* else ++slot; + if (refcntp != NULL) + ++*refcntp; + for (descending = 0;;) { ref = pindex->index[slot]; diff --git a/src/evict/evict_file.c b/src/evict/evict_file.c index 5aa85872a3b..910aef070ca 100644 --- a/src/evict/evict_file.c +++ b/src/evict/evict_file.c @@ -36,8 +36,8 @@ __wt_evict_file(WT_SESSION_IMPL *session, int syncop) /* Walk the tree, discarding pages. */ next_ref = NULL; - WT_ERR(__wt_tree_walk( - session, &next_ref, WT_READ_CACHE | WT_READ_NO_EVICT)); + WT_ERR(__wt_tree_walk(session, &next_ref, NULL, + WT_READ_CACHE | WT_READ_NO_EVICT)); while ((ref = next_ref) != NULL) { page = ref->page; @@ -73,8 +73,8 @@ __wt_evict_file(WT_SESSION_IMPL *session, int syncop) * the reconciliation, the next walk call could miss a page in * the tree. */ - WT_ERR(__wt_tree_walk( - session, &next_ref, WT_READ_CACHE | WT_READ_NO_EVICT)); + WT_ERR(__wt_tree_walk(session, &next_ref, NULL, + WT_READ_CACHE | WT_READ_NO_EVICT)); switch (syncop) { case WT_SYNC_CLOSE: diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 062356af637..e6985d750bd 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -839,7 +839,7 @@ __evict_walk(WT_SESSION_IMPL *session, uint32_t flags) WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle; WT_DECL_RET; - u_int max_entries, old_slot, retries, slot, spins; + u_int max_entries, prev_slot, retries, slot, start_slot, spins; int incr, dhandle_locked; WT_DECL_SPINLOCK_ID(id); @@ -869,7 +869,7 @@ __evict_walk(WT_SESSION_IMPL *session, uint32_t flags) * Set the starting slot in the queue and the maximum pages added * per walk. */ - slot = cache->evict_entries; + start_slot = slot = cache->evict_entries; max_entries = slot + WT_EVICT_WALK_INCR; retry: while (slot < max_entries && ret == 0) { @@ -934,10 +934,10 @@ retry: while (slot < max_entries && ret == 0) { continue; /* - * Also skip files that are configured to stick in cache until - * we get aggressive. + * Also skip files that are checkpointing or configured to + * stick in cache until we get aggressive. */ - if (btree->evict_priority != 0 && + if ((btree->checkpointing || btree->evict_priority != 0) && !LF_ISSET(WT_EVICT_PASS_AGGRESSIVE)) continue; @@ -950,7 +950,7 @@ retry: while (slot < max_entries && ret == 0) { btree->evict_walk_skips++ < btree->evict_walk_period) continue; btree->evict_walk_skips = 0; - old_slot = slot; + prev_slot = slot; (void)WT_ATOMIC_ADD4(dhandle->session_inuse, 1); incr = 1; @@ -972,15 +972,14 @@ retry: while (slot < max_entries && ret == 0) { __wt_spin_unlock(session, &cache->evict_walk_lock); /* - * If we didn't find enough candidates in the file, skip it - * next time. + * If we didn't find any candidates in the file, skip it next + * time. */ - if (slot >= old_slot + WT_EVICT_WALK_PER_FILE || - slot >= max_entries) - btree->evict_walk_period = 0; - else + if (slot == prev_slot) btree->evict_walk_period = WT_MIN( - WT_MAX(1, 2 * btree->evict_walk_period), 1000); + WT_MAX(1, 2 * btree->evict_walk_period), 100); + else + btree->evict_walk_period = 0; } if (incr) { @@ -1003,8 +1002,9 @@ retry: while (slot < max_entries && ret == 0) { if (!F_ISSET(cache, WT_EVICT_CLEAR_WALKS) && ret == 0 && slot < max_entries && (retries < 2 || (!LF_ISSET(WT_EVICT_PASS_WOULD_BLOCK) && - retries < 10 && slot > 0))) { + retries < 10 && slot > start_slot))) { cache->evict_file_next = NULL; + start_slot = slot; ++retries; goto retry; } @@ -1072,9 +1072,9 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags) * Get some more eviction candidate pages. */ for (evict = start, pages_walked = 0, internal_pages = restarts = 0; - evict < end && (ret == 0 || ret == WT_NOTFOUND); - ret = __wt_tree_walk(session, &btree->evict_ref, walk_flags), - ++pages_walked) { + evict < end && pages_walked < WT_EVICT_MAX_PER_FILE && + (ret == 0 || ret == WT_NOTFOUND); + ret = __wt_tree_walk(session, &btree->evict_ref, &pages_walked, walk_flags)) { if (btree->evict_ref == NULL) { /* * Take care with terminating this loop. @@ -1451,7 +1451,7 @@ __wt_cache_dump(WT_SESSION_IMPL *session) next_walk = NULL; session->dhandle = dhandle; while (__wt_tree_walk(session, - &next_walk, WT_READ_CACHE | WT_READ_NO_WAIT) == 0 && + &next_walk, NULL, WT_READ_CACHE | WT_READ_NO_WAIT) == 0 && next_walk != NULL) { page = next_walk->page; if (page->type == WT_PAGE_COL_INT || diff --git a/src/include/cache.h b/src/include/cache.h index de6faad608a..24ea14ab11d 100644 --- a/src/include/cache.h +++ b/src/include/cache.h @@ -13,9 +13,10 @@ #define WT_EVICT_INT_SKEW (1<<20) /* Prefer leaf pages over internal pages by this many increments of the read generation. */ -#define WT_EVICT_WALK_PER_FILE 10 /* Pages to visit per file */ -#define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */ -#define WT_EVICT_WALK_INCR 100 /* Pages added each walk */ +#define WT_EVICT_WALK_PER_FILE 10 /* Pages to queue per file */ +#define WT_EVICT_MAX_PER_FILE 100 /* Max pages to visit per file */ +#define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */ +#define WT_EVICT_WALK_INCR 100 /* Pages added each walk */ #define WT_EVICT_PASS_AGGRESSIVE 0x01 #define WT_EVICT_PASS_ALL 0x02 diff --git a/src/include/extern.h b/src/include/extern.h index 40493b4aab4..c5909f7af2d 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -157,7 +157,7 @@ extern int __wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_verify_dsk_image(WT_SESSION_IMPL *session, const char *addr, const WT_PAGE_HEADER *dsk, size_t size, int empty_page_ok); extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *addr, WT_ITEM *buf); -extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags); +extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *refcntp, uint32_t flags); extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, WT_ITEM *value, WT_UPDATE *upd, int is_remove); extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt); extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page); |