summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gorrod <alexander.gorrod@mongodb.com>2017-02-20 11:04:56 +1100
committerMichael Cahill <michael.cahill@mongodb.com>2017-02-20 11:04:56 +1100
commitacceacbab536b64d52a1f9ef2e6cbdd54a1996ef (patch)
tree5ca615976dc0147e54ad8792e55e58bb4c9ef948
parentc23fa74a5fcefd751532ed0357ee0b237d487ab2 (diff)
downloadmongo-acceacbab536b64d52a1f9ef2e6cbdd54a1996ef.tar.gz
WT-3149 Use a range of eviction walk start points. (#3305)
Choosing a random point isn't very efficient in append only workloads.
-rw-r--r--src/evict/evict_lru.c51
-rw-r--r--src/include/btree.h6
2 files changed, 37 insertions, 20 deletions
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index 42fe4d4608e..07cf8542c53 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -1654,31 +1654,36 @@ __evict_walk_file(WT_SESSION_IMPL *session,
!F_ISSET(cache, WT_CACHE_EVICT_CLEAN))
min_pages *= 10;
+ walk_flags =
+ WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | WT_READ_NO_WAIT;
+
/*
* Choose a random point in the tree if looking for candidates in a
* tree with no starting point set. This is mostly aimed at ensuring
* eviction fairly visits all pages in trees with a lot of in-cache
* content.
*/
- if (btree->evict_ref == NULL) {
- /* Ensure internal pages indexes remain valid for our walk */
- WT_WITH_PAGE_INDEX(session, ret =
- __wt_random_descent(session, &btree->evict_ref, true));
- WT_RET_NOTFOUND_OK(ret);
-
- /*
- * Reverse the direction of the walk each time we start at a
- * random point so both ends of the tree are equally likely to
- * be visited.
- */
- btree->evict_walk_reverse = !btree->evict_walk_reverse;
- }
-
- walk_flags =
- WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | WT_READ_NO_WAIT;
-
- if (btree->evict_walk_reverse)
+ switch (btree->evict_walk_state) {
+ case WT_EVICT_WALK_NEXT:
+ break;
+ case WT_EVICT_WALK_PREV:
FLD_SET(walk_flags, WT_READ_PREV);
+ break;
+ case WT_EVICT_WALK_RAND_PREV:
+ FLD_SET(walk_flags, WT_READ_PREV);
+ /* FALLTHROUGH */
+ case WT_EVICT_WALK_RAND_NEXT:
+ if (btree->evict_ref == NULL) {
+ /* Ensure internal pages indexes remain valid */
+ WT_WITH_PAGE_INDEX(session, ret = __wt_random_descent(
+ session, &btree->evict_ref, true));
+ WT_RET_NOTFOUND_OK(ret);
+ }
+ break;
+ default:
+ WT_RET_MSG(session, EINVAL,
+ "Invalid btree walk state encountered");
+ }
/*
* Get some more eviction candidate pages, starting at the last saved
@@ -1713,8 +1718,16 @@ __evict_walk_file(WT_SESSION_IMPL *session,
pages_seen > min_pages &&
(pages_queued == 0 || (pages_seen / pages_queued) >
(min_pages / target_pages));
- if (give_up)
+ if (give_up) {
+ /*
+ * Try a different walk start point next time if a
+ * walk gave up.
+ */
+ btree->evict_walk_state =
+ (btree->evict_walk_state + 1) %
+ WT_EVICT_WALK_MAX_LEGAL_VALUE;
break;
+ }
if (ref == NULL) {
if (++restarts == 2)
diff --git a/src/include/btree.h b/src/include/btree.h
index d742310bf8f..976c1d2110c 100644
--- a/src/include/btree.h
+++ b/src/include/btree.h
@@ -141,7 +141,11 @@ struct __wt_btree {
u_int evict_walk_skips; /* Number of walks skipped */
u_int evict_disabled; /* Eviction disabled count */
volatile uint32_t evict_busy; /* Count of threads in eviction */
- bool evict_walk_reverse; /* Walk direction */
+ enum {
+ WT_EVICT_WALK_NEXT, WT_EVICT_WALK_PREV,
+ WT_EVICT_WALK_RAND_NEXT, WT_EVICT_WALK_RAND_PREV
+ } evict_walk_state; /* Eviction walk state */
+#define WT_EVICT_WALK_MAX_LEGAL_VALUE WT_EVICT_WALK_RAND_PREV + 1
enum {
WT_CKPT_OFF, WT_CKPT_PREPARE, WT_CKPT_RUNNING