summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@mongodb.com>2017-04-15 06:01:02 +1000
committerAlex Gorrod <alexander.gorrod@mongodb.com>2017-04-14 16:01:02 -0400
commitba5cccea14a2afae8dc62765735be809908e0e39 (patch)
treee1a7ee13eddbf3ba62bca1128674d83dcc80d0c5
parenta07222c50ef9b07a9265aaa22573b41a5bb7356b (diff)
downloadmongo-ba5cccea14a2afae8dc62765735be809908e0e39.tar.gz
WT-3160 Queue empty internal pages for eviction. (#3357)
Also reduce the skew against internal pages by only queuing internal pages when we get aggressive or when a tree is idle.
-rw-r--r--src/bloom/bloom.c8
-rw-r--r--src/evict/evict_lru.c44
-rw-r--r--src/include/cache.h6
-rw-r--r--src/meta/meta_table.c3
4 files changed, 36 insertions, 25 deletions
diff --git a/src/bloom/bloom.c b/src/bloom/bloom.c
index 3a1e861fb5d..bfbfa34078f 100644
--- a/src/bloom/bloom.c
+++ b/src/bloom/bloom.c
@@ -133,8 +133,12 @@ __bloom_open_cursor(WT_BLOOM *bloom, WT_CURSOR *owner)
c = NULL;
WT_RET(__wt_open_cursor(session, bloom->uri, owner, cfg, &c));
- /* Bump the cache priority for Bloom filters. */
- __wt_evict_priority_set(session, WT_EVICT_INT_SKEW);
+ /*
+ * Bump the cache priority for Bloom filters: this makes eviction favor
+ * pages from other trees over Bloom filters.
+ */
+#define WT_EVICT_BLOOM_SKEW 1000
+ __wt_evict_priority_set(session, WT_EVICT_BLOOM_SKEW);
bloom->c = c;
return (0);
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index 7ad9f377809..041e557ef78 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -95,12 +95,8 @@ __evict_entry_priority(WT_SESSION_IMPL *session, WT_REF *ref)
if (page->read_gen == WT_READGEN_OLDEST)
return (WT_READGEN_OLDEST);
- /*
- * Any leaf page from a dead tree is a great choice (not internal pages,
- * they may have children and are not yet evictable).
- */
- if (!WT_PAGE_IS_INTERNAL(page) &&
- F_ISSET(btree->dhandle, WT_DHANDLE_DEAD))
+ /* Any page from a dead tree is a great choice. */
+ if (F_ISSET(btree->dhandle, WT_DHANDLE_DEAD))
return (WT_READGEN_OLDEST);
/* Any empty page (leaf or internal), is a good choice. */
@@ -123,8 +119,10 @@ __evict_entry_priority(WT_SESSION_IMPL *session, WT_REF *ref)
read_gen = page->read_gen;
read_gen += btree->evict_priority;
+
+#define WT_EVICT_INTL_SKEW 1000
if (WT_PAGE_IS_INTERNAL(page))
- read_gen += WT_EVICT_INT_SKEW;
+ read_gen += WT_EVICT_INTL_SKEW;
return (read_gen);
}
@@ -1580,7 +1578,7 @@ __evict_walk_file(WT_SESSION_IMPL *session,
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_EVICT_ENTRY *end, *evict, *start;
- WT_PAGE *page;
+ WT_PAGE *last_parent, *page;
WT_PAGE_MODIFY *mod;
WT_REF *ref;
WT_TXN_GLOBAL *txn_global;
@@ -1588,14 +1586,15 @@ __evict_walk_file(WT_SESSION_IMPL *session,
uint64_t pages_seen, pages_queued, refs_walked;
uint32_t remaining_slots, total_slots, walk_flags;
uint32_t target_pages_clean, target_pages_dirty, target_pages;
- int internal_pages, restarts;
+ int restarts;
bool give_up, modified, urgent_queued;
conn = S2C(session);
btree = S2BT(session);
cache = conn->cache;
txn_global = &conn->txn_global;
- internal_pages = restarts = 0;
+ last_parent = NULL;
+ restarts = 0;
give_up = urgent_queued = false;
/*
@@ -1750,6 +1749,7 @@ __evict_walk_file(WT_SESSION_IMPL *session,
*/
for (evict = start, pages_queued = pages_seen = refs_walked = 0;
evict < end && (ret == 0 || ret == WT_NOTFOUND);
+ last_parent = ref == NULL ? NULL : ref->home,
ret = __wt_tree_walk_count(
session, &ref, &refs_walked, walk_flags)) {
/*
@@ -1830,10 +1830,23 @@ __evict_walk_file(WT_SESSION_IMPL *session,
if (modified && !F_ISSET(cache, WT_CACHE_EVICT_DIRTY))
continue;
- /* Limit internal pages to 50% of the total. */
- if (WT_PAGE_IS_INTERNAL(page) &&
- internal_pages > (int)(evict - start) / 2)
- continue;
+ /*
+ * Don't attempt eviction of internal pages with children in
+ * cache (indicated by seeing an internal page that is the
+ * parent of the last page we saw).
+ *
+ * Also skip internal page unless we get aggressive or the tree
+ * is idle (indicated by the tree being skipped for walks).
+ * The goal here is that if trees become completely idle, we
+ * eventually push them out of cache completely.
+ */
+ if (WT_PAGE_IS_INTERNAL(page)) {
+ if (page == last_parent)
+ continue;
+ if (btree->evict_walk_period == 0 &&
+ !__wt_cache_aggressive(session))
+ continue;
+ }
/* If eviction gets aggressive, anything else is fair game. */
if (__wt_cache_aggressive(session))
@@ -1862,9 +1875,6 @@ fast: /* If the page can't be evicted, give up. */
++evict;
++pages_queued;
- if (WT_PAGE_IS_INTERNAL(page))
- ++internal_pages;
-
__wt_verbose(session, WT_VERB_EVICTSERVER,
"select: %p, size %" WT_SIZET_FMT,
(void *)page, page->memory_footprint);
diff --git a/src/include/cache.h b/src/include/cache.h
index 6e79c2a5868..8f439599eca 100644
--- a/src/include/cache.h
+++ b/src/include/cache.h
@@ -10,14 +10,10 @@
* Tuning constants: I hesitate to call this tuning, but we want to review some
* number of pages from each file's in-memory tree for each page we evict.
*/
-#define WT_EVICT_INT_SKEW (1<<20) /* Prefer leaf pages over internal
- pages by this many increments of the
- read generation. */
+#define WT_EVICT_MAX_TREES 1000 /* Maximum walk points */
#define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */
#define WT_EVICT_WALK_INCR 100 /* Pages added each walk */
-#define WT_EVICT_MAX_TREES 1000 /* Maximum walk points */
-
/* Ways to position when starting an eviction walk. */
typedef enum {
WT_EVICT_WALK_NEXT,
diff --git a/src/meta/meta_table.c b/src/meta/meta_table.c
index 827a440073a..65835a16c8b 100644
--- a/src/meta/meta_table.c
+++ b/src/meta/meta_table.c
@@ -62,9 +62,10 @@ __wt_metadata_cursor_open(
* first update is safe because it's single-threaded from
* wiredtiger_open).
*/
+#define WT_EVICT_META_SKEW 10000
if (btree->evict_priority == 0)
WT_WITH_BTREE(session, btree,
- __wt_evict_priority_set(session, WT_EVICT_INT_SKEW));
+ __wt_evict_priority_set(session, WT_EVICT_META_SKEW));
if (F_ISSET(btree, WT_BTREE_NO_LOGGING))
F_CLR(btree, WT_BTREE_NO_LOGGING);