diff options
author | Michael Cahill <michael.cahill@mongodb.com> | 2017-04-15 06:01:02 +1000 |
---|---|---|
committer | Alex Gorrod <alexander.gorrod@mongodb.com> | 2017-04-14 16:01:02 -0400 |
commit | ba5cccea14a2afae8dc62765735be809908e0e39 (patch) | |
tree | e1a7ee13eddbf3ba62bca1128674d83dcc80d0c5 | |
parent | a07222c50ef9b07a9265aaa22573b41a5bb7356b (diff) | |
download | mongo-ba5cccea14a2afae8dc62765735be809908e0e39.tar.gz |
WT-3160 Queue empty internal pages for eviction. (#3357)
Also reduce the skew against internal pages by only queuing internal pages when we get aggressive or when a tree is idle.
-rw-r--r-- | src/bloom/bloom.c | 8 | ||||
-rw-r--r-- | src/evict/evict_lru.c | 44 | ||||
-rw-r--r-- | src/include/cache.h | 6 | ||||
-rw-r--r-- | src/meta/meta_table.c | 3 |
4 files changed, 36 insertions, 25 deletions
diff --git a/src/bloom/bloom.c b/src/bloom/bloom.c index 3a1e861fb5d..bfbfa34078f 100644 --- a/src/bloom/bloom.c +++ b/src/bloom/bloom.c @@ -133,8 +133,12 @@ __bloom_open_cursor(WT_BLOOM *bloom, WT_CURSOR *owner) c = NULL; WT_RET(__wt_open_cursor(session, bloom->uri, owner, cfg, &c)); - /* Bump the cache priority for Bloom filters. */ - __wt_evict_priority_set(session, WT_EVICT_INT_SKEW); + /* + * Bump the cache priority for Bloom filters: this makes eviction favor + * pages from other trees over Bloom filters. + */ +#define WT_EVICT_BLOOM_SKEW 1000 + __wt_evict_priority_set(session, WT_EVICT_BLOOM_SKEW); bloom->c = c; return (0); diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 7ad9f377809..041e557ef78 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -95,12 +95,8 @@ __evict_entry_priority(WT_SESSION_IMPL *session, WT_REF *ref) if (page->read_gen == WT_READGEN_OLDEST) return (WT_READGEN_OLDEST); - /* - * Any leaf page from a dead tree is a great choice (not internal pages, - * they may have children and are not yet evictable). - */ - if (!WT_PAGE_IS_INTERNAL(page) && - F_ISSET(btree->dhandle, WT_DHANDLE_DEAD)) + /* Any page from a dead tree is a great choice. */ + if (F_ISSET(btree->dhandle, WT_DHANDLE_DEAD)) return (WT_READGEN_OLDEST); /* Any empty page (leaf or internal), is a good choice. */ @@ -123,8 +119,10 @@ __evict_entry_priority(WT_SESSION_IMPL *session, WT_REF *ref) read_gen = page->read_gen; read_gen += btree->evict_priority; + +#define WT_EVICT_INTL_SKEW 1000 if (WT_PAGE_IS_INTERNAL(page)) - read_gen += WT_EVICT_INT_SKEW; + read_gen += WT_EVICT_INTL_SKEW; return (read_gen); } @@ -1580,7 +1578,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_EVICT_ENTRY *end, *evict, *start; - WT_PAGE *page; + WT_PAGE *last_parent, *page; WT_PAGE_MODIFY *mod; WT_REF *ref; WT_TXN_GLOBAL *txn_global; @@ -1588,14 +1586,15 @@ __evict_walk_file(WT_SESSION_IMPL *session, uint64_t pages_seen, pages_queued, refs_walked; uint32_t remaining_slots, total_slots, walk_flags; uint32_t target_pages_clean, target_pages_dirty, target_pages; - int internal_pages, restarts; + int restarts; bool give_up, modified, urgent_queued; conn = S2C(session); btree = S2BT(session); cache = conn->cache; txn_global = &conn->txn_global; - internal_pages = restarts = 0; + last_parent = NULL; + restarts = 0; give_up = urgent_queued = false; /* @@ -1750,6 +1749,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, */ for (evict = start, pages_queued = pages_seen = refs_walked = 0; evict < end && (ret == 0 || ret == WT_NOTFOUND); + last_parent = ref == NULL ? NULL : ref->home, ret = __wt_tree_walk_count( session, &ref, &refs_walked, walk_flags)) { /* @@ -1830,10 +1830,23 @@ __evict_walk_file(WT_SESSION_IMPL *session, if (modified && !F_ISSET(cache, WT_CACHE_EVICT_DIRTY)) continue; - /* Limit internal pages to 50% of the total. */ - if (WT_PAGE_IS_INTERNAL(page) && - internal_pages > (int)(evict - start) / 2) - continue; + /* + * Don't attempt eviction of internal pages with children in + * cache (indicated by seeing an internal page that is the + * parent of the last page we saw). + * + * Also skip internal page unless we get aggressive or the tree + * is idle (indicated by the tree being skipped for walks). + * The goal here is that if trees become completely idle, we + * eventually push them out of cache completely. + */ + if (WT_PAGE_IS_INTERNAL(page)) { + if (page == last_parent) + continue; + if (btree->evict_walk_period == 0 && + !__wt_cache_aggressive(session)) + continue; + } /* If eviction gets aggressive, anything else is fair game. */ if (__wt_cache_aggressive(session)) @@ -1862,9 +1875,6 @@ fast: /* If the page can't be evicted, give up. */ ++evict; ++pages_queued; - if (WT_PAGE_IS_INTERNAL(page)) - ++internal_pages; - __wt_verbose(session, WT_VERB_EVICTSERVER, "select: %p, size %" WT_SIZET_FMT, (void *)page, page->memory_footprint); diff --git a/src/include/cache.h b/src/include/cache.h index 6e79c2a5868..8f439599eca 100644 --- a/src/include/cache.h +++ b/src/include/cache.h @@ -10,14 +10,10 @@ * Tuning constants: I hesitate to call this tuning, but we want to review some * number of pages from each file's in-memory tree for each page we evict. */ -#define WT_EVICT_INT_SKEW (1<<20) /* Prefer leaf pages over internal - pages by this many increments of the - read generation. */ +#define WT_EVICT_MAX_TREES 1000 /* Maximum walk points */ #define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */ #define WT_EVICT_WALK_INCR 100 /* Pages added each walk */ -#define WT_EVICT_MAX_TREES 1000 /* Maximum walk points */ - /* Ways to position when starting an eviction walk. */ typedef enum { WT_EVICT_WALK_NEXT, diff --git a/src/meta/meta_table.c b/src/meta/meta_table.c index 827a440073a..65835a16c8b 100644 --- a/src/meta/meta_table.c +++ b/src/meta/meta_table.c @@ -62,9 +62,10 @@ __wt_metadata_cursor_open( * first update is safe because it's single-threaded from * wiredtiger_open). */ +#define WT_EVICT_META_SKEW 10000 if (btree->evict_priority == 0) WT_WITH_BTREE(session, btree, - __wt_evict_priority_set(session, WT_EVICT_INT_SKEW)); + __wt_evict_priority_set(session, WT_EVICT_META_SKEW)); if (F_ISSET(btree, WT_BTREE_NO_LOGGING)) F_CLR(btree, WT_BTREE_NO_LOGGING); |