diff options
Diffstat (limited to 'src/third_party/wiredtiger/src/evict/evict_lru.c')
-rw-r--r-- | src/third_party/wiredtiger/src/evict/evict_lru.c | 242 |
1 files changed, 143 insertions, 99 deletions
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index 884c08a02df..50a00787f35 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -159,7 +159,7 @@ __wt_evict_server_wake(WT_SESSION_IMPL *session) bytes_max / WT_MEGABYTE)); } - return (__wt_cond_signal(session, cache->evict_cond)); + return (__wt_cond_auto_signal(session, cache->evict_cond)); } /* @@ -175,8 +175,8 @@ __evict_server(void *arg) WT_SESSION_IMPL *session; #ifdef HAVE_DIAGNOSTIC struct timespec now, stuck_ts; - uint64_t pages_evicted = 0; #endif + uint64_t pages_evicted = 0; u_int spins; session = arg; @@ -219,11 +219,11 @@ __evict_server(void *arg) /* Next time we wake up, reverse the sweep direction. */ cache->flags ^= WT_CACHE_WALK_REVERSE; -#ifdef HAVE_DIAGNOSTIC pages_evicted = 0; } else if (pages_evicted != cache->pages_evict) { - WT_ERR(__wt_epoch(session, &stuck_ts)); pages_evicted = cache->pages_evict; +#ifdef HAVE_DIAGNOSTIC + WT_ERR(__wt_epoch(session, &stuck_ts)); } else { /* After being stuck for 5 minutes, give up. */ WT_ERR(__wt_epoch(session, &now)); @@ -238,7 +238,8 @@ __evict_server(void *arg) WT_ERR(__wt_verbose(session, WT_VERB_EVICTSERVER, "sleeping")); /* Don't rely on signals: check periodically. */ - WT_ERR(__wt_cond_wait(session, cache->evict_cond, 100000)); + WT_ERR(__wt_cond_auto_wait( + session, cache->evict_cond, pages_evicted != 0)); WT_ERR(__wt_verbose(session, WT_VERB_EVICTSERVER, "waking")); } @@ -720,12 +721,32 @@ __evict_clear_walks(WT_SESSION_IMPL *session) } /* - * __evict_request_walk_clear -- + * __evict_clear_all_walks -- + * Clear the eviction walk points for all files a session is waiting on. + */ +static int +__evict_clear_all_walks(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + WT_DATA_HANDLE *dhandle; + WT_DECL_RET; + + conn = S2C(session); + + TAILQ_FOREACH(dhandle, &conn->dhqh, q) + if (WT_PREFIX_MATCH(dhandle->name, "file:")) + WT_WITH_DHANDLE(session, + dhandle, WT_TRET(__evict_clear_walk(session))); + return (ret); +} + +/* + * __evict_request_clear_walk -- * Request that the eviction server clear the tree's current eviction * point. */ static int -__evict_request_walk_clear(WT_SESSION_IMPL *session) +__evict_request_clear_walk(WT_SESSION_IMPL *session) { WT_BTREE *btree; WT_CACHE *cache; @@ -753,32 +774,12 @@ __evict_request_walk_clear(WT_SESSION_IMPL *session) } /* - * __evict_clear_all_walks -- - * Clear the eviction walk points for all files a session is waiting on. - */ -static int -__evict_clear_all_walks(WT_SESSION_IMPL *session) -{ - WT_CONNECTION_IMPL *conn; - WT_DATA_HANDLE *dhandle; - WT_DECL_RET; - - conn = S2C(session); - - TAILQ_FOREACH(dhandle, &conn->dhqh, q) - if (WT_PREFIX_MATCH(dhandle->name, "file:")) - WT_WITH_DHANDLE(session, - dhandle, WT_TRET(__evict_clear_walk(session))); - return (ret); -} - -/* * __wt_evict_file_exclusive_on -- * Get exclusive eviction access to a file and discard any of the file's * blocks queued for eviction. */ int -__wt_evict_file_exclusive_on(WT_SESSION_IMPL *session, bool *evict_resetp) +__wt_evict_file_exclusive_on(WT_SESSION_IMPL *session) { WT_BTREE *btree; WT_CACHE *cache; @@ -786,40 +787,39 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session, bool *evict_resetp) WT_EVICT_ENTRY *evict; u_int i, elem; - *evict_resetp = false; - btree = S2BT(session); cache = S2C(session)->cache; - /* If the file was never evictable, there's no work to do. */ - if (F_ISSET(btree, WT_BTREE_NO_EVICTION)) - return (0); - /* - * Hold the walk lock to set the "no eviction" flag: no new pages from - * the file will be queued for eviction after this point. + * Hold the walk lock to set the no-eviction flag. + * + * The no-eviction flag can be set permanently, in which case we never + * increment the no-eviction count. */ __wt_spin_lock(session, &cache->evict_walk_lock); - if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) { - F_SET(btree, WT_BTREE_NO_EVICTION); - *evict_resetp = true; + if (F_ISSET(btree, WT_BTREE_NO_EVICTION)) { + if (btree->evict_disabled != 0) + ++btree->evict_disabled; + __wt_spin_unlock(session, &cache->evict_walk_lock); + return (0); } - __wt_spin_unlock(session, &cache->evict_walk_lock); + ++btree->evict_disabled; - /* If some other operation has disabled eviction, we're done. */ - if (!*evict_resetp) - return (0); + /* + * Ensure no new pages from the file will be queued for eviction after + * this point. + */ + F_SET(btree, WT_BTREE_NO_EVICTION); + WT_FULL_BARRIER(); /* Clear any existing LRU eviction walk for the file. */ - WT_ERR(__evict_request_walk_clear(session)); - - /* Hold the evict lock to remove any queued pages from this file. */ - __wt_spin_lock(session, &cache->evict_lock); + WT_ERR(__evict_request_clear_walk(session)); /* * The eviction candidate list might reference pages from the file, - * clear it. + * clear it. Hold the evict lock to remove queued pages from a file. */ + __wt_spin_lock(session, &cache->evict_lock); elem = cache->evict_max; for (i = 0, evict = cache->evict_queue; i < elem; i++, evict++) if (evict->btree == btree) @@ -833,10 +833,11 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session, bool *evict_resetp) while (btree->evict_busy > 0) __wt_yield(); - return (0); - -err: F_CLR(btree, WT_BTREE_NO_EVICTION); - *evict_resetp = false; + if (0) { +err: --btree->evict_disabled; + F_CLR(btree, WT_BTREE_NO_EVICTION); + } + __wt_spin_unlock(session, &cache->evict_walk_lock); return (ret); } @@ -848,8 +849,10 @@ void __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session) { WT_BTREE *btree; + WT_CACHE *cache; btree = S2BT(session); + cache = S2C(session)->cache; /* * We have seen subtle bugs with multiple threads racing to turn @@ -857,10 +860,17 @@ __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session) */ WT_DIAGNOSTIC_YIELD; - WT_ASSERT(session, btree->evict_ref == NULL && - F_ISSET(btree, WT_BTREE_NO_EVICTION)); + WT_ASSERT(session, + btree->evict_ref == NULL && F_ISSET(btree, WT_BTREE_NO_EVICTION)); - F_CLR(btree, WT_BTREE_NO_EVICTION); + /* + * The no-eviction flag can be set permanently, in which case we never + * increment the no-eviction count. + */ + __wt_spin_lock(session, &cache->evict_walk_lock); + if (btree->evict_disabled > 0 && --btree->evict_disabled == 0) + F_CLR(btree, WT_BTREE_NO_EVICTION); + __wt_spin_unlock(session, &cache->evict_walk_lock); } /* @@ -890,7 +900,7 @@ __evict_lru_walk(WT_SESSION_IMPL *session) { WT_CACHE *cache; WT_DECL_RET; - uint64_t cutoff; + uint64_t cutoff, read_gen_oldest; uint32_t candidates, entries; cache = S2C(session)->cache; @@ -931,34 +941,62 @@ __evict_lru_walk(WT_SESSION_IMPL *session) return (0); } - WT_ASSERT(session, cache->evict_queue[0].ref != NULL); - - /* Track the oldest read generation we have in the queue. */ - cache->read_gen_oldest = cache->evict_queue[0].ref->page->read_gen; - + /* Decide how many of the candidates we're going to try and evict. */ if (FLD_ISSET(cache->state, - WT_EVICT_PASS_AGGRESSIVE | WT_EVICT_PASS_WOULD_BLOCK)) + WT_EVICT_PASS_AGGRESSIVE | WT_EVICT_PASS_WOULD_BLOCK)) { /* * Take all candidates if we only gathered pages with an oldest * read generation set. */ cache->evict_candidates = entries; - else { - /* Find the bottom 25% of read generations. */ - cutoff = (3 * __evict_read_gen(&cache->evict_queue[0]) + - __evict_read_gen(&cache->evict_queue[entries - 1])) / 4; + } else { /* - * Don't take less than 10% or more than 50% of entries, - * regardless. That said, if there is only one entry, which is - * normal when populating an empty file, don't exclude it. + * Find the oldest read generation we have in the queue, used + * to set the initial value for pages read into the system. + * The queue is sorted, find the first "normal" generation. */ - for (candidates = 1 + entries / 10; - candidates < entries / 2; - candidates++) - if (__evict_read_gen( - &cache->evict_queue[candidates]) > cutoff) + read_gen_oldest = WT_READGEN_OLDEST; + for (candidates = 0; candidates < entries; ++candidates) { + read_gen_oldest = + __evict_read_gen(&cache->evict_queue[candidates]); + if (read_gen_oldest != WT_READGEN_OLDEST) break; - cache->evict_candidates = candidates; + } + + /* + * Take all candidates if we only gathered pages with an oldest + * read generation set. + * + * We normally never take more than 50% of the entries; if 50% + * of the entries were at the oldest read generation, take them. + */ + if (read_gen_oldest == WT_READGEN_OLDEST) + cache->evict_candidates = entries; + else if (candidates >= entries / 2) + cache->evict_candidates = candidates; + else { + /* Save the calculated oldest generation. */ + cache->read_gen_oldest = read_gen_oldest; + + /* Find the bottom 25% of read generations. */ + cutoff = + (3 * read_gen_oldest + __evict_read_gen( + &cache->evict_queue[entries - 1])) / 4; + + /* + * Don't take less than 10% or more than 50% of entries, + * regardless. That said, if there is only one entry, + * which is normal when populating an empty file, don't + * exclude it. + */ + for (candidates = 1 + entries / 10; + candidates < entries / 2; + candidates++) + if (__evict_read_gen( + &cache->evict_queue[candidates]) > cutoff) + break; + cache->evict_candidates = candidates; + } } cache->evict_current = cache->evict_queue; @@ -1127,23 +1165,27 @@ retry: while (slot < max_entries && ret == 0) { __wt_spin_unlock(session, &conn->dhandle_lock); dhandle_locked = false; - __wt_spin_lock(session, &cache->evict_walk_lock); - /* - * Re-check the "no eviction" flag -- it is used to enforce - * exclusive access when a handle is being closed. + * Re-check the "no eviction" flag, used to enforce exclusive + * access when a handle is being closed. If not set, remember + * the file to visit first, next loop. + * + * Only try to acquire the lock and simply continue if we fail; + * the lock is held while the thread turning off eviction clears + * the tree's current eviction point, and part of the process is + * waiting on this thread to acknowledge that action. */ - if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) { - /* Remember the file to visit first, next loop. */ - cache->evict_file_next = dhandle; - - WT_WITH_DHANDLE(session, dhandle, - ret = __evict_walk_file(session, &slot)); - WT_ASSERT(session, session->split_gen == 0); + if (!F_ISSET(btree, WT_BTREE_NO_EVICTION) && + !__wt_spin_trylock(session, &cache->evict_walk_lock)) { + if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) { + cache->evict_file_next = dhandle; + WT_WITH_DHANDLE(session, dhandle, + ret = __evict_walk_file(session, &slot)); + WT_ASSERT(session, session->split_gen == 0); + } + __wt_spin_unlock(session, &cache->evict_walk_lock); } - __wt_spin_unlock(session, &cache->evict_walk_lock); - /* * If we didn't find any candidates in the file, skip it next * time. @@ -1286,6 +1328,18 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp) if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU)) continue; + /* + * It's possible (but unlikely) to visit a page without a read + * generation, if we race with the read instantiating the page. + * Ignore those pages, but set the page's read generation here + * to ensure a bug doesn't somehow leave a page without a read + * generation. + */ + if (page->read_gen == WT_READGEN_NOTSET) { + __wt_cache_read_gen_new(session, page); + continue; + } + /* Pages we no longer need (clean or dirty), are found money. */ if (__wt_page_is_empty(page) || F_ISSET(session->dhandle, WT_DHANDLE_DEAD) || @@ -1311,13 +1365,6 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp) internal_pages >= (int)(evict - start) / 2) continue; - /* - * If this page has never been considered for eviction, set its - * read generation to somewhere in the middle of the LRU list. - */ - if (page->read_gen == WT_READGEN_NOTSET) - page->read_gen = __wt_cache_read_gen_new(session); - fast: /* If the page can't be evicted, give up. */ if (!__wt_page_can_evict(session, ref, NULL)) continue; @@ -1477,7 +1524,6 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server) { WT_BTREE *btree; WT_DECL_RET; - WT_PAGE *page; WT_REF *ref; WT_RET(__evict_get_ref(session, is_server, &btree, &ref)); @@ -1506,9 +1552,7 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server) * the page and some other thread may have evicted it by the time we * look at it. */ - page = ref->page; - if (page->read_gen != WT_READGEN_OLDEST) - page->read_gen = __wt_cache_read_gen_bump(session); + __wt_cache_read_gen_bump(session, ref->page); WT_WITH_BTREE(session, btree, ret = __wt_evict(session, ref, false)); |