diff options
Diffstat (limited to 'src/evict/evict_lru.c')
-rw-r--r-- | src/evict/evict_lru.c | 380 |
1 files changed, 195 insertions, 185 deletions
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index f5a6c33e50f..8ea487bbf83 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -9,15 +9,15 @@ #include "wt_internal.h" static int __evict_clear_all_walks(WT_SESSION_IMPL *); -static int __evict_clear_walks(WT_SESSION_IMPL *); +static int __evict_helper(WT_SESSION_IMPL *); static int WT_CDECL __evict_lru_cmp(const void *, const void *); static int __evict_lru_pages(WT_SESSION_IMPL *, bool); static int __evict_lru_walk(WT_SESSION_IMPL *); static int __evict_page(WT_SESSION_IMPL *, bool); static int __evict_pass(WT_SESSION_IMPL *); +static int __evict_server(WT_SESSION_IMPL *, bool *); static int __evict_walk(WT_SESSION_IMPL *, uint32_t); static int __evict_walk_file(WT_SESSION_IMPL *, uint32_t, u_int *); -static WT_THREAD_RET __evict_worker(void *); /* * __evict_read_gen -- @@ -152,6 +152,7 @@ __wt_evict_server_wake(WT_SESSION_IMPL *session) conn = S2C(session); cache = conn->cache; +#ifdef HAVE_VERBOSE if (WT_VERBOSE_ISSET(session, WT_VERB_EVICTSERVER)) { uint64_t bytes_inuse, bytes_max; @@ -165,107 +166,159 @@ __wt_evict_server_wake(WT_SESSION_IMPL *session) bytes_inuse <= bytes_max ? "<=" : ">", bytes_max / WT_MEGABYTE)); } +#endif return (__wt_cond_auto_signal(session, cache->evict_cond)); } /* - * __evict_server -- - * Thread to evict pages from the cache. + * __evict_thread_run -- + * General wrapper for any eviction thread. */ static WT_THREAD_RET -__evict_server(void *arg) +__evict_thread_run(void *arg) { WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_SESSION_IMPL *session; -#ifdef HAVE_DIAGNOSTIC - struct timespec now, stuck_ts; -#endif - uint64_t pages_evicted = 0; - u_int spins; + bool did_work; session = arg; conn = S2C(session); cache = conn->cache; #ifdef HAVE_DIAGNOSTIC - WT_ERR(__wt_epoch(session, &stuck_ts)); /* -Wuninitialized */ + if (session == conn->evict_session) + WT_ERR(__wt_epoch( + session, &cache->stuck_ts)); /* -Wuninitialized */ #endif while (F_ISSET(conn, WT_CONN_EVICTION_RUN)) { - /* Evict pages from the cache as needed. */ - WT_ERR(__evict_pass(session)); - - if (!F_ISSET(conn, WT_CONN_EVICTION_RUN)) - break; - - /* - * Clear the walks so we don't pin pages while asleep, - * otherwise we can block applications evicting large pages. - */ - if (!F_ISSET(cache, WT_CACHE_STUCK)) { - for (spins = 0; (ret = __wt_spin_trylock( - session, &conn->dhandle_lock)) == EBUSY && - !F_ISSET(cache, WT_CACHE_CLEAR_WALKS); - spins++) { - if (spins < WT_THOUSAND) - __wt_yield(); - else - __wt_sleep(0, WT_THOUSAND); - } + if (conn->evict_tid_set && + __wt_spin_trylock(session, &cache->evict_pass_lock) == 0) { /* - * If we gave up acquiring the lock, that indicates a - * session is waiting for us to clear walks. Do that - * as part of a normal pass (without the handle list - * lock) to avoid deadlock. + * Cannot use WT_WITH_PASS_LOCK because this is a try + * lock. Fix when that is supported. We set the flag + * on both sessions because we may call clear_walk when + * we are walking with the walk session, locked. */ - if (ret == EBUSY) - continue; - WT_ERR(ret); - ret = __evict_clear_all_walks(session); - __wt_spin_unlock(session, &conn->dhandle_lock); + F_SET(session, WT_SESSION_LOCKED_PASS); + F_SET(cache->walk_session, WT_SESSION_LOCKED_PASS); + ret = __evict_server(session, &did_work); + F_CLR(cache->walk_session, WT_SESSION_LOCKED_PASS); + F_CLR(session, WT_SESSION_LOCKED_PASS); + __wt_spin_unlock(session, &cache->evict_pass_lock); WT_ERR(ret); + WT_ERR(__wt_verbose( + session, WT_VERB_EVICTSERVER, "sleeping")); + /* Don't rely on signals: check periodically. */ + WT_ERR(__wt_cond_auto_wait( + session, cache->evict_cond, did_work)); + WT_ERR(__wt_verbose( + session, WT_VERB_EVICTSERVER, "waking")); + } else + WT_ERR(__evict_helper(session)); + } - /* Next time we wake up, reverse the sweep direction. */ - cache->flags ^= WT_CACHE_WALK_REVERSE; - pages_evicted = 0; - } else if (pages_evicted != cache->pages_evict) { - pages_evicted = cache->pages_evict; + if (session == conn->evict_session) { + /* + * The eviction server is shutting down: in case any trees are + * still open, clear all walks now so that they can be closed. + */ + WT_WITH_PASS_LOCK(session, ret, + ret = __evict_clear_all_walks(session)); + WT_ERR(ret); + } + WT_ERR(__wt_verbose( + session, WT_VERB_EVICTSERVER, "cache eviction thread exiting")); + + /* + * The only two cases when eviction workers are expected to stop are + * when recovery is finished or when the connection is closing. Check + * otherwise fewer eviction worker threads may be running than + * expected. + */ + WT_ASSERT(session, F_ISSET(conn, WT_CONN_CLOSING | WT_CONN_RECOVERING)); + if (0) { +err: WT_PANIC_MSG(session, ret, "cache eviction thread error"); + } + return (WT_THREAD_RET_VALUE); +} + +/* + * __evict_server -- + * Thread to evict pages from the cache. + */ +static int +__evict_server(WT_SESSION_IMPL *session, bool *did_work) +{ + WT_CACHE *cache; + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; #ifdef HAVE_DIAGNOSTIC - WT_ERR(__wt_epoch(session, &stuck_ts)); - } else { - /* After being stuck for 5 minutes, give up. */ - WT_ERR(__wt_epoch(session, &now)); - if (WT_TIMEDIFF_SEC(now, stuck_ts) > 300) { - __wt_err(session, ETIMEDOUT, - "Cache stuck for too long, giving up"); - (void)__wt_cache_dump(session, NULL); - WT_ERR(ETIMEDOUT); - } + struct timespec now; #endif - } + uint64_t orig_pages_evicted; + u_int spins; - WT_ERR(__wt_verbose(session, WT_VERB_EVICTSERVER, "sleeping")); - /* Don't rely on signals: check periodically. */ - WT_ERR(__wt_cond_auto_wait( - session, cache->evict_cond, pages_evicted != 0)); - WT_ERR(__wt_verbose(session, WT_VERB_EVICTSERVER, "waking")); - } + conn = S2C(session); + cache = conn->cache; + WT_ASSERT(session, did_work != NULL); + *did_work = false; + orig_pages_evicted = cache->pages_evicted; + + /* Evict pages from the cache as needed. */ + WT_RET(__evict_pass(session)); + + if (!F_ISSET(conn, WT_CONN_EVICTION_RUN)) + return (0); /* - * The eviction server is shutting down: in case any trees are still - * open, clear all walks now so that they can be closed. + * Clear the walks so we don't pin pages while asleep, + * otherwise we can block applications evicting large pages. */ - WT_ERR(__evict_clear_all_walks(session)); - - WT_ERR(__wt_verbose( - session, WT_VERB_EVICTSERVER, "cache eviction server exiting")); + if (!F_ISSET(cache, WT_CACHE_STUCK)) { + for (spins = 0; (ret = __wt_spin_trylock( + session, &conn->dhandle_lock)) == EBUSY && + cache->pass_intr == 0; spins++) { + if (spins < WT_THOUSAND) + __wt_yield(); + else + __wt_sleep(0, WT_THOUSAND); + } + /* + * If we gave up acquiring the lock, that indicates a + * session is waiting for us to clear walks. Do that + * as part of a normal pass (without the handle list + * lock) to avoid deadlock. + */ + if (ret == EBUSY) + return (0); + WT_RET(ret); + ret = __evict_clear_all_walks(session); + __wt_spin_unlock(session, &conn->dhandle_lock); + WT_RET(ret); - if (0) { -err: WT_PANIC_MSG(session, ret, "cache eviction server error"); + /* Next time we wake up, reverse the sweep direction. */ + cache->flags ^= WT_CACHE_WALK_REVERSE; + cache->pages_evicted = 0; + } else if (cache->pages_evicted != cache->pages_evict) { + cache->pages_evicted = cache->pages_evict; +#ifdef HAVE_DIAGNOSTIC + WT_RET(__wt_epoch(session, &cache->stuck_ts)); + } else { + /* After being stuck for 5 minutes, give up. */ + WT_RET(__wt_epoch(session, &now)); + if (WT_TIMEDIFF_SEC(now, cache->stuck_ts) > 300) { + __wt_err(session, ETIMEDOUT, + "Cache stuck for too long, giving up"); + (void)__wt_cache_dump(session, NULL); + WT_RET(ETIMEDOUT); + } +#endif } - return (WT_THREAD_RET_VALUE); + *did_work = cache->pages_evicted != orig_pages_evicted; + return (0); } /* @@ -284,6 +337,7 @@ __evict_workers_resize(WT_SESSION_IMPL *session) uint32_t i, session_flags; conn = S2C(session); + workers = NULL; /* -Wconditional-uninitialized */ if (conn->evict_workers_alloc < conn->evict_workers_max) { alloc = conn->evict_workers_alloc * sizeof(*workers); @@ -315,7 +369,8 @@ __evict_workers_resize(WT_SESSION_IMPL *session) ++conn->evict_workers; F_SET(&workers[i], WT_EVICT_WORKER_RUN); WT_ERR(__wt_thread_create(workers[i].session, - &workers[i].tid, __evict_worker, &workers[i])); + &workers[i].tid, __evict_thread_run, + workers[i].session)); } } @@ -369,7 +424,7 @@ __wt_evict_create(WT_SESSION_IMPL *session) * the worker's sessions are created. */ WT_RET(__wt_thread_create( - session, &conn->evict_tid, __evict_server, session)); + session, &conn->evict_tid, __evict_thread_run, session)); conn->evict_tid_set = true; return (0); @@ -439,39 +494,22 @@ __wt_evict_destroy(WT_SESSION_IMPL *session) } /* - * __evict_worker -- + * __evict_helper -- * Thread to help evict pages from the cache. */ -static WT_THREAD_RET -__evict_worker(void *arg) +static int +__evict_helper(WT_SESSION_IMPL *session) { WT_CACHE *cache; - WT_CONNECTION_IMPL *conn; WT_DECL_RET; - WT_EVICT_WORKER *worker; - WT_SESSION_IMPL *session; - worker = arg; - session = worker->session; - conn = S2C(session); - cache = conn->cache; - - while (F_ISSET(conn, WT_CONN_EVICTION_RUN) && - F_ISSET(worker, WT_EVICT_WORKER_RUN)) { - /* Don't spin in a busy loop if there is no work to do */ - if ((ret = __evict_lru_pages(session, false)) == WT_NOTFOUND) - WT_ERR(__wt_cond_wait( - session, cache->evict_waiter_cond, 10000)); - else - WT_ERR(ret); - } - WT_ERR(__wt_verbose( - session, WT_VERB_EVICTSERVER, "cache eviction worker exiting")); - - if (0) { -err: WT_PANIC_MSG(session, ret, "cache eviction worker error"); - } - return (WT_THREAD_RET_VALUE); + cache = S2C(session)->cache; + if ((ret = __evict_lru_pages(session, false)) == WT_NOTFOUND) + WT_RET(__wt_cond_wait( + session, cache->evict_waiter_cond, 10000)); + else + WT_RET(ret); + return (0); } /* @@ -579,12 +617,8 @@ __evict_pass(WT_SESSION_IMPL *session) * If there is a request to clear eviction walks, do that now, * before checking if the cache is full. */ - if (F_ISSET(cache, WT_CACHE_CLEAR_WALKS)) { - F_CLR(cache, WT_CACHE_CLEAR_WALKS); - WT_RET(__evict_clear_walks(session)); - WT_RET(__wt_cond_signal( - session, cache->evict_waiter_cond)); - } + if (cache->pass_intr != 0) + break; /* * Increment the shared read generation. Do this occasionally @@ -602,9 +636,10 @@ __evict_pass(WT_SESSION_IMPL *session) * * Do this every time the eviction server wakes up, regardless * of whether the cache is full, to prevent the oldest ID - * falling too far behind. + * falling too far behind. Don't wait to lock the table: with + * highly threaded workloads, that creates a bottleneck. */ - WT_RET(__wt_txn_update_oldest(session, loop > 0)); + WT_RET(__wt_txn_update_oldest(session, WT_TXN_OLDEST_STRICT)); if (!__evict_update_work(session)) break; @@ -630,7 +665,7 @@ __evict_pass(WT_SESSION_IMPL *session) worker = &conn->evict_workctx[conn->evict_workers++]; F_SET(worker, WT_EVICT_WORKER_RUN); WT_RET(__wt_thread_create(session, - &worker->tid, __evict_worker, worker)); + &worker->tid, __evict_thread_run, worker->session)); } WT_RET(__wt_verbose(session, WT_VERB_EVICTSERVER, @@ -648,7 +683,7 @@ __evict_pass(WT_SESSION_IMPL *session) */ if (pages_evicted == cache->pages_evict) { WT_STAT_FAST_CONN_INCR(session, - cache_eviction_server_slept); + cache_eviction_server_slept); /* * Back off if we aren't making progress: walks hold * the handle list lock, which blocks other operations @@ -689,11 +724,13 @@ __evict_clear_walk(WT_SESSION_IMPL *session) { WT_BTREE *btree; WT_CACHE *cache; + WT_DECL_RET; WT_REF *ref; btree = S2BT(session); cache = S2C(session)->cache; + WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_PASS)); if (session->dhandle == cache->evict_file_next) cache->evict_file_next = NULL; @@ -705,30 +742,9 @@ __evict_clear_walk(WT_SESSION_IMPL *session) * assert we never try to evict the current eviction walk point). */ btree->evict_ref = NULL; - return (__wt_page_release(session, ref, WT_READ_NO_EVICT)); -} - -/* - * __evict_clear_walks -- - * Clear the eviction walk points for any file a session is waiting on. - */ -static int -__evict_clear_walks(WT_SESSION_IMPL *session) -{ - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; - WT_SESSION_IMPL *s; - u_int i, session_cnt; - - conn = S2C(session); - - WT_ORDERED_READ(session_cnt, conn->session_cnt); - for (s = conn->sessions, i = 0; i < session_cnt; ++s, ++i) { - if (!s->active || !F_ISSET(s, WT_SESSION_CLEAR_EVICT_WALK)) - continue; - WT_WITH_DHANDLE( - session, s->dhandle, WT_TRET(__evict_clear_walk(session))); - } + WT_WITH_DHANDLE(cache->walk_session, session->dhandle, + (ret = __wt_page_release(cache->walk_session, + ref, WT_READ_NO_EVICT))); return (ret); } @@ -753,39 +769,6 @@ __evict_clear_all_walks(WT_SESSION_IMPL *session) } /* - * __evict_request_clear_walk -- - * Request that the eviction server clear the tree's current eviction - * point. - */ -static int -__evict_request_clear_walk(WT_SESSION_IMPL *session) -{ - WT_BTREE *btree; - WT_CACHE *cache; - WT_DECL_RET; - - btree = S2BT(session); - cache = S2C(session)->cache; - - F_SET(session, WT_SESSION_CLEAR_EVICT_WALK); - - while (ret == 0 && (btree->evict_ref != NULL || - cache->evict_file_next == session->dhandle)) { - F_SET(cache, WT_CACHE_CLEAR_WALKS); - ret = __wt_cond_wait( - session, cache->evict_waiter_cond, 100000); - } - - F_CLR(session, WT_SESSION_CLEAR_EVICT_WALK); - - /* An error is unexpected - flag the failure. */ - if (ret != 0) - __wt_err(session, ret, "Failed to clear eviction walk point"); - - return (ret); -} - -/* * __wt_evict_file_exclusive_on -- * Get exclusive eviction access to a file and discard any of the file's * blocks queued for eviction. @@ -822,10 +805,14 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session) * this point. */ F_SET(btree, WT_BTREE_NO_EVICTION); + (void)__wt_atomic_add32(&cache->pass_intr, 1); WT_FULL_BARRIER(); /* Clear any existing LRU eviction walk for the file. */ - WT_ERR(__evict_request_clear_walk(session)); + WT_WITH_PASS_LOCK(session, ret, + ret = __evict_clear_walk(session)); + (void)__wt_atomic_sub32(&cache->pass_intr, 1); + WT_ERR(ret); /* * The eviction candidate list might reference pages from the file, @@ -949,7 +936,7 @@ __evict_lru_walk(WT_SESSION_IMPL *session) queue_index = cache->evict_queue_fill++ % WT_EVICT_QUEUE_MAX; evict_queue = &cache->evict_queues[queue_index]; /* Get some more pages to consider for eviction. */ - if ((ret = __evict_walk(session, queue_index)) != 0) + if ((ret = __evict_walk(cache->walk_session, queue_index)) != 0) return (ret == EBUSY ? 0 : ret); /* Sort the list into LRU order and restart. */ @@ -1063,8 +1050,8 @@ __evict_lru_walk(WT_SESSION_IMPL *session) __wt_spin_unlock(session, &cache->evict_queue_lock); /* - * The eviction server thread doesn't do any actual eviction if there - * are multiple eviction workers running. + * Signal any application or helper threads that may be waiting + * to help with eviction. */ WT_RET(__wt_cond_signal(session, cache->evict_waiter_cond)); @@ -1108,7 +1095,7 @@ retry: while (slot < max_entries && ret == 0) { * If another thread is waiting on the eviction server to clear * the walk point in a tree, give up. */ - if (F_ISSET(cache, WT_CACHE_CLEAR_WALKS)) + if (cache->pass_intr != 0) break; /* @@ -1118,7 +1105,7 @@ retry: while (slot < max_entries && ret == 0) { if (!dhandle_locked) { for (spins = 0; (ret = __wt_spin_trylock( session, &conn->dhandle_lock)) == EBUSY && - !F_ISSET(cache, WT_CACHE_CLEAR_WALKS); + cache->pass_intr == 0; spins++) { if (spins < WT_THOUSAND) __wt_yield(); @@ -1244,7 +1231,7 @@ retry: while (slot < max_entries && ret == 0) { * Try two passes through all the files, give up when we have some * candidates and we aren't finding more. */ - if (!F_ISSET(cache, WT_CACHE_CLEAR_WALKS) && ret == 0 && + if (cache->pass_intr == 0 && ret == 0 && slot < max_entries && (retries < 2 || (retries < 10 && !FLD_ISSET(cache->state, WT_EVICT_PASS_WOULD_BLOCK) && @@ -1298,7 +1285,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, uint32_t queue_index, u_int *slotp) WT_PAGE *page; WT_PAGE_MODIFY *mod; WT_REF *ref; - uint64_t pages_walked; + uint64_t pages_seen, refs_walked; uint32_t walk_flags; int internal_pages, restarts; bool enough, modified; @@ -1333,17 +1320,21 @@ __evict_walk_file(WT_SESSION_IMPL *session, uint32_t queue_index, u_int *slotp) * Once we hit the page limit, do one more step through the walk in * case we are appending and only the last page in the file is live. */ - for (evict = start, pages_walked = 0; + for (evict = start, pages_seen = refs_walked = 0; evict < end && !enough && (ret == 0 || ret == WT_NOTFOUND); ret = __wt_tree_walk_count( - session, &btree->evict_ref, &pages_walked, walk_flags)) { - enough = pages_walked > cache->evict_max_refs_per_file; + session, &btree->evict_ref, &refs_walked, walk_flags)) { + enough = refs_walked > cache->evict_max_refs_per_file; if ((ref = btree->evict_ref) == NULL) { if (++restarts == 2 || enough) break; + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_walks_started); continue; } + ++pages_seen; + /* Ignore root pages entirely. */ if (__wt_ref_is_root(ref)) continue; @@ -1371,9 +1362,13 @@ __evict_walk_file(WT_SESSION_IMPL *session, uint32_t queue_index, u_int *slotp) } /* Pages we no longer need (clean or dirty), are found money. */ + if (page->read_gen == WT_READGEN_OLDEST) { + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_pages_queued_oldest); + goto fast; + } if (__wt_page_is_empty(page) || - F_ISSET(session->dhandle, WT_DHANDLE_DEAD) || - page->read_gen == WT_READGEN_OLDEST) + F_ISSET(session->dhandle, WT_DHANDLE_DEAD)) goto fast; /* Skip clean pages if appropriate. */ @@ -1439,24 +1434,31 @@ fast: /* If the page can't be evicted, give up. */ WT_RET_NOTFOUND_OK(ret); *slotp += (u_int)(evict - start); + WT_STAT_FAST_CONN_INCRV( + session, cache_eviction_pages_queued, (u_int)(evict - start)); /* * If we happen to end up on the root page, clear it. We have to track * hazard pointers, and the root page complicates that calculation. * + * Likewise if we found no new candidates during the walk: there is no + * point keeping a page pinned, since it may be the only candidate in an + * idle tree. + * * If we land on a page requiring forced eviction, move on to the next * page: we want this page evicted as quickly as possible. */ if ((ref = btree->evict_ref) != NULL) { - if (__wt_ref_is_root(ref)) + if (__wt_ref_is_root(ref) || evict == start) WT_RET(__evict_clear_walk(session)); else if (ref->page->read_gen == WT_READGEN_OLDEST) WT_RET_NOTFOUND_OK(__wt_tree_walk_count( session, &btree->evict_ref, - &pages_walked, walk_flags)); + &refs_walked, walk_flags)); } - WT_STAT_FAST_CONN_INCRV(session, cache_eviction_walk, pages_walked); + WT_STAT_FAST_CONN_INCRV(session, cache_eviction_walk, refs_walked); + WT_STAT_FAST_CONN_INCRV(session, cache_eviction_pages_seen, pages_seen); return (0); } @@ -1541,6 +1543,10 @@ __evict_get_ref( for (;;) { if (__wt_spin_trylock(session, &evict_queue->evict_lock) == 0) break; + if (!F_ISSET(session, WT_SESSION_INTERNAL)) { + __wt_spin_unlock(session, &cache->evict_queue_lock); + return (WT_NOTFOUND); + } __wt_yield(); } /* @@ -1638,6 +1644,9 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server) cache->worker_evicts++; } } else { + if (__wt_page_is_modified(ref->page)) + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_app_dirty); WT_STAT_FAST_CONN_INCR(session, cache_eviction_app); cache->app_evicts++; } @@ -1727,8 +1736,9 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) } /* See if eviction is still needed. */ - if (!__wt_eviction_needed(session, NULL) || - cache->pages_evict > init_evict_count + max_pages_evicted) + if (!__wt_eviction_needed(session, &pct_full) || + (pct_full < 100 && + cache->pages_evict > init_evict_count + max_pages_evicted)) return (0); /* Evict a page. */ |