diff options
Diffstat (limited to 'src/third_party/wiredtiger/src/evict/evict_lru.c')
-rw-r--r-- | src/third_party/wiredtiger/src/evict/evict_lru.c | 169 |
1 files changed, 102 insertions, 67 deletions
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index f1949a7c320..3ce35c60f2e 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -824,31 +824,19 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session) btree = S2BT(session); cache = S2C(session)->cache; - /* - * Hold the walk lock to set the no-eviction flag. - * - * The no-eviction flag can be set permanently, in which case we never - * increment the no-eviction count. - */ + /* Hold the walk lock to turn off eviction. */ __wt_spin_lock(session, &cache->evict_walk_lock); - if (F_ISSET(btree, WT_BTREE_NO_EVICTION)) { - if (btree->evict_disabled != 0) - ++btree->evict_disabled; + if (++btree->evict_disabled > 1) { __wt_spin_unlock(session, &cache->evict_walk_lock); return (0); } - ++btree->evict_disabled; /* * Ensure no new pages from the file will be queued for eviction after - * this point. + * this point, then clear any existing LRU eviction walk for the file. */ - F_SET(btree, WT_BTREE_NO_EVICTION); (void)__wt_atomic_addv32(&cache->pass_intr, 1); - - /* Clear any existing LRU eviction walk for the file. */ - WT_WITH_PASS_LOCK(session, - ret = __evict_clear_walk(session)); + WT_WITH_PASS_LOCK(session, ret = __evict_clear_walk(session)); (void)__wt_atomic_subv32(&cache->pass_intr, 1); WT_ERR(ret); @@ -879,7 +867,6 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session) if (0) { err: --btree->evict_disabled; - F_CLR(btree, WT_BTREE_NO_EVICTION); } __wt_spin_unlock(session, &cache->evict_walk_lock); return (ret); @@ -904,38 +891,41 @@ __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session) */ WT_DIAGNOSTIC_YIELD; - WT_ASSERT(session, - btree->evict_ref == NULL && F_ISSET(btree, WT_BTREE_NO_EVICTION)); - - /* - * The no-eviction flag can be set permanently, in which case we never - * increment the no-eviction count. - */ + /* Hold the walk lock to turn on eviction. */ __wt_spin_lock(session, &cache->evict_walk_lock); - if (btree->evict_disabled > 0 && --btree->evict_disabled == 0) - F_CLR(btree, WT_BTREE_NO_EVICTION); + WT_ASSERT(session, + btree->evict_ref == NULL && btree->evict_disabled > 0); + --btree->evict_disabled; __wt_spin_unlock(session, &cache->evict_walk_lock); } #define EVICT_TUNE_BATCH 1 /* Max workers to add each period */ -#define EVICT_TUNE_DATAPT_MIN 3 /* Data points needed before deciding - if we should keep adding workers or - settle on an earlier value. */ -#define EVICT_TUNE_PERIOD 2 /* Tune period in seconds */ +/* + * Data points needed before deciding if we should keep adding workers or settle + * on an earlier value. + */ +#define EVICT_TUNE_DATAPT_MIN 3 +#define EVICT_TUNE_PERIOD 1 /* Tune period in seconds */ + +/* + * We will do a fresh re-tune every that many seconds to adjust to + * significant phase changes. + */ +#define EVICT_FORCE_RETUNE 30 /* * __evict_tune_workers -- * Find the right number of eviction workers. Gradually ramp up the number of * workers increasing the number in batches indicated by the setting above. - * Store the number of workers that gave us the best throughput so far and - * the number of data points we have tried. + * Store the number of workers that gave us the best throughput so far and the + * number of data points we have tried. * - * Every once in a while when we have the minimum number of data points - * we check whether the eviction throughput achieved with the current number - * of workers is the best we have seen so far. If so, we will keep increasing - * the number of workers. If not, we are past the infliction point on the - * eviction throughput curve. In that case, we will set the number of workers - * to the best observed so far and settle into a stable state. + * Every once in a while when we have the minimum number of data points we check + * whether the eviction throughput achieved with the current number of workers + * is the best we have seen so far. If so, we will keep increasing the number of + * workers. If not, we are past the infliction point on the eviction throughput + * curve. In that case, we will set the number of workers to the best observed + * so far and settle into a stable state. */ static int __evict_tune_workers(WT_SESSION_IMPL *session) @@ -945,28 +935,60 @@ __evict_tune_workers(WT_SESSION_IMPL *session) WT_CONNECTION_IMPL *conn; WT_DECL_RET; uint64_t cur_threads, delta_msec, delta_pages, i, target_threads; - uint64_t pgs_evicted_cur, pgs_evicted_persec_cur; + uint64_t pgs_evicted_cur, pgs_evicted_persec_cur, time_diff; uint32_t thread_surplus; conn = S2C(session); cache = conn->cache; WT_ASSERT(session, conn->evict_threads.threads[0]->session == session); - pgs_evicted_persec_cur = 0; - - if (conn->evict_tune_stable) - return (0); + pgs_evicted_cur = pgs_evicted_persec_cur = 0; __wt_epoch(session, ¤t_time); + time_diff = WT_TIMEDIFF_SEC(current_time, conn->evict_tune_last_time); /* - * Every EVICT_TUNE_PERIOD seconds record the number of - * pages evicted per second observed in the previous period. + * If we have reached the stable state and have not run long enough to + * surpass the forced re-tuning threshold, return. */ - if (WT_TIMEDIFF_SEC( - current_time, conn->evict_tune_last_time) < EVICT_TUNE_PERIOD) - return (0); + if (conn->evict_tune_stable) { + if (time_diff < EVICT_FORCE_RETUNE) + return (0); + /* + * Stable state was reached a long time ago. Let's re-tune. + * Reset all the state. + */ + conn->evict_tune_stable = 0; + conn->evict_tune_last_action_time.tv_sec = 0; + conn->evict_tune_pgs_last = 0; + conn->evict_tune_num_points = 0; + conn->evict_tune_pg_sec_max = 0; + conn->evict_tune_workers_best = 0; + + /* Reduce the number of eviction workers to the minimum */ + thread_surplus = conn->evict_threads.current_threads - + conn->evict_threads_min; + for (i = 0; i < thread_surplus; i++) { + WT_ERR(__wt_thread_group_stop_one( + session, &conn->evict_threads, false)); + WT_STAT_CONN_INCR(session, + cache_eviction_worker_removed); + } + WT_STAT_CONN_INCR(session, cache_eviction_force_retune); + } else + if (time_diff < EVICT_TUNE_PERIOD) + /* + * If we have not reached stable state, don't do + * anything unless enough time has passed since the last + * time we have taken any action in this function. + */ + return (0); + + /* + * Measure the number of evicted pages so far. Eviction rate correlates + * to performance, so this is our metric of success. + */ pgs_evicted_cur = cache->pages_evict; /* @@ -984,7 +1006,8 @@ __evict_tune_workers(WT_SESSION_IMPL *session) pgs_evicted_persec_cur = (delta_pages * WT_THOUSAND) / delta_msec; conn->evict_tune_num_points++; - /* Keep track of the maximum eviction throughput seen and the number + /* + * Keep track of the maximum eviction throughput seen and the number * of workers corresponding to that throughput. */ if (pgs_evicted_persec_cur > conn->evict_tune_pg_sec_max) { @@ -1005,7 +1028,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session) if (conn->evict_tune_num_points >= conn->evict_tune_datapts_needed) { if ((conn->evict_tune_workers_best == conn->evict_threads.current_threads) && - (conn->evict_threads.current_threads < + (conn->evict_threads.current_threads < conn->evict_threads_max)) { /* * Keep adding workers. We will check again @@ -1014,7 +1037,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session) conn->evict_tune_datapts_needed += WT_MIN(EVICT_TUNE_DATAPT_MIN, (conn->evict_threads_max - - conn->evict_threads.current_threads)/ + - conn->evict_threads.current_threads) / EVICT_TUNE_BATCH); } else { /* @@ -1043,7 +1066,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session) conn->evict_tune_stable = true; WT_STAT_CONN_SET(session, cache_eviction_active_workers, conn->evict_threads.current_threads); - return (0); + goto err; } } @@ -1372,7 +1395,7 @@ retry: while (slot < max_entries) { /* Skip files that don't allow eviction. */ btree = dhandle->handle; - if (F_ISSET(btree, WT_BTREE_NO_EVICTION)) + if (btree->evict_disabled > 0) continue; /* @@ -1428,13 +1451,23 @@ retry: while (slot < max_entries) { * the tree's current eviction point, and part of the process is * waiting on this thread to acknowledge that action. */ - if (!F_ISSET(btree, WT_BTREE_NO_EVICTION) && + if (btree->evict_disabled == 0 && !__wt_spin_trylock(session, &cache->evict_walk_lock)) { - if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) { + if (btree->evict_disabled == 0) { + /* + * Assert the handle has a root page: eviction + * should have been locked out if the tree is + * being discarded or the root page is changing. + * As this has not always been the case, assert + * to debug that change. + */ + WT_ASSERT(session, btree->root.page != NULL); + cache->evict_file_next = dhandle; - WT_WITH_DHANDLE(session, dhandle, ret = - __evict_walk_file(session, queue, - max_entries, &slot)); + WT_WITH_DHANDLE(session, dhandle, + ret = __evict_walk_file( + session, queue, max_entries, &slot)); + WT_ASSERT(session, session->split_gen == 0); } __wt_spin_unlock(session, &cache->evict_walk_lock); @@ -1663,7 +1696,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, * eviction fairly visits all pages in trees with a lot of in-cache * content. */ - switch (btree->evict_walk_state) { + switch ((WT_EVICT_WALK_START)btree->evict_start_type) { case WT_EVICT_WALK_NEXT: break; case WT_EVICT_WALK_PREV: @@ -1720,9 +1753,9 @@ __evict_walk_file(WT_SESSION_IMPL *session, * Try a different walk start point next time if a * walk gave up. */ - btree->evict_walk_state = - (btree->evict_walk_state + 1) % - WT_EVICT_WALK_MAX_LEGAL_VALUE; + btree->evict_start_type = + (btree->evict_start_type + 1) % + WT_EVICT_WALK_START_NUM; break; } @@ -2124,6 +2157,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) WT_TXN_GLOBAL *txn_global; WT_TXN_STATE *txn_state; uint64_t init_evict_count, max_pages_evicted; + bool timer; conn = S2C(session); cache = conn->cache; @@ -2144,7 +2178,9 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) __wt_evict_server_wake(session); /* Track how long application threads spend doing eviction. */ - if (WT_STAT_ENABLED(session) && !F_ISSET(session, WT_SESSION_INTERNAL)) + timer = + WT_STAT_ENABLED(session) && !F_ISSET(session, WT_SESSION_INTERNAL); + if (timer) __wt_epoch(session, &enter); for (init_evict_count = cache->pages_evict;; ret = 0) { @@ -2210,8 +2246,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) } } -err: if (WT_STAT_ENABLED(session) && - !F_ISSET(session, WT_SESSION_INTERNAL)) { +err: if (timer) { __wt_epoch(session, &leave); WT_STAT_CONN_INCRV(session, application_cache_time, WT_TIMEDIFF_US(leave, enter)); @@ -2239,7 +2274,7 @@ __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref) page = ref->page; if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU) || - F_ISSET(S2BT(session), WT_BTREE_NO_EVICTION)) + S2BT(session)->evict_disabled > 0) return (false); /* Append to the urgent queue if we can. */ @@ -2249,7 +2284,7 @@ __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref) __wt_spin_lock(session, &cache->evict_queue_lock); if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU) || - F_ISSET(S2BT(session), WT_BTREE_NO_EVICTION)) + S2BT(session)->evict_disabled > 0) goto done; __wt_spin_lock(session, &urgent_queue->evict_lock); |