diff options
Diffstat (limited to 'src/evict/evict_lru.c')
-rw-r--r-- | src/evict/evict_lru.c | 87 |
1 files changed, 64 insertions, 23 deletions
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 84c9990832d..3ce35c60f2e 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -900,24 +900,32 @@ __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session) } #define EVICT_TUNE_BATCH 1 /* Max workers to add each period */ -#define EVICT_TUNE_DATAPT_MIN 3 /* Data points needed before deciding - if we should keep adding workers or - settle on an earlier value. */ +/* + * Data points needed before deciding if we should keep adding workers or settle + * on an earlier value. + */ +#define EVICT_TUNE_DATAPT_MIN 3 #define EVICT_TUNE_PERIOD 1 /* Tune period in seconds */ /* + * We will do a fresh re-tune every that many seconds to adjust to + * significant phase changes. + */ +#define EVICT_FORCE_RETUNE 30 + +/* * __evict_tune_workers -- * Find the right number of eviction workers. Gradually ramp up the number of * workers increasing the number in batches indicated by the setting above. - * Store the number of workers that gave us the best throughput so far and - * the number of data points we have tried. + * Store the number of workers that gave us the best throughput so far and the + * number of data points we have tried. * - * Every once in a while when we have the minimum number of data points - * we check whether the eviction throughput achieved with the current number - * of workers is the best we have seen so far. If so, we will keep increasing - * the number of workers. If not, we are past the infliction point on the - * eviction throughput curve. In that case, we will set the number of workers - * to the best observed so far and settle into a stable state. + * Every once in a while when we have the minimum number of data points we check + * whether the eviction throughput achieved with the current number of workers + * is the best we have seen so far. If so, we will keep increasing the number of + * workers. If not, we are past the infliction point on the eviction throughput + * curve. In that case, we will set the number of workers to the best observed + * so far and settle into a stable state. */ static int __evict_tune_workers(WT_SESSION_IMPL *session) @@ -927,27 +935,60 @@ __evict_tune_workers(WT_SESSION_IMPL *session) WT_CONNECTION_IMPL *conn; WT_DECL_RET; uint64_t cur_threads, delta_msec, delta_pages, i, target_threads; - uint64_t pgs_evicted_cur, pgs_evicted_persec_cur; + uint64_t pgs_evicted_cur, pgs_evicted_persec_cur, time_diff; uint32_t thread_surplus; conn = S2C(session); cache = conn->cache; WT_ASSERT(session, conn->evict_threads.threads[0]->session == session); - - if (conn->evict_tune_stable) - return (0); + pgs_evicted_cur = pgs_evicted_persec_cur = 0; __wt_epoch(session, ¤t_time); + time_diff = WT_TIMEDIFF_SEC(current_time, conn->evict_tune_last_time); /* - * Every EVICT_TUNE_PERIOD seconds record the number of - * pages evicted per second observed in the previous period. + * If we have reached the stable state and have not run long enough to + * surpass the forced re-tuning threshold, return. */ - if (WT_TIMEDIFF_SEC( - current_time, conn->evict_tune_last_time) < EVICT_TUNE_PERIOD) - return (0); + if (conn->evict_tune_stable) { + if (time_diff < EVICT_FORCE_RETUNE) + return (0); + + /* + * Stable state was reached a long time ago. Let's re-tune. + * Reset all the state. + */ + conn->evict_tune_stable = 0; + conn->evict_tune_last_action_time.tv_sec = 0; + conn->evict_tune_pgs_last = 0; + conn->evict_tune_num_points = 0; + conn->evict_tune_pg_sec_max = 0; + conn->evict_tune_workers_best = 0; + + /* Reduce the number of eviction workers to the minimum */ + thread_surplus = conn->evict_threads.current_threads - + conn->evict_threads_min; + for (i = 0; i < thread_surplus; i++) { + WT_ERR(__wt_thread_group_stop_one( + session, &conn->evict_threads, false)); + WT_STAT_CONN_INCR(session, + cache_eviction_worker_removed); + } + WT_STAT_CONN_INCR(session, cache_eviction_force_retune); + } else + if (time_diff < EVICT_TUNE_PERIOD) + /* + * If we have not reached stable state, don't do + * anything unless enough time has passed since the last + * time we have taken any action in this function. + */ + return (0); + /* + * Measure the number of evicted pages so far. Eviction rate correlates + * to performance, so this is our metric of success. + */ pgs_evicted_cur = cache->pages_evict; /* @@ -987,7 +1028,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session) if (conn->evict_tune_num_points >= conn->evict_tune_datapts_needed) { if ((conn->evict_tune_workers_best == conn->evict_threads.current_threads) && - (conn->evict_threads.current_threads < + (conn->evict_threads.current_threads < conn->evict_threads_max)) { /* * Keep adding workers. We will check again @@ -996,7 +1037,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session) conn->evict_tune_datapts_needed += WT_MIN(EVICT_TUNE_DATAPT_MIN, (conn->evict_threads_max - - conn->evict_threads.current_threads)/ + - conn->evict_threads.current_threads) / EVICT_TUNE_BATCH); } else { /* @@ -1025,7 +1066,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session) conn->evict_tune_stable = true; WT_STAT_CONN_SET(session, cache_eviction_active_workers, conn->evict_threads.current_threads); - return (0); + goto err; } } |