diff options
Diffstat (limited to 'src/third_party/wiredtiger/src/evict/evict_lru.c')
-rw-r--r-- | src/third_party/wiredtiger/src/evict/evict_lru.c | 218 |
1 files changed, 95 insertions, 123 deletions
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index c224a3b7b11..2f9f3220106 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -277,10 +277,12 @@ __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread) conn = S2C(session); cache = conn->cache; - /* - * The thread group code calls us repeatedly. So each call is one pass through eviction. - */ - WT_TRACK_TIME(session); +/* + * The thread group code calls us repeatedly. So each call is one pass through eviction. + */ +#ifdef HAVE_DIAGNOSTIC + __wt_seconds32(session, &session->op_5043_seconds); +#endif if (conn->evict_server_running && __wt_spin_trylock(session, &cache->evict_pass_lock) == 0) { /* * Cannot use WT_WITH_PASS_LOCK because this is a try lock. Fix when that is supported. We @@ -426,15 +428,14 @@ __evict_server(WT_SESSION_IMPL *session, bool *did_work) return (0); #endif /* - * If we're stuck for 5 minutes in diagnostic mode, or the verbose - * evict_stuck flag is configured, log the cache and transaction state. + * If we're stuck for 5 minutes in diagnostic mode, or the verbose evict_stuck flag is + * configured, log the cache and transaction state. * * If we're stuck for 5 minutes in diagnostic mode, give up. * - * We don't do this check for in-memory workloads because application - * threads are not blocked by the cache being full. If the cache becomes - * full of clean pages, we can be servicing reads while the cache - * appears stuck to eviction. + * We don't do this check for in-memory workloads because application threads are not blocked by + * the cache being full. If the cache becomes full of clean pages, we can be servicing reads + * while the cache appears stuck to eviction. */ if (F_ISSET(conn, WT_CONN_IN_MEMORY)) return (0); @@ -578,8 +579,7 @@ __evict_update_work(WT_SESSION_IMPL *session) /* * If we need space in the cache, try to find clean pages to evict. * - * Avoid division by zero if the cache size has not yet been set in a - * shared cache. + * Avoid division by zero if the cache size has not yet been set in a shared cache. */ bytes_max = conn->cache_size + 1; bytes_inuse = __wt_cache_bytes_inuse(cache); @@ -679,14 +679,12 @@ __evict_pass(WT_SESSION_IMPL *session) ++cache->evict_pass_gen; /* - * Update the oldest ID: we use it to decide whether pages are - * candidates for eviction. Without this, if all threads are - * blocked after a long-running transaction (such as a + * Update the oldest ID: we use it to decide whether pages are candidates for eviction. + * Without this, if all threads are blocked after a long-running transaction (such as a * checkpoint) completes, we may never start evicting again. * - * Do this every time the eviction server wakes up, regardless - * of whether the cache is full, to prevent the oldest ID - * falling too far behind. Don't wait to lock the table: with + * Do this every time the eviction server wakes up, regardless of whether the cache is full, + * to prevent the oldest ID falling too far behind. Don't wait to lock the table: with * highly threaded workloads, that creates a bottleneck. */ WT_RET(__wt_txn_update_oldest(session, WT_TXN_OLDEST_STRICT)); @@ -702,14 +700,12 @@ __evict_pass(WT_SESSION_IMPL *session) WT_RET(__evict_lru_walk(session)); /* - * If the queue has been empty recently, keep queuing more - * pages to evict. If the rate of queuing pages is high - * enough, this score will go to zero, in which case the - * eviction server might as well help out with eviction. + * If the queue has been empty recently, keep queuing more pages to evict. If the rate of + * queuing pages is high enough, this score will go to zero, in which case the eviction + * server might as well help out with eviction. * - * Also, if there is a single eviction server thread with no - * workers, it must service the urgent queue in case all - * application threads are busy. + * Also, if there is a single eviction server thread with no workers, it must service the + * urgent queue in case all application threads are busy. */ if (!WT_EVICT_HAS_WORKERS(session) && (cache->evict_empty_score < WT_EVICT_SCORE_CUTOFF || @@ -720,16 +716,13 @@ __evict_pass(WT_SESSION_IMPL *session) break; /* - * If we're making progress, keep going; if we're not making - * any progress at all, mark the cache "stuck" and go back to - * sleep, it's not something we can fix. + * If we're making progress, keep going; if we're not making any progress at all, mark the + * cache "stuck" and go back to sleep, it's not something we can fix. * - * We check for progress every 20ms, the idea being that the - * aggressive score will reach 10 after 200ms if we aren't - * making progress and eviction will start considering more - * pages. If there is still no progress after 2s, we will - * treat the cache as stuck and start rolling back - * transactions and writing updates to the lookaside table. + * We check for progress every 20ms, the idea being that the aggressive score will reach 10 + * after 200ms if we aren't making progress and eviction will start considering more pages. + * If there is still no progress after 2s, we will treat the cache as stuck and start + * rolling back transactions and writing updates to the lookaside table. */ if (eviction_progress == cache->eviction_progress) { if (WT_CLOCKDIFF_MS(time_now, time_prev) >= 20 && @@ -750,14 +743,11 @@ __evict_pass(WT_SESSION_IMPL *session) */ if (loop < 100 || cache->evict_aggressive_score < 100) { /* - * Back off if we aren't making progress: walks - * hold the handle list lock, blocking other - * operations that can free space in cache, - * such as LSM discarding handles. + * Back off if we aren't making progress: walks hold the handle list lock, blocking + * other operations that can free space in cache, such as LSM discarding handles. * - * Allow this wait to be interrupted (e.g. if a - * checkpoint completes): make sure we wait for - * a non-zero number of microseconds). + * Allow this wait to be interrupted (e.g. if a checkpoint completes): make sure we + * wait for a non-zero number of microseconds). */ WT_STAT_CONN_INCR(session, cache_eviction_server_slept); __wt_cond_wait(session, cache->evict_cond, WT_THOUSAND, NULL); @@ -1181,8 +1171,8 @@ __evict_lru_walk(WT_SESSION_IMPL *session) /* * Get some more pages to consider for eviction. * - * If the walk is interrupted, we still need to sort the queue: the - * next walk assumes there are no entries beyond WT_EVICT_WALK_BASE. + * If the walk is interrupted, we still need to sort the queue: the next walk assumes there are + * no entries beyond WT_EVICT_WALK_BASE. */ if ((ret = __evict_walk(cache->walk_session, queue)) == EBUSY) ret = 0; @@ -1264,15 +1254,12 @@ __evict_lru_walk(WT_SESSION_IMPL *session) queue->evict_candidates = candidates; else { /* - * Take all of the urgent pages plus a third of - * ordinary candidates (which could be expressed as - * WT_EVICT_WALK_INCR / WT_EVICT_WALK_BASE). In the - * steady state, we want to get as many candidates as - * the eviction walk adds to the queue. + * Take all of the urgent pages plus a third of ordinary candidates (which could be + * expressed as WT_EVICT_WALK_INCR / WT_EVICT_WALK_BASE). In the steady state, we want + * to get as many candidates as the eviction walk adds to the queue. * - * That said, if there is only one entry, which is - * normal when populating an empty file, don't exclude - * it. + * That said, if there is only one entry, which is normal when populating an empty file, + * don't exclude it. */ queue->evict_candidates = 1 + candidates + ((entries - candidates) - 1) / 3; cache->read_gen_oldest = read_gen_oldest; @@ -1468,11 +1455,9 @@ retry: /* * Skip files if we have too many active walks. * - * This used to be limited by the configured maximum number of - * hazard pointers per session. Even though that ceiling has - * been removed, we need to test eviction with huge numbers of - * active trees before allowing larger numbers of hazard - * pointers in the walk session. + * This used to be limited by the configured maximum number of hazard pointers per session. + * Even though that ceiling has been removed, we need to test eviction with huge numbers of + * active trees before allowing larger numbers of hazard pointers in the walk session. */ if (btree->evict_ref == NULL && session->nhazard > WT_EVICT_MAX_TREES) continue; @@ -1490,16 +1475,14 @@ retry: dhandle_locked = false; /* - * Re-check the "no eviction" flag, used to enforce exclusive - * access when a handle is being closed. + * Re-check the "no eviction" flag, used to enforce exclusive access when a handle is being + * closed. * - * Only try to acquire the lock and simply continue if we fail; - * the lock is held while the thread turning off eviction clears - * the tree's current eviction point, and part of the process is - * waiting on this thread to acknowledge that action. + * Only try to acquire the lock and simply continue if we fail; the lock is held while the + * thread turning off eviction clears the tree's current eviction point, and part of the + * process is waiting on this thread to acknowledge that action. * - * If a handle is being discarded, it will still be marked open, - * but won't have a root page. + * If a handle is being discarded, it will still be marked open, but won't have a root page. */ if (btree->evict_disabled == 0 && !__wt_spin_trylock(session, &cache->evict_walk_lock)) { if (btree->evict_disabled == 0 && btree->root.page != NULL) { @@ -1888,9 +1871,8 @@ __evict_walk_tree(WT_SESSION_IMPL *session, WT_EVICT_QUEUE *queue, u_int max_ent /* * Pages that are empty or from dead trees are fast-tracked. * - * Also evict lookaside table pages without further filtering: - * the cache is under pressure by definition and we want to - * free space. + * Also evict lookaside table pages without further filtering: the cache is under pressure + * by definition and we want to free space. */ if (__wt_page_is_empty(page) || F_ISSET(session->dhandle, WT_DHANDLE_DEAD) || F_ISSET(btree, WT_BTREE_LOOKASIDE)) @@ -1920,15 +1902,12 @@ __evict_walk_tree(WT_SESSION_IMPL *session, WT_EVICT_QUEUE *queue, u_int max_ent continue; /* - * Don't attempt eviction of internal pages with children in - * cache (indicated by seeing an internal page that is the - * parent of the last page we saw). + * Don't attempt eviction of internal pages with children in cache (indicated by seeing an + * internal page that is the parent of the last page we saw). * - * Also skip internal page unless we get aggressive, the tree - * is idle (indicated by the tree being skipped for walks), - * or we are in eviction debug mode. - * The goal here is that if trees become completely idle, we - * eventually push them out of cache completely. + * Also skip internal page unless we get aggressive, the tree is idle (indicated by the tree + * being skipped for walks), or we are in eviction debug mode. The goal here is that if + * trees become completely idle, we eventually push them out of cache completely. */ if (!F_ISSET(cache, WT_CACHE_EVICT_DEBUG_MODE) && WT_PAGE_IS_INTERNAL(page)) { if (page == last_parent) @@ -1987,18 +1966,15 @@ fast: /* * Give up the walk occasionally. * - * If we happen to end up on the root page or a page requiring urgent - * eviction, clear it. We have to track hazard pointers, and the root - * page complicates that calculation. + * If we happen to end up on the root page or a page requiring urgent eviction, clear it. We + * have to track hazard pointers, and the root page complicates that calculation. * - * Likewise if we found no new candidates during the walk: there is no - * point keeping a page pinned, since it may be the only candidate in - * an idle tree. + * Likewise if we found no new candidates during the walk: there is no point keeping a page + * pinned, since it may be the only candidate in an idle tree. * - * If we land on a page requiring forced eviction, or that isn't an - * ordinary in-memory page (e.g., WT_REF_LIMBO), move until we find an - * ordinary page: we should not prevent exclusive access to the page - * until the next walk. + * If we land on a page requiring forced eviction, or that isn't an ordinary in-memory page + * (e.g., WT_REF_LIMBO), move until we find an ordinary page: we should not prevent exclusive + * access to the page until the next walk. */ if (ref != NULL) { if (__wt_ref_is_root(ref) || evict == start || give_up || @@ -2064,13 +2040,12 @@ __evict_get_ref(WT_SESSION_IMPL *session, bool is_server, WT_BTREE **btreep, WT_ } /* - * The server repopulates whenever the other queue is not full, as long - * as at least one page has been evicted out of the current queue. + * The server repopulates whenever the other queue is not full, as long as at least one page has + * been evicted out of the current queue. * - * Note that there are pathological cases where there are only enough - * eviction candidates in the cache to fill one queue. In that case, - * we will continually evict one page and attempt to refill the queues. - * Such cases are extremely rare in real applications. + * Note that there are pathological cases where there are only enough eviction candidates in the + * cache to fill one queue. In that case, we will continually evict one page and attempt to + * refill the queues. Such cases are extremely rare in real applications. */ if (is_server && (!urgent_ok || __evict_queue_empty(urgent_queue, false)) && !__evict_queue_full(cache->evict_current_queue) && @@ -2088,9 +2063,8 @@ __evict_get_ref(WT_SESSION_IMPL *session, bool is_server, WT_BTREE **btreep, WT_ /* * Check if the current queue needs to change. * - * The server will only evict half of the pages before looking - * for more, but should only switch queues if there are no - * other eviction workers. + * The server will only evict half of the pages before looking for more, but should only + * switch queues if there are no other eviction workers. */ queue = cache->evict_current_queue; other_queue = cache->evict_other_queue; @@ -2136,14 +2110,13 @@ __evict_get_ref(WT_SESSION_IMPL *session, bool is_server, WT_BTREE **btreep, WT_ WT_ASSERT(session, evict->btree != NULL); /* - * Evicting a dirty page in the server thread could stall - * during a write and prevent eviction from finding new work. + * Evicting a dirty page in the server thread could stall during a write and prevent + * eviction from finding new work. * - * However, we can't skip entries in the urgent queue or they - * may never be found again. + * However, we can't skip entries in the urgent queue or they may never be found again. * - * Don't force application threads to evict dirty pages if they - * aren't stalled by the amount of dirty data in cache. + * Don't force application threads to evict dirty pages if they aren't stalled by the amount + * of dirty data in cache. */ if (!urgent_ok && (is_server || !F_ISSET(cache, WT_CACHE_EVICT_DIRTY_HARD)) && __wt_page_is_modified(evict->ref->page)) { @@ -2233,13 +2206,11 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server) } /* - * In case something goes wrong, don't pick the same set of pages every - * time. + * In case something goes wrong, don't pick the same set of pages every time. * - * We used to bump the page's read generation only if eviction failed, - * but that isn't safe: at that point, eviction has already unlocked - * the page and some other thread may have evicted it by the time we - * look at it. + * We used to bump the page's read generation only if eviction failed, but that isn't safe: at + * that point, eviction has already unlocked the page and some other thread may have evicted it + * by the time we look at it. */ __wt_cache_read_gen_bump(session, ref->page); @@ -2295,31 +2266,32 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, bool readonly, d if (timer) time_start = __wt_clock(session); - WT_TRACK_TIME(session); +#ifdef HAVE_DIAGNOSTIC + __wt_seconds32(session, &session->op_5043_seconds); +#endif for (initial_progress = cache->eviction_progress;; ret = 0) { /* - * A pathological case: if we're the oldest transaction in the - * system and the eviction server is stuck trying to find space - * (and we're not in recovery, because those transactions can't - * be rolled back), abort the transaction to give up all hazard - * pointers before trying again. + * If eviction is stuck, check if this thread is likely causing problems and should be + * rolled back. Ignore if in recovery, those transactions can't be rolled back. */ - if (__wt_cache_stuck(session) && __wt_txn_am_oldest(session) && - !F_ISSET(conn, WT_CONN_RECOVERING)) { - --cache->evict_aggressive_score; - WT_STAT_CONN_INCR(session, txn_fail_cache); - WT_ERR( - __wt_txn_rollback_required(session, "oldest transaction rolled back for eviction")); + if (!F_ISSET(conn, WT_CONN_RECOVERING) && __wt_cache_stuck(session)) { + ret = __wt_txn_is_blocking_old(session); + if (ret == 0) + ret = __wt_txn_is_blocking_pin(session); + if (ret == WT_ROLLBACK) { + --cache->evict_aggressive_score; + WT_STAT_CONN_INCR(session, txn_fail_cache); + } + WT_ERR(ret); } /* * Check if we have become busy. * - * If we're busy (because of the transaction check we just did - * or because our caller is waiting on a longer-than-usual event - * such as a page read), and the cache level drops below 100%, - * limit the work to 5 evictions and return. If that's not the - * case, we can do more. + * If we're busy (because of the transaction check we just did or because our caller is + * waiting on a longer-than-usual event such as a page read), and the cache level drops + * below 100%, limit the work to 5 evictions and return. If that's not the case, we can do + * more. */ if (!busy && txn_state->pinned_id != WT_TXN_NONE && txn_global->current != txn_global->oldest_id) |