summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/evict/evict_lru.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/wiredtiger/src/evict/evict_lru.c')
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c218
1 files changed, 95 insertions, 123 deletions
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index c224a3b7b11..2f9f3220106 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -277,10 +277,12 @@ __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread)
conn = S2C(session);
cache = conn->cache;
- /*
- * The thread group code calls us repeatedly. So each call is one pass through eviction.
- */
- WT_TRACK_TIME(session);
+/*
+ * The thread group code calls us repeatedly. So each call is one pass through eviction.
+ */
+#ifdef HAVE_DIAGNOSTIC
+ __wt_seconds32(session, &session->op_5043_seconds);
+#endif
if (conn->evict_server_running && __wt_spin_trylock(session, &cache->evict_pass_lock) == 0) {
/*
* Cannot use WT_WITH_PASS_LOCK because this is a try lock. Fix when that is supported. We
@@ -426,15 +428,14 @@ __evict_server(WT_SESSION_IMPL *session, bool *did_work)
return (0);
#endif
/*
- * If we're stuck for 5 minutes in diagnostic mode, or the verbose
- * evict_stuck flag is configured, log the cache and transaction state.
+ * If we're stuck for 5 minutes in diagnostic mode, or the verbose evict_stuck flag is
+ * configured, log the cache and transaction state.
*
* If we're stuck for 5 minutes in diagnostic mode, give up.
*
- * We don't do this check for in-memory workloads because application
- * threads are not blocked by the cache being full. If the cache becomes
- * full of clean pages, we can be servicing reads while the cache
- * appears stuck to eviction.
+ * We don't do this check for in-memory workloads because application threads are not blocked by
+ * the cache being full. If the cache becomes full of clean pages, we can be servicing reads
+ * while the cache appears stuck to eviction.
*/
if (F_ISSET(conn, WT_CONN_IN_MEMORY))
return (0);
@@ -578,8 +579,7 @@ __evict_update_work(WT_SESSION_IMPL *session)
/*
* If we need space in the cache, try to find clean pages to evict.
*
- * Avoid division by zero if the cache size has not yet been set in a
- * shared cache.
+ * Avoid division by zero if the cache size has not yet been set in a shared cache.
*/
bytes_max = conn->cache_size + 1;
bytes_inuse = __wt_cache_bytes_inuse(cache);
@@ -679,14 +679,12 @@ __evict_pass(WT_SESSION_IMPL *session)
++cache->evict_pass_gen;
/*
- * Update the oldest ID: we use it to decide whether pages are
- * candidates for eviction. Without this, if all threads are
- * blocked after a long-running transaction (such as a
+ * Update the oldest ID: we use it to decide whether pages are candidates for eviction.
+ * Without this, if all threads are blocked after a long-running transaction (such as a
* checkpoint) completes, we may never start evicting again.
*
- * Do this every time the eviction server wakes up, regardless
- * of whether the cache is full, to prevent the oldest ID
- * falling too far behind. Don't wait to lock the table: with
+ * Do this every time the eviction server wakes up, regardless of whether the cache is full,
+ * to prevent the oldest ID falling too far behind. Don't wait to lock the table: with
* highly threaded workloads, that creates a bottleneck.
*/
WT_RET(__wt_txn_update_oldest(session, WT_TXN_OLDEST_STRICT));
@@ -702,14 +700,12 @@ __evict_pass(WT_SESSION_IMPL *session)
WT_RET(__evict_lru_walk(session));
/*
- * If the queue has been empty recently, keep queuing more
- * pages to evict. If the rate of queuing pages is high
- * enough, this score will go to zero, in which case the
- * eviction server might as well help out with eviction.
+ * If the queue has been empty recently, keep queuing more pages to evict. If the rate of
+ * queuing pages is high enough, this score will go to zero, in which case the eviction
+ * server might as well help out with eviction.
*
- * Also, if there is a single eviction server thread with no
- * workers, it must service the urgent queue in case all
- * application threads are busy.
+ * Also, if there is a single eviction server thread with no workers, it must service the
+ * urgent queue in case all application threads are busy.
*/
if (!WT_EVICT_HAS_WORKERS(session) &&
(cache->evict_empty_score < WT_EVICT_SCORE_CUTOFF ||
@@ -720,16 +716,13 @@ __evict_pass(WT_SESSION_IMPL *session)
break;
/*
- * If we're making progress, keep going; if we're not making
- * any progress at all, mark the cache "stuck" and go back to
- * sleep, it's not something we can fix.
+ * If we're making progress, keep going; if we're not making any progress at all, mark the
+ * cache "stuck" and go back to sleep, it's not something we can fix.
*
- * We check for progress every 20ms, the idea being that the
- * aggressive score will reach 10 after 200ms if we aren't
- * making progress and eviction will start considering more
- * pages. If there is still no progress after 2s, we will
- * treat the cache as stuck and start rolling back
- * transactions and writing updates to the lookaside table.
+ * We check for progress every 20ms, the idea being that the aggressive score will reach 10
+ * after 200ms if we aren't making progress and eviction will start considering more pages.
+ * If there is still no progress after 2s, we will treat the cache as stuck and start
+ * rolling back transactions and writing updates to the lookaside table.
*/
if (eviction_progress == cache->eviction_progress) {
if (WT_CLOCKDIFF_MS(time_now, time_prev) >= 20 &&
@@ -750,14 +743,11 @@ __evict_pass(WT_SESSION_IMPL *session)
*/
if (loop < 100 || cache->evict_aggressive_score < 100) {
/*
- * Back off if we aren't making progress: walks
- * hold the handle list lock, blocking other
- * operations that can free space in cache,
- * such as LSM discarding handles.
+ * Back off if we aren't making progress: walks hold the handle list lock, blocking
+ * other operations that can free space in cache, such as LSM discarding handles.
*
- * Allow this wait to be interrupted (e.g. if a
- * checkpoint completes): make sure we wait for
- * a non-zero number of microseconds).
+ * Allow this wait to be interrupted (e.g. if a checkpoint completes): make sure we
+ * wait for a non-zero number of microseconds).
*/
WT_STAT_CONN_INCR(session, cache_eviction_server_slept);
__wt_cond_wait(session, cache->evict_cond, WT_THOUSAND, NULL);
@@ -1181,8 +1171,8 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
/*
* Get some more pages to consider for eviction.
*
- * If the walk is interrupted, we still need to sort the queue: the
- * next walk assumes there are no entries beyond WT_EVICT_WALK_BASE.
+ * If the walk is interrupted, we still need to sort the queue: the next walk assumes there are
+ * no entries beyond WT_EVICT_WALK_BASE.
*/
if ((ret = __evict_walk(cache->walk_session, queue)) == EBUSY)
ret = 0;
@@ -1264,15 +1254,12 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
queue->evict_candidates = candidates;
else {
/*
- * Take all of the urgent pages plus a third of
- * ordinary candidates (which could be expressed as
- * WT_EVICT_WALK_INCR / WT_EVICT_WALK_BASE). In the
- * steady state, we want to get as many candidates as
- * the eviction walk adds to the queue.
+ * Take all of the urgent pages plus a third of ordinary candidates (which could be
+ * expressed as WT_EVICT_WALK_INCR / WT_EVICT_WALK_BASE). In the steady state, we want
+ * to get as many candidates as the eviction walk adds to the queue.
*
- * That said, if there is only one entry, which is
- * normal when populating an empty file, don't exclude
- * it.
+ * That said, if there is only one entry, which is normal when populating an empty file,
+ * don't exclude it.
*/
queue->evict_candidates = 1 + candidates + ((entries - candidates) - 1) / 3;
cache->read_gen_oldest = read_gen_oldest;
@@ -1468,11 +1455,9 @@ retry:
/*
* Skip files if we have too many active walks.
*
- * This used to be limited by the configured maximum number of
- * hazard pointers per session. Even though that ceiling has
- * been removed, we need to test eviction with huge numbers of
- * active trees before allowing larger numbers of hazard
- * pointers in the walk session.
+ * This used to be limited by the configured maximum number of hazard pointers per session.
+ * Even though that ceiling has been removed, we need to test eviction with huge numbers of
+ * active trees before allowing larger numbers of hazard pointers in the walk session.
*/
if (btree->evict_ref == NULL && session->nhazard > WT_EVICT_MAX_TREES)
continue;
@@ -1490,16 +1475,14 @@ retry:
dhandle_locked = false;
/*
- * Re-check the "no eviction" flag, used to enforce exclusive
- * access when a handle is being closed.
+ * Re-check the "no eviction" flag, used to enforce exclusive access when a handle is being
+ * closed.
*
- * Only try to acquire the lock and simply continue if we fail;
- * the lock is held while the thread turning off eviction clears
- * the tree's current eviction point, and part of the process is
- * waiting on this thread to acknowledge that action.
+ * Only try to acquire the lock and simply continue if we fail; the lock is held while the
+ * thread turning off eviction clears the tree's current eviction point, and part of the
+ * process is waiting on this thread to acknowledge that action.
*
- * If a handle is being discarded, it will still be marked open,
- * but won't have a root page.
+ * If a handle is being discarded, it will still be marked open, but won't have a root page.
*/
if (btree->evict_disabled == 0 && !__wt_spin_trylock(session, &cache->evict_walk_lock)) {
if (btree->evict_disabled == 0 && btree->root.page != NULL) {
@@ -1888,9 +1871,8 @@ __evict_walk_tree(WT_SESSION_IMPL *session, WT_EVICT_QUEUE *queue, u_int max_ent
/*
* Pages that are empty or from dead trees are fast-tracked.
*
- * Also evict lookaside table pages without further filtering:
- * the cache is under pressure by definition and we want to
- * free space.
+ * Also evict lookaside table pages without further filtering: the cache is under pressure
+ * by definition and we want to free space.
*/
if (__wt_page_is_empty(page) || F_ISSET(session->dhandle, WT_DHANDLE_DEAD) ||
F_ISSET(btree, WT_BTREE_LOOKASIDE))
@@ -1920,15 +1902,12 @@ __evict_walk_tree(WT_SESSION_IMPL *session, WT_EVICT_QUEUE *queue, u_int max_ent
continue;
/*
- * Don't attempt eviction of internal pages with children in
- * cache (indicated by seeing an internal page that is the
- * parent of the last page we saw).
+ * Don't attempt eviction of internal pages with children in cache (indicated by seeing an
+ * internal page that is the parent of the last page we saw).
*
- * Also skip internal page unless we get aggressive, the tree
- * is idle (indicated by the tree being skipped for walks),
- * or we are in eviction debug mode.
- * The goal here is that if trees become completely idle, we
- * eventually push them out of cache completely.
+ * Also skip internal page unless we get aggressive, the tree is idle (indicated by the tree
+ * being skipped for walks), or we are in eviction debug mode. The goal here is that if
+ * trees become completely idle, we eventually push them out of cache completely.
*/
if (!F_ISSET(cache, WT_CACHE_EVICT_DEBUG_MODE) && WT_PAGE_IS_INTERNAL(page)) {
if (page == last_parent)
@@ -1987,18 +1966,15 @@ fast:
/*
* Give up the walk occasionally.
*
- * If we happen to end up on the root page or a page requiring urgent
- * eviction, clear it. We have to track hazard pointers, and the root
- * page complicates that calculation.
+ * If we happen to end up on the root page or a page requiring urgent eviction, clear it. We
+ * have to track hazard pointers, and the root page complicates that calculation.
*
- * Likewise if we found no new candidates during the walk: there is no
- * point keeping a page pinned, since it may be the only candidate in
- * an idle tree.
+ * Likewise if we found no new candidates during the walk: there is no point keeping a page
+ * pinned, since it may be the only candidate in an idle tree.
*
- * If we land on a page requiring forced eviction, or that isn't an
- * ordinary in-memory page (e.g., WT_REF_LIMBO), move until we find an
- * ordinary page: we should not prevent exclusive access to the page
- * until the next walk.
+ * If we land on a page requiring forced eviction, or that isn't an ordinary in-memory page
+ * (e.g., WT_REF_LIMBO), move until we find an ordinary page: we should not prevent exclusive
+ * access to the page until the next walk.
*/
if (ref != NULL) {
if (__wt_ref_is_root(ref) || evict == start || give_up ||
@@ -2064,13 +2040,12 @@ __evict_get_ref(WT_SESSION_IMPL *session, bool is_server, WT_BTREE **btreep, WT_
}
/*
- * The server repopulates whenever the other queue is not full, as long
- * as at least one page has been evicted out of the current queue.
+ * The server repopulates whenever the other queue is not full, as long as at least one page has
+ * been evicted out of the current queue.
*
- * Note that there are pathological cases where there are only enough
- * eviction candidates in the cache to fill one queue. In that case,
- * we will continually evict one page and attempt to refill the queues.
- * Such cases are extremely rare in real applications.
+ * Note that there are pathological cases where there are only enough eviction candidates in the
+ * cache to fill one queue. In that case, we will continually evict one page and attempt to
+ * refill the queues. Such cases are extremely rare in real applications.
*/
if (is_server && (!urgent_ok || __evict_queue_empty(urgent_queue, false)) &&
!__evict_queue_full(cache->evict_current_queue) &&
@@ -2088,9 +2063,8 @@ __evict_get_ref(WT_SESSION_IMPL *session, bool is_server, WT_BTREE **btreep, WT_
/*
* Check if the current queue needs to change.
*
- * The server will only evict half of the pages before looking
- * for more, but should only switch queues if there are no
- * other eviction workers.
+ * The server will only evict half of the pages before looking for more, but should only
+ * switch queues if there are no other eviction workers.
*/
queue = cache->evict_current_queue;
other_queue = cache->evict_other_queue;
@@ -2136,14 +2110,13 @@ __evict_get_ref(WT_SESSION_IMPL *session, bool is_server, WT_BTREE **btreep, WT_
WT_ASSERT(session, evict->btree != NULL);
/*
- * Evicting a dirty page in the server thread could stall
- * during a write and prevent eviction from finding new work.
+ * Evicting a dirty page in the server thread could stall during a write and prevent
+ * eviction from finding new work.
*
- * However, we can't skip entries in the urgent queue or they
- * may never be found again.
+ * However, we can't skip entries in the urgent queue or they may never be found again.
*
- * Don't force application threads to evict dirty pages if they
- * aren't stalled by the amount of dirty data in cache.
+ * Don't force application threads to evict dirty pages if they aren't stalled by the amount
+ * of dirty data in cache.
*/
if (!urgent_ok && (is_server || !F_ISSET(cache, WT_CACHE_EVICT_DIRTY_HARD)) &&
__wt_page_is_modified(evict->ref->page)) {
@@ -2233,13 +2206,11 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server)
}
/*
- * In case something goes wrong, don't pick the same set of pages every
- * time.
+ * In case something goes wrong, don't pick the same set of pages every time.
*
- * We used to bump the page's read generation only if eviction failed,
- * but that isn't safe: at that point, eviction has already unlocked
- * the page and some other thread may have evicted it by the time we
- * look at it.
+ * We used to bump the page's read generation only if eviction failed, but that isn't safe: at
+ * that point, eviction has already unlocked the page and some other thread may have evicted it
+ * by the time we look at it.
*/
__wt_cache_read_gen_bump(session, ref->page);
@@ -2295,31 +2266,32 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, bool readonly, d
if (timer)
time_start = __wt_clock(session);
- WT_TRACK_TIME(session);
+#ifdef HAVE_DIAGNOSTIC
+ __wt_seconds32(session, &session->op_5043_seconds);
+#endif
for (initial_progress = cache->eviction_progress;; ret = 0) {
/*
- * A pathological case: if we're the oldest transaction in the
- * system and the eviction server is stuck trying to find space
- * (and we're not in recovery, because those transactions can't
- * be rolled back), abort the transaction to give up all hazard
- * pointers before trying again.
+ * If eviction is stuck, check if this thread is likely causing problems and should be
+ * rolled back. Ignore if in recovery, those transactions can't be rolled back.
*/
- if (__wt_cache_stuck(session) && __wt_txn_am_oldest(session) &&
- !F_ISSET(conn, WT_CONN_RECOVERING)) {
- --cache->evict_aggressive_score;
- WT_STAT_CONN_INCR(session, txn_fail_cache);
- WT_ERR(
- __wt_txn_rollback_required(session, "oldest transaction rolled back for eviction"));
+ if (!F_ISSET(conn, WT_CONN_RECOVERING) && __wt_cache_stuck(session)) {
+ ret = __wt_txn_is_blocking_old(session);
+ if (ret == 0)
+ ret = __wt_txn_is_blocking_pin(session);
+ if (ret == WT_ROLLBACK) {
+ --cache->evict_aggressive_score;
+ WT_STAT_CONN_INCR(session, txn_fail_cache);
+ }
+ WT_ERR(ret);
}
/*
* Check if we have become busy.
*
- * If we're busy (because of the transaction check we just did
- * or because our caller is waiting on a longer-than-usual event
- * such as a page read), and the cache level drops below 100%,
- * limit the work to 5 evictions and return. If that's not the
- * case, we can do more.
+ * If we're busy (because of the transaction check we just did or because our caller is
+ * waiting on a longer-than-usual event such as a page read), and the cache level drops
+ * below 100%, limit the work to 5 evictions and return. If that's not the case, we can do
+ * more.
*/
if (!busy && txn_state->pinned_id != WT_TXN_NONE &&
txn_global->current != txn_global->oldest_id)