summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/evict/evict_lru.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/wiredtiger/src/evict/evict_lru.c')
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c242
1 files changed, 143 insertions, 99 deletions
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 884c08a02df..50a00787f35 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -159,7 +159,7 @@ __wt_evict_server_wake(WT_SESSION_IMPL *session)
bytes_max / WT_MEGABYTE));
}
- return (__wt_cond_signal(session, cache->evict_cond));
+ return (__wt_cond_auto_signal(session, cache->evict_cond));
}
/*
@@ -175,8 +175,8 @@ __evict_server(void *arg)
WT_SESSION_IMPL *session;
#ifdef HAVE_DIAGNOSTIC
struct timespec now, stuck_ts;
- uint64_t pages_evicted = 0;
#endif
+ uint64_t pages_evicted = 0;
u_int spins;
session = arg;
@@ -219,11 +219,11 @@ __evict_server(void *arg)
/* Next time we wake up, reverse the sweep direction. */
cache->flags ^= WT_CACHE_WALK_REVERSE;
-#ifdef HAVE_DIAGNOSTIC
pages_evicted = 0;
} else if (pages_evicted != cache->pages_evict) {
- WT_ERR(__wt_epoch(session, &stuck_ts));
pages_evicted = cache->pages_evict;
+#ifdef HAVE_DIAGNOSTIC
+ WT_ERR(__wt_epoch(session, &stuck_ts));
} else {
/* After being stuck for 5 minutes, give up. */
WT_ERR(__wt_epoch(session, &now));
@@ -238,7 +238,8 @@ __evict_server(void *arg)
WT_ERR(__wt_verbose(session, WT_VERB_EVICTSERVER, "sleeping"));
/* Don't rely on signals: check periodically. */
- WT_ERR(__wt_cond_wait(session, cache->evict_cond, 100000));
+ WT_ERR(__wt_cond_auto_wait(
+ session, cache->evict_cond, pages_evicted != 0));
WT_ERR(__wt_verbose(session, WT_VERB_EVICTSERVER, "waking"));
}
@@ -720,12 +721,32 @@ __evict_clear_walks(WT_SESSION_IMPL *session)
}
/*
- * __evict_request_walk_clear --
+ * __evict_clear_all_walks --
+ * Clear the eviction walk points for all files a session is waiting on.
+ */
+static int
+__evict_clear_all_walks(WT_SESSION_IMPL *session)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+
+ conn = S2C(session);
+
+ TAILQ_FOREACH(dhandle, &conn->dhqh, q)
+ if (WT_PREFIX_MATCH(dhandle->name, "file:"))
+ WT_WITH_DHANDLE(session,
+ dhandle, WT_TRET(__evict_clear_walk(session)));
+ return (ret);
+}
+
+/*
+ * __evict_request_clear_walk --
* Request that the eviction server clear the tree's current eviction
* point.
*/
static int
-__evict_request_walk_clear(WT_SESSION_IMPL *session)
+__evict_request_clear_walk(WT_SESSION_IMPL *session)
{
WT_BTREE *btree;
WT_CACHE *cache;
@@ -753,32 +774,12 @@ __evict_request_walk_clear(WT_SESSION_IMPL *session)
}
/*
- * __evict_clear_all_walks --
- * Clear the eviction walk points for all files a session is waiting on.
- */
-static int
-__evict_clear_all_walks(WT_SESSION_IMPL *session)
-{
- WT_CONNECTION_IMPL *conn;
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
-
- conn = S2C(session);
-
- TAILQ_FOREACH(dhandle, &conn->dhqh, q)
- if (WT_PREFIX_MATCH(dhandle->name, "file:"))
- WT_WITH_DHANDLE(session,
- dhandle, WT_TRET(__evict_clear_walk(session)));
- return (ret);
-}
-
-/*
* __wt_evict_file_exclusive_on --
* Get exclusive eviction access to a file and discard any of the file's
* blocks queued for eviction.
*/
int
-__wt_evict_file_exclusive_on(WT_SESSION_IMPL *session, bool *evict_resetp)
+__wt_evict_file_exclusive_on(WT_SESSION_IMPL *session)
{
WT_BTREE *btree;
WT_CACHE *cache;
@@ -786,40 +787,39 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session, bool *evict_resetp)
WT_EVICT_ENTRY *evict;
u_int i, elem;
- *evict_resetp = false;
-
btree = S2BT(session);
cache = S2C(session)->cache;
- /* If the file was never evictable, there's no work to do. */
- if (F_ISSET(btree, WT_BTREE_NO_EVICTION))
- return (0);
-
/*
- * Hold the walk lock to set the "no eviction" flag: no new pages from
- * the file will be queued for eviction after this point.
+ * Hold the walk lock to set the no-eviction flag.
+ *
+ * The no-eviction flag can be set permanently, in which case we never
+ * increment the no-eviction count.
*/
__wt_spin_lock(session, &cache->evict_walk_lock);
- if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) {
- F_SET(btree, WT_BTREE_NO_EVICTION);
- *evict_resetp = true;
+ if (F_ISSET(btree, WT_BTREE_NO_EVICTION)) {
+ if (btree->evict_disabled != 0)
+ ++btree->evict_disabled;
+ __wt_spin_unlock(session, &cache->evict_walk_lock);
+ return (0);
}
- __wt_spin_unlock(session, &cache->evict_walk_lock);
+ ++btree->evict_disabled;
- /* If some other operation has disabled eviction, we're done. */
- if (!*evict_resetp)
- return (0);
+ /*
+ * Ensure no new pages from the file will be queued for eviction after
+ * this point.
+ */
+ F_SET(btree, WT_BTREE_NO_EVICTION);
+ WT_FULL_BARRIER();
/* Clear any existing LRU eviction walk for the file. */
- WT_ERR(__evict_request_walk_clear(session));
-
- /* Hold the evict lock to remove any queued pages from this file. */
- __wt_spin_lock(session, &cache->evict_lock);
+ WT_ERR(__evict_request_clear_walk(session));
/*
* The eviction candidate list might reference pages from the file,
- * clear it.
+ * clear it. Hold the evict lock to remove queued pages from a file.
*/
+ __wt_spin_lock(session, &cache->evict_lock);
elem = cache->evict_max;
for (i = 0, evict = cache->evict_queue; i < elem; i++, evict++)
if (evict->btree == btree)
@@ -833,10 +833,11 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session, bool *evict_resetp)
while (btree->evict_busy > 0)
__wt_yield();
- return (0);
-
-err: F_CLR(btree, WT_BTREE_NO_EVICTION);
- *evict_resetp = false;
+ if (0) {
+err: --btree->evict_disabled;
+ F_CLR(btree, WT_BTREE_NO_EVICTION);
+ }
+ __wt_spin_unlock(session, &cache->evict_walk_lock);
return (ret);
}
@@ -848,8 +849,10 @@ void
__wt_evict_file_exclusive_off(WT_SESSION_IMPL *session)
{
WT_BTREE *btree;
+ WT_CACHE *cache;
btree = S2BT(session);
+ cache = S2C(session)->cache;
/*
* We have seen subtle bugs with multiple threads racing to turn
@@ -857,10 +860,17 @@ __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session)
*/
WT_DIAGNOSTIC_YIELD;
- WT_ASSERT(session, btree->evict_ref == NULL &&
- F_ISSET(btree, WT_BTREE_NO_EVICTION));
+ WT_ASSERT(session,
+ btree->evict_ref == NULL && F_ISSET(btree, WT_BTREE_NO_EVICTION));
- F_CLR(btree, WT_BTREE_NO_EVICTION);
+ /*
+ * The no-eviction flag can be set permanently, in which case we never
+ * increment the no-eviction count.
+ */
+ __wt_spin_lock(session, &cache->evict_walk_lock);
+ if (btree->evict_disabled > 0 && --btree->evict_disabled == 0)
+ F_CLR(btree, WT_BTREE_NO_EVICTION);
+ __wt_spin_unlock(session, &cache->evict_walk_lock);
}
/*
@@ -890,7 +900,7 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
{
WT_CACHE *cache;
WT_DECL_RET;
- uint64_t cutoff;
+ uint64_t cutoff, read_gen_oldest;
uint32_t candidates, entries;
cache = S2C(session)->cache;
@@ -931,34 +941,62 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
return (0);
}
- WT_ASSERT(session, cache->evict_queue[0].ref != NULL);
-
- /* Track the oldest read generation we have in the queue. */
- cache->read_gen_oldest = cache->evict_queue[0].ref->page->read_gen;
-
+ /* Decide how many of the candidates we're going to try and evict. */
if (FLD_ISSET(cache->state,
- WT_EVICT_PASS_AGGRESSIVE | WT_EVICT_PASS_WOULD_BLOCK))
+ WT_EVICT_PASS_AGGRESSIVE | WT_EVICT_PASS_WOULD_BLOCK)) {
/*
* Take all candidates if we only gathered pages with an oldest
* read generation set.
*/
cache->evict_candidates = entries;
- else {
- /* Find the bottom 25% of read generations. */
- cutoff = (3 * __evict_read_gen(&cache->evict_queue[0]) +
- __evict_read_gen(&cache->evict_queue[entries - 1])) / 4;
+ } else {
/*
- * Don't take less than 10% or more than 50% of entries,
- * regardless. That said, if there is only one entry, which is
- * normal when populating an empty file, don't exclude it.
+ * Find the oldest read generation we have in the queue, used
+ * to set the initial value for pages read into the system.
+ * The queue is sorted, find the first "normal" generation.
*/
- for (candidates = 1 + entries / 10;
- candidates < entries / 2;
- candidates++)
- if (__evict_read_gen(
- &cache->evict_queue[candidates]) > cutoff)
+ read_gen_oldest = WT_READGEN_OLDEST;
+ for (candidates = 0; candidates < entries; ++candidates) {
+ read_gen_oldest =
+ __evict_read_gen(&cache->evict_queue[candidates]);
+ if (read_gen_oldest != WT_READGEN_OLDEST)
break;
- cache->evict_candidates = candidates;
+ }
+
+ /*
+ * Take all candidates if we only gathered pages with an oldest
+ * read generation set.
+ *
+ * We normally never take more than 50% of the entries; if 50%
+ * of the entries were at the oldest read generation, take them.
+ */
+ if (read_gen_oldest == WT_READGEN_OLDEST)
+ cache->evict_candidates = entries;
+ else if (candidates >= entries / 2)
+ cache->evict_candidates = candidates;
+ else {
+ /* Save the calculated oldest generation. */
+ cache->read_gen_oldest = read_gen_oldest;
+
+ /* Find the bottom 25% of read generations. */
+ cutoff =
+ (3 * read_gen_oldest + __evict_read_gen(
+ &cache->evict_queue[entries - 1])) / 4;
+
+ /*
+ * Don't take less than 10% or more than 50% of entries,
+ * regardless. That said, if there is only one entry,
+ * which is normal when populating an empty file, don't
+ * exclude it.
+ */
+ for (candidates = 1 + entries / 10;
+ candidates < entries / 2;
+ candidates++)
+ if (__evict_read_gen(
+ &cache->evict_queue[candidates]) > cutoff)
+ break;
+ cache->evict_candidates = candidates;
+ }
}
cache->evict_current = cache->evict_queue;
@@ -1127,23 +1165,27 @@ retry: while (slot < max_entries && ret == 0) {
__wt_spin_unlock(session, &conn->dhandle_lock);
dhandle_locked = false;
- __wt_spin_lock(session, &cache->evict_walk_lock);
-
/*
- * Re-check the "no eviction" flag -- it is used to enforce
- * exclusive access when a handle is being closed.
+ * Re-check the "no eviction" flag, used to enforce exclusive
+ * access when a handle is being closed. If not set, remember
+ * the file to visit first, next loop.
+ *
+ * Only try to acquire the lock and simply continue if we fail;
+ * the lock is held while the thread turning off eviction clears
+ * the tree's current eviction point, and part of the process is
+ * waiting on this thread to acknowledge that action.
*/
- if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) {
- /* Remember the file to visit first, next loop. */
- cache->evict_file_next = dhandle;
-
- WT_WITH_DHANDLE(session, dhandle,
- ret = __evict_walk_file(session, &slot));
- WT_ASSERT(session, session->split_gen == 0);
+ if (!F_ISSET(btree, WT_BTREE_NO_EVICTION) &&
+ !__wt_spin_trylock(session, &cache->evict_walk_lock)) {
+ if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) {
+ cache->evict_file_next = dhandle;
+ WT_WITH_DHANDLE(session, dhandle,
+ ret = __evict_walk_file(session, &slot));
+ WT_ASSERT(session, session->split_gen == 0);
+ }
+ __wt_spin_unlock(session, &cache->evict_walk_lock);
}
- __wt_spin_unlock(session, &cache->evict_walk_lock);
-
/*
* If we didn't find any candidates in the file, skip it next
* time.
@@ -1286,6 +1328,18 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp)
if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU))
continue;
+ /*
+ * It's possible (but unlikely) to visit a page without a read
+ * generation, if we race with the read instantiating the page.
+ * Ignore those pages, but set the page's read generation here
+ * to ensure a bug doesn't somehow leave a page without a read
+ * generation.
+ */
+ if (page->read_gen == WT_READGEN_NOTSET) {
+ __wt_cache_read_gen_new(session, page);
+ continue;
+ }
+
/* Pages we no longer need (clean or dirty), are found money. */
if (__wt_page_is_empty(page) ||
F_ISSET(session->dhandle, WT_DHANDLE_DEAD) ||
@@ -1311,13 +1365,6 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp)
internal_pages >= (int)(evict - start) / 2)
continue;
- /*
- * If this page has never been considered for eviction, set its
- * read generation to somewhere in the middle of the LRU list.
- */
- if (page->read_gen == WT_READGEN_NOTSET)
- page->read_gen = __wt_cache_read_gen_new(session);
-
fast: /* If the page can't be evicted, give up. */
if (!__wt_page_can_evict(session, ref, NULL))
continue;
@@ -1477,7 +1524,6 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server)
{
WT_BTREE *btree;
WT_DECL_RET;
- WT_PAGE *page;
WT_REF *ref;
WT_RET(__evict_get_ref(session, is_server, &btree, &ref));
@@ -1506,9 +1552,7 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server)
* the page and some other thread may have evicted it by the time we
* look at it.
*/
- page = ref->page;
- if (page->read_gen != WT_READGEN_OLDEST)
- page->read_gen = __wt_cache_read_gen_bump(session);
+ __wt_cache_read_gen_bump(session, ref->page);
WT_WITH_BTREE(session, btree, ret = __wt_evict(session, ref, false));