summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoragorrod <alexg@wiredtiger.com>2012-10-09 22:22:42 -0700
committeragorrod <alexg@wiredtiger.com>2012-10-09 22:22:42 -0700
commit5ae1ccb205aa3d012dce5757b4bba80391b4fd97 (patch)
treedb15e9c20de9429cb576392df11bb128ba12740a
parent16f187e9fed776a917e5ea96608d98fbca58b63b (diff)
parent858bcf33633d7595412570cf01615b6291d91e7c (diff)
downloadmongo-5ae1ccb205aa3d012dce5757b4bba80391b4fd97.tar.gz
Merge pull request #356 from wiredtiger/eviction-simplify
Simplify eviction
-rw-r--r--src/btree/bt_evict.c191
-rw-r--r--src/btree/bt_page.c41
-rw-r--r--src/btree/row_modify.c18
-rw-r--r--src/include/api.h2
-rw-r--r--src/include/cache.h1
-rw-r--r--src/include/cache.i54
-rw-r--r--src/include/extern.h1
-rw-r--r--src/support/hazard.c17
8 files changed, 41 insertions, 284 deletions
diff --git a/src/btree/bt_evict.c b/src/btree/bt_evict.c
index 064e1b096f6..a8c019f52f4 100644
--- a/src/btree/bt_evict.c
+++ b/src/btree/bt_evict.c
@@ -12,7 +12,6 @@ static int __evict_file_request(WT_SESSION_IMPL *, int);
static int __evict_file_request_walk(WT_SESSION_IMPL *);
static int __evict_lru(WT_SESSION_IMPL *);
static int __evict_lru_cmp(const void *, const void *);
-static int __evict_page_request_walk(WT_SESSION_IMPL *);
static int __evict_walk(WT_SESSION_IMPL *);
static int __evict_walk_file(WT_SESSION_IMPL *, u_int *);
static int __evict_worker(WT_SESSION_IMPL *);
@@ -104,36 +103,6 @@ __wt_evict_list_clr_page(WT_SESSION_IMPL *session, WT_PAGE *page)
}
/*
- * __evict_req_set --
- * Set an entry in the forced page eviction request list.
- */
-static inline void
-__evict_req_set(WT_EVICT_ENTRY *r, WT_BTREE *btree, WT_PAGE *page)
-{
- r->btree = btree;
- /*
- * Publish: there must be a barrier to ensure the structure fields are
- * set before the eviction thread can see the request.
- */
- WT_PUBLISH(r->page, page);
-}
-
-/*
- * __evict_req_clr --
- * Clear an entry in the forced page eviction request list.
- */
-static inline void
-__evict_req_clr(WT_EVICT_ENTRY *r)
-{
- r->btree = NULL;
- r->page = NULL;
- /*
- * No publication necessary, all we care about is the page value and
- * whenever it's cleared is fine.
- */
-}
-
-/*
* __wt_evict_server_wake --
* Wake the eviction server thread.
*/
@@ -186,74 +155,6 @@ __wt_sync_file_serial_func(WT_SESSION_IMPL *session, void *args)
}
/*
- * __wt_evict_page_request --
- * Schedule a page for forced eviction due to a high volume of inserts or
- * updates.
- */
-void
-__wt_evict_page_request(WT_SESSION_IMPL *session, WT_PAGE *page)
-{
- WT_CACHE *cache;
- WT_EVICT_ENTRY *er, *er_end;
- int set;
-
- cache = S2C(session)->cache;
-
- /* Do a cheap test before acquiring the lock. */
- if (page->ref->state != WT_REF_MEM)
- return;
-
- __wt_spin_lock(session, &cache->evict_lock);
-
- /*
- * Application threads request forced eviction of pages when they
- * become too big. The application thread must hold a hazard reference
- * when this function is called, which protects it from being freed.
- *
- * However, it is possible (but unlikely) that the page is already part
- * way through the process of being evicted: a thread may have selected
- * it from the LRU list but not yet checked its hazard references.
- *
- * To avoid that race, we try to atomically switch the page state to
- * WT_REF_EVICT_FORCE. Since only one thread can do that successfully,
- * this prevents a page from being evicted twice. Threads looking for
- * a page to evict on the ordinary LRU eviction queue will ignore this
- * page and it will be evicted by the main eviction thread.
- *
- * If the state is not WT_REF_MEM, some other thread is already
- * evicting this page, which is fine, and in that case we don't want to
- * put it on the request queue because the memory may be freed by the
- * time the eviction thread sees it.
- */
- if (!WT_ATOMIC_CAS(page->ref->state, WT_REF_MEM, WT_REF_EVICT_FORCE)) {
- __wt_spin_unlock(session, &cache->evict_lock);
- return;
- }
-
- set = 0;
-
- /* Find an empty slot and enter the eviction request. */
- WT_EVICT_REQ_FOREACH(er, er_end, cache)
- if (er->page == NULL) {
- __evict_req_set(er, session->btree, page);
- set = 1;
- break;
- }
-
- if (!set) {
- /*
- * The request table is full, that's okay for page requests:
- * another thread will see this later.
- */
- WT_VERBOSE_VOID(session, evictserver,
- "page eviction request table is full");
- page->ref->state = WT_REF_MEM;
- }
-
- __wt_spin_unlock(session, &cache->evict_lock);
-}
-
-/*
* __wt_cache_evict_server --
* Thread to evict pages from the cache.
*/
@@ -334,13 +235,6 @@ __evict_worker(WT_SESSION_IMPL *session)
*/
__wt_spin_lock(session, &cache->evict_lock);
- /*
- * Walk the eviction-request queue. It is important to do this
- * before closing files, in case a page schedule for eviction
- * is freed by closing a file.
- */
- WT_RET(__evict_page_request_walk(session));
-
/* If there is a file sync request, satisfy it. */
while (cache->sync_complete != cache->sync_request)
WT_RET(__evict_file_request_walk(session));
@@ -606,79 +500,6 @@ err: if (next_page != NULL)
}
/*
- * __evict_page_request_walk --
- * Walk the forced page eviction request queue.
- */
-static int
-__evict_page_request_walk(WT_SESSION_IMPL *session)
-{
- WT_CACHE *cache;
- WT_EVICT_ENTRY *er, *er_end;
- WT_PAGE *page;
- WT_REF *ref;
-
- cache = S2C(session)->cache;
-
- /*
- * Walk the forced page eviction request queue: if we find a request,
- * perform it and clear the request slot.
- */
- WT_EVICT_REQ_FOREACH(er, er_end, cache) {
- if ((page = er->page) == NULL)
- continue;
-
- /* Reference the correct WT_BTREE handle. */
- WT_SET_BTREE_IN_SESSION(session, er->btree);
-
- WT_VERBOSE_RET(session, evictserver,
- "forcing eviction of page %p", page);
-
- /*
- * The eviction candidate list might reference pages we are
- * about to discard; clear it.
- */
- __evict_list_clr_all(session, 0);
-
- /*
- * The eviction candidate might be part of the current tree's
- * walk; clear it.
- */
- __evict_clear_tree_walk(session, NULL);
-
- /*
- * Wait for LRU eviction activity to drain. It is much easier
- * to reason about sync or forced eviction if we know there are
- * no other threads evicting in the tree.
- */
- while (session->btree->lru_count > 0) {
- __wt_spin_unlock(session, &cache->evict_lock);
- __wt_yield();
- __wt_spin_lock(session, &cache->evict_lock);
- }
-
- ref = page->ref;
- WT_ASSERT(session, ref->page == page);
- WT_ASSERT(session, ref->state == WT_REF_EVICT_FORCE);
- ref->state = WT_REF_LOCKED;
-
- /*
- * If eviction fails, it will free up the page: hope it works
- * next time. Application threads may be holding a reference
- * while trying to get another (e.g., if they have two cursors
- * open), so blocking indefinitely leads to deadlock.
- */
- (void)__evict_page(session, page);
-
- /* Clear the reference to the btree handle. */
- WT_CLEAR_BTREE_IN_SESSION(session);
-
- /* Clear the request slot. */
- __evict_req_clr(er);
- }
- return (0);
-}
-
-/*
* __evict_lru --
* Evict pages from the cache based on their read generation.
*/
@@ -719,7 +540,7 @@ __evict_walk(WT_SESSION_IMPL *session)
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
- u_int elem, i;
+ u_int elem, file_count, i;
conn = S2C(session);
cache = S2C(session)->cache;
@@ -729,8 +550,7 @@ __evict_walk(WT_SESSION_IMPL *session)
* get some pages from each underlying file. In practice, a realloc
* is rarely needed, so it is worth avoiding the LRU lock.
*/
- elem = WT_EVICT_WALK_BASE +
- (conn->open_btree_count * WT_EVICT_WALK_PER_TABLE);
+ elem = WT_EVICT_WALK_BASE + 2 * WT_EVICT_GROUP;
if (elem > cache->evict_entries) {
__wt_spin_lock(session, &cache->evict_lock);
/* Save the offset of the eviction point. */
@@ -749,7 +569,11 @@ __evict_walk(WT_SESSION_IMPL *session)
* servicing eviction requests.
*/
i = WT_EVICT_WALK_BASE;
+ file_count = 0;
TAILQ_FOREACH(btree, &conn->btqh, q) {
+ if (file_count++ < cache->evict_file_next)
+ continue;
+
/*
* Skip files that aren't open or don't have a root page.
*
@@ -770,9 +594,10 @@ __evict_walk(WT_SESSION_IMPL *session)
ret = __evict_walk_file(session, &i);
WT_CLEAR_BTREE_IN_SESSION(session);
- if (ret != 0)
+ if (ret != 0 || i == cache->evict_entries)
break;
}
+ cache->evict_file_next = (btree == NULL) ? 0 : file_count;
if (0) {
err: __wt_spin_unlock(session, &cache->evict_lock);
diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c
index ad0fc237191..ecdf9d47a16 100644
--- a/src/btree/bt_page.c
+++ b/src/btree/bt_page.c
@@ -28,36 +28,18 @@ __wt_page_in_func(
{
WT_DECL_RET;
WT_PAGE *page;
- int busy, read_lockout, wake;
-
- /*
- * Only wake the eviction server the first time through here (if the
- * cache is too full), or after we fail to evict a page. Otherwise, we
- * are just wasting effort and making a busy mutex busier.
- */
- wake = 1;
+ int busy;
for (;;) {
switch (ref->state) {
case WT_REF_DISK:
case WT_REF_DELETED:
- /* The page isn't in memory, attempt to read it. */
-
- /* Check if there is space in the cache. */
- __wt_eviction_check(session, &read_lockout, wake);
- wake = 0;
-
/*
- * If the cache is full, give up, but only if we are
- * not holding the schema lock. The schema lock can
- * block checkpoints, and thus eviction, so it is not
- * safe to wait for eviction if we are holding it.
+ * The page isn't in memory, attempt to read it.
+ *
+ * First make sure there is space in the cache.
*/
- if (read_lockout &&
- !F_ISSET(session, WT_SESSION_SCHEMA_LOCKED) &&
- !F_ISSET(session->btree, WT_BTREE_NO_CACHE))
- break;
-
+ WT_RET(__wt_cache_full_check(session));
WT_RET(__wt_cache_read(session, parent, ref));
continue;
case WT_REF_EVICT_FORCE:
@@ -92,12 +74,12 @@ __wt_page_in_func(
* Ensure the page doesn't have ancient updates on it.
* If it did, reading the page could ignore committed
* updates. This should be extremely unlikely in real
- * applications, force eviction of the page to avoid
+ * applications, wait for eviction of the page to avoid
* the issue.
*/
if (page->modify != NULL &&
__wt_txn_ancient(session, page->modify->first_id)) {
- __wt_evict_page_request(session, page);
+ page->read_gen = 0;
__wt_hazard_clear(session, page);
__wt_evict_server_wake(session);
break;
@@ -114,13 +96,8 @@ __wt_page_in_func(
WT_ILLEGAL_VALUE(session);
}
- /* Find a page to evict -- if the page is busy, keep trying. */
- if ((ret = __wt_evict_lru_page(session, 1)) == EBUSY)
- __wt_yield();
- else if (ret == WT_NOTFOUND)
- wake = 1;
- else
- WT_RET(ret);
+ /* We failed to get the page -- yield before retrying. */
+ __wt_yield();
}
}
diff --git a/src/btree/row_modify.c b/src/btree/row_modify.c
index 40ba30c6c5d..7d42cf4727e 100644
--- a/src/btree/row_modify.c
+++ b/src/btree/row_modify.c
@@ -268,9 +268,7 @@ __wt_insert_serial_func(WT_SESSION_IMPL *session, void *args)
int
__wt_update_check(WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *next)
{
- WT_DECL_RET;
WT_TXN *txn;
- int lockout, wake = 1;
/* Discard obsolete WT_UPDATE structures. */
if (next != NULL)
@@ -280,22 +278,6 @@ __wt_update_check(WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *next)
WT_RET(__wt_txn_update_check(session, next));
/*
- * Pause if the cache is full.
- * This matches the logic in __wt_page_in_func.
- */
- for (;;) {
- __wt_eviction_check(session, &lockout, wake);
- wake = 0;
- if (!lockout ||
- F_ISSET(session, WT_SESSION_SCHEMA_LOCKED))
- break;
- if ((ret = __wt_evict_lru_page(session, 1)) == EBUSY)
- __wt_yield();
- else
- WT_RET_NOTFOUND_OK(ret);
- }
-
- /*
* Record the transaction ID for the first update to a page.
* We don't care if this races: there is a buffer built into the
* check for ancient updates.
diff --git a/src/include/api.h b/src/include/api.h
index f5aa7bd9050..bf78121bf5a 100644
--- a/src/include/api.h
+++ b/src/include/api.h
@@ -296,6 +296,7 @@ struct __wt_connection_impl {
#define TXN_API_CALL(s, h, n, cur, bt, cfg, cfgvar) do { \
int __autotxn = 0; \
API_CALL(s, h, n, bt, cur, cfg, cfgvar); \
+ WT_ERR(__wt_cache_full_check(s)); \
__autotxn = F_ISSET(S2C(s), WT_CONN_TRANSACTIONAL) && \
!F_ISSET(&(s)->txn, TXN_RUNNING); \
if (__autotxn) \
@@ -305,6 +306,7 @@ struct __wt_connection_impl {
#define TXN_API_CALL_NOCONF(s, h, n, cur, bt) do { \
int __autotxn = 0; \
API_CALL_NOCONF(s, h, n, cur, bt); \
+ WT_ERR(__wt_cache_full_check(s)); \
__autotxn = F_ISSET(S2C(s), WT_CONN_TRANSACTIONAL) && \
!F_ISSET(&(s)->txn, TXN_AUTOCOMMIT | TXN_RUNNING); \
if (__autotxn) \
diff --git a/src/include/cache.h b/src/include/cache.h
index 2fc7e0fedd3..73dd5ab1c31 100644
--- a/src/include/cache.h
+++ b/src/include/cache.h
@@ -52,6 +52,7 @@ struct __wt_cache {
WT_EVICT_ENTRY *evict_current; /* LRU current page to be evicted */
size_t evict_allocated; /* LRU list bytes allocated */
uint32_t evict_entries; /* LRU list eviction slots */
+ u_int evict_file_next; /* LRU: next file to search */
/*
* Forced-page eviction request information.
diff --git a/src/include/cache.i b/src/include/cache.i
index 75050eadde7..99aa35166d3 100644
--- a/src/include/cache.i
+++ b/src/include/cache.i
@@ -36,44 +36,30 @@ __wt_eviction_check(WT_SESSION_IMPL *session, int *read_lockoutp, int wake)
}
/*
- * __wt_eviction_page_check --
- * Return if a page should be forcibly evicted.
+ * __wt_cache_full_check --
+ * Wait for there to be space in the cache before a read or update.
*/
static inline int
-__wt_eviction_page_check(WT_SESSION_IMPL *session, WT_PAGE *page)
+__wt_cache_full_check(WT_SESSION_IMPL *session)
{
- WT_CONNECTION_IMPL *conn;
- WT_PAGE_MODIFY *mod;
-
- conn = S2C(session);
- mod = page->modify;
-
- /*
- * Root pages and clean pages are never forcibly evicted.
- * Nor are pages from files that are purely cache resident.
- */
- if (WT_PAGE_IS_ROOT(page) ||
- !__wt_page_is_modified(page) ||
- F_ISSET(session->btree, WT_BTREE_NO_EVICTION))
- return (0);
+ WT_DECL_RET;
+ int lockout, wake;
/*
- * Check the page's memory footprint - evict pages that take up more
- * than their fair share of the cache. We define a fair share as
- * approximately half the cache size per open writable btree handle.
+ * Only wake the eviction server the first time through here (if the
+ * cache is too full), or after we fail to evict a page. Otherwise, we
+ * are just wasting effort and making a busy mutex busier.
*/
- if ((int64_t)page->memory_footprint >
- conn->cache_size / (2 * (conn->open_btree_count + 1)))
- return (1);
-
- /*
- * If the page's write-generation has wrapped and caught up with the
- * page's disk generation (wildly unlikely as it requires 4B updates
- * between page reconciliations, but is technically possible), forcibly
- * evict the page.
- */
- if (mod != NULL && mod->write_gen + 1 == mod->disk_gen)
- return (1);
-
- return (0);
+ for (wake = 1;; wake = 0) {
+ __wt_eviction_check(session, &lockout, wake);
+ if (!lockout ||
+ F_ISSET(session, WT_SESSION_SCHEMA_LOCKED) ||
+ (session->btree != NULL &&
+ F_ISSET(session->btree, WT_BTREE_NO_CACHE)))
+ return (0);
+ if ((ret = __wt_evict_lru_page(session, 1)) == EBUSY)
+ __wt_yield();
+ else
+ WT_RET_NOTFOUND_OK(ret);
+ }
}
diff --git a/src/include/extern.h b/src/include/extern.h
index fefe52d15f0..b783ff4400f 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -292,7 +292,6 @@ extern void __wt_page_out(WT_SESSION_IMPL *session,
extern void __wt_evict_list_clr_page(WT_SESSION_IMPL *session, WT_PAGE *page);
extern void __wt_evict_server_wake(WT_SESSION_IMPL *session);
extern int __wt_sync_file_serial_func(WT_SESSION_IMPL *session, void *args);
-extern void __wt_evict_page_request(WT_SESSION_IMPL *session, WT_PAGE *page);
extern void *__wt_cache_evict_server(void *arg);
extern int __wt_evict_lru_page(WT_SESSION_IMPL *session, int is_app);
extern int __wt_btree_create(WT_SESSION_IMPL *session, const char *filename);
diff --git a/src/support/hazard.c b/src/support/hazard.c
index 670175ed124..df362278d25 100644
--- a/src/support/hazard.c
+++ b/src/support/hazard.c
@@ -139,16 +139,6 @@ __wt_hazard_clear(WT_SESSION_IMPL *session, WT_PAGE *page)
hp < session->hazard + session->hazard_size; ++hp)
if (hp->page == page) {
/*
- * Check to see if the page has grown too big and force
- * eviction. We have to request eviction while holding
- * a hazard reference (else the page might disappear out
- * from under us), but we can't wake the eviction server
- * until we've released our hazard reference because our
- * hazard reference blocks the page eviction. A little
- * dance: check the page, schedule the forced eviction,
- * clear/publish the hazard reference, wake the eviction
- * server.
- *
* We don't publish the hazard reference clear in the
* general case. It's not required for correctness;
* it gives the page server thread faster access to the
@@ -156,12 +146,7 @@ __wt_hazard_clear(WT_SESSION_IMPL *session, WT_PAGE *page)
* generation number was just set, so it's unlikely the
* page will be selected for eviction.
*/
- if (__wt_eviction_page_check(session, page)) {
- __wt_evict_page_request(session, page);
- WT_PUBLISH(hp->page, NULL);
- __wt_evict_server_wake(session);
- } else
- hp->page = NULL;
+ hp->page = NULL;
/*
* If this was the last hazard reference in the session,