summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@wiredtiger.com>2012-02-29 14:50:41 +1100
committerMichael Cahill <michael.cahill@wiredtiger.com>2012-02-29 14:50:41 +1100
commit63960ed5463713bace9e283092d6ae894e9f6464 (patch)
tree048d5cc75b06f210bd8cce0f2f72e86889efb923
parent2c41bbf5fd56b5362decfa7575d34fc08c657e8c (diff)
downloadmongo-63960ed5463713bace9e283092d6ae894e9f6464.tar.gz
If forced page eviction fails, just give up and hope it works next time.
This allows rec_evict.c to simply set the WT_REF state to WT_REF_MEM after all failures, and fixes a bug where pages on the forced eviction queue would end up with state WT_REF_MEM, meaning they could be chosen for eviction multiple times. The tradeoff is that when building a tree from scratch that is larger than the cache size, the eviction thread may never get exclusive access to the single leaf page in order to evict it. I've added a yield to the eviction server path to give application threads a chance to leave the page, but that won't help in cases of genuine contention (though we've never had a complete answer for that case). closes #175
-rw-r--r--src/btree/bt_evict.c25
-rw-r--r--src/btree/rec_evict.c31
2 files changed, 33 insertions, 23 deletions
diff --git a/src/btree/bt_evict.c b/src/btree/bt_evict.c
index aa8704b56f3..118f17882f0 100644
--- a/src/btree/bt_evict.c
+++ b/src/btree/bt_evict.c
@@ -364,7 +364,24 @@ __evict_request_walk(WT_SESSION_IMPL *session)
WT_VERBOSE(session, evictserver,
"forcing eviction of page %p", er->page);
- ret = __wt_rec_evict(session, er->page, 0);
+
+ /*
+ * At this point, the page is marked with
+ * WT_REF_EVICTING, which stalls new readers. Take a
+ * brief pause before attempting to evict it to give
+ * existing readers a chance to drop their references.
+ */
+ __wt_yield();
+
+ /*
+ * If eviction fails, free up the page and hope it
+ * works next time. Application threads may be holding
+ * a reference while trying to get another (e.g., if
+ * they have two cursors open), so blocking
+ * indefinitely leads to deadlock.
+ */
+ if ((ret = __wt_rec_evict(session, er->page, 0)) != 0)
+ er->page->ref->state = WT_REF_MEM;
} else {
/*
* If we're about to do a walk of the file tree (and
@@ -391,11 +408,7 @@ __evict_request_walk(WT_SESSION_IMPL *session)
if (!F_ISSET(er, WT_EVICT_REQ_PAGE))
__wt_session_serialize_wrapup(
request_session, NULL, ret);
- else if (ret == EBUSY) {
- /* Don't rest until this request is handled. */
- __wt_cond_signal(session, cache->evict_cond);
- continue;
- }
+
__evict_req_clr(session, er);
}
return (0);
diff --git a/src/btree/rec_evict.c b/src/btree/rec_evict.c
index 45fc4fe9629..95ca42c2a1b 100644
--- a/src/btree/rec_evict.c
+++ b/src/btree/rec_evict.c
@@ -479,7 +479,15 @@ __hazard_exclusive(WT_SESSION_IMPL *session, WT_REF *ref)
{
WT_CONNECTION_IMPL *conn;
uint32_t elem, i;
- int ret, was_evicting;
+
+ /*
+ * Make sure there is space to track exclusive access so we can unlock
+ * to clean up.
+ */
+ if (session->excl_next * sizeof(WT_REF *) == session->excl_allocated)
+ WT_RET(__wt_realloc(session, &session->excl_allocated,
+ (session->excl_next + 50) * sizeof(WT_REF *),
+ &session->excl));
/*
* Hazard references are acquired down the tree, which means we can't
@@ -496,12 +504,12 @@ __hazard_exclusive(WT_SESSION_IMPL *session, WT_REF *ref)
* to force out. Without this, application threads can starve eviction
* and heap usage grows without bounds.
*/
- was_evicting = 0;
- if (WT_ATOMIC_CAS(ref->state, WT_REF_EVICTING, WT_REF_LOCKED))
- was_evicting = 1;
- else if (!WT_ATOMIC_CAS(ref->state, WT_REF_MEM, WT_REF_LOCKED))
+ if (!WT_ATOMIC_CAS(ref->state, WT_REF_MEM, WT_REF_LOCKED) &&
+ !WT_ATOMIC_CAS(ref->state, WT_REF_EVICTING, WT_REF_LOCKED))
return (EBUSY); /* We couldn't change the state. */
+ session->excl[session->excl_next++] = ref;
+
/* Walk the list of hazard references to search for a match. */
conn = S2C(session);
elem = conn->session_size * conn->hazard_size;
@@ -512,19 +520,8 @@ __hazard_exclusive(WT_SESSION_IMPL *session, WT_REF *ref)
WT_VERBOSE(session,
evict, "page %p hazard request failed", ref->page);
- WT_ERR(EBUSY);
+ return (EBUSY);
}
- /* We have exclusive access, track that so we can unlock to clean up. */
- if (session->excl_next * sizeof(WT_REF *) == session->excl_allocated)
- WT_ERR(__wt_realloc(session, &session->excl_allocated,
- (session->excl_next + 50) * sizeof(WT_REF *),
- &session->excl));
- session->excl[session->excl_next++] = ref;
-
return (0);
-
- /* Restore to the original state on error. */
-err: ref->state = (was_evicting ? WT_REF_EVICTING : WT_REF_MEM);
- return (ret);
}