diff options
author | Michael Cahill <michael.cahill@wiredtiger.com> | 2012-01-23 22:32:28 +1100 |
---|---|---|
committer | Michael Cahill <michael.cahill@wiredtiger.com> | 2012-01-23 22:32:28 +1100 |
commit | 1b06bf34bbeb8a5f27e6b320c0929a7f33dc4b36 (patch) | |
tree | fcc8aca611883d11cf6872c6d770e2f953fde39e /src/btree | |
parent | 80deca72baafe78dabfff58c7ecd8dc089219460 (diff) | |
download | mongo-1b06bf34bbeb8a5f27e6b320c0929a7f33dc4b36.tar.gz |
Some cleanup of the hazard code. In particular:
* error handling: use of WT_RET in functions that are only expected to return a boolean (e.g., WT_REF(__wt_realloc(...)) in __hazard_exclusive.
Switch to zero for success and EBUSY if the page is already locked. This shouldn't make it out to applications but at least is from the same namespace as ENOMEM or other system errors.
* two threads calling __hazard_exclusive concurrently were sharing memory in cache->hazard, with unpredictable results.
* the WT_REF_LOCKED state was being used for multiple things. Add a WT_REF_EVICTING state to prevent multiple threads choosing the same page from the eviction queue.
Diffstat (limited to 'src/btree')
-rw-r--r-- | src/btree/bt_evict.c | 22 | ||||
-rw-r--r-- | src/btree/bt_page.c | 3 | ||||
-rw-r--r-- | src/btree/rec_evict.c | 87 |
3 files changed, 53 insertions, 59 deletions
diff --git a/src/btree/bt_evict.c b/src/btree/bt_evict.c index 28c1b89bd7e..7b08d747bb3 100644 --- a/src/btree/bt_evict.c +++ b/src/btree/bt_evict.c @@ -190,13 +190,6 @@ __wt_cache_evict_server(void *arg) session = &conn->default_session; WT_ERR(__wt_open_session(conn, 1, NULL, NULL, &session)); - /* - * Allocate memory for a copy of the hazard references (it's a fixed - * size, so doesn't need run-time adjustments). - */ - WT_ERR(__wt_calloc_def(session, - conn->session_size * conn->hazard_size, &cache->hazard)); - while (F_ISSET(conn, WT_SERVER_RUN)) { WT_VERBOSE(session, evictserver, "sleeping"); __wt_cond_wait(session, cache->evict_cond); @@ -222,7 +215,6 @@ err: __wt_err(session, ret, "eviction server error"); WT_VERBOSE(session, evictserver, "exiting"); __wt_free(session, cache->evict); - __wt_free(session, cache->hazard); if (session != &conn->default_session) (void)session->iface.close(&session->iface, NULL); @@ -334,7 +326,7 @@ __evict_request_walk(WT_SESSION_IMPL *session) if (F_ISSET(er, WT_EVICT_REQ_PAGE)) { WT_VERBOSE(session, evictserver, "forcing eviction of page %p", er->page); - ret = __wt_rec_evict(session, er->page, WT_REC_WAIT); + WT_RETRY_YIELD(__wt_rec_evict(session, er->page, 0)); } else ret = __evict_file(session, er); @@ -652,7 +644,7 @@ __evict_get_page( * prevent multiple attempts to evict it. */ ref = evict->page->ref; - if (!WT_ATOMIC_CAS(ref->state, WT_REF_MEM, WT_REF_LOCKED)) + if (!WT_ATOMIC_CAS(ref->state, WT_REF_MEM, WT_REF_EVICTING)) goto done; *pagep = evict->page; @@ -698,9 +690,17 @@ __wt_evict_lru_page(WT_SESSION_IMPL *session, int is_app) * out of disk space, or the page had an in-memory subtree already being * evicted). Regardless, don't pick the same page every time. */ - if (__wt_rec_evict(session, page, 0) != 0) + if (__wt_rec_evict(session, page, 0) != 0) { page->read_gen = __wt_cache_read_gen(session); + /* + * If the evicting state of the page was not cleared, clear it + * now to make the page available again. + */ + if (page->ref->state == WT_REF_EVICTING) + page->ref->state = WT_REF_MEM; + } + WT_CLEAR_BTREE_IN_SESSION(session); session->btree = saved_btree; diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c index 51e80adfac0..7fb14714ec9 100644 --- a/src/btree/bt_page.c +++ b/src/btree/bt_page.c @@ -42,6 +42,7 @@ __wt_page_in_func( WT_RET(__wt_cache_read(session, parent, ref)); continue; + case WT_REF_EVICTING: case WT_REF_LOCKED: case WT_REF_READING: /* @@ -61,7 +62,7 @@ __wt_page_in_func( #ifdef HAVE_DIAGNOSTIC , file, line #endif - )) { + ) == 0) { ref->page->read_gen = __wt_cache_read_gen(session); return (0); diff --git a/src/btree/rec_evict.c b/src/btree/rec_evict.c index e5b32db2a62..4f5bf805c8f 100644 --- a/src/btree/rec_evict.c +++ b/src/btree/rec_evict.c @@ -8,8 +8,8 @@ #include "wt_internal.h" static int __hazard_bsearch_cmp(const void *, const void *); -static void __hazard_copy(WT_SESSION_IMPL *); -static int __hazard_exclusive(WT_SESSION_IMPL *, WT_REF *, int); +static int __hazard_copy(WT_SESSION_IMPL *); +static int __hazard_exclusive(WT_SESSION_IMPL *, WT_REF *); static int __hazard_qsort_cmp(const void *, const void *); static int __rec_discard(WT_SESSION_IMPL *, WT_PAGE *); static int __rec_discard_page(WT_SESSION_IMPL *, WT_PAGE *); @@ -306,8 +306,7 @@ __rec_review(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags) * locked down. */ if (!LF_ISSET(WT_REC_SINGLE)) - WT_RET(__hazard_exclusive( - session, page->ref, LF_ISSET(WT_REC_WAIT) ? 1 : 0)); + WT_RET(__hazard_exclusive(session, page->ref)); /* Walk the page's subtree and make sure we can evict this page. */ switch (page->type) { @@ -339,6 +338,7 @@ __rec_excl(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t flags) switch (ref->state) { case WT_REF_DISK: /* On-disk */ continue; + case WT_REF_EVICTING: /* Being evaluated */ case WT_REF_MEM: /* In-memory */ break; case WT_REF_LOCKED: /* Being evicted */ @@ -405,7 +405,7 @@ __rec_excl_page(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) */ if (!F_ISSET(page, WT_PAGE_REC_EMPTY | WT_PAGE_REC_SPLIT | WT_PAGE_REC_SPLIT_MERGE)) - return (1); + return (EBUSY); /* * If the page is dirty, try and write it. This is because once a page @@ -421,15 +421,14 @@ __rec_excl_page(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) /* Repeat the cheap test: an empty page might no longer be "empty". */ if (!F_ISSET(page, WT_PAGE_REC_EMPTY | WT_PAGE_REC_SPLIT | WT_PAGE_REC_SPLIT_MERGE)) - return (1); + return (EBUSY); /* * Next, if our caller doesn't have the tree locked down, get exclusive * access to the page. */ if (!LF_ISSET(WT_REC_SINGLE)) - WT_RET(__hazard_exclusive( - session, ref, LF_ISSET(WT_REC_WAIT) ? 1 : 0)); + WT_RET(__hazard_exclusive(session, ref)); /* * Finally, a more careful test: merge-split pages are OK, no matter if @@ -438,12 +437,12 @@ __rec_excl_page(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) * are not OK, they must be written first so we know what they're going * to look like to the parent. */ - if (F_ISSET(page, WT_PAGE_REC_SPLIT_MERGE)) + if (F_ISSET(page, WT_PAGE_REC_SPLIT_MERGE) || + (F_ISSET(page, WT_PAGE_REC_SPLIT | WT_PAGE_REC_EMPTY) && + !__wt_page_is_modified(page))) return (0); - if (F_ISSET(page, WT_PAGE_REC_SPLIT | WT_PAGE_REC_EMPTY)) - if (!__wt_page_is_modified(page)) - return (0); - return (1); + + return (EBUSY); } /* @@ -493,55 +492,40 @@ __rec_discard_page(WT_SESSION_IMPL *session, WT_PAGE *page) * Request exclusive access to a page. */ static int -__hazard_exclusive(WT_SESSION_IMPL *session, WT_REF *ref, int force) +__hazard_exclusive(WT_SESSION_IMPL *session, WT_REF *ref) { - WT_CACHE *cache; - - cache = S2C(session)->cache; - - /* The page must be in memory, and we may already have it locked. */ - WT_ASSERT(session, - ref->state == WT_REF_MEM || ref->state == WT_REF_LOCKED); - /* * Hazard references are acquired down the tree, which means we can't * deadlock. * - * Request exclusive access to the page; no memory flush needed, the - * state field is declared volatile. If another thread already has - * this page and we are not forcing the issue, give up. + * Request exclusive access to the page. It may be either in the + * evicting state (if this is the top-level page for this eviction + * operation), or a child page in memory. If another thread already + * has this page, give up. */ + if (!WT_ATOMIC_CAS(ref->state, WT_REF_EVICTING, WT_REF_LOCKED) && + !WT_ATOMIC_CAS(ref->state, WT_REF_MEM, WT_REF_LOCKED)) + return (EBUSY); + if (session->excl_next * sizeof(WT_REF *) == session->excl_allocated) WT_RET(__wt_realloc(session, &session->excl_allocated, (session->excl_next + 50) * sizeof(WT_REF *), &session->excl)); session->excl[session->excl_next++] = ref; - ref->state = WT_REF_LOCKED; /* Get a fresh copy of the hazard reference array. */ -retry: __hazard_copy(session); + WT_RET(__hazard_copy(session)); /* If we find a matching hazard reference, the page is still in use. */ - if (bsearch(ref->page, cache->hazard, cache->hazard_elem, + if (bsearch(ref->page, session->hazard_copy, session->hazard_copy_elem, sizeof(WT_HAZARD), __hazard_bsearch_cmp) == NULL) return (0); WT_BSTAT_INCR(session, rec_hazard); - - /* - * If we have to get this hazard reference, spin and wait for it to - * become available. - */ - if (force) { - __wt_yield(); - goto retry; - } - WT_CSTAT_INCR(session, cache_evict_hazard); WT_VERBOSE(session, evict, "page %p hazard request failed", ref->page); - - return (1); + return (EBUSY); } /* @@ -563,30 +547,39 @@ __hazard_qsort_cmp(const void *a, const void *b) * __hazard_copy -- * Copy the hazard array and prepare it for searching. */ -static void +static int __hazard_copy(WT_SESSION_IMPL *session) { - WT_CACHE *cache; WT_CONNECTION_IMPL *conn; uint32_t elem, i, j; conn = S2C(session); - cache = conn->cache; + + /* + * Allocate memory for a copy of the hazard references (it's a fixed + * size, so doesn't need run-time adjustments). + */ + if (session->hazard_copy == NULL) + WT_RET(__wt_calloc_def(session, + conn->session_size * conn->hazard_size, + &session->hazard_copy)); /* Copy the list of hazard references, compacting it as we go. */ elem = conn->session_size * conn->hazard_size; for (i = j = 0; j < elem; ++j) { if (conn->hazard[j].page == NULL) continue; - cache->hazard[i] = conn->hazard[j]; + session->hazard_copy[i] = conn->hazard[j]; ++i; } elem = i; /* Sort the list by page address. */ - qsort( - cache->hazard, (size_t)elem, sizeof(WT_HAZARD), __hazard_qsort_cmp); - cache->hazard_elem = elem; + qsort(session->hazard_copy, (size_t)elem, sizeof(WT_HAZARD), + __hazard_qsort_cmp); + session->hazard_copy_elem = elem; + + return (0); } /* |