diff options
author | Michael Cahill <mjc@wiredtiger.com> | 2012-05-22 01:09:42 -0700 |
---|---|---|
committer | Michael Cahill <mjc@wiredtiger.com> | 2012-05-22 01:09:42 -0700 |
commit | 4bf90b1f59f3bc6a9053c8acb55c1aaaf63a552c (patch) | |
tree | b180f0cb5e2669f1c0549aa3530e609636a6e306 | |
parent | 3009ce4ac2289f1effa78cec4a377a6cfda70b4b (diff) | |
parent | 4cd7ac1a90538d84c6e4c3a0b50861812eb2aafc (diff) | |
download | mongo-4bf90b1f59f3bc6a9053c8acb55c1aaaf63a552c.tar.gz |
Merge pull request #212 from wiredtiger/max-session
closes #186
-rw-r--r-- | src/btree/bt_discard.c | 8 | ||||
-rw-r--r-- | src/btree/bt_evict.c | 28 | ||||
-rw-r--r-- | src/btree/rec_evict.c | 26 | ||||
-rw-r--r-- | src/conn/conn_api.c | 29 | ||||
-rw-r--r-- | src/conn/conn_handle.c | 2 | ||||
-rw-r--r-- | src/conn/conn_open.c | 22 | ||||
-rw-r--r-- | src/include/api.h | 27 | ||||
-rw-r--r-- | src/include/btree.i | 37 | ||||
-rw-r--r-- | src/include/extern.h | 1 | ||||
-rw-r--r-- | src/session/session_api.c | 79 | ||||
-rw-r--r-- | src/support/hazard.c | 22 | ||||
-rw-r--r-- | src/support/sess_dump.c | 10 |
12 files changed, 162 insertions, 129 deletions
diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c index 7c4cfe5016c..86300a99c70 100644 --- a/src/btree/bt_discard.c +++ b/src/btree/bt_discard.c @@ -41,7 +41,13 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep, uint32_t flags) WT_ASSERT(session, !F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU)); #ifdef HAVE_DIAGNOSTIC - __wt_hazard_validate(session, page); + { + WT_HAZARD *hp; + if ((hp = __wt_page_hazard_check(session, page)) != NULL) + __wt_errx(session, + "discarded page has hazard reference: (%p: %s, line %d)", + hp->page, hp->file, hp->line); + } #endif /* * Pages without a memory footprint aren't associated with the cache diff --git a/src/btree/bt_evict.c b/src/btree/bt_evict.c index de9509fabc7..4a0990cea0d 100644 --- a/src/btree/bt_evict.c +++ b/src/btree/bt_evict.c @@ -305,8 +305,10 @@ err: __wt_err(session, ret, "eviction server error"); __wt_free(session, cache->evict); - if (session != conn->default_session) + if (session != conn->default_session) { (void)session->iface.close(&session->iface, NULL); + __wt_free(conn->default_session, session->hazard); + } return (NULL); } @@ -399,7 +401,7 @@ __evict_file_request_walk(WT_SESSION_IMPL *session) WT_CONNECTION_IMPL *conn; WT_SESSION_IMPL *request_session; WT_DECL_RET; - uint32_t i; + uint32_t i, session_cnt; int syncop; conn = S2C(session); @@ -408,15 +410,19 @@ __evict_file_request_walk(WT_SESSION_IMPL *session) /* Make progress, regardless of success or failure. */ ++cache->sync_complete; - /* The session array requires no lock, it's fixed in size. */ - request_session = NULL; - for (i = 0; i < conn->session_cnt; ++i) - if ((request_session = conn->sessions[i]) != NULL && - request_session->syncop != 0) + /* + * No lock is required because the session array is fixed size, but it + * it may contain inactive entries. + * + * If we don't find a request, something went wrong; complain, but don't + * return an error code, the eviction thread doesn't need to exit. + */ + WT_ORDERED_READ(session_cnt, conn->session_cnt); + for (request_session = conn->sessions, + i = 0; i < session_cnt; ++request_session, ++i) + if (request_session->active && request_session->syncop != 0) break; - - /* If we don't find an entry, something broke, complain. */ - if (request_session == NULL) { + if (i == session_cnt) { __wt_errx(session, "failed to find handle's sync operation request"); return (0); @@ -431,8 +437,6 @@ __evict_file_request_walk(WT_SESSION_IMPL *session) syncop = request_session->syncop; request_session->syncop = 0; - WT_ASSERT(session, syncop != 0); - WT_VERBOSE_RET(session, evictserver, "file request: %s", (request_session->syncop == WT_SYNC ? "sync" : diff --git a/src/btree/rec_evict.c b/src/btree/rec_evict.c index d56ab93dea0..97df42acfdd 100644 --- a/src/btree/rec_evict.c +++ b/src/btree/rec_evict.c @@ -369,10 +369,6 @@ __rec_excl_clear(WT_SESSION_IMPL *session) static int __hazard_exclusive(WT_SESSION_IMPL *session, WT_REF *ref, int top) { - WT_CONNECTION_IMPL *conn; - WT_HAZARD *hp; - uint32_t elem, i; - /* * Make sure there is space to track exclusive access so we can unlock * to clean up. @@ -396,18 +392,14 @@ __hazard_exclusive(WT_SESSION_IMPL *session, WT_REF *ref, int top) session->excl[session->excl_next++] = ref; - /* Walk the list of hazard references to search for a match. */ - conn = S2C(session); - elem = conn->session_size * conn->hazard_size; - for (i = 0, hp = conn->hazard; i < elem; ++i, ++hp) - if (hp->page == ref->page) { - WT_BSTAT_INCR(session, rec_hazard); - WT_CSTAT_INCR(session, cache_evict_hazard); - - WT_VERBOSE_RET(session, - evict, "page %p hazard request failed", ref->page); - return (EBUSY); - } + /* Check for a matching hazard reference. */ + if (__wt_page_hazard_check(session, ref->page) == NULL) + return (0); - return (0); + WT_BSTAT_INCR(session, rec_hazard); + WT_CSTAT_INCR(session, cache_evict_hazard); + + WT_VERBOSE_RET( + session, evict, "page %p hazard request failed", ref->page); + return (EBUSY); } diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index 456765149ed..877e27645ae 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -292,27 +292,30 @@ __conn_close(WT_CONNECTION *wt_conn, const char *config) WT_NAMED_COMPRESSOR *ncomp; WT_NAMED_DATA_SOURCE *ndsrc; WT_SESSION *wt_session; - WT_SESSION_IMPL *s, *session, **tp; + WT_SESSION_IMPL *s, *session; + uint32_t i; conn = (WT_CONNECTION_IMPL *)wt_conn; CONNECTION_API_CALL(conn, session, close, config, cfg); WT_UNUSED(cfg); - /* Close open sessions. */ - for (tp = conn->sessions; (s = *tp) != NULL;) { - if (!F_ISSET(s, WT_SESSION_INTERNAL)) { + /* + * Close open, external sessions. + * Additionally, the session's hazard reference memory isn't discarded + * during normal session close because access to it isn't serialized. + * Discard it now. Note the loop for the hazard reference memory, it's + * the entire session array, not only the active session count, as the + * active session count may be less than the maximum session count. + */ + for (s = conn->sessions, i = 0; i < conn->session_cnt; ++s, ++i) + if (s->active && !F_ISSET(s, WT_SESSION_INTERNAL)) { wt_session = &s->iface; WT_TRET(wt_session->close(wt_session, config)); - - /* - * We closed a session, which has shuffled pointers - * around. Restart the search. - */ - tp = conn->sessions; - } else - ++tp; - } + } + for (s = conn->sessions, i = 0; i < conn->session_size; ++s, ++i) + if (!F_ISSET(s, WT_SESSION_INTERNAL)) + __wt_free(session, s->hazard); /* Close open btree handles. */ WT_TRET(__wt_conn_btree_discard(conn)); diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c index d02ef1ec7e3..9d29c881ff8 100644 --- a/src/conn/conn_handle.c +++ b/src/conn/conn_handle.c @@ -78,8 +78,6 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn) /* Free allocated memory. */ __wt_free(session, conn->home); __wt_free(session, conn->sessions); - __wt_free(session, conn->session_array); - __wt_free(session, conn->hazard); __wt_free(session, conn->stats); __wt_free(NULL, conn); diff --git a/src/conn/conn_open.c b/src/conn/conn_open.c index 17e1a41ccf2..a320ca83442 100644 --- a/src/conn/conn_open.c +++ b/src/conn/conn_open.c @@ -21,15 +21,9 @@ __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[]) session = conn->default_session; session->iface.connection = &conn->iface; - /* WT_SESSION_IMPL and hazard arrays. */ + /* WT_SESSION_IMPL array. */ WT_ERR(__wt_calloc(session, - conn->session_size, sizeof(WT_SESSION_IMPL *), &conn->sessions)); - WT_ERR(__wt_calloc(session, - conn->session_size, sizeof(WT_SESSION_IMPL), - &conn->session_array)); - WT_ERR(__wt_calloc(session, - conn->session_size * conn->hazard_size, sizeof(WT_HAZARD), - &conn->hazard)); + conn->session_size, sizeof(WT_SESSION_IMPL), &conn->sessions)); /* Create the cache. */ WT_ERR(__wt_cache_create(conn, cfg)); @@ -96,12 +90,18 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) } /* - * Close the default session and switch back to the dummy session in - * case of any error messages from the remaining operations while - * destroying the connection handle. + * Close the internal (default) session, and switch back to the dummy + * session in case of any error messages from the remaining operations + * while destroying the connection handle. + * + * Additionally, the session's hazard reference memory isn't discarded + * during normal session close because access to it isn't serialized. + * Discard it now. */ if (session != &conn->dummy_session) { WT_TRET(session->iface.close(&session->iface, NULL)); + __wt_free(&conn->dummy_session, session->hazard); + conn->default_session = &conn->dummy_session; } diff --git a/src/include/api.h b/src/include/api.h index 22b7c93df4d..4dd35c3ce90 100644 --- a/src/include/api.h +++ b/src/include/api.h @@ -58,6 +58,8 @@ typedef enum { struct __wt_session_impl { WT_SESSION iface; + u_int active; /* Non-zero if the session is in-use */ + WT_CONDVAR *cond; /* Condition variable */ const char *name; /* Name */ @@ -91,8 +93,6 @@ struct __wt_session_impl { int wq_sleeping; /* Thread is blocked */ int wq_ret; /* Return value */ - WT_HAZARD *hazard; /* Hazard reference array */ - void *reconcile; /* Reconciliation information */ WT_REF **excl; /* Eviction exclusive list */ @@ -105,6 +105,16 @@ struct __wt_session_impl { int syncop; /* File operation */ uint32_t flags; + + /* + * The hazard reference must be placed at the end of the structure: the + * structure is cleared when closed, all except the hazard reference. + * Putting the hazard reference at the end of the structure allows us to + * easily call a function to clear memory up to, but not including, the + * hazard reference. + */ +#define WT_SESSION_CLEAR(s) memset(s, 0, WT_PTRDIFF(&(s)->hazard, s)) + WT_HAZARD *hazard; /* Hazard reference array */ }; /******************************************* @@ -194,22 +204,17 @@ struct __wt_connection_impl { * the server thread code to avoid walking the entire array when only a * few threads are running. */ - WT_SESSION_IMPL **sessions; /* Session reference */ - void *session_array; /* Session array */ - uint32_t session_cnt; /* Session count */ + WT_SESSION_IMPL *sessions; /* Session reference */ + uint32_t session_size; /* Session array size */ + uint32_t session_cnt; /* Session count */ /* * WiredTiger allocates space for 15 hazard references in each thread of * control, by default. There's no code path that requires more than 15 * pages at a time (and if we find one, the right change is to increase * the default). - * - * The hazard array is separate from the WT_SESSION_IMPL array because - * we need to easily copy and search it when evicting pages from memory. */ - WT_HAZARD *hazard; /* Hazard references array */ - uint32_t hazard_size; - uint32_t session_size; + uint32_t hazard_size; /* Hazard array size */ WT_CACHE *cache; /* Page cache */ uint64_t cache_size; diff --git a/src/include/btree.i b/src/include/btree.i index de8cc6fb973..e076375e5ad 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -225,17 +225,50 @@ __wt_get_addr( /* * __wt_page_release -- - * Release a reference to a page, unless it's pinned into memory, in which - * case we never acquired a hazard reference. + * Release a reference to a page. */ static inline void __wt_page_release(WT_SESSION_IMPL *session, WT_PAGE *page) { + /* We never acquired a hazard reference on the root page. */ if (page != NULL && !WT_PAGE_IS_ROOT(page)) __wt_hazard_clear(session, page); } /* + * __wt_page_hazard_check -- + * Return if there's a hazard reference to the page in the system. + */ +static inline WT_HAZARD * +__wt_page_hazard_check(WT_SESSION_IMPL *session, WT_PAGE *page) +{ + WT_CONNECTION_IMPL *conn; + WT_HAZARD *hp; + WT_SESSION_IMPL *s; + uint32_t i, session_cnt; + + conn = S2C(session); + + /* + * No lock is required because the session array is fixed size, but it + * it may contain inactive entries. We must review any active session + * that might contain a hazard reference, so insert a barrier before + * reading the active session count. That way, no matter what sessions + * come or go, we'll check the slots for all of the sessions that could + * have been active when we started our check. + */ + WT_ORDERED_READ(session_cnt, conn->session_cnt); + for (s = conn->sessions, i = 0; i < session_cnt; ++s, ++i) { + if (!s->active) + continue; + for (hp = s->hazard; hp < s->hazard + conn->hazard_size; ++hp) + if (hp->page == page) + return (hp); + } + return (NULL); +} + +/* * __wt_skip_choose_depth -- * Randomly choose a depth for a skiplist insert. */ diff --git a/src/include/extern.h b/src/include/extern.h index fae90c07120..3ca8ef5cff3 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -938,7 +938,6 @@ __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, int *busyp ); extern void __wt_hazard_clear(WT_SESSION_IMPL *session, WT_PAGE *page); extern void __wt_hazard_empty(WT_SESSION_IMPL *session); -extern void __wt_hazard_validate(WT_SESSION_IMPL *session, WT_PAGE *page); extern int __wt_raw_to_hex( WT_SESSION_IMPL *session, const uint8_t *from, uint32_t size, diff --git a/src/session/session_api.c b/src/session/session_api.c index c844e0b2784..3b9e3a8ea44 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -18,7 +18,7 @@ __session_close(WT_SESSION *wt_session, const char *config) WT_CONNECTION_IMPL *conn; WT_CURSOR *cursor; WT_DECL_RET; - WT_SESSION_IMPL *session, **tp; + WT_SESSION_IMPL *session; conn = (WT_CONNECTION_IMPL *)wt_session->connection; session = (WT_SESSION_IMPL *)wt_session; @@ -56,26 +56,26 @@ __session_close(WT_SESSION *wt_session, const char *config) (void)__wt_cond_destroy(session, session->cond); /* - * Replace the session reference we're closing with the last entry in - * the table, then clear the last entry. As far as the walk of the - * server threads is concerned, it's OK if the session appears twice, - * or if it doesn't appear at all, so these lines can race all they - * want. + * Sessions are re-used, clear the structure: this code sets the active + * field to 0, which will exclude the hazard array from review by the + * eviction thread. Note: there's no serialization support around the + * review of the hazard array, which means threads checking for hazard + * references first check the active field (which may be 0) and then use + * the hazard pointer (which cannot be NULL). For this reason, clear + * the session structure carefully. */ - for (tp = conn->sessions; *tp != session; ++tp) - ; - --conn->session_cnt; - *tp = conn->sessions[conn->session_cnt]; - conn->sessions[conn->session_cnt] = NULL; + WT_SESSION_CLEAR(session); + session = conn->default_session; /* - * Publish, making the session array entry available for re-use. There - * must be a barrier here to ensure the cleanup above completes before - * the entry is re-used. + * Decrement the count of active sessions if that's possible: a session + * being closed may or may not be at the end of the array, step toward + * the beginning of the array until we reach an active session. */ - WT_PUBLISH(session->iface.connection, NULL); + while (conn->sessions[conn->session_cnt - 1].active == 0) + if (--conn->session_cnt == 0) + break; - session = conn->default_session; __wt_spin_unlock(session, &conn->spinlock); err: API_END_NOTFOUND_MAP(session, ret); @@ -469,7 +469,7 @@ __wt_open_session(WT_CONNECTION_IMPL *conn, int internal, }; WT_DECL_RET; WT_SESSION_IMPL *session, *session_ret; - uint32_t slot; + uint32_t i; WT_UNUSED(config); @@ -478,35 +478,45 @@ __wt_open_session(WT_CONNECTION_IMPL *conn, int internal, __wt_spin_lock(session, &conn->spinlock); - /* Check to see if there's an available session slot. */ - if (conn->session_cnt == conn->session_size - 1) + /* Find the first inactive session slot. */ + for (session_ret = conn->sessions, + i = 0; i < conn->session_size; ++session_ret, ++i) + if (!session_ret->active) + break; + if (i == conn->session_size) WT_ERR_MSG(session, WT_ERROR, - "WiredTiger only configured to support %d thread contexts", + "only configured to support %d thread contexts", conn->session_size); /* - * The session reference list is compact, the session array is not. - * Find the first empty session slot. + * If the active session count is increasing, update it. We don't worry + * about correcting the session count on error, as long as we don't mark + * this session as active, we'll clean it up on close. */ - for (slot = 0, session_ret = conn->session_array; - session_ret->iface.connection != NULL; - ++session_ret, ++slot) - ; + if (i >= conn->session_cnt) /* Defend against off-by-one errors. */ + conn->session_cnt = i + 1; - /* Session entries are re-used, clear the old contents. */ - WT_CLEAR(*session_ret); - - WT_ERR(__wt_cond_alloc(session, "session", 1, &session_ret->cond)); session_ret->iface = stds; session_ret->iface.connection = &conn->iface; + + WT_ERR(__wt_cond_alloc(session, "session", 1, &session_ret->cond)); + __wt_event_handler_set(session_ret, (event_handler != NULL) ? event_handler : session_ret->event_handler); - session_ret->hazard = conn->hazard + slot * conn->hazard_size; TAILQ_INIT(&session_ret->cursors); TAILQ_INIT(&session_ret->btrees); /* + * The session's hazard reference memory isn't discarded during normal + * session close because access to it isn't serialized. Allocate the + * first time we open this session. + */ + if (session_ret->hazard == NULL) + WT_ERR(__wt_calloc(session, conn->hazard_size, + sizeof(WT_HAZARD), &session_ret->hazard)); + + /* * Public sessions are automatically closed during WT_CONNECTION->close. * If the session handles for internal threads were to go on the public * list, there would be complex ordering issues during close. Set a @@ -517,10 +527,11 @@ __wt_open_session(WT_CONNECTION_IMPL *conn, int internal, /* * Publish: make the entry visible to server threads. There must be a - * barrier to ensure the structure fields are set before any other - * thread can see the session. + * barrier for two reasons, to ensure structure fields are set before + * any other thread will consider the session, and to push the session + * count to ensure the eviction thread can't review too few slots. */ - WT_PUBLISH(conn->sessions[conn->session_cnt++], session_ret); + WT_PUBLISH(session_ret->active, 1); STATIC_ASSERT(offsetof(WT_CONNECTION_IMPL, iface) == 0); *sessionp = session_ret; diff --git a/src/support/hazard.c b/src/support/hazard.c index b7557013c9f..3846889dce2 100644 --- a/src/support/hazard.c +++ b/src/support/hazard.c @@ -207,26 +207,4 @@ __hazard_dump(WT_SESSION_IMPL *session) if (fail) __wt_errx(session, "unexpected hazard reference"); } - -/* - * __wt_hazard_validate -- - * Confirm that a page isn't on the hazard list. - */ -void -__wt_hazard_validate(WT_SESSION_IMPL *session, WT_PAGE *page) -{ - WT_CONNECTION_IMPL *conn; - WT_HAZARD *hp; - uint32_t elem, i; - - conn = S2C(session); - - elem = conn->session_size * conn->hazard_size; - for (i = 0, hp = conn->hazard; i < elem; ++i, ++hp) - if (hp->page == page) - __wt_errx(session, - "discarded page has hazard reference: " - "(%p: %s, line %d)", - hp->page, hp->file, hp->line); -} #endif diff --git a/src/support/sess_dump.c b/src/support/sess_dump.c index da6fd07c411..5fa5deb2b74 100644 --- a/src/support/sess_dump.c +++ b/src/support/sess_dump.c @@ -15,13 +15,17 @@ void __wt_session_dump_all(WT_SESSION_IMPL *session) { - WT_SESSION_IMPL **tp; + WT_CONNECTION_IMPL *conn; + WT_SESSION_IMPL *s; + uint32_t i; if (session == NULL) return; - for (tp = S2C(session)->sessions; *tp != NULL; ++tp) - __wt_session_dump(*tp); + conn = S2C(session); + for (s = conn->sessions, i = 0; i < conn->session_size; ++s, ++i) + if (s->active) + __wt_session_dump(s); } /* |