summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <mjc@wiredtiger.com>2012-05-22 01:09:42 -0700
committerMichael Cahill <mjc@wiredtiger.com>2012-05-22 01:09:42 -0700
commit4bf90b1f59f3bc6a9053c8acb55c1aaaf63a552c (patch)
treeb180f0cb5e2669f1c0549aa3530e609636a6e306
parent3009ce4ac2289f1effa78cec4a377a6cfda70b4b (diff)
parent4cd7ac1a90538d84c6e4c3a0b50861812eb2aafc (diff)
downloadmongo-4bf90b1f59f3bc6a9053c8acb55c1aaaf63a552c.tar.gz
Merge pull request #212 from wiredtiger/max-session
closes #186
-rw-r--r--src/btree/bt_discard.c8
-rw-r--r--src/btree/bt_evict.c28
-rw-r--r--src/btree/rec_evict.c26
-rw-r--r--src/conn/conn_api.c29
-rw-r--r--src/conn/conn_handle.c2
-rw-r--r--src/conn/conn_open.c22
-rw-r--r--src/include/api.h27
-rw-r--r--src/include/btree.i37
-rw-r--r--src/include/extern.h1
-rw-r--r--src/session/session_api.c79
-rw-r--r--src/support/hazard.c22
-rw-r--r--src/support/sess_dump.c10
12 files changed, 162 insertions, 129 deletions
diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c
index 7c4cfe5016c..86300a99c70 100644
--- a/src/btree/bt_discard.c
+++ b/src/btree/bt_discard.c
@@ -41,7 +41,13 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep, uint32_t flags)
WT_ASSERT(session, !F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU));
#ifdef HAVE_DIAGNOSTIC
- __wt_hazard_validate(session, page);
+ {
+ WT_HAZARD *hp;
+ if ((hp = __wt_page_hazard_check(session, page)) != NULL)
+ __wt_errx(session,
+ "discarded page has hazard reference: (%p: %s, line %d)",
+ hp->page, hp->file, hp->line);
+ }
#endif
/*
* Pages without a memory footprint aren't associated with the cache
diff --git a/src/btree/bt_evict.c b/src/btree/bt_evict.c
index de9509fabc7..4a0990cea0d 100644
--- a/src/btree/bt_evict.c
+++ b/src/btree/bt_evict.c
@@ -305,8 +305,10 @@ err: __wt_err(session, ret, "eviction server error");
__wt_free(session, cache->evict);
- if (session != conn->default_session)
+ if (session != conn->default_session) {
(void)session->iface.close(&session->iface, NULL);
+ __wt_free(conn->default_session, session->hazard);
+ }
return (NULL);
}
@@ -399,7 +401,7 @@ __evict_file_request_walk(WT_SESSION_IMPL *session)
WT_CONNECTION_IMPL *conn;
WT_SESSION_IMPL *request_session;
WT_DECL_RET;
- uint32_t i;
+ uint32_t i, session_cnt;
int syncop;
conn = S2C(session);
@@ -408,15 +410,19 @@ __evict_file_request_walk(WT_SESSION_IMPL *session)
/* Make progress, regardless of success or failure. */
++cache->sync_complete;
- /* The session array requires no lock, it's fixed in size. */
- request_session = NULL;
- for (i = 0; i < conn->session_cnt; ++i)
- if ((request_session = conn->sessions[i]) != NULL &&
- request_session->syncop != 0)
+ /*
+ * No lock is required because the session array is fixed size, but it
+ * it may contain inactive entries.
+ *
+ * If we don't find a request, something went wrong; complain, but don't
+ * return an error code, the eviction thread doesn't need to exit.
+ */
+ WT_ORDERED_READ(session_cnt, conn->session_cnt);
+ for (request_session = conn->sessions,
+ i = 0; i < session_cnt; ++request_session, ++i)
+ if (request_session->active && request_session->syncop != 0)
break;
-
- /* If we don't find an entry, something broke, complain. */
- if (request_session == NULL) {
+ if (i == session_cnt) {
__wt_errx(session,
"failed to find handle's sync operation request");
return (0);
@@ -431,8 +437,6 @@ __evict_file_request_walk(WT_SESSION_IMPL *session)
syncop = request_session->syncop;
request_session->syncop = 0;
- WT_ASSERT(session, syncop != 0);
-
WT_VERBOSE_RET(session, evictserver,
"file request: %s",
(request_session->syncop == WT_SYNC ? "sync" :
diff --git a/src/btree/rec_evict.c b/src/btree/rec_evict.c
index d56ab93dea0..97df42acfdd 100644
--- a/src/btree/rec_evict.c
+++ b/src/btree/rec_evict.c
@@ -369,10 +369,6 @@ __rec_excl_clear(WT_SESSION_IMPL *session)
static int
__hazard_exclusive(WT_SESSION_IMPL *session, WT_REF *ref, int top)
{
- WT_CONNECTION_IMPL *conn;
- WT_HAZARD *hp;
- uint32_t elem, i;
-
/*
* Make sure there is space to track exclusive access so we can unlock
* to clean up.
@@ -396,18 +392,14 @@ __hazard_exclusive(WT_SESSION_IMPL *session, WT_REF *ref, int top)
session->excl[session->excl_next++] = ref;
- /* Walk the list of hazard references to search for a match. */
- conn = S2C(session);
- elem = conn->session_size * conn->hazard_size;
- for (i = 0, hp = conn->hazard; i < elem; ++i, ++hp)
- if (hp->page == ref->page) {
- WT_BSTAT_INCR(session, rec_hazard);
- WT_CSTAT_INCR(session, cache_evict_hazard);
-
- WT_VERBOSE_RET(session,
- evict, "page %p hazard request failed", ref->page);
- return (EBUSY);
- }
+ /* Check for a matching hazard reference. */
+ if (__wt_page_hazard_check(session, ref->page) == NULL)
+ return (0);
- return (0);
+ WT_BSTAT_INCR(session, rec_hazard);
+ WT_CSTAT_INCR(session, cache_evict_hazard);
+
+ WT_VERBOSE_RET(
+ session, evict, "page %p hazard request failed", ref->page);
+ return (EBUSY);
}
diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c
index 456765149ed..877e27645ae 100644
--- a/src/conn/conn_api.c
+++ b/src/conn/conn_api.c
@@ -292,27 +292,30 @@ __conn_close(WT_CONNECTION *wt_conn, const char *config)
WT_NAMED_COMPRESSOR *ncomp;
WT_NAMED_DATA_SOURCE *ndsrc;
WT_SESSION *wt_session;
- WT_SESSION_IMPL *s, *session, **tp;
+ WT_SESSION_IMPL *s, *session;
+ uint32_t i;
conn = (WT_CONNECTION_IMPL *)wt_conn;
CONNECTION_API_CALL(conn, session, close, config, cfg);
WT_UNUSED(cfg);
- /* Close open sessions. */
- for (tp = conn->sessions; (s = *tp) != NULL;) {
- if (!F_ISSET(s, WT_SESSION_INTERNAL)) {
+ /*
+ * Close open, external sessions.
+ * Additionally, the session's hazard reference memory isn't discarded
+ * during normal session close because access to it isn't serialized.
+ * Discard it now. Note the loop for the hazard reference memory, it's
+ * the entire session array, not only the active session count, as the
+ * active session count may be less than the maximum session count.
+ */
+ for (s = conn->sessions, i = 0; i < conn->session_cnt; ++s, ++i)
+ if (s->active && !F_ISSET(s, WT_SESSION_INTERNAL)) {
wt_session = &s->iface;
WT_TRET(wt_session->close(wt_session, config));
-
- /*
- * We closed a session, which has shuffled pointers
- * around. Restart the search.
- */
- tp = conn->sessions;
- } else
- ++tp;
- }
+ }
+ for (s = conn->sessions, i = 0; i < conn->session_size; ++s, ++i)
+ if (!F_ISSET(s, WT_SESSION_INTERNAL))
+ __wt_free(session, s->hazard);
/* Close open btree handles. */
WT_TRET(__wt_conn_btree_discard(conn));
diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c
index d02ef1ec7e3..9d29c881ff8 100644
--- a/src/conn/conn_handle.c
+++ b/src/conn/conn_handle.c
@@ -78,8 +78,6 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn)
/* Free allocated memory. */
__wt_free(session, conn->home);
__wt_free(session, conn->sessions);
- __wt_free(session, conn->session_array);
- __wt_free(session, conn->hazard);
__wt_free(session, conn->stats);
__wt_free(NULL, conn);
diff --git a/src/conn/conn_open.c b/src/conn/conn_open.c
index 17e1a41ccf2..a320ca83442 100644
--- a/src/conn/conn_open.c
+++ b/src/conn/conn_open.c
@@ -21,15 +21,9 @@ __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[])
session = conn->default_session;
session->iface.connection = &conn->iface;
- /* WT_SESSION_IMPL and hazard arrays. */
+ /* WT_SESSION_IMPL array. */
WT_ERR(__wt_calloc(session,
- conn->session_size, sizeof(WT_SESSION_IMPL *), &conn->sessions));
- WT_ERR(__wt_calloc(session,
- conn->session_size, sizeof(WT_SESSION_IMPL),
- &conn->session_array));
- WT_ERR(__wt_calloc(session,
- conn->session_size * conn->hazard_size, sizeof(WT_HAZARD),
- &conn->hazard));
+ conn->session_size, sizeof(WT_SESSION_IMPL), &conn->sessions));
/* Create the cache. */
WT_ERR(__wt_cache_create(conn, cfg));
@@ -96,12 +90,18 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn)
}
/*
- * Close the default session and switch back to the dummy session in
- * case of any error messages from the remaining operations while
- * destroying the connection handle.
+ * Close the internal (default) session, and switch back to the dummy
+ * session in case of any error messages from the remaining operations
+ * while destroying the connection handle.
+ *
+ * Additionally, the session's hazard reference memory isn't discarded
+ * during normal session close because access to it isn't serialized.
+ * Discard it now.
*/
if (session != &conn->dummy_session) {
WT_TRET(session->iface.close(&session->iface, NULL));
+ __wt_free(&conn->dummy_session, session->hazard);
+
conn->default_session = &conn->dummy_session;
}
diff --git a/src/include/api.h b/src/include/api.h
index 22b7c93df4d..4dd35c3ce90 100644
--- a/src/include/api.h
+++ b/src/include/api.h
@@ -58,6 +58,8 @@ typedef enum {
struct __wt_session_impl {
WT_SESSION iface;
+ u_int active; /* Non-zero if the session is in-use */
+
WT_CONDVAR *cond; /* Condition variable */
const char *name; /* Name */
@@ -91,8 +93,6 @@ struct __wt_session_impl {
int wq_sleeping; /* Thread is blocked */
int wq_ret; /* Return value */
- WT_HAZARD *hazard; /* Hazard reference array */
-
void *reconcile; /* Reconciliation information */
WT_REF **excl; /* Eviction exclusive list */
@@ -105,6 +105,16 @@ struct __wt_session_impl {
int syncop; /* File operation */
uint32_t flags;
+
+ /*
+ * The hazard reference must be placed at the end of the structure: the
+ * structure is cleared when closed, all except the hazard reference.
+ * Putting the hazard reference at the end of the structure allows us to
+ * easily call a function to clear memory up to, but not including, the
+ * hazard reference.
+ */
+#define WT_SESSION_CLEAR(s) memset(s, 0, WT_PTRDIFF(&(s)->hazard, s))
+ WT_HAZARD *hazard; /* Hazard reference array */
};
/*******************************************
@@ -194,22 +204,17 @@ struct __wt_connection_impl {
* the server thread code to avoid walking the entire array when only a
* few threads are running.
*/
- WT_SESSION_IMPL **sessions; /* Session reference */
- void *session_array; /* Session array */
- uint32_t session_cnt; /* Session count */
+ WT_SESSION_IMPL *sessions; /* Session reference */
+ uint32_t session_size; /* Session array size */
+ uint32_t session_cnt; /* Session count */
/*
* WiredTiger allocates space for 15 hazard references in each thread of
* control, by default. There's no code path that requires more than 15
* pages at a time (and if we find one, the right change is to increase
* the default).
- *
- * The hazard array is separate from the WT_SESSION_IMPL array because
- * we need to easily copy and search it when evicting pages from memory.
*/
- WT_HAZARD *hazard; /* Hazard references array */
- uint32_t hazard_size;
- uint32_t session_size;
+ uint32_t hazard_size; /* Hazard array size */
WT_CACHE *cache; /* Page cache */
uint64_t cache_size;
diff --git a/src/include/btree.i b/src/include/btree.i
index de8cc6fb973..e076375e5ad 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -225,17 +225,50 @@ __wt_get_addr(
/*
* __wt_page_release --
- * Release a reference to a page, unless it's pinned into memory, in which
- * case we never acquired a hazard reference.
+ * Release a reference to a page.
*/
static inline void
__wt_page_release(WT_SESSION_IMPL *session, WT_PAGE *page)
{
+ /* We never acquired a hazard reference on the root page. */
if (page != NULL && !WT_PAGE_IS_ROOT(page))
__wt_hazard_clear(session, page);
}
/*
+ * __wt_page_hazard_check --
+ * Return if there's a hazard reference to the page in the system.
+ */
+static inline WT_HAZARD *
+__wt_page_hazard_check(WT_SESSION_IMPL *session, WT_PAGE *page)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_HAZARD *hp;
+ WT_SESSION_IMPL *s;
+ uint32_t i, session_cnt;
+
+ conn = S2C(session);
+
+ /*
+ * No lock is required because the session array is fixed size, but it
+ * it may contain inactive entries. We must review any active session
+ * that might contain a hazard reference, so insert a barrier before
+ * reading the active session count. That way, no matter what sessions
+ * come or go, we'll check the slots for all of the sessions that could
+ * have been active when we started our check.
+ */
+ WT_ORDERED_READ(session_cnt, conn->session_cnt);
+ for (s = conn->sessions, i = 0; i < session_cnt; ++s, ++i) {
+ if (!s->active)
+ continue;
+ for (hp = s->hazard; hp < s->hazard + conn->hazard_size; ++hp)
+ if (hp->page == page)
+ return (hp);
+ }
+ return (NULL);
+}
+
+/*
* __wt_skip_choose_depth --
* Randomly choose a depth for a skiplist insert.
*/
diff --git a/src/include/extern.h b/src/include/extern.h
index fae90c07120..3ca8ef5cff3 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -938,7 +938,6 @@ __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, int *busyp
);
extern void __wt_hazard_clear(WT_SESSION_IMPL *session, WT_PAGE *page);
extern void __wt_hazard_empty(WT_SESSION_IMPL *session);
-extern void __wt_hazard_validate(WT_SESSION_IMPL *session, WT_PAGE *page);
extern int __wt_raw_to_hex( WT_SESSION_IMPL *session,
const uint8_t *from,
uint32_t size,
diff --git a/src/session/session_api.c b/src/session/session_api.c
index c844e0b2784..3b9e3a8ea44 100644
--- a/src/session/session_api.c
+++ b/src/session/session_api.c
@@ -18,7 +18,7 @@ __session_close(WT_SESSION *wt_session, const char *config)
WT_CONNECTION_IMPL *conn;
WT_CURSOR *cursor;
WT_DECL_RET;
- WT_SESSION_IMPL *session, **tp;
+ WT_SESSION_IMPL *session;
conn = (WT_CONNECTION_IMPL *)wt_session->connection;
session = (WT_SESSION_IMPL *)wt_session;
@@ -56,26 +56,26 @@ __session_close(WT_SESSION *wt_session, const char *config)
(void)__wt_cond_destroy(session, session->cond);
/*
- * Replace the session reference we're closing with the last entry in
- * the table, then clear the last entry. As far as the walk of the
- * server threads is concerned, it's OK if the session appears twice,
- * or if it doesn't appear at all, so these lines can race all they
- * want.
+ * Sessions are re-used, clear the structure: this code sets the active
+ * field to 0, which will exclude the hazard array from review by the
+ * eviction thread. Note: there's no serialization support around the
+ * review of the hazard array, which means threads checking for hazard
+ * references first check the active field (which may be 0) and then use
+ * the hazard pointer (which cannot be NULL). For this reason, clear
+ * the session structure carefully.
*/
- for (tp = conn->sessions; *tp != session; ++tp)
- ;
- --conn->session_cnt;
- *tp = conn->sessions[conn->session_cnt];
- conn->sessions[conn->session_cnt] = NULL;
+ WT_SESSION_CLEAR(session);
+ session = conn->default_session;
/*
- * Publish, making the session array entry available for re-use. There
- * must be a barrier here to ensure the cleanup above completes before
- * the entry is re-used.
+ * Decrement the count of active sessions if that's possible: a session
+ * being closed may or may not be at the end of the array, step toward
+ * the beginning of the array until we reach an active session.
*/
- WT_PUBLISH(session->iface.connection, NULL);
+ while (conn->sessions[conn->session_cnt - 1].active == 0)
+ if (--conn->session_cnt == 0)
+ break;
- session = conn->default_session;
__wt_spin_unlock(session, &conn->spinlock);
err: API_END_NOTFOUND_MAP(session, ret);
@@ -469,7 +469,7 @@ __wt_open_session(WT_CONNECTION_IMPL *conn, int internal,
};
WT_DECL_RET;
WT_SESSION_IMPL *session, *session_ret;
- uint32_t slot;
+ uint32_t i;
WT_UNUSED(config);
@@ -478,35 +478,45 @@ __wt_open_session(WT_CONNECTION_IMPL *conn, int internal,
__wt_spin_lock(session, &conn->spinlock);
- /* Check to see if there's an available session slot. */
- if (conn->session_cnt == conn->session_size - 1)
+ /* Find the first inactive session slot. */
+ for (session_ret = conn->sessions,
+ i = 0; i < conn->session_size; ++session_ret, ++i)
+ if (!session_ret->active)
+ break;
+ if (i == conn->session_size)
WT_ERR_MSG(session, WT_ERROR,
- "WiredTiger only configured to support %d thread contexts",
+ "only configured to support %d thread contexts",
conn->session_size);
/*
- * The session reference list is compact, the session array is not.
- * Find the first empty session slot.
+ * If the active session count is increasing, update it. We don't worry
+ * about correcting the session count on error, as long as we don't mark
+ * this session as active, we'll clean it up on close.
*/
- for (slot = 0, session_ret = conn->session_array;
- session_ret->iface.connection != NULL;
- ++session_ret, ++slot)
- ;
+ if (i >= conn->session_cnt) /* Defend against off-by-one errors. */
+ conn->session_cnt = i + 1;
- /* Session entries are re-used, clear the old contents. */
- WT_CLEAR(*session_ret);
-
- WT_ERR(__wt_cond_alloc(session, "session", 1, &session_ret->cond));
session_ret->iface = stds;
session_ret->iface.connection = &conn->iface;
+
+ WT_ERR(__wt_cond_alloc(session, "session", 1, &session_ret->cond));
+
__wt_event_handler_set(session_ret, (event_handler != NULL) ?
event_handler : session_ret->event_handler);
- session_ret->hazard = conn->hazard + slot * conn->hazard_size;
TAILQ_INIT(&session_ret->cursors);
TAILQ_INIT(&session_ret->btrees);
/*
+ * The session's hazard reference memory isn't discarded during normal
+ * session close because access to it isn't serialized. Allocate the
+ * first time we open this session.
+ */
+ if (session_ret->hazard == NULL)
+ WT_ERR(__wt_calloc(session, conn->hazard_size,
+ sizeof(WT_HAZARD), &session_ret->hazard));
+
+ /*
* Public sessions are automatically closed during WT_CONNECTION->close.
* If the session handles for internal threads were to go on the public
* list, there would be complex ordering issues during close. Set a
@@ -517,10 +527,11 @@ __wt_open_session(WT_CONNECTION_IMPL *conn, int internal,
/*
* Publish: make the entry visible to server threads. There must be a
- * barrier to ensure the structure fields are set before any other
- * thread can see the session.
+ * barrier for two reasons, to ensure structure fields are set before
+ * any other thread will consider the session, and to push the session
+ * count to ensure the eviction thread can't review too few slots.
*/
- WT_PUBLISH(conn->sessions[conn->session_cnt++], session_ret);
+ WT_PUBLISH(session_ret->active, 1);
STATIC_ASSERT(offsetof(WT_CONNECTION_IMPL, iface) == 0);
*sessionp = session_ret;
diff --git a/src/support/hazard.c b/src/support/hazard.c
index b7557013c9f..3846889dce2 100644
--- a/src/support/hazard.c
+++ b/src/support/hazard.c
@@ -207,26 +207,4 @@ __hazard_dump(WT_SESSION_IMPL *session)
if (fail)
__wt_errx(session, "unexpected hazard reference");
}
-
-/*
- * __wt_hazard_validate --
- * Confirm that a page isn't on the hazard list.
- */
-void
-__wt_hazard_validate(WT_SESSION_IMPL *session, WT_PAGE *page)
-{
- WT_CONNECTION_IMPL *conn;
- WT_HAZARD *hp;
- uint32_t elem, i;
-
- conn = S2C(session);
-
- elem = conn->session_size * conn->hazard_size;
- for (i = 0, hp = conn->hazard; i < elem; ++i, ++hp)
- if (hp->page == page)
- __wt_errx(session,
- "discarded page has hazard reference: "
- "(%p: %s, line %d)",
- hp->page, hp->file, hp->line);
-}
#endif
diff --git a/src/support/sess_dump.c b/src/support/sess_dump.c
index da6fd07c411..5fa5deb2b74 100644
--- a/src/support/sess_dump.c
+++ b/src/support/sess_dump.c
@@ -15,13 +15,17 @@
void
__wt_session_dump_all(WT_SESSION_IMPL *session)
{
- WT_SESSION_IMPL **tp;
+ WT_CONNECTION_IMPL *conn;
+ WT_SESSION_IMPL *s;
+ uint32_t i;
if (session == NULL)
return;
- for (tp = S2C(session)->sessions; *tp != NULL; ++tp)
- __wt_session_dump(*tp);
+ conn = S2C(session);
+ for (s = conn->sessions, i = 0; i < conn->session_size; ++s, ++i)
+ if (s->active)
+ __wt_session_dump(s);
}
/*