summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gorrod <alexander.gorrod@mongodb.com>2015-10-02 14:46:04 +1000
committerAlex Gorrod <alexander.gorrod@mongodb.com>2015-10-02 14:46:04 +1000
commitecfbbb0845e291c8cd7a8b317fb8c822efe9833f (patch)
tree34f71c9049644b54e3f48ada01a707c18effeec1
parent21b83306b129118bc57def309a39b1c9c1e9e493 (diff)
parent0390b293fa9c40c6dde4ce9b5234359d99d63bcb (diff)
downloadmongo-ecfbbb0845e291c8cd7a8b317fb8c822efe9833f.tar.gz
Merge pull request #2234 from wiredtiger/wt-2149
WT-2149: Deadlock opening lookaside table cursor
-rw-r--r--src/async/async_api.c17
-rw-r--r--src/cache/cache_las.c125
-rw-r--r--src/conn/conn_api.c3
-rw-r--r--src/conn/conn_cache_pool.c4
-rw-r--r--src/conn/conn_ckpt.c12
-rw-r--r--src/conn/conn_dhandle.c14
-rw-r--r--src/conn/conn_log.c14
-rw-r--r--src/conn/conn_open.c29
-rw-r--r--src/conn/conn_stat.c3
-rw-r--r--src/conn/conn_sweep.c12
-rw-r--r--src/evict/evict_lru.c54
-rw-r--r--src/include/connection.h1
-rw-r--r--src/include/extern.h5
-rw-r--r--src/lsm/lsm_manager.c8
-rw-r--r--src/session/session_api.c119
-rw-r--r--src/txn/txn_recover.c21
16 files changed, 251 insertions, 190 deletions
diff --git a/src/async/async_api.c b/src/async/async_api.c
index 1d819474728..dc26f2d11c3 100644
--- a/src/async/async_api.c
+++ b/src/async/async_api.c
@@ -53,7 +53,7 @@ __async_get_format(WT_CONNECTION_IMPL *conn, const char *uri,
* for the cursor.
*/
WT_RET(__wt_open_internal_session(
- conn, "async-cursor", true, true, &session));
+ conn, "async-cursor", true, 0, &session));
__wt_spin_lock(session, &async->ops_lock);
WT_ERR(__wt_calloc_one(session, &af));
WT_ERR(__wt_strdup(session, uri, &af->uri));
@@ -229,7 +229,7 @@ __async_start(WT_SESSION_IMPL *session)
{
WT_ASYNC *async;
WT_CONNECTION_IMPL *conn;
- uint32_t i;
+ uint32_t i, session_flags;
conn = S2C(session);
conn->async_cfg = 1;
@@ -256,9 +256,9 @@ __async_start(WT_SESSION_IMPL *session)
* workers and we may want to selectively stop some workers
* while leaving the rest running.
*/
- WT_RET(__wt_open_internal_session(conn,
- "async-worker", true, true, &async->worker_sessions[i]));
- F_SET(async->worker_sessions[i], WT_SESSION_SERVER_ASYNC);
+ session_flags = WT_SESSION_SERVER_ASYNC;
+ WT_RET(__wt_open_internal_session(conn, "async-worker",
+ true, session_flags, &async->worker_sessions[i]));
}
for (i = 0; i < conn->async_workers; i++) {
/*
@@ -305,7 +305,7 @@ __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[])
WT_DECL_RET;
WT_SESSION *wt_session;
bool run;
- uint32_t i;
+ uint32_t i, session_flags;
conn = S2C(session);
async = conn->async;
@@ -371,10 +371,9 @@ __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[])
/*
* Each worker has its own session.
*/
+ session_flags = WT_SESSION_SERVER_ASYNC;
WT_RET(__wt_open_internal_session(conn, "async-worker",
- true, true, &async->worker_sessions[i]));
- F_SET(async->worker_sessions[i],
- WT_SESSION_SERVER_ASYNC);
+ true, session_flags, &async->worker_sessions[i]));
}
for (i = conn->async_workers; i < tmp_conn.async_workers; i++) {
/*
diff --git a/src/cache/cache_las.c b/src/cache/cache_las.c
index a964ac39874..714963b18d4 100644
--- a/src/cache/cache_las.c
+++ b/src/cache/cache_las.c
@@ -27,7 +27,7 @@ __wt_las_stats_update(WT_SESSION_IMPL *session)
* remain 0. In the current system, there's always a lookaside table,
* but there's no reason not to be cautious.
*/
- if (conn->las_cursor == NULL)
+ if (conn->las_session->las_cursor == NULL)
return;
/*
@@ -35,7 +35,8 @@ __wt_las_stats_update(WT_SESSION_IMPL *session)
* to it by way of the underlying btree handle, but it's a little ugly.
*/
cstats = conn->stats;
- dstats = ((WT_CURSOR_BTREE *)conn->las_cursor)->btree->dhandle->stats;
+ dstats = ((WT_CURSOR_BTREE *)
+ conn->las_session->las_cursor)->btree->dhandle->stats;
WT_STAT_SET(session, cstats,
cache_lookaside_insert, WT_STAT_READ(dstats, cursor_insert));
@@ -44,40 +45,6 @@ __wt_las_stats_update(WT_SESSION_IMPL *session)
}
/*
- * __las_cursor_create --
- * Open a new lookaside table cursor.
- */
-static int
-__las_cursor_create(WT_SESSION_IMPL *session, WT_CURSOR **cursorp)
-{
- WT_BTREE *btree;
- const char *open_cursor_cfg[] = {
- WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL };
-
- WT_RET(__wt_open_cursor(
- session, WT_LAS_URI, NULL, open_cursor_cfg, cursorp));
-
- /*
- * Set special flags for the lookaside table: the lookaside flag (used,
- * for example, to avoid writing records during reconciliation), also
- * turn off checkpoints and logging.
- *
- * Test flags before setting them so updates can't race in subsequent
- * opens (the first update is safe because it's single-threaded from
- * wiredtiger_open).
- */
- btree = S2BT(session);
- if (!F_ISSET(btree, WT_BTREE_LOOKASIDE))
- F_SET(btree, WT_BTREE_LOOKASIDE);
- if (!F_ISSET(btree, WT_BTREE_NO_CHECKPOINT))
- F_SET(btree, WT_BTREE_NO_CHECKPOINT);
- if (!F_ISSET(btree, WT_BTREE_NO_LOGGING))
- F_SET(btree, WT_BTREE_NO_LOGGING);
-
- return (0);
-}
-
-/*
* __wt_las_create --
* Initialize the database's lookaside store.
*/
@@ -85,7 +52,7 @@ int
__wt_las_create(WT_SESSION_IMPL *session)
{
WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
+ uint32_t session_flags;
const char *drop_cfg[] = {
WT_CONFIG_BASE(session, WT_SESSION_drop), "force=true", NULL };
@@ -96,27 +63,22 @@ __wt_las_create(WT_SESSION_IMPL *session)
* schema lock to create and drop the file, and it may not always be
* available.
*
- * Open an internal session, used for the shared lookaside cursor.
- *
- * Sessions associated with a lookaside cursor should never be tapped
- * for eviction.
+ * Discard any previous incarnation of the file.
*/
- WT_RET(__wt_open_internal_session(
- conn, "lookaside table", true, true, &conn->las_session));
- session = conn->las_session;
- F_SET(session, WT_SESSION_LOOKASIDE_CURSOR | WT_SESSION_NO_EVICTION);
-
- /* Discard any previous incarnation of the file. */
WT_RET(__wt_session_drop(session, WT_LAS_URI, drop_cfg));
/* Re-create the file. */
WT_RET(__wt_session_create(session, WT_LAS_URI, WT_LAS_FORMAT));
- /* Open the shared cursor. */
- WT_WITHOUT_DHANDLE(session,
- ret = __las_cursor_create(session, &conn->las_cursor));
-
- return (ret);
+ /*
+ * Open an internal session, used for the shared lookaside cursor.
+ *
+ * Sessions associated with a lookaside cursor should never be tapped
+ * for eviction.
+ */
+ session_flags = WT_SESSION_LOOKASIDE_CURSOR | WT_SESSION_NO_EVICTION;
+ return (__wt_open_internal_session(
+ conn, "lookaside table", true, session_flags, &conn->las_session));
}
/*
@@ -138,7 +100,6 @@ __wt_las_destroy(WT_SESSION_IMPL *session)
wt_session = &conn->las_session->iface;
ret = wt_session->close(wt_session, NULL);
- conn->las_cursor = NULL;
conn->las_session = NULL;
return (ret);
@@ -176,6 +137,40 @@ __wt_las_is_written(WT_SESSION_IMPL *session)
}
/*
+ * __wt_las_cursor_create --
+ * Open a new lookaside table cursor.
+ */
+int
+__wt_las_cursor_create(WT_SESSION_IMPL *session, WT_CURSOR **cursorp)
+{
+ WT_BTREE *btree;
+ const char *open_cursor_cfg[] = {
+ WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL };
+
+ WT_RET(__wt_open_cursor(
+ session, WT_LAS_URI, NULL, open_cursor_cfg, cursorp));
+
+ /*
+ * Set special flags for the lookaside table: the lookaside flag (used,
+ * for example, to avoid writing records during reconciliation), also
+ * turn off checkpoints and logging.
+ *
+ * Test flags before setting them so updates can't race in subsequent
+ * opens (the first update is safe because it's single-threaded from
+ * wiredtiger_open).
+ */
+ btree = S2BT(session);
+ if (!F_ISSET(btree, WT_BTREE_LOOKASIDE))
+ F_SET(btree, WT_BTREE_LOOKASIDE);
+ if (!F_ISSET(btree, WT_BTREE_NO_CHECKPOINT))
+ F_SET(btree, WT_BTREE_NO_CHECKPOINT);
+ if (!F_ISSET(btree, WT_BTREE_NO_LOGGING))
+ F_SET(btree, WT_BTREE_NO_LOGGING);
+
+ return (0);
+}
+
+/*
* __wt_las_cursor --
* Return a lookaside cursor.
*/
@@ -184,7 +179,6 @@ __wt_las_cursor(
WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags)
{
WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
*cursorp = NULL;
@@ -202,20 +196,15 @@ __wt_las_cursor(
conn = S2C(session);
- /* Eviction and sweep threads have their own lookaside table cursors. */
- if (F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR)) {
- if (session->las_cursor == NULL) {
- WT_WITHOUT_DHANDLE(session, ret =
- __las_cursor_create(session, &session->las_cursor));
- WT_RET(ret);
- }
-
+ /*
+ * Some threads have their own lookaside table cursors, else lock the
+ * shared lookaside cursor.
+ */
+ if (F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR))
*cursorp = session->las_cursor;
- } else {
- /* Lock the shared lookaside cursor. */
+ else {
__wt_spin_lock(session, &conn->las_lock);
-
- *cursorp = conn->las_cursor;
+ *cursorp = conn->las_session->las_cursor;
}
/* Turn caching and eviction off. */
@@ -253,8 +242,8 @@ __wt_las_cursor_close(
F_SET(session, session_flags);
/*
- * Eviction and sweep threads have their own lookaside table cursors;
- * else, unlock the shared lookaside cursor.
+ * Some threads have their own lookaside table cursors, else unlock the
+ * shared lookaside cursor.
*/
if (!F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR))
__wt_spin_unlock(session, &conn->las_lock);
diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c
index 7ce84b624a3..b50ad750158 100644
--- a/src/conn/conn_api.c
+++ b/src/conn/conn_api.c
@@ -2038,9 +2038,6 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
/* Start the worker threads and run recovery. */
WT_ERR(__wt_connection_workers(session, cfg));
- /* Create the lookaside table. */
- WT_ERR(__wt_las_create(session));
-
WT_STATIC_ASSERT(offsetof(WT_CONNECTION_IMPL, iface) == 0);
*wt_connp = &conn->iface;
diff --git a/src/conn/conn_cache_pool.c b/src/conn/conn_cache_pool.c
index 6294e3b01a7..22af88c66b1 100644
--- a/src/conn/conn_cache_pool.c
+++ b/src/conn/conn_cache_pool.c
@@ -243,6 +243,7 @@ __wt_conn_cache_pool_open(WT_SESSION_IMPL *session)
WT_CACHE_POOL *cp;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
+ uint32_t session_flags;
conn = S2C(session);
cache = conn->cache;
@@ -252,8 +253,9 @@ __wt_conn_cache_pool_open(WT_SESSION_IMPL *session)
* Create a session that can be used by the cache pool thread, do
* it in the main thread to avoid shutdown races
*/
+ session_flags = WT_SESSION_NO_DATA_HANDLES;
if ((ret = __wt_open_internal_session(
- conn, "cache-pool", false, false, &cache->cp_session)) != 0)
+ conn, "cache-pool", false, session_flags, &cache->cp_session)) != 0)
WT_RET_MSG(NULL, ret,
"Failed to create session for cache pool");
diff --git a/src/conn/conn_ckpt.c b/src/conn/conn_ckpt.c
index 7fc790d5efa..caf0c3b68f0 100644
--- a/src/conn/conn_ckpt.c
+++ b/src/conn/conn_ckpt.c
@@ -123,22 +123,24 @@ static int
__ckpt_server_start(WT_CONNECTION_IMPL *conn)
{
WT_SESSION_IMPL *session;
+ uint32_t session_flags;
/* Nothing to do if the server is already running. */
if (conn->ckpt_session != NULL)
return (0);
F_SET(conn, WT_CONN_SERVER_CHECKPOINT);
- /* The checkpoint server gets its own session. */
- WT_RET(__wt_open_internal_session(
- conn, "checkpoint-server", true, true, &conn->ckpt_session));
- session = conn->ckpt_session;
/*
+ * The checkpoint server gets its own session.
+ *
* Checkpoint does enough I/O it may be called upon to perform slow
* operations for the block manager.
*/
- F_SET(session, WT_SESSION_CAN_WAIT);
+ session_flags = WT_SESSION_CAN_WAIT;
+ WT_RET(__wt_open_internal_session(conn,
+ "checkpoint-server", true, session_flags, &conn->ckpt_session));
+ session = conn->ckpt_session;
WT_RET(__wt_cond_alloc(
session, "checkpoint server", false, &conn->ckpt_cond));
diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c
index 77e7693042b..0b364b5fd4b 100644
--- a/src/conn/conn_dhandle.c
+++ b/src/conn/conn_dhandle.c
@@ -678,11 +678,15 @@ __wt_conn_dhandle_discard(WT_SESSION_IMPL *session)
conn = S2C(session);
/*
- * Close open data handles: first, everything but the metadata file
- * (as closing a normal file may open and write the metadata file),
- * then the metadata file. This function isn't called often, and I
- * don't want to "know" anything about the metadata file's position on
- * the list, so we do it the hard way.
+ * Empty the session cache: any data handles created in a connection
+ * method may be cached here, and we're about to close them.
+ */
+ __wt_session_close_cache(session);
+
+ /*
+ * Close open data handles: first, everything but the metadata file (as
+ * closing a normal file may open and write the metadata file), then
+ * the metadata file.
*/
restart:
TAILQ_FOREACH(dhandle, &conn->dhqh, q) {
diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c
index bf2447fb646..a2555ba536f 100644
--- a/src/conn/conn_log.c
+++ b/src/conn/conn_log.c
@@ -821,6 +821,7 @@ int
__wt_logmgr_open(WT_SESSION_IMPL *session)
{
WT_CONNECTION_IMPL *conn;
+ uint32_t session_flags;
conn = S2C(session);
@@ -832,8 +833,9 @@ __wt_logmgr_open(WT_SESSION_IMPL *session)
* Start the log close thread. It is not configurable.
* If logging is enabled, this thread runs.
*/
- WT_RET(__wt_open_internal_session(
- conn, "log-close-server", false, false, &conn->log_file_session));
+ session_flags = WT_SESSION_NO_DATA_HANDLES;
+ WT_RET(__wt_open_internal_session(conn,
+ "log-close-server", false, session_flags, &conn->log_file_session));
WT_RET(__wt_cond_alloc(conn->log_file_session,
"log close server", false, &conn->log_file_cond));
@@ -848,8 +850,8 @@ __wt_logmgr_open(WT_SESSION_IMPL *session)
* Start the log write LSN thread. It is not configurable.
* If logging is enabled, this thread runs.
*/
- WT_RET(__wt_open_internal_session(
- conn, "log-wrlsn-server", false, false, &conn->log_wrlsn_session));
+ WT_RET(__wt_open_internal_session(conn, "log-wrlsn-server",
+ false, session_flags, &conn->log_wrlsn_session));
WT_RET(__wt_cond_alloc(conn->log_wrlsn_session,
"log write lsn server", false, &conn->log_wrlsn_cond));
WT_RET(__wt_thread_create(conn->log_wrlsn_session,
@@ -868,8 +870,8 @@ __wt_logmgr_open(WT_SESSION_IMPL *session)
WT_RET(__wt_cond_signal(session, conn->log_cond));
} else {
/* The log server gets its own session. */
- WT_RET(__wt_open_internal_session(
- conn, "log-server", false, false, &conn->log_session));
+ WT_RET(__wt_open_internal_session(conn,
+ "log-server", false, session_flags, &conn->log_session));
WT_RET(__wt_cond_alloc(conn->log_session,
"log server", false, &conn->log_cond));
diff --git a/src/conn/conn_open.c b/src/conn/conn_open.c
index 199cf213e0a..6db0c4bb10c 100644
--- a/src/conn/conn_open.c
+++ b/src/conn/conn_open.c
@@ -38,7 +38,7 @@ __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[])
* need to get cleaned up on close.
*/
WT_RET(__wt_open_internal_session(
- conn, "connection", true, false, &session));
+ conn, "connection", false, 0, &session));
/*
* The connection's default session is originally a static structure,
@@ -228,11 +228,6 @@ int
__wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[])
{
/*
- * Start the eviction thread.
- */
- WT_RET(__wt_evict_create(session));
-
- /*
* Start the optional statistics thread. Start statistics first so that
* other optional threads can know if statistics are enabled or not.
*/
@@ -242,19 +237,27 @@ __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[])
/* Run recovery. */
WT_RET(__wt_txn_recover(session));
+ /*
+ * Start the optional logging/archive threads.
+ * NOTE: The log manager must be started before checkpoints so that the
+ * checkpoint server knows if logging is enabled. It must also be
+ * started before any operation that can commit, or the commit can
+ * block.
+ */
+ WT_RET(__wt_logmgr_open(session));
+
+ /* Create the lookaside table. */
+ WT_RET(__wt_las_create(session));
+
+ /* Start eviction threads. */
+ WT_RET(__wt_evict_create(session, true));
+
/* Start the handle sweep thread. */
WT_RET(__wt_sweep_create(session));
/* Start the optional async threads. */
WT_RET(__wt_async_create(session, cfg));
- /*
- * Start the optional logging/archive thread.
- * NOTE: The log manager must be started before checkpoints so that the
- * checkpoint server knows if logging is enabled.
- */
- WT_RET(__wt_logmgr_open(session));
-
/* Start the optional checkpoint thread. */
WT_RET(__wt_checkpoint_server_create(session, cfg));
diff --git a/src/conn/conn_stat.c b/src/conn/conn_stat.c
index d8c7227ae61..ec3a630581a 100644
--- a/src/conn/conn_stat.c
+++ b/src/conn/conn_stat.c
@@ -447,9 +447,10 @@ __statlog_start(WT_CONNECTION_IMPL *conn)
return (0);
F_SET(conn, WT_CONN_SERVER_STATISTICS);
+
/* The statistics log server gets its own session. */
WT_RET(__wt_open_internal_session(
- conn, "statlog-server", true, true, &conn->stat_session));
+ conn, "statlog-server", true, 0, &conn->stat_session));
session = conn->stat_session;
WT_RET(__wt_cond_alloc(
diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c
index 2de0cc12069..23846f978fe 100644
--- a/src/conn/conn_sweep.c
+++ b/src/conn/conn_sweep.c
@@ -353,16 +353,13 @@ int
__wt_sweep_create(WT_SESSION_IMPL *session)
{
WT_CONNECTION_IMPL *conn;
+ uint32_t session_flags;
conn = S2C(session);
/* Set first, the thread might run before we finish up. */
F_SET(conn, WT_CONN_SERVER_SWEEP);
- WT_RET(__wt_open_internal_session(
- conn, "sweep-server", true, true, &conn->sweep_session));
- session = conn->sweep_session;
-
/*
* Handle sweep does enough I/O it may be called upon to perform slow
* operations for the block manager.
@@ -372,8 +369,11 @@ __wt_sweep_create(WT_SESSION_IMPL *session)
*
* Don't tap the sweep thread for eviction.
*/
- F_SET(session, WT_SESSION_CAN_WAIT |
- WT_SESSION_LOOKASIDE_CURSOR | WT_SESSION_NO_EVICTION);
+ session_flags = WT_SESSION_CAN_WAIT |
+ WT_SESSION_LOOKASIDE_CURSOR | WT_SESSION_NO_EVICTION;
+ WT_RET(__wt_open_internal_session(
+ conn, "sweep-server", true, session_flags, &conn->sweep_session));
+ session = conn->sweep_session;
WT_RET(__wt_cond_alloc(
session, "handle sweep server", false, &conn->sweep_cond));
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index d0cc60b583d..a597079ff47 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -236,27 +236,30 @@ __evict_workers_resize(WT_SESSION_IMPL *session)
WT_DECL_RET;
WT_EVICT_WORKER *workers;
size_t alloc;
- uint32_t i;
+ uint32_t i, session_flags;
conn = S2C(session);
- alloc = conn->evict_workers_alloc * sizeof(*workers);
- WT_RET(__wt_realloc(session, &alloc,
- conn->evict_workers_max * sizeof(*workers), &conn->evict_workctx));
- workers = conn->evict_workctx;
+ if (conn->evict_workers_alloc < conn->evict_workers_max) {
+ alloc = conn->evict_workers_alloc * sizeof(*workers);
+ WT_RET(__wt_realloc(session, &alloc,
+ conn->evict_workers_max * sizeof(*workers),
+ &conn->evict_workctx));
+ workers = conn->evict_workctx;
+ }
for (i = conn->evict_workers_alloc; i < conn->evict_workers_max; i++) {
- WT_ERR(__wt_open_internal_session(conn,
- "eviction-worker", true, false, &workers[i].session));
- workers[i].id = i;
-
/*
+ * Eviction worker threads get their own session.
* Eviction worker threads get their own lookaside table cursor.
* Eviction worker threads may be called upon to perform slow
* operations for the block manager.
*/
- F_SET(workers[i].session,
- WT_SESSION_LOOKASIDE_CURSOR | WT_SESSION_CAN_WAIT);
+ session_flags =
+ WT_SESSION_CAN_WAIT | WT_SESSION_LOOKASIDE_CURSOR;
+ WT_ERR(__wt_open_internal_session(conn, "eviction-worker",
+ false, session_flags, &workers[i].session));
+ workers[i].id = i;
if (i < conn->evict_workers_min) {
++conn->evict_workers;
@@ -275,36 +278,39 @@ err: conn->evict_workers_alloc = conn->evict_workers_max;
* Start the eviction server thread.
*/
int
-__wt_evict_create(WT_SESSION_IMPL *session)
+__wt_evict_create(WT_SESSION_IMPL *session, bool with_las)
{
WT_CONNECTION_IMPL *conn;
+ uint32_t session_flags;
conn = S2C(session);
/* Set first, the thread might run before we finish up. */
F_SET(conn, WT_CONN_EVICTION_RUN);
- /* We need a session handle because we're reading/writing pages. */
- WT_RET(__wt_open_internal_session(
- conn, "eviction-server", true, false, &conn->evict_session));
+ /*
+ * We need a session handle because we're reading/writing pages.
+ *
+ * The eviction server gets its own lookaside table cursor.
+ *
+ * If there's only a single eviction thread, it may be called upon to
+ * perform slow operations for the block manager. (The flag is not
+ * reset if reconfigured later, but I doubt that's a problem.)
+ */
+ session_flags = with_las ? WT_SESSION_LOOKASIDE_CURSOR : 0;
+ if (conn->evict_workers_max == 0)
+ FLD_SET(session_flags, WT_SESSION_CAN_WAIT);
+ WT_RET(__wt_open_internal_session(conn,
+ "eviction-server", false, session_flags, &conn->evict_session));
session = conn->evict_session;
/*
* If eviction workers were configured, allocate sessions for them now.
* This is done to reduce the chance that we will open new eviction
* sessions after WT_CONNECTION::close is called.
- *
- * If there's only a single eviction thread, it may be called upon to
- * perform slow operations for the block manager. (The flag is not
- * reset if reconfigured later, but I doubt that's a problem.)
*/
if (conn->evict_workers_max > 0)
WT_RET(__evict_workers_resize(session));
- else
- F_SET(session, WT_SESSION_CAN_WAIT);
-
- /* The eviction server gets its own lookaside table cursor. */
- F_SET(session, WT_SESSION_LOOKASIDE_CURSOR);
/*
* Start the primary eviction server thread after the worker threads
diff --git a/src/include/connection.h b/src/include/connection.h
index 61ef9e2391c..2dfb24a83da 100644
--- a/src/include/connection.h
+++ b/src/include/connection.h
@@ -377,7 +377,6 @@ struct __wt_connection_impl {
*/
WT_SPINLOCK las_lock; /* Lookaside table spinlock */
WT_SESSION_IMPL *las_session; /* Lookaside table session */
- WT_CURSOR *las_cursor; /* Lookaside table cursor */
bool las_written; /* Lookaside table has been written */
WT_ITEM las_sweep_key; /* Sweep server's saved key */
diff --git a/src/include/extern.h b/src/include/extern.h
index ec25b96aa22..c196d550c7e 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -186,6 +186,7 @@ extern int __wt_las_create(WT_SESSION_IMPL *session);
extern int __wt_las_destroy(WT_SESSION_IMPL *session);
extern void __wt_las_set_written(WT_SESSION_IMPL *session);
extern bool __wt_las_is_written(WT_SESSION_IMPL *session);
+extern int __wt_las_cursor_create(WT_SESSION_IMPL *session, WT_CURSOR **cursorp);
extern int __wt_las_cursor( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags);
extern int __wt_las_cursor_close( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags);
extern int __wt_las_sweep(WT_SESSION_IMPL *session);
@@ -318,7 +319,7 @@ extern int __wt_curtable_open(WT_SESSION_IMPL *session, const char *uri, const c
extern int __wt_evict_file(WT_SESSION_IMPL *session, int syncop);
extern void __wt_evict_list_clear_page(WT_SESSION_IMPL *session, WT_REF *ref);
extern int __wt_evict_server_wake(WT_SESSION_IMPL *session);
-extern int __wt_evict_create(WT_SESSION_IMPL *session);
+extern int __wt_evict_create(WT_SESSION_IMPL *session, bool with_las);
extern int __wt_evict_destroy(WT_SESSION_IMPL *session);
extern int __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session, bool *evict_resetp);
extern void __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session);
@@ -593,8 +594,8 @@ extern int __wt_session_copy_values(WT_SESSION_IMPL *session);
extern int __wt_open_cursor(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp);
extern int __wt_session_create( WT_SESSION_IMPL *session, const char *uri, const char *config);
extern int __wt_session_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]);
-extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, bool uses_dhandles, bool open_metadata, WT_SESSION_IMPL **sessionp);
extern int __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const char *config, bool open_metadata, WT_SESSION_IMPL **sessionp);
+extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, bool open_metadata, uint32_t session_flags, WT_SESSION_IMPL **sessionp);
extern int __wt_compact_uri_analyze(WT_SESSION_IMPL *session, const char *uri, bool *skipp);
extern int __wt_session_compact( WT_SESSION *wt_session, const char *uri, const char *config);
extern int __wt_session_lock_dhandle( WT_SESSION_IMPL *session, uint32_t flags, bool *is_deadp);
diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c
index bd3adb3a528..1c5124c32af 100644
--- a/src/lsm/lsm_manager.c
+++ b/src/lsm/lsm_manager.c
@@ -203,12 +203,14 @@ __wt_lsm_manager_reconfig(WT_SESSION_IMPL *session, const char **cfg)
int
__wt_lsm_manager_start(WT_SESSION_IMPL *session)
{
+ WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_LSM_MANAGER *manager;
WT_SESSION_IMPL *worker_session;
uint32_t i;
- manager = &S2C(session)->lsm_manager;
+ conn = S2C(session);
+ manager = &conn->lsm_manager;
/*
* We need at least a manager, a switch thread and a generic
@@ -225,7 +227,7 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session)
*/
for (i = 0; i < WT_LSM_MAX_WORKERS; i++) {
WT_ERR(__wt_open_internal_session(
- S2C(session), "lsm-worker", true, false, &worker_session));
+ conn, "lsm-worker", false, 0, &worker_session));
worker_session->isolation = WT_ISO_READ_UNCOMMITTED;
manager->lsm_worker_cookies[i].session = worker_session;
}
@@ -234,7 +236,7 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session)
WT_ERR(__wt_thread_create(session, &manager->lsm_worker_cookies[0].tid,
__lsm_worker_manager, &manager->lsm_worker_cookies[0]));
- F_SET(S2C(session), WT_CONN_SERVER_LSM);
+ F_SET(conn, WT_CONN_SERVER_LSM);
if (0) {
err: for (i = 0;
diff --git a/src/session/session_api.c b/src/session/session_api.c
index 1bb519e80e0..a766829afad 100644
--- a/src/session/session_api.c
+++ b/src/session/session_api.c
@@ -1144,47 +1144,12 @@ __session_strerror(WT_SESSION *wt_session, int error)
}
/*
- * __wt_open_internal_session --
- * Allocate a session for WiredTiger's use.
- */
-int
-__wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name,
- bool uses_dhandles, bool open_metadata, WT_SESSION_IMPL **sessionp)
-{
- WT_SESSION_IMPL *session;
-
- *sessionp = NULL;
-
- WT_RET(__wt_open_session(conn, NULL, NULL, open_metadata, &session));
- session->name = name;
-
- /*
- * Public sessions are automatically closed during WT_CONNECTION->close.
- * If the session handles for internal threads were to go on the public
- * list, there would be complex ordering issues during close. Set a
- * flag to avoid this: internal sessions are not closed automatically.
- */
- F_SET(session, WT_SESSION_INTERNAL);
-
- /*
- * Some internal threads must keep running after we close all data
- * handles. Make sure these threads don't open their own handles.
- */
- if (!uses_dhandles)
- F_SET(session, WT_SESSION_NO_DATA_HANDLES);
-
- *sessionp = session;
- return (0);
-}
-
-/*
- * __wt_open_session --
- * Allocate a session handle. The internal parameter is used for sessions
- * opened by WiredTiger for its own use.
+ * __open_session --
+ * Allocate a session handle.
*/
-int
-__wt_open_session(WT_CONNECTION_IMPL *conn,
- WT_EVENT_HANDLER *event_handler, const char *config, bool open_metadata,
+static int
+__open_session(WT_CONNECTION_IMPL *conn,
+ WT_EVENT_HANDLER *event_handler, const char *config,
WT_SESSION_IMPL **sessionp)
{
static const WT_SESSION stds = {
@@ -1324,7 +1289,26 @@ __wt_open_session(WT_CONNECTION_IMPL *conn,
WT_STAT_FAST_CONN_INCR(session, session_open);
err: __wt_spin_unlock(session, &conn->api_lock);
- WT_RET(ret);
+ return (ret);
+}
+
+/*
+ * __wt_open_session --
+ * Allocate a session handle.
+ */
+int
+__wt_open_session(WT_CONNECTION_IMPL *conn,
+ WT_EVENT_HANDLER *event_handler, const char *config,
+ bool open_metadata, WT_SESSION_IMPL **sessionp)
+{
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ WT_SESSION *wt_session;
+
+ *sessionp = NULL;
+
+ /* Acquire a session. */
+ WT_RET(__open_session(conn, event_handler, config, &session));
/*
* Acquiring the metadata handle requires the schema lock; we've seen
@@ -1336,8 +1320,59 @@ err: __wt_spin_unlock(session, &conn->api_lock);
*/
if (open_metadata) {
WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
- WT_RET(__wt_metadata_open(session_ret));
+ if ((ret = __wt_metadata_open(session)) != 0) {
+ wt_session = &session->iface;
+ WT_TRET(wt_session->close(wt_session, NULL));
+ return (ret);
+ }
}
+ *sessionp = session;
+ return (0);
+}
+
+/*
+ * __wt_open_internal_session --
+ * Allocate a session for WiredTiger's use.
+ */
+int
+__wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name,
+ bool open_metadata, uint32_t session_flags, WT_SESSION_IMPL **sessionp)
+{
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
+ WT_SESSION_IMPL *session;
+
+ *sessionp = NULL;
+
+ /* Acquire a session. */
+ WT_RET(__wt_open_session(conn, NULL, NULL, open_metadata, &session));
+ session->name = name;
+
+ /*
+ * Public sessions are automatically closed during WT_CONNECTION->close.
+ * If the session handles for internal threads were to go on the public
+ * list, there would be complex ordering issues during close. Set a
+ * flag to avoid this: internal sessions are not closed automatically.
+ */
+ F_SET(session, session_flags | WT_SESSION_INTERNAL);
+
+ /*
+ * Acquiring the lookaside table cursor requires various locks; we've
+ * seen problems in the past where deadlocks happened because sessions
+ * deadlocked getting the cursor late in the process. Be defensive,
+ * get it now.
+ */
+ if (F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR)) {
+ WT_WITHOUT_DHANDLE(session, ret =
+ __wt_las_cursor_create(session, &session->las_cursor));
+ if (ret != 0) {
+ wt_session = &session->iface;
+ WT_TRET(wt_session->close(wt_session, NULL));
+ return (ret);
+ }
+ }
+
+ *sessionp = session;
return (0);
}
diff --git a/src/txn/txn_recover.c b/src/txn/txn_recover.c
index f2b181711d1..9d8a19cbff3 100644
--- a/src/txn/txn_recover.c
+++ b/src/txn/txn_recover.c
@@ -412,11 +412,12 @@ __wt_txn_recover(WT_SESSION_IMPL *session)
WT_RECOVERY r;
struct WT_RECOVERY_FILE *metafile;
char *config;
- bool needs_rec, was_backup;
+ bool eviction_started, needs_rec, was_backup;
conn = S2C(session);
WT_CLEAR(r);
WT_INIT_LSN(&r.ckpt_lsn);
+ eviction_started = false;
was_backup = F_ISSET(conn, WT_CONN_WAS_BACKUP);
/* We need a real session for recovery. */
@@ -494,6 +495,15 @@ __wt_txn_recover(WT_SESSION_IMPL *session)
*/
if (needs_rec && FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR))
WT_ERR(WT_RUN_RECOVERY);
+
+ /*
+ * Recovery can touch more data than fits in cache, so it relies on
+ * regular eviction to manage paging. Start eviction threads for
+ * recovery without LAS cursors.
+ */
+ WT_ERR(__wt_evict_create(session, false));
+ eviction_started = true;
+
/*
* Always run recovery even if it was a clean shutdown.
* We can consider skipping it in the future.
@@ -522,6 +532,15 @@ __wt_txn_recover(WT_SESSION_IMPL *session)
done: FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE);
err: WT_TRET(__recovery_free(&r));
__wt_free(session, config);
+
+ /*
+ * Destroy the eviction threads that were started in support of
+ * recovery. They will be restarted once the lookaside table is
+ * created.
+ */
+ if (eviction_started)
+ WT_TRET(__wt_evict_destroy(session));
+
WT_TRET(session->iface.close(&session->iface, NULL));
return (ret);