diff options
author | Alex Gorrod <alexander.gorrod@mongodb.com> | 2015-10-02 14:46:04 +1000 |
---|---|---|
committer | Alex Gorrod <alexander.gorrod@mongodb.com> | 2015-10-02 14:46:04 +1000 |
commit | ecfbbb0845e291c8cd7a8b317fb8c822efe9833f (patch) | |
tree | 34f71c9049644b54e3f48ada01a707c18effeec1 | |
parent | 21b83306b129118bc57def309a39b1c9c1e9e493 (diff) | |
parent | 0390b293fa9c40c6dde4ce9b5234359d99d63bcb (diff) | |
download | mongo-ecfbbb0845e291c8cd7a8b317fb8c822efe9833f.tar.gz |
Merge pull request #2234 from wiredtiger/wt-2149
WT-2149: Deadlock opening lookaside table cursor
-rw-r--r-- | src/async/async_api.c | 17 | ||||
-rw-r--r-- | src/cache/cache_las.c | 125 | ||||
-rw-r--r-- | src/conn/conn_api.c | 3 | ||||
-rw-r--r-- | src/conn/conn_cache_pool.c | 4 | ||||
-rw-r--r-- | src/conn/conn_ckpt.c | 12 | ||||
-rw-r--r-- | src/conn/conn_dhandle.c | 14 | ||||
-rw-r--r-- | src/conn/conn_log.c | 14 | ||||
-rw-r--r-- | src/conn/conn_open.c | 29 | ||||
-rw-r--r-- | src/conn/conn_stat.c | 3 | ||||
-rw-r--r-- | src/conn/conn_sweep.c | 12 | ||||
-rw-r--r-- | src/evict/evict_lru.c | 54 | ||||
-rw-r--r-- | src/include/connection.h | 1 | ||||
-rw-r--r-- | src/include/extern.h | 5 | ||||
-rw-r--r-- | src/lsm/lsm_manager.c | 8 | ||||
-rw-r--r-- | src/session/session_api.c | 119 | ||||
-rw-r--r-- | src/txn/txn_recover.c | 21 |
16 files changed, 251 insertions, 190 deletions
diff --git a/src/async/async_api.c b/src/async/async_api.c index 1d819474728..dc26f2d11c3 100644 --- a/src/async/async_api.c +++ b/src/async/async_api.c @@ -53,7 +53,7 @@ __async_get_format(WT_CONNECTION_IMPL *conn, const char *uri, * for the cursor. */ WT_RET(__wt_open_internal_session( - conn, "async-cursor", true, true, &session)); + conn, "async-cursor", true, 0, &session)); __wt_spin_lock(session, &async->ops_lock); WT_ERR(__wt_calloc_one(session, &af)); WT_ERR(__wt_strdup(session, uri, &af->uri)); @@ -229,7 +229,7 @@ __async_start(WT_SESSION_IMPL *session) { WT_ASYNC *async; WT_CONNECTION_IMPL *conn; - uint32_t i; + uint32_t i, session_flags; conn = S2C(session); conn->async_cfg = 1; @@ -256,9 +256,9 @@ __async_start(WT_SESSION_IMPL *session) * workers and we may want to selectively stop some workers * while leaving the rest running. */ - WT_RET(__wt_open_internal_session(conn, - "async-worker", true, true, &async->worker_sessions[i])); - F_SET(async->worker_sessions[i], WT_SESSION_SERVER_ASYNC); + session_flags = WT_SESSION_SERVER_ASYNC; + WT_RET(__wt_open_internal_session(conn, "async-worker", + true, session_flags, &async->worker_sessions[i])); } for (i = 0; i < conn->async_workers; i++) { /* @@ -305,7 +305,7 @@ __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[]) WT_DECL_RET; WT_SESSION *wt_session; bool run; - uint32_t i; + uint32_t i, session_flags; conn = S2C(session); async = conn->async; @@ -371,10 +371,9 @@ __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[]) /* * Each worker has its own session. */ + session_flags = WT_SESSION_SERVER_ASYNC; WT_RET(__wt_open_internal_session(conn, "async-worker", - true, true, &async->worker_sessions[i])); - F_SET(async->worker_sessions[i], - WT_SESSION_SERVER_ASYNC); + true, session_flags, &async->worker_sessions[i])); } for (i = conn->async_workers; i < tmp_conn.async_workers; i++) { /* diff --git a/src/cache/cache_las.c b/src/cache/cache_las.c index a964ac39874..714963b18d4 100644 --- a/src/cache/cache_las.c +++ b/src/cache/cache_las.c @@ -27,7 +27,7 @@ __wt_las_stats_update(WT_SESSION_IMPL *session) * remain 0. In the current system, there's always a lookaside table, * but there's no reason not to be cautious. */ - if (conn->las_cursor == NULL) + if (conn->las_session->las_cursor == NULL) return; /* @@ -35,7 +35,8 @@ __wt_las_stats_update(WT_SESSION_IMPL *session) * to it by way of the underlying btree handle, but it's a little ugly. */ cstats = conn->stats; - dstats = ((WT_CURSOR_BTREE *)conn->las_cursor)->btree->dhandle->stats; + dstats = ((WT_CURSOR_BTREE *) + conn->las_session->las_cursor)->btree->dhandle->stats; WT_STAT_SET(session, cstats, cache_lookaside_insert, WT_STAT_READ(dstats, cursor_insert)); @@ -44,40 +45,6 @@ __wt_las_stats_update(WT_SESSION_IMPL *session) } /* - * __las_cursor_create -- - * Open a new lookaside table cursor. - */ -static int -__las_cursor_create(WT_SESSION_IMPL *session, WT_CURSOR **cursorp) -{ - WT_BTREE *btree; - const char *open_cursor_cfg[] = { - WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL }; - - WT_RET(__wt_open_cursor( - session, WT_LAS_URI, NULL, open_cursor_cfg, cursorp)); - - /* - * Set special flags for the lookaside table: the lookaside flag (used, - * for example, to avoid writing records during reconciliation), also - * turn off checkpoints and logging. - * - * Test flags before setting them so updates can't race in subsequent - * opens (the first update is safe because it's single-threaded from - * wiredtiger_open). - */ - btree = S2BT(session); - if (!F_ISSET(btree, WT_BTREE_LOOKASIDE)) - F_SET(btree, WT_BTREE_LOOKASIDE); - if (!F_ISSET(btree, WT_BTREE_NO_CHECKPOINT)) - F_SET(btree, WT_BTREE_NO_CHECKPOINT); - if (!F_ISSET(btree, WT_BTREE_NO_LOGGING)) - F_SET(btree, WT_BTREE_NO_LOGGING); - - return (0); -} - -/* * __wt_las_create -- * Initialize the database's lookaside store. */ @@ -85,7 +52,7 @@ int __wt_las_create(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; - WT_DECL_RET; + uint32_t session_flags; const char *drop_cfg[] = { WT_CONFIG_BASE(session, WT_SESSION_drop), "force=true", NULL }; @@ -96,27 +63,22 @@ __wt_las_create(WT_SESSION_IMPL *session) * schema lock to create and drop the file, and it may not always be * available. * - * Open an internal session, used for the shared lookaside cursor. - * - * Sessions associated with a lookaside cursor should never be tapped - * for eviction. + * Discard any previous incarnation of the file. */ - WT_RET(__wt_open_internal_session( - conn, "lookaside table", true, true, &conn->las_session)); - session = conn->las_session; - F_SET(session, WT_SESSION_LOOKASIDE_CURSOR | WT_SESSION_NO_EVICTION); - - /* Discard any previous incarnation of the file. */ WT_RET(__wt_session_drop(session, WT_LAS_URI, drop_cfg)); /* Re-create the file. */ WT_RET(__wt_session_create(session, WT_LAS_URI, WT_LAS_FORMAT)); - /* Open the shared cursor. */ - WT_WITHOUT_DHANDLE(session, - ret = __las_cursor_create(session, &conn->las_cursor)); - - return (ret); + /* + * Open an internal session, used for the shared lookaside cursor. + * + * Sessions associated with a lookaside cursor should never be tapped + * for eviction. + */ + session_flags = WT_SESSION_LOOKASIDE_CURSOR | WT_SESSION_NO_EVICTION; + return (__wt_open_internal_session( + conn, "lookaside table", true, session_flags, &conn->las_session)); } /* @@ -138,7 +100,6 @@ __wt_las_destroy(WT_SESSION_IMPL *session) wt_session = &conn->las_session->iface; ret = wt_session->close(wt_session, NULL); - conn->las_cursor = NULL; conn->las_session = NULL; return (ret); @@ -176,6 +137,40 @@ __wt_las_is_written(WT_SESSION_IMPL *session) } /* + * __wt_las_cursor_create -- + * Open a new lookaside table cursor. + */ +int +__wt_las_cursor_create(WT_SESSION_IMPL *session, WT_CURSOR **cursorp) +{ + WT_BTREE *btree; + const char *open_cursor_cfg[] = { + WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL }; + + WT_RET(__wt_open_cursor( + session, WT_LAS_URI, NULL, open_cursor_cfg, cursorp)); + + /* + * Set special flags for the lookaside table: the lookaside flag (used, + * for example, to avoid writing records during reconciliation), also + * turn off checkpoints and logging. + * + * Test flags before setting them so updates can't race in subsequent + * opens (the first update is safe because it's single-threaded from + * wiredtiger_open). + */ + btree = S2BT(session); + if (!F_ISSET(btree, WT_BTREE_LOOKASIDE)) + F_SET(btree, WT_BTREE_LOOKASIDE); + if (!F_ISSET(btree, WT_BTREE_NO_CHECKPOINT)) + F_SET(btree, WT_BTREE_NO_CHECKPOINT); + if (!F_ISSET(btree, WT_BTREE_NO_LOGGING)) + F_SET(btree, WT_BTREE_NO_LOGGING); + + return (0); +} + +/* * __wt_las_cursor -- * Return a lookaside cursor. */ @@ -184,7 +179,6 @@ __wt_las_cursor( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags) { WT_CONNECTION_IMPL *conn; - WT_DECL_RET; *cursorp = NULL; @@ -202,20 +196,15 @@ __wt_las_cursor( conn = S2C(session); - /* Eviction and sweep threads have their own lookaside table cursors. */ - if (F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR)) { - if (session->las_cursor == NULL) { - WT_WITHOUT_DHANDLE(session, ret = - __las_cursor_create(session, &session->las_cursor)); - WT_RET(ret); - } - + /* + * Some threads have their own lookaside table cursors, else lock the + * shared lookaside cursor. + */ + if (F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR)) *cursorp = session->las_cursor; - } else { - /* Lock the shared lookaside cursor. */ + else { __wt_spin_lock(session, &conn->las_lock); - - *cursorp = conn->las_cursor; + *cursorp = conn->las_session->las_cursor; } /* Turn caching and eviction off. */ @@ -253,8 +242,8 @@ __wt_las_cursor_close( F_SET(session, session_flags); /* - * Eviction and sweep threads have their own lookaside table cursors; - * else, unlock the shared lookaside cursor. + * Some threads have their own lookaside table cursors, else unlock the + * shared lookaside cursor. */ if (!F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR)) __wt_spin_unlock(session, &conn->las_lock); diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index 7ce84b624a3..b50ad750158 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -2038,9 +2038,6 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, /* Start the worker threads and run recovery. */ WT_ERR(__wt_connection_workers(session, cfg)); - /* Create the lookaside table. */ - WT_ERR(__wt_las_create(session)); - WT_STATIC_ASSERT(offsetof(WT_CONNECTION_IMPL, iface) == 0); *wt_connp = &conn->iface; diff --git a/src/conn/conn_cache_pool.c b/src/conn/conn_cache_pool.c index 6294e3b01a7..22af88c66b1 100644 --- a/src/conn/conn_cache_pool.c +++ b/src/conn/conn_cache_pool.c @@ -243,6 +243,7 @@ __wt_conn_cache_pool_open(WT_SESSION_IMPL *session) WT_CACHE_POOL *cp; WT_CONNECTION_IMPL *conn; WT_DECL_RET; + uint32_t session_flags; conn = S2C(session); cache = conn->cache; @@ -252,8 +253,9 @@ __wt_conn_cache_pool_open(WT_SESSION_IMPL *session) * Create a session that can be used by the cache pool thread, do * it in the main thread to avoid shutdown races */ + session_flags = WT_SESSION_NO_DATA_HANDLES; if ((ret = __wt_open_internal_session( - conn, "cache-pool", false, false, &cache->cp_session)) != 0) + conn, "cache-pool", false, session_flags, &cache->cp_session)) != 0) WT_RET_MSG(NULL, ret, "Failed to create session for cache pool"); diff --git a/src/conn/conn_ckpt.c b/src/conn/conn_ckpt.c index 7fc790d5efa..caf0c3b68f0 100644 --- a/src/conn/conn_ckpt.c +++ b/src/conn/conn_ckpt.c @@ -123,22 +123,24 @@ static int __ckpt_server_start(WT_CONNECTION_IMPL *conn) { WT_SESSION_IMPL *session; + uint32_t session_flags; /* Nothing to do if the server is already running. */ if (conn->ckpt_session != NULL) return (0); F_SET(conn, WT_CONN_SERVER_CHECKPOINT); - /* The checkpoint server gets its own session. */ - WT_RET(__wt_open_internal_session( - conn, "checkpoint-server", true, true, &conn->ckpt_session)); - session = conn->ckpt_session; /* + * The checkpoint server gets its own session. + * * Checkpoint does enough I/O it may be called upon to perform slow * operations for the block manager. */ - F_SET(session, WT_SESSION_CAN_WAIT); + session_flags = WT_SESSION_CAN_WAIT; + WT_RET(__wt_open_internal_session(conn, + "checkpoint-server", true, session_flags, &conn->ckpt_session)); + session = conn->ckpt_session; WT_RET(__wt_cond_alloc( session, "checkpoint server", false, &conn->ckpt_cond)); diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index 77e7693042b..0b364b5fd4b 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -678,11 +678,15 @@ __wt_conn_dhandle_discard(WT_SESSION_IMPL *session) conn = S2C(session); /* - * Close open data handles: first, everything but the metadata file - * (as closing a normal file may open and write the metadata file), - * then the metadata file. This function isn't called often, and I - * don't want to "know" anything about the metadata file's position on - * the list, so we do it the hard way. + * Empty the session cache: any data handles created in a connection + * method may be cached here, and we're about to close them. + */ + __wt_session_close_cache(session); + + /* + * Close open data handles: first, everything but the metadata file (as + * closing a normal file may open and write the metadata file), then + * the metadata file. */ restart: TAILQ_FOREACH(dhandle, &conn->dhqh, q) { diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c index bf2447fb646..a2555ba536f 100644 --- a/src/conn/conn_log.c +++ b/src/conn/conn_log.c @@ -821,6 +821,7 @@ int __wt_logmgr_open(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; + uint32_t session_flags; conn = S2C(session); @@ -832,8 +833,9 @@ __wt_logmgr_open(WT_SESSION_IMPL *session) * Start the log close thread. It is not configurable. * If logging is enabled, this thread runs. */ - WT_RET(__wt_open_internal_session( - conn, "log-close-server", false, false, &conn->log_file_session)); + session_flags = WT_SESSION_NO_DATA_HANDLES; + WT_RET(__wt_open_internal_session(conn, + "log-close-server", false, session_flags, &conn->log_file_session)); WT_RET(__wt_cond_alloc(conn->log_file_session, "log close server", false, &conn->log_file_cond)); @@ -848,8 +850,8 @@ __wt_logmgr_open(WT_SESSION_IMPL *session) * Start the log write LSN thread. It is not configurable. * If logging is enabled, this thread runs. */ - WT_RET(__wt_open_internal_session( - conn, "log-wrlsn-server", false, false, &conn->log_wrlsn_session)); + WT_RET(__wt_open_internal_session(conn, "log-wrlsn-server", + false, session_flags, &conn->log_wrlsn_session)); WT_RET(__wt_cond_alloc(conn->log_wrlsn_session, "log write lsn server", false, &conn->log_wrlsn_cond)); WT_RET(__wt_thread_create(conn->log_wrlsn_session, @@ -868,8 +870,8 @@ __wt_logmgr_open(WT_SESSION_IMPL *session) WT_RET(__wt_cond_signal(session, conn->log_cond)); } else { /* The log server gets its own session. */ - WT_RET(__wt_open_internal_session( - conn, "log-server", false, false, &conn->log_session)); + WT_RET(__wt_open_internal_session(conn, + "log-server", false, session_flags, &conn->log_session)); WT_RET(__wt_cond_alloc(conn->log_session, "log server", false, &conn->log_cond)); diff --git a/src/conn/conn_open.c b/src/conn/conn_open.c index 199cf213e0a..6db0c4bb10c 100644 --- a/src/conn/conn_open.c +++ b/src/conn/conn_open.c @@ -38,7 +38,7 @@ __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[]) * need to get cleaned up on close. */ WT_RET(__wt_open_internal_session( - conn, "connection", true, false, &session)); + conn, "connection", false, 0, &session)); /* * The connection's default session is originally a static structure, @@ -228,11 +228,6 @@ int __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[]) { /* - * Start the eviction thread. - */ - WT_RET(__wt_evict_create(session)); - - /* * Start the optional statistics thread. Start statistics first so that * other optional threads can know if statistics are enabled or not. */ @@ -242,19 +237,27 @@ __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[]) /* Run recovery. */ WT_RET(__wt_txn_recover(session)); + /* + * Start the optional logging/archive threads. + * NOTE: The log manager must be started before checkpoints so that the + * checkpoint server knows if logging is enabled. It must also be + * started before any operation that can commit, or the commit can + * block. + */ + WT_RET(__wt_logmgr_open(session)); + + /* Create the lookaside table. */ + WT_RET(__wt_las_create(session)); + + /* Start eviction threads. */ + WT_RET(__wt_evict_create(session, true)); + /* Start the handle sweep thread. */ WT_RET(__wt_sweep_create(session)); /* Start the optional async threads. */ WT_RET(__wt_async_create(session, cfg)); - /* - * Start the optional logging/archive thread. - * NOTE: The log manager must be started before checkpoints so that the - * checkpoint server knows if logging is enabled. - */ - WT_RET(__wt_logmgr_open(session)); - /* Start the optional checkpoint thread. */ WT_RET(__wt_checkpoint_server_create(session, cfg)); diff --git a/src/conn/conn_stat.c b/src/conn/conn_stat.c index d8c7227ae61..ec3a630581a 100644 --- a/src/conn/conn_stat.c +++ b/src/conn/conn_stat.c @@ -447,9 +447,10 @@ __statlog_start(WT_CONNECTION_IMPL *conn) return (0); F_SET(conn, WT_CONN_SERVER_STATISTICS); + /* The statistics log server gets its own session. */ WT_RET(__wt_open_internal_session( - conn, "statlog-server", true, true, &conn->stat_session)); + conn, "statlog-server", true, 0, &conn->stat_session)); session = conn->stat_session; WT_RET(__wt_cond_alloc( diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c index 2de0cc12069..23846f978fe 100644 --- a/src/conn/conn_sweep.c +++ b/src/conn/conn_sweep.c @@ -353,16 +353,13 @@ int __wt_sweep_create(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; + uint32_t session_flags; conn = S2C(session); /* Set first, the thread might run before we finish up. */ F_SET(conn, WT_CONN_SERVER_SWEEP); - WT_RET(__wt_open_internal_session( - conn, "sweep-server", true, true, &conn->sweep_session)); - session = conn->sweep_session; - /* * Handle sweep does enough I/O it may be called upon to perform slow * operations for the block manager. @@ -372,8 +369,11 @@ __wt_sweep_create(WT_SESSION_IMPL *session) * * Don't tap the sweep thread for eviction. */ - F_SET(session, WT_SESSION_CAN_WAIT | - WT_SESSION_LOOKASIDE_CURSOR | WT_SESSION_NO_EVICTION); + session_flags = WT_SESSION_CAN_WAIT | + WT_SESSION_LOOKASIDE_CURSOR | WT_SESSION_NO_EVICTION; + WT_RET(__wt_open_internal_session( + conn, "sweep-server", true, session_flags, &conn->sweep_session)); + session = conn->sweep_session; WT_RET(__wt_cond_alloc( session, "handle sweep server", false, &conn->sweep_cond)); diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index d0cc60b583d..a597079ff47 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -236,27 +236,30 @@ __evict_workers_resize(WT_SESSION_IMPL *session) WT_DECL_RET; WT_EVICT_WORKER *workers; size_t alloc; - uint32_t i; + uint32_t i, session_flags; conn = S2C(session); - alloc = conn->evict_workers_alloc * sizeof(*workers); - WT_RET(__wt_realloc(session, &alloc, - conn->evict_workers_max * sizeof(*workers), &conn->evict_workctx)); - workers = conn->evict_workctx; + if (conn->evict_workers_alloc < conn->evict_workers_max) { + alloc = conn->evict_workers_alloc * sizeof(*workers); + WT_RET(__wt_realloc(session, &alloc, + conn->evict_workers_max * sizeof(*workers), + &conn->evict_workctx)); + workers = conn->evict_workctx; + } for (i = conn->evict_workers_alloc; i < conn->evict_workers_max; i++) { - WT_ERR(__wt_open_internal_session(conn, - "eviction-worker", true, false, &workers[i].session)); - workers[i].id = i; - /* + * Eviction worker threads get their own session. * Eviction worker threads get their own lookaside table cursor. * Eviction worker threads may be called upon to perform slow * operations for the block manager. */ - F_SET(workers[i].session, - WT_SESSION_LOOKASIDE_CURSOR | WT_SESSION_CAN_WAIT); + session_flags = + WT_SESSION_CAN_WAIT | WT_SESSION_LOOKASIDE_CURSOR; + WT_ERR(__wt_open_internal_session(conn, "eviction-worker", + false, session_flags, &workers[i].session)); + workers[i].id = i; if (i < conn->evict_workers_min) { ++conn->evict_workers; @@ -275,36 +278,39 @@ err: conn->evict_workers_alloc = conn->evict_workers_max; * Start the eviction server thread. */ int -__wt_evict_create(WT_SESSION_IMPL *session) +__wt_evict_create(WT_SESSION_IMPL *session, bool with_las) { WT_CONNECTION_IMPL *conn; + uint32_t session_flags; conn = S2C(session); /* Set first, the thread might run before we finish up. */ F_SET(conn, WT_CONN_EVICTION_RUN); - /* We need a session handle because we're reading/writing pages. */ - WT_RET(__wt_open_internal_session( - conn, "eviction-server", true, false, &conn->evict_session)); + /* + * We need a session handle because we're reading/writing pages. + * + * The eviction server gets its own lookaside table cursor. + * + * If there's only a single eviction thread, it may be called upon to + * perform slow operations for the block manager. (The flag is not + * reset if reconfigured later, but I doubt that's a problem.) + */ + session_flags = with_las ? WT_SESSION_LOOKASIDE_CURSOR : 0; + if (conn->evict_workers_max == 0) + FLD_SET(session_flags, WT_SESSION_CAN_WAIT); + WT_RET(__wt_open_internal_session(conn, + "eviction-server", false, session_flags, &conn->evict_session)); session = conn->evict_session; /* * If eviction workers were configured, allocate sessions for them now. * This is done to reduce the chance that we will open new eviction * sessions after WT_CONNECTION::close is called. - * - * If there's only a single eviction thread, it may be called upon to - * perform slow operations for the block manager. (The flag is not - * reset if reconfigured later, but I doubt that's a problem.) */ if (conn->evict_workers_max > 0) WT_RET(__evict_workers_resize(session)); - else - F_SET(session, WT_SESSION_CAN_WAIT); - - /* The eviction server gets its own lookaside table cursor. */ - F_SET(session, WT_SESSION_LOOKASIDE_CURSOR); /* * Start the primary eviction server thread after the worker threads diff --git a/src/include/connection.h b/src/include/connection.h index 61ef9e2391c..2dfb24a83da 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -377,7 +377,6 @@ struct __wt_connection_impl { */ WT_SPINLOCK las_lock; /* Lookaside table spinlock */ WT_SESSION_IMPL *las_session; /* Lookaside table session */ - WT_CURSOR *las_cursor; /* Lookaside table cursor */ bool las_written; /* Lookaside table has been written */ WT_ITEM las_sweep_key; /* Sweep server's saved key */ diff --git a/src/include/extern.h b/src/include/extern.h index ec25b96aa22..c196d550c7e 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -186,6 +186,7 @@ extern int __wt_las_create(WT_SESSION_IMPL *session); extern int __wt_las_destroy(WT_SESSION_IMPL *session); extern void __wt_las_set_written(WT_SESSION_IMPL *session); extern bool __wt_las_is_written(WT_SESSION_IMPL *session); +extern int __wt_las_cursor_create(WT_SESSION_IMPL *session, WT_CURSOR **cursorp); extern int __wt_las_cursor( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags); extern int __wt_las_cursor_close( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags); extern int __wt_las_sweep(WT_SESSION_IMPL *session); @@ -318,7 +319,7 @@ extern int __wt_curtable_open(WT_SESSION_IMPL *session, const char *uri, const c extern int __wt_evict_file(WT_SESSION_IMPL *session, int syncop); extern void __wt_evict_list_clear_page(WT_SESSION_IMPL *session, WT_REF *ref); extern int __wt_evict_server_wake(WT_SESSION_IMPL *session); -extern int __wt_evict_create(WT_SESSION_IMPL *session); +extern int __wt_evict_create(WT_SESSION_IMPL *session, bool with_las); extern int __wt_evict_destroy(WT_SESSION_IMPL *session); extern int __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session, bool *evict_resetp); extern void __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session); @@ -593,8 +594,8 @@ extern int __wt_session_copy_values(WT_SESSION_IMPL *session); extern int __wt_open_cursor(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp); extern int __wt_session_create( WT_SESSION_IMPL *session, const char *uri, const char *config); extern int __wt_session_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]); -extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, bool uses_dhandles, bool open_metadata, WT_SESSION_IMPL **sessionp); extern int __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const char *config, bool open_metadata, WT_SESSION_IMPL **sessionp); +extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, bool open_metadata, uint32_t session_flags, WT_SESSION_IMPL **sessionp); extern int __wt_compact_uri_analyze(WT_SESSION_IMPL *session, const char *uri, bool *skipp); extern int __wt_session_compact( WT_SESSION *wt_session, const char *uri, const char *config); extern int __wt_session_lock_dhandle( WT_SESSION_IMPL *session, uint32_t flags, bool *is_deadp); diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index bd3adb3a528..1c5124c32af 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -203,12 +203,14 @@ __wt_lsm_manager_reconfig(WT_SESSION_IMPL *session, const char **cfg) int __wt_lsm_manager_start(WT_SESSION_IMPL *session) { + WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_LSM_MANAGER *manager; WT_SESSION_IMPL *worker_session; uint32_t i; - manager = &S2C(session)->lsm_manager; + conn = S2C(session); + manager = &conn->lsm_manager; /* * We need at least a manager, a switch thread and a generic @@ -225,7 +227,7 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session) */ for (i = 0; i < WT_LSM_MAX_WORKERS; i++) { WT_ERR(__wt_open_internal_session( - S2C(session), "lsm-worker", true, false, &worker_session)); + conn, "lsm-worker", false, 0, &worker_session)); worker_session->isolation = WT_ISO_READ_UNCOMMITTED; manager->lsm_worker_cookies[i].session = worker_session; } @@ -234,7 +236,7 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session) WT_ERR(__wt_thread_create(session, &manager->lsm_worker_cookies[0].tid, __lsm_worker_manager, &manager->lsm_worker_cookies[0])); - F_SET(S2C(session), WT_CONN_SERVER_LSM); + F_SET(conn, WT_CONN_SERVER_LSM); if (0) { err: for (i = 0; diff --git a/src/session/session_api.c b/src/session/session_api.c index 1bb519e80e0..a766829afad 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -1144,47 +1144,12 @@ __session_strerror(WT_SESSION *wt_session, int error) } /* - * __wt_open_internal_session -- - * Allocate a session for WiredTiger's use. - */ -int -__wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, - bool uses_dhandles, bool open_metadata, WT_SESSION_IMPL **sessionp) -{ - WT_SESSION_IMPL *session; - - *sessionp = NULL; - - WT_RET(__wt_open_session(conn, NULL, NULL, open_metadata, &session)); - session->name = name; - - /* - * Public sessions are automatically closed during WT_CONNECTION->close. - * If the session handles for internal threads were to go on the public - * list, there would be complex ordering issues during close. Set a - * flag to avoid this: internal sessions are not closed automatically. - */ - F_SET(session, WT_SESSION_INTERNAL); - - /* - * Some internal threads must keep running after we close all data - * handles. Make sure these threads don't open their own handles. - */ - if (!uses_dhandles) - F_SET(session, WT_SESSION_NO_DATA_HANDLES); - - *sessionp = session; - return (0); -} - -/* - * __wt_open_session -- - * Allocate a session handle. The internal parameter is used for sessions - * opened by WiredTiger for its own use. + * __open_session -- + * Allocate a session handle. */ -int -__wt_open_session(WT_CONNECTION_IMPL *conn, - WT_EVENT_HANDLER *event_handler, const char *config, bool open_metadata, +static int +__open_session(WT_CONNECTION_IMPL *conn, + WT_EVENT_HANDLER *event_handler, const char *config, WT_SESSION_IMPL **sessionp) { static const WT_SESSION stds = { @@ -1324,7 +1289,26 @@ __wt_open_session(WT_CONNECTION_IMPL *conn, WT_STAT_FAST_CONN_INCR(session, session_open); err: __wt_spin_unlock(session, &conn->api_lock); - WT_RET(ret); + return (ret); +} + +/* + * __wt_open_session -- + * Allocate a session handle. + */ +int +__wt_open_session(WT_CONNECTION_IMPL *conn, + WT_EVENT_HANDLER *event_handler, const char *config, + bool open_metadata, WT_SESSION_IMPL **sessionp) +{ + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_SESSION *wt_session; + + *sessionp = NULL; + + /* Acquire a session. */ + WT_RET(__open_session(conn, event_handler, config, &session)); /* * Acquiring the metadata handle requires the schema lock; we've seen @@ -1336,8 +1320,59 @@ err: __wt_spin_unlock(session, &conn->api_lock); */ if (open_metadata) { WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)); - WT_RET(__wt_metadata_open(session_ret)); + if ((ret = __wt_metadata_open(session)) != 0) { + wt_session = &session->iface; + WT_TRET(wt_session->close(wt_session, NULL)); + return (ret); + } } + *sessionp = session; + return (0); +} + +/* + * __wt_open_internal_session -- + * Allocate a session for WiredTiger's use. + */ +int +__wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, + bool open_metadata, uint32_t session_flags, WT_SESSION_IMPL **sessionp) +{ + WT_DECL_RET; + WT_SESSION *wt_session; + WT_SESSION_IMPL *session; + + *sessionp = NULL; + + /* Acquire a session. */ + WT_RET(__wt_open_session(conn, NULL, NULL, open_metadata, &session)); + session->name = name; + + /* + * Public sessions are automatically closed during WT_CONNECTION->close. + * If the session handles for internal threads were to go on the public + * list, there would be complex ordering issues during close. Set a + * flag to avoid this: internal sessions are not closed automatically. + */ + F_SET(session, session_flags | WT_SESSION_INTERNAL); + + /* + * Acquiring the lookaside table cursor requires various locks; we've + * seen problems in the past where deadlocks happened because sessions + * deadlocked getting the cursor late in the process. Be defensive, + * get it now. + */ + if (F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR)) { + WT_WITHOUT_DHANDLE(session, ret = + __wt_las_cursor_create(session, &session->las_cursor)); + if (ret != 0) { + wt_session = &session->iface; + WT_TRET(wt_session->close(wt_session, NULL)); + return (ret); + } + } + + *sessionp = session; return (0); } diff --git a/src/txn/txn_recover.c b/src/txn/txn_recover.c index f2b181711d1..9d8a19cbff3 100644 --- a/src/txn/txn_recover.c +++ b/src/txn/txn_recover.c @@ -412,11 +412,12 @@ __wt_txn_recover(WT_SESSION_IMPL *session) WT_RECOVERY r; struct WT_RECOVERY_FILE *metafile; char *config; - bool needs_rec, was_backup; + bool eviction_started, needs_rec, was_backup; conn = S2C(session); WT_CLEAR(r); WT_INIT_LSN(&r.ckpt_lsn); + eviction_started = false; was_backup = F_ISSET(conn, WT_CONN_WAS_BACKUP); /* We need a real session for recovery. */ @@ -494,6 +495,15 @@ __wt_txn_recover(WT_SESSION_IMPL *session) */ if (needs_rec && FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR)) WT_ERR(WT_RUN_RECOVERY); + + /* + * Recovery can touch more data than fits in cache, so it relies on + * regular eviction to manage paging. Start eviction threads for + * recovery without LAS cursors. + */ + WT_ERR(__wt_evict_create(session, false)); + eviction_started = true; + /* * Always run recovery even if it was a clean shutdown. * We can consider skipping it in the future. @@ -522,6 +532,15 @@ __wt_txn_recover(WT_SESSION_IMPL *session) done: FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE); err: WT_TRET(__recovery_free(&r)); __wt_free(session, config); + + /* + * Destroy the eviction threads that were started in support of + * recovery. They will be restarted once the lookaside table is + * created. + */ + if (eviction_started) + WT_TRET(__wt_evict_destroy(session)); + WT_TRET(session->iface.close(&session->iface, NULL)); return (ret); |