summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Bostic <keith.bostic@mongodb.com>2017-08-14 07:25:48 -0400
committerGitHub <noreply@github.com>2017-08-14 07:25:48 -0400
commit6e66393bbc3ceba364fa8e42612f0f8caa0dc4ea (patch)
treeb59296d19f20e03ce71e49d93d846b209b96ea97
parent71bb828bdca4f0efe1cbbcf717d83791817b0efa (diff)
downloadmongo-6e66393bbc3ceba364fa8e42612f0f8caa0dc4ea.tar.gz
WT-3358 LSM will hang if the manager fails to start (#3582)
We increment WT_LSM_MANAGER.lsm_workers to 1 before starting the manager and don't reset it to 0 on failure, causing __wt_lsm_manager_destroy() to hang, waiting on the manager thread to set WT_LSM_MANAGER_SHUTDOWN. Move the increment of WT_LSM_MANAGER.lsm_workers into __wt_lsm_manager_start() to clarify what's happening, and reset that value to 0 if we fail to start the manager. In addition, set WT_LSM_MANAGER_SHUTDOWN on error (that way, even if we somehow get the test wrong, __wt_lsm_manager_destroy() will proceed). In addition, test WT_LSM_MANAGER_SHUTDOWN in __wt_lsm_manager_start() so that once we fail to start the manager, subsequent LSM tree open calls won't attempt to start the manager again.
-rw-r--r--src/lsm/lsm_manager.c27
-rw-r--r--src/lsm/lsm_tree.c3
2 files changed, 22 insertions, 8 deletions
diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c
index 24a0429a184..3949d88cec4 100644
--- a/src/lsm/lsm_manager.c
+++ b/src/lsm/lsm_manager.c
@@ -208,14 +208,20 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session)
conn = S2C(session);
manager = &conn->lsm_manager;
- if (F_ISSET(conn, WT_CONN_READONLY)) {
- manager->lsm_workers = 0;
- return (0);
- }
/*
- * We need at least a manager, a switch thread and a generic
- * worker.
+ * If readonly or the manager is running, or we've already failed,
+ * there's no work to do.
*/
+ if (F_ISSET(conn, WT_CONN_READONLY) ||
+ manager->lsm_workers != 0 ||
+ F_ISSET(manager, WT_LSM_MANAGER_SHUTDOWN))
+ return (0);
+
+ /* It's possible to race, see if we're the winner. */
+ if (!__wt_atomic_cas32(&manager->lsm_workers, 0, 1))
+ return (0);
+
+ /* We need at least a manager, a switch thread and a generic worker. */
WT_ASSERT(session, manager->lsm_workers_max > 2);
/*
@@ -245,6 +251,15 @@ err: for (i = 0;
i++)
WT_TRET((&worker_session->iface)->close(
&worker_session->iface, NULL));
+
+ /* Make the failure permanent, we won't try again. */
+ F_SET(manager, WT_LSM_MANAGER_SHUTDOWN);
+
+ /*
+ * Reset the workers count (otherwise, LSM destroy will hang
+ * waiting for threads to exit.
+ */
+ WT_PUBLISH(manager->lsm_workers, 0);
}
return (ret);
}
diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c
index 18e1f6d3115..33d9e472df6 100644
--- a/src/lsm/lsm_tree.c
+++ b/src/lsm/lsm_tree.c
@@ -474,8 +474,7 @@ __lsm_tree_open(WT_SESSION_IMPL *session,
F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
/* Start the LSM manager thread if it isn't running. */
- if (__wt_atomic_cas32(&conn->lsm_manager.lsm_workers, 0, 1))
- WT_RET(__wt_lsm_manager_start(session));
+ WT_RET(__wt_lsm_manager_start(session));
/* Make sure no one beat us to it. */
if ((ret = __lsm_tree_find(