summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorSamy Lanka <samy.lanka@mongodb.com>2018-12-13 16:38:27 -0500
committerSamy Lanka <samy.lanka@mongodb.com>2019-01-17 17:38:32 -0500
commit5918fda8a354db2e3ecc95ac0c384b412bfe0684 (patch)
tree10a698246928b82ceffca68c086f352a59fb13e4 /src/mongo/db
parent95ff8eff9c4641240c6158d1b449f1fbabea6a8e (diff)
downloadmongo-5918fda8a354db2e3ecc95ac0c384b412bfe0684.tar.gz
SERVER-38162 Acquire RSTL on shutdown in mode X
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/concurrency/replication_state_transition_lock_guard.cpp1
-rw-r--r--src/mongo/db/db.cpp30
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp4
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp4
4 files changed, 28 insertions, 11 deletions
diff --git a/src/mongo/db/concurrency/replication_state_transition_lock_guard.cpp b/src/mongo/db/concurrency/replication_state_transition_lock_guard.cpp
index 54b6e3696e8..0260275ba24 100644
--- a/src/mongo/db/concurrency/replication_state_transition_lock_guard.cpp
+++ b/src/mongo/db/concurrency/replication_state_transition_lock_guard.cpp
@@ -99,6 +99,5 @@ void ReplicationStateTransitionLockGuard::_unlock() {
_result = LOCK_INVALID;
}
-
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/db.cpp b/src/mongo/db/db.cpp
index 8d07347ca5b..eefd845af57 100644
--- a/src/mongo/db/db.cpp
+++ b/src/mongo/db/db.cpp
@@ -65,6 +65,7 @@
#include "mongo/db/commands/feature_compatibility_version_gen.h"
#include "mongo/db/concurrency/d_concurrency.h"
#include "mongo/db/concurrency/lock_state.h"
+#include "mongo/db/concurrency/replication_state_transition_lock_guard.h"
#include "mongo/db/concurrency/write_conflict_exception.h"
#include "mongo/db/db_raii.h"
#include "mongo/db/dbdirectclient.h"
@@ -910,14 +911,33 @@ void shutdownTask() {
opCtx = uniqueOpCtx.get();
}
- // This can wait a long time while we drain the secondary's apply queue, especially if it
- // is building an index.
+ // This can wait a long time while we drain the secondary's apply queue, especially if
+ // it is building an index.
repl::ReplicationCoordinator::get(serviceContext)->shutdown(opCtx);
ShardingInitializationMongoD::get(serviceContext)->shutDown(opCtx);
- // Destroy all stashed transaction resources, in order to release locks.
- killSessionsLocalShutdownAllTransactions(opCtx);
+ // Acquire the RSTL in mode X. First we enqueue the lock request, then kill all operations,
+ // destroy all stashed transaction resources in order to release locks, and finally wait
+ // until the lock request is granted.
+ repl::ReplicationStateTransitionLockGuard rstl(
+ opCtx, repl::ReplicationStateTransitionLockGuard::EnqueueOnly());
+
+ // Kill all operations. After this point, the opCtx will have been marked as killed and will
+ // not be usable other than to kill all transactions directly below.
+ serviceContext->setKillAllOperations();
+
+ {
+ // Make this scope uninterruptible so that we can still abort all transactions even
+ // though the opCtx has been killed. While we don't currently check for an interrupt
+ // before checking out a session, we want to make sure that this completes.
+ UninterruptibleLockGuard noInterrupt(opCtx->lockState());
+
+ // Destroy all stashed transaction resources, in order to release locks.
+ killSessionsLocalShutdownAllTransactions(opCtx);
+
+ rstl.waitForLockUntil(Date_t::max());
+ }
// Interrupts all index builds, leaving the state intact to be recovered when the server
// restarts. This should be done after replication oplog application finishes, so foreground
@@ -925,8 +945,6 @@ void shutdownTask() {
IndexBuildsCoordinator::get(serviceContext)->shutdown();
}
- serviceContext->setKillAllOperations();
-
ReplicaSetMonitor::shutdown();
if (auto sr = Grid::get(serviceContext)->shardRegistry()) {
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index c739b696577..b7949947cdd 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -1763,8 +1763,8 @@ void ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx,
ReplicationStateTransitionLockGuard rstlLock(
opCtx, ReplicationStateTransitionLockGuard::EnqueueOnly());
- // Kill all user operations to help us get the global lock faster, as well as to ensure that
- // operations that are no longer safe to run (like writes) get killed.
+ // Since we are in stepdown, after enqueueing the RSTL we need to kill all user operations to
+ // ensure that operations that are no longer safe to run (like writes) get killed.
_killOperationsOnStepDown(opCtx);
// Using 'force' sets the default for the wait time to zero, which means the stepdown will
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index dafe36af09c..fa53cec63ba 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -384,8 +384,8 @@ void ReplicationCoordinatorImpl::_stepDownFinish(
ReplicationStateTransitionLockGuard rstlLock(
opCtx.get(), ReplicationStateTransitionLockGuard::EnqueueOnly());
- // Kill all user operations to help us get the global lock faster, as well as to ensure that
- // operations that are no longer safe to run (like writes) get killed.
+ // Since we are in stepdown, after enqueueing the RSTL we need to kill all user operations to
+ // ensure that operations that are no longer safe to run (like writes) get killed.
_killOperationsOnStepDown(opCtx.get());
rstlLock.waitForLockUntil(Date_t::max());