diff options
author | Samy Lanka <samy.lanka@mongodb.com> | 2018-12-13 16:38:27 -0500 |
---|---|---|
committer | Samy Lanka <samy.lanka@mongodb.com> | 2019-01-17 17:38:32 -0500 |
commit | 5918fda8a354db2e3ecc95ac0c384b412bfe0684 (patch) | |
tree | 10a698246928b82ceffca68c086f352a59fb13e4 /src/mongo/db | |
parent | 95ff8eff9c4641240c6158d1b449f1fbabea6a8e (diff) | |
download | mongo-5918fda8a354db2e3ecc95ac0c384b412bfe0684.tar.gz |
SERVER-38162 Acquire RSTL on shutdown in mode X
Diffstat (limited to 'src/mongo/db')
4 files changed, 28 insertions, 11 deletions
diff --git a/src/mongo/db/concurrency/replication_state_transition_lock_guard.cpp b/src/mongo/db/concurrency/replication_state_transition_lock_guard.cpp index 54b6e3696e8..0260275ba24 100644 --- a/src/mongo/db/concurrency/replication_state_transition_lock_guard.cpp +++ b/src/mongo/db/concurrency/replication_state_transition_lock_guard.cpp @@ -99,6 +99,5 @@ void ReplicationStateTransitionLockGuard::_unlock() { _result = LOCK_INVALID; } - } // namespace repl } // namespace mongo diff --git a/src/mongo/db/db.cpp b/src/mongo/db/db.cpp index 8d07347ca5b..eefd845af57 100644 --- a/src/mongo/db/db.cpp +++ b/src/mongo/db/db.cpp @@ -65,6 +65,7 @@ #include "mongo/db/commands/feature_compatibility_version_gen.h" #include "mongo/db/concurrency/d_concurrency.h" #include "mongo/db/concurrency/lock_state.h" +#include "mongo/db/concurrency/replication_state_transition_lock_guard.h" #include "mongo/db/concurrency/write_conflict_exception.h" #include "mongo/db/db_raii.h" #include "mongo/db/dbdirectclient.h" @@ -910,14 +911,33 @@ void shutdownTask() { opCtx = uniqueOpCtx.get(); } - // This can wait a long time while we drain the secondary's apply queue, especially if it - // is building an index. + // This can wait a long time while we drain the secondary's apply queue, especially if + // it is building an index. repl::ReplicationCoordinator::get(serviceContext)->shutdown(opCtx); ShardingInitializationMongoD::get(serviceContext)->shutDown(opCtx); - // Destroy all stashed transaction resources, in order to release locks. - killSessionsLocalShutdownAllTransactions(opCtx); + // Acquire the RSTL in mode X. First we enqueue the lock request, then kill all operations, + // destroy all stashed transaction resources in order to release locks, and finally wait + // until the lock request is granted. + repl::ReplicationStateTransitionLockGuard rstl( + opCtx, repl::ReplicationStateTransitionLockGuard::EnqueueOnly()); + + // Kill all operations. After this point, the opCtx will have been marked as killed and will + // not be usable other than to kill all transactions directly below. + serviceContext->setKillAllOperations(); + + { + // Make this scope uninterruptible so that we can still abort all transactions even + // though the opCtx has been killed. While we don't currently check for an interrupt + // before checking out a session, we want to make sure that this completes. + UninterruptibleLockGuard noInterrupt(opCtx->lockState()); + + // Destroy all stashed transaction resources, in order to release locks. + killSessionsLocalShutdownAllTransactions(opCtx); + + rstl.waitForLockUntil(Date_t::max()); + } // Interrupts all index builds, leaving the state intact to be recovered when the server // restarts. This should be done after replication oplog application finishes, so foreground @@ -925,8 +945,6 @@ void shutdownTask() { IndexBuildsCoordinator::get(serviceContext)->shutdown(); } - serviceContext->setKillAllOperations(); - ReplicaSetMonitor::shutdown(); if (auto sr = Grid::get(serviceContext)->shardRegistry()) { diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index c739b696577..b7949947cdd 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -1763,8 +1763,8 @@ void ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx, ReplicationStateTransitionLockGuard rstlLock( opCtx, ReplicationStateTransitionLockGuard::EnqueueOnly()); - // Kill all user operations to help us get the global lock faster, as well as to ensure that - // operations that are no longer safe to run (like writes) get killed. + // Since we are in stepdown, after enqueueing the RSTL we need to kill all user operations to + // ensure that operations that are no longer safe to run (like writes) get killed. _killOperationsOnStepDown(opCtx); // Using 'force' sets the default for the wait time to zero, which means the stepdown will diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp index dafe36af09c..fa53cec63ba 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp @@ -384,8 +384,8 @@ void ReplicationCoordinatorImpl::_stepDownFinish( ReplicationStateTransitionLockGuard rstlLock( opCtx.get(), ReplicationStateTransitionLockGuard::EnqueueOnly()); - // Kill all user operations to help us get the global lock faster, as well as to ensure that - // operations that are no longer safe to run (like writes) get killed. + // Since we are in stepdown, after enqueueing the RSTL we need to kill all user operations to + // ensure that operations that are no longer safe to run (like writes) get killed. _killOperationsOnStepDown(opCtx.get()); rstlLock.waitForLockUntil(Date_t::max()); |