diff options
author | Samy Lanka <samy.lanka@mongodb.com> | 2018-11-07 15:10:08 -0500 |
---|---|---|
committer | Samy Lanka <samy.lanka@mongodb.com> | 2018-11-08 17:28:44 -0500 |
commit | 09f0e3262c5bc5255135338d982acdc5723d5e15 (patch) | |
tree | 587696e37b5eb3fdb43e89d40d5486cea954765f /src/mongo | |
parent | 75b0e69c471d53675572f32f94f2f67d214d9842 (diff) | |
download | mongo-09f0e3262c5bc5255135338d982acdc5723d5e15.tar.gz |
SERVER-37945 Replace the Global X lock with a ReplicationStateTransitionLockGuard wherever we acquire it for a state transition
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/repl/SConscript | 14 | ||||
-rw-r--r-- | src/mongo/db/repl/bgsync.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 22 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl_test.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_state_transition_lock_guard.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_state_transition_lock_guard.h | 8 | ||||
-rw-r--r-- | src/mongo/db/repl/rollback_impl.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/repl/rs_rollback.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/repl/sync_tail.cpp | 5 |
10 files changed, 55 insertions, 28 deletions
diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript index 910d84bebfd..0fc06ce813c 100644 --- a/src/mongo/db/repl/SConscript +++ b/src/mongo/db/repl/SConscript @@ -609,6 +609,7 @@ env.Library( '$BUILD_DIR/mongo/util/net/network', 'optime', 'repl_coordinator_interface', + 'repl_state_transition_lock_guard', 'roll_back_local_operations', ], LIBDEPS_PRIVATE=[ @@ -686,6 +687,7 @@ env.Library( 'oplogreader', 'repl_coordinator_interface', 'repl_settings', + 'repl_state_transition_lock_guard', 'storage_interface', ], LIBDEPS_PRIVATE=[ @@ -818,6 +820,16 @@ env.CppUnitTest( ) env.Library( + target='repl_state_transition_lock_guard', + source=[ + 'replication_state_transition_lock_guard.cpp', + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/db/kill_sessions_local', + ] +) + +env.Library( target='repl_coordinator_impl', source=[ 'check_quorum_for_config_change.cpp', @@ -825,7 +837,6 @@ env.Library( 'replication_coordinator_impl.cpp', 'replication_coordinator_impl_elect_v1.cpp', 'replication_coordinator_impl_heartbeat.cpp', - 'replication_state_transition_lock_guard.cpp', 'vote_requester.cpp', ], LIBDEPS=[ @@ -845,6 +856,7 @@ env.Library( 'data_replicator_external_state_initial_sync', 'repl_coordinator_interface', 'repl_settings', + 'repl_state_transition_lock_guard', 'replica_set_messages', 'replication_process', 'reporter', diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp index 6c09d2c816b..305d4b54079 100644 --- a/src/mongo/db/repl/bgsync.cpp +++ b/src/mongo/db/repl/bgsync.cpp @@ -50,6 +50,7 @@ #include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/repl/replication_coordinator_impl.h" #include "mongo/db/repl/replication_process.h" +#include "mongo/db/repl/replication_state_transition_lock_guard.h" #include "mongo/db/repl/rollback_source_impl.h" #include "mongo/db/repl/rs_rollback.h" #include "mongo/db/repl/storage_interface.h" @@ -339,9 +340,9 @@ void BackgroundSync::_produce() { // Mark yourself as too stale. _tooStale = true; - // Need to take global X lock to transition out of SECONDARY. + // Need to take the RSTL in mode X to transition out of SECONDARY. auto opCtx = cc().makeOperationContext(); - Lock::GlobalWrite globalWriteLock(opCtx.get()); + ReplicationStateTransitionLockGuard transitionGuard(opCtx.get()); error() << "too stale to catch up -- entering maintenance mode"; log() << "Our newest OpTime : " << lastOpTimeFetched; diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 488ee1a8996..9d2bfa42570 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -963,25 +963,25 @@ ReplicationCoordinator::ApplierState ReplicationCoordinatorImpl::getApplierState void ReplicationCoordinatorImpl::signalDrainComplete(OperationContext* opCtx, long long termWhenBufferIsEmpty) { - // This logic is a little complicated in order to avoid acquiring the global exclusive lock + // This logic is a little complicated in order to avoid acquiring the RSTL in mode X // unnecessarily. This is important because the applier may call signalDrainComplete() // whenever it wants, not only when the ReplicationCoordinator is expecting it. // // The steps are: // 1.) Check to see if we're waiting for this signal. If not, return early. - // 2.) Otherwise, release the mutex while acquiring the global exclusive lock, - // since that might take a while (NB there's a deadlock cycle otherwise, too). + // 2.) Otherwise, release the mutex while acquiring the RSTL in mode X, since that might take a + // while (NB there's a deadlock cycle otherwise, too). // 3.) Re-check to see if we've somehow left drain mode. If we have not, clear // producer and applier's states, set the flag allowing non-local database writes and - // drop the mutex. At this point, no writes can occur from other threads, due to the - // global exclusive lock. + // drop the mutex. At this point, no writes can occur from other threads, due to the RSTL + // in mode X. // 4.) Drop all temp collections, and log the drops to the oplog. // 5.) Log transition to primary in the oplog and set that OpTime as the floor for what we will // consider to be committed. - // 6.) Drop the global exclusive lock. + // 6.) Drop the RSTL. // // Because replicatable writes are forbidden while in drain mode, and we don't exit drain - // mode until we have the global exclusive lock, which forbids all other threads from making + // mode until we have the RSTL in mode X, which forbids all other threads from making // writes, we know that from the time that _canAcceptNonLocalWrites is set until // this method returns, no external writes will be processed. This is important so that a new // temp collection isn't introduced on the new primary before we drop all the temp collections. @@ -1012,7 +1012,7 @@ void ReplicationCoordinatorImpl::signalDrainComplete(OperationContext* opCtx, } } - Lock::GlobalWrite globalWriteLock(opCtx); + ReplicationStateTransitionLockGuard transitionGuard(opCtx); lk.lock(); // Exit drain mode only if we're actually in draining mode, the apply buffer is empty in the @@ -2341,12 +2341,12 @@ void ReplicationCoordinatorImpl::_finishReplSetReconfig( return; } auto opCtx = cc().makeOperationContext(); - boost::optional<Lock::GlobalWrite> globalExclusiveLock; + boost::optional<ReplicationStateTransitionLockGuard> transitionGuard; if (isForceReconfig) { // Since it's a force reconfig, the primary node may not be electable after the // configuration change. In case we are that primary node, finish the reconfig under the - // global lock, so that the step down occurs safely. - globalExclusiveLock.emplace(opCtx.get()); + // RSTL, so that the step down occurs safely. + transitionGuard.emplace(opCtx.get()); } stdx::unique_lock<stdx::mutex> lk(_mutex); diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp index 5f2c64b81ce..69e1ec75baa 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp @@ -558,14 +558,14 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigFinish( } auto opCtx = cc().makeOperationContext(); - boost::optional<Lock::GlobalWrite> globalExclusiveLock; + boost::optional<ReplicationStateTransitionLockGuard> transitionGuard; stdx::unique_lock<stdx::mutex> lk{_mutex}; if (_memberState.primary()) { - // If we are primary, we need the global lock in MODE_X to step down. If we somehow - // transition out of primary while waiting for the global lock, there's no harm in holding + // If we are primary, we need the RSTL in mode X to step down. If we somehow + // transition out of primary while waiting for the RSTL, there's no harm in holding // it. lk.unlock(); - globalExclusiveLock.emplace(opCtx.get()); + transitionGuard.emplace(opCtx.get()); lk.lock(); } diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp index 0da2179b76e..d68848f812f 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp @@ -54,6 +54,7 @@ #include "mongo/db/repl/replication_coordinator_external_state_mock.h" #include "mongo/db/repl/replication_coordinator_impl.h" #include "mongo/db/repl/replication_coordinator_test_fixture.h" +#include "mongo/db/repl/replication_state_transition_lock_guard.h" #include "mongo/db/repl/storage_interface_mock.h" #include "mongo/db/repl/topology_coordinator.h" #include "mongo/db/repl/update_position_args.h" @@ -1549,7 +1550,7 @@ TEST_F(ReplCoordTest, ConcurrentStepDownShouldNotSignalTheSameFinishEventMoreTha // Prevent _stepDownFinish() from running and becoming secondary by blocking in this // exclusive task. const auto opCtx = makeOperationContext(); - boost::optional<Lock::GlobalWrite> globalExclusiveLock(opCtx.get()); + boost::optional<ReplicationStateTransitionLockGuard> transitionGuard(opCtx.get()); TopologyCoordinator::UpdateTermResult termUpdated2; auto updateTermEvh2 = getReplCoord()->updateTerm_forTest(2, &termUpdated2); @@ -1565,7 +1566,7 @@ TEST_F(ReplCoordTest, ConcurrentStepDownShouldNotSignalTheSameFinishEventMoreTha ASSERT(!updateTermEvh3.isValid()); // Unblock the tasks for updateTerm and _stepDownFinish. - globalExclusiveLock.reset(); + transitionGuard.reset(); // Wait stepdown to finish and term 3 to be installed. replExec->waitForEvent(updateTermEvh2); @@ -1882,9 +1883,9 @@ TEST_F(StepDownTest, const auto opCtx = makeOperationContext(); - // Make sure stepDown cannot grab the global exclusive lock. We need to use a different + // Make sure stepDown cannot grab the RSTL in mode X. We need to use a different // locker to test this, or otherwise stepDown will be granted the lock automatically. - Lock::GlobalWrite lk(opCtx.get()); + ReplicationStateTransitionLockGuard transitionGuard(opCtx.get()); ASSERT_TRUE(opCtx->lockState()->isW()); auto locker = opCtx.get()->swapLockState(stdx::make_unique<LockerImpl>()); diff --git a/src/mongo/db/repl/replication_state_transition_lock_guard.cpp b/src/mongo/db/repl/replication_state_transition_lock_guard.cpp index 5e557b81857..61ee8d68e72 100644 --- a/src/mongo/db/repl/replication_state_transition_lock_guard.cpp +++ b/src/mongo/db/repl/replication_state_transition_lock_guard.cpp @@ -39,6 +39,9 @@ namespace mongo { namespace repl { +ReplicationStateTransitionLockGuard::ReplicationStateTransitionLockGuard(OperationContext* opCtx) + : ReplicationStateTransitionLockGuard(opCtx, Args()) {} + ReplicationStateTransitionLockGuard::ReplicationStateTransitionLockGuard(OperationContext* opCtx, const Args& args) : _opCtx(opCtx), _args(args) { diff --git a/src/mongo/db/repl/replication_state_transition_lock_guard.h b/src/mongo/db/repl/replication_state_transition_lock_guard.h index a4a30e7381d..7f42ce00e27 100644 --- a/src/mongo/db/repl/replication_state_transition_lock_guard.h +++ b/src/mongo/db/repl/replication_state_transition_lock_guard.h @@ -58,10 +58,16 @@ public: }; /** - * Acquires the global X lock and performs any other required actions accoriding to the Args + * Acquires the global X lock. + */ + ReplicationStateTransitionLockGuard(OperationContext* opCtx); + + /** + * Acquires the global X lock and performs any other required actions according to the Args * provided. */ ReplicationStateTransitionLockGuard(OperationContext* opCtx, const Args& args); + ~ReplicationStateTransitionLockGuard(); /** diff --git a/src/mongo/db/repl/rollback_impl.cpp b/src/mongo/db/repl/rollback_impl.cpp index b8214ee26a6..99240bb16e1 100644 --- a/src/mongo/db/repl/rollback_impl.cpp +++ b/src/mongo/db/repl/rollback_impl.cpp @@ -48,6 +48,7 @@ #include "mongo/db/repl/drop_pending_collection_reaper.h" #include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/repl/replication_process.h" +#include "mongo/db/repl/replication_state_transition_lock_guard.h" #include "mongo/db/repl/roll_back_local_operations.h" #include "mongo/db/repl/storage_interface.h" #include "mongo/db/s/shard_identity_rollback_notifier.h" @@ -297,7 +298,7 @@ Status RollbackImpl::_transitionToRollback(OperationContext* opCtx) { log() << "transition to ROLLBACK"; { - Lock::GlobalWrite globalWrite(opCtx); + ReplicationStateTransitionLockGuard transitionGuard(opCtx); auto status = _replicationCoordinator->setFollowerMode(MemberState::RS_ROLLBACK); if (!status.isOK()) { @@ -878,7 +879,7 @@ void RollbackImpl::_transitionFromRollbackToSecondary(OperationContext* opCtx) { log() << "transition to SECONDARY"; - Lock::GlobalWrite globalWrite(opCtx); + ReplicationStateTransitionLockGuard transitionGuard(opCtx); auto status = _replicationCoordinator->setFollowerMode(MemberState::RS_SECONDARY); if (!status.isOK()) { diff --git a/src/mongo/db/repl/rs_rollback.cpp b/src/mongo/db/repl/rs_rollback.cpp index 9c76bbf5b18..1927ffd20ea 100644 --- a/src/mongo/db/repl/rs_rollback.cpp +++ b/src/mongo/db/repl/rs_rollback.cpp @@ -67,6 +67,7 @@ #include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/repl/replication_coordinator_impl.h" #include "mongo/db/repl/replication_process.h" +#include "mongo/db/repl/replication_state_transition_lock_guard.h" #include "mongo/db/repl/roll_back_local_operations.h" #include "mongo/db/repl/rollback_source.h" #include "mongo/db/repl/rslog.h" @@ -1492,7 +1493,8 @@ void rollback(OperationContext* opCtx, // then. { - Lock::GlobalWrite globalWrite(opCtx); + ReplicationStateTransitionLockGuard transitionGuard(opCtx); + auto status = replCoord->setFollowerMode(MemberState::RS_ROLLBACK); if (!status.isOK()) { log() << "Cannot transition from " << replCoord->getMemberState().toString() << " to " diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp index 140f76c7a7d..1ae75be248b 100644 --- a/src/mongo/db/repl/sync_tail.cpp +++ b/src/mongo/db/repl/sync_tail.cpp @@ -68,6 +68,7 @@ #include "mongo/db/repl/repl_client_info.h" #include "mongo/db/repl/repl_set_config.h" #include "mongo/db/repl/replication_coordinator.h" +#include "mongo/db/repl/replication_state_transition_lock_guard.h" #include "mongo/db/repl/session_update_tracker.h" #include "mongo/db/service_context.h" #include "mongo/db/session.h" @@ -643,8 +644,8 @@ void tryToGoLiveAsASecondary(OperationContext* opCtx, // This needs to happen after the attempt so readers can be sure we've already tried. ON_BLOCK_EXIT([] { attemptsToBecomeSecondary.increment(); }); - // Need global X lock to transition to SECONDARY - Lock::GlobalWrite writeLock(opCtx); + // Need the RSTL in mode X to transition to SECONDARY + ReplicationStateTransitionLockGuard transitionGuard(opCtx); // Maintenance mode will force us to remain in RECOVERING state, no matter what. if (replCoord->getMaintenanceMode()) { |