summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorSamy Lanka <samy.lanka@mongodb.com>2018-11-07 15:10:08 -0500
committerSamy Lanka <samy.lanka@mongodb.com>2018-11-08 17:28:44 -0500
commit09f0e3262c5bc5255135338d982acdc5723d5e15 (patch)
tree587696e37b5eb3fdb43e89d40d5486cea954765f /src/mongo
parent75b0e69c471d53675572f32f94f2f67d214d9842 (diff)
downloadmongo-09f0e3262c5bc5255135338d982acdc5723d5e15.tar.gz
SERVER-37945 Replace the Global X lock with a ReplicationStateTransitionLockGuard wherever we acquire it for a state transition
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/repl/SConscript14
-rw-r--r--src/mongo/db/repl/bgsync.cpp5
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp22
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp8
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_test.cpp9
-rw-r--r--src/mongo/db/repl/replication_state_transition_lock_guard.cpp3
-rw-r--r--src/mongo/db/repl/replication_state_transition_lock_guard.h8
-rw-r--r--src/mongo/db/repl/rollback_impl.cpp5
-rw-r--r--src/mongo/db/repl/rs_rollback.cpp4
-rw-r--r--src/mongo/db/repl/sync_tail.cpp5
10 files changed, 55 insertions, 28 deletions
diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript
index 910d84bebfd..0fc06ce813c 100644
--- a/src/mongo/db/repl/SConscript
+++ b/src/mongo/db/repl/SConscript
@@ -609,6 +609,7 @@ env.Library(
'$BUILD_DIR/mongo/util/net/network',
'optime',
'repl_coordinator_interface',
+ 'repl_state_transition_lock_guard',
'roll_back_local_operations',
],
LIBDEPS_PRIVATE=[
@@ -686,6 +687,7 @@ env.Library(
'oplogreader',
'repl_coordinator_interface',
'repl_settings',
+ 'repl_state_transition_lock_guard',
'storage_interface',
],
LIBDEPS_PRIVATE=[
@@ -818,6 +820,16 @@ env.CppUnitTest(
)
env.Library(
+ target='repl_state_transition_lock_guard',
+ source=[
+ 'replication_state_transition_lock_guard.cpp',
+ ],
+ LIBDEPS=[
+ '$BUILD_DIR/mongo/db/kill_sessions_local',
+ ]
+)
+
+env.Library(
target='repl_coordinator_impl',
source=[
'check_quorum_for_config_change.cpp',
@@ -825,7 +837,6 @@ env.Library(
'replication_coordinator_impl.cpp',
'replication_coordinator_impl_elect_v1.cpp',
'replication_coordinator_impl_heartbeat.cpp',
- 'replication_state_transition_lock_guard.cpp',
'vote_requester.cpp',
],
LIBDEPS=[
@@ -845,6 +856,7 @@ env.Library(
'data_replicator_external_state_initial_sync',
'repl_coordinator_interface',
'repl_settings',
+ 'repl_state_transition_lock_guard',
'replica_set_messages',
'replication_process',
'reporter',
diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp
index 6c09d2c816b..305d4b54079 100644
--- a/src/mongo/db/repl/bgsync.cpp
+++ b/src/mongo/db/repl/bgsync.cpp
@@ -50,6 +50,7 @@
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/replication_coordinator_impl.h"
#include "mongo/db/repl/replication_process.h"
+#include "mongo/db/repl/replication_state_transition_lock_guard.h"
#include "mongo/db/repl/rollback_source_impl.h"
#include "mongo/db/repl/rs_rollback.h"
#include "mongo/db/repl/storage_interface.h"
@@ -339,9 +340,9 @@ void BackgroundSync::_produce() {
// Mark yourself as too stale.
_tooStale = true;
- // Need to take global X lock to transition out of SECONDARY.
+ // Need to take the RSTL in mode X to transition out of SECONDARY.
auto opCtx = cc().makeOperationContext();
- Lock::GlobalWrite globalWriteLock(opCtx.get());
+ ReplicationStateTransitionLockGuard transitionGuard(opCtx.get());
error() << "too stale to catch up -- entering maintenance mode";
log() << "Our newest OpTime : " << lastOpTimeFetched;
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 488ee1a8996..9d2bfa42570 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -963,25 +963,25 @@ ReplicationCoordinator::ApplierState ReplicationCoordinatorImpl::getApplierState
void ReplicationCoordinatorImpl::signalDrainComplete(OperationContext* opCtx,
long long termWhenBufferIsEmpty) {
- // This logic is a little complicated in order to avoid acquiring the global exclusive lock
+ // This logic is a little complicated in order to avoid acquiring the RSTL in mode X
// unnecessarily. This is important because the applier may call signalDrainComplete()
// whenever it wants, not only when the ReplicationCoordinator is expecting it.
//
// The steps are:
// 1.) Check to see if we're waiting for this signal. If not, return early.
- // 2.) Otherwise, release the mutex while acquiring the global exclusive lock,
- // since that might take a while (NB there's a deadlock cycle otherwise, too).
+ // 2.) Otherwise, release the mutex while acquiring the RSTL in mode X, since that might take a
+ // while (NB there's a deadlock cycle otherwise, too).
// 3.) Re-check to see if we've somehow left drain mode. If we have not, clear
// producer and applier's states, set the flag allowing non-local database writes and
- // drop the mutex. At this point, no writes can occur from other threads, due to the
- // global exclusive lock.
+ // drop the mutex. At this point, no writes can occur from other threads, due to the RSTL
+ // in mode X.
// 4.) Drop all temp collections, and log the drops to the oplog.
// 5.) Log transition to primary in the oplog and set that OpTime as the floor for what we will
// consider to be committed.
- // 6.) Drop the global exclusive lock.
+ // 6.) Drop the RSTL.
//
// Because replicatable writes are forbidden while in drain mode, and we don't exit drain
- // mode until we have the global exclusive lock, which forbids all other threads from making
+ // mode until we have the RSTL in mode X, which forbids all other threads from making
// writes, we know that from the time that _canAcceptNonLocalWrites is set until
// this method returns, no external writes will be processed. This is important so that a new
// temp collection isn't introduced on the new primary before we drop all the temp collections.
@@ -1012,7 +1012,7 @@ void ReplicationCoordinatorImpl::signalDrainComplete(OperationContext* opCtx,
}
}
- Lock::GlobalWrite globalWriteLock(opCtx);
+ ReplicationStateTransitionLockGuard transitionGuard(opCtx);
lk.lock();
// Exit drain mode only if we're actually in draining mode, the apply buffer is empty in the
@@ -2341,12 +2341,12 @@ void ReplicationCoordinatorImpl::_finishReplSetReconfig(
return;
}
auto opCtx = cc().makeOperationContext();
- boost::optional<Lock::GlobalWrite> globalExclusiveLock;
+ boost::optional<ReplicationStateTransitionLockGuard> transitionGuard;
if (isForceReconfig) {
// Since it's a force reconfig, the primary node may not be electable after the
// configuration change. In case we are that primary node, finish the reconfig under the
- // global lock, so that the step down occurs safely.
- globalExclusiveLock.emplace(opCtx.get());
+ // RSTL, so that the step down occurs safely.
+ transitionGuard.emplace(opCtx.get());
}
stdx::unique_lock<stdx::mutex> lk(_mutex);
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index 5f2c64b81ce..69e1ec75baa 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -558,14 +558,14 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigFinish(
}
auto opCtx = cc().makeOperationContext();
- boost::optional<Lock::GlobalWrite> globalExclusiveLock;
+ boost::optional<ReplicationStateTransitionLockGuard> transitionGuard;
stdx::unique_lock<stdx::mutex> lk{_mutex};
if (_memberState.primary()) {
- // If we are primary, we need the global lock in MODE_X to step down. If we somehow
- // transition out of primary while waiting for the global lock, there's no harm in holding
+ // If we are primary, we need the RSTL in mode X to step down. If we somehow
+ // transition out of primary while waiting for the RSTL, there's no harm in holding
// it.
lk.unlock();
- globalExclusiveLock.emplace(opCtx.get());
+ transitionGuard.emplace(opCtx.get());
lk.lock();
}
diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
index 0da2179b76e..d68848f812f 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
@@ -54,6 +54,7 @@
#include "mongo/db/repl/replication_coordinator_external_state_mock.h"
#include "mongo/db/repl/replication_coordinator_impl.h"
#include "mongo/db/repl/replication_coordinator_test_fixture.h"
+#include "mongo/db/repl/replication_state_transition_lock_guard.h"
#include "mongo/db/repl/storage_interface_mock.h"
#include "mongo/db/repl/topology_coordinator.h"
#include "mongo/db/repl/update_position_args.h"
@@ -1549,7 +1550,7 @@ TEST_F(ReplCoordTest, ConcurrentStepDownShouldNotSignalTheSameFinishEventMoreTha
// Prevent _stepDownFinish() from running and becoming secondary by blocking in this
// exclusive task.
const auto opCtx = makeOperationContext();
- boost::optional<Lock::GlobalWrite> globalExclusiveLock(opCtx.get());
+ boost::optional<ReplicationStateTransitionLockGuard> transitionGuard(opCtx.get());
TopologyCoordinator::UpdateTermResult termUpdated2;
auto updateTermEvh2 = getReplCoord()->updateTerm_forTest(2, &termUpdated2);
@@ -1565,7 +1566,7 @@ TEST_F(ReplCoordTest, ConcurrentStepDownShouldNotSignalTheSameFinishEventMoreTha
ASSERT(!updateTermEvh3.isValid());
// Unblock the tasks for updateTerm and _stepDownFinish.
- globalExclusiveLock.reset();
+ transitionGuard.reset();
// Wait stepdown to finish and term 3 to be installed.
replExec->waitForEvent(updateTermEvh2);
@@ -1882,9 +1883,9 @@ TEST_F(StepDownTest,
const auto opCtx = makeOperationContext();
- // Make sure stepDown cannot grab the global exclusive lock. We need to use a different
+ // Make sure stepDown cannot grab the RSTL in mode X. We need to use a different
// locker to test this, or otherwise stepDown will be granted the lock automatically.
- Lock::GlobalWrite lk(opCtx.get());
+ ReplicationStateTransitionLockGuard transitionGuard(opCtx.get());
ASSERT_TRUE(opCtx->lockState()->isW());
auto locker = opCtx.get()->swapLockState(stdx::make_unique<LockerImpl>());
diff --git a/src/mongo/db/repl/replication_state_transition_lock_guard.cpp b/src/mongo/db/repl/replication_state_transition_lock_guard.cpp
index 5e557b81857..61ee8d68e72 100644
--- a/src/mongo/db/repl/replication_state_transition_lock_guard.cpp
+++ b/src/mongo/db/repl/replication_state_transition_lock_guard.cpp
@@ -39,6 +39,9 @@
namespace mongo {
namespace repl {
+ReplicationStateTransitionLockGuard::ReplicationStateTransitionLockGuard(OperationContext* opCtx)
+ : ReplicationStateTransitionLockGuard(opCtx, Args()) {}
+
ReplicationStateTransitionLockGuard::ReplicationStateTransitionLockGuard(OperationContext* opCtx,
const Args& args)
: _opCtx(opCtx), _args(args) {
diff --git a/src/mongo/db/repl/replication_state_transition_lock_guard.h b/src/mongo/db/repl/replication_state_transition_lock_guard.h
index a4a30e7381d..7f42ce00e27 100644
--- a/src/mongo/db/repl/replication_state_transition_lock_guard.h
+++ b/src/mongo/db/repl/replication_state_transition_lock_guard.h
@@ -58,10 +58,16 @@ public:
};
/**
- * Acquires the global X lock and performs any other required actions accoriding to the Args
+ * Acquires the global X lock.
+ */
+ ReplicationStateTransitionLockGuard(OperationContext* opCtx);
+
+ /**
+ * Acquires the global X lock and performs any other required actions according to the Args
* provided.
*/
ReplicationStateTransitionLockGuard(OperationContext* opCtx, const Args& args);
+
~ReplicationStateTransitionLockGuard();
/**
diff --git a/src/mongo/db/repl/rollback_impl.cpp b/src/mongo/db/repl/rollback_impl.cpp
index b8214ee26a6..99240bb16e1 100644
--- a/src/mongo/db/repl/rollback_impl.cpp
+++ b/src/mongo/db/repl/rollback_impl.cpp
@@ -48,6 +48,7 @@
#include "mongo/db/repl/drop_pending_collection_reaper.h"
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/replication_process.h"
+#include "mongo/db/repl/replication_state_transition_lock_guard.h"
#include "mongo/db/repl/roll_back_local_operations.h"
#include "mongo/db/repl/storage_interface.h"
#include "mongo/db/s/shard_identity_rollback_notifier.h"
@@ -297,7 +298,7 @@ Status RollbackImpl::_transitionToRollback(OperationContext* opCtx) {
log() << "transition to ROLLBACK";
{
- Lock::GlobalWrite globalWrite(opCtx);
+ ReplicationStateTransitionLockGuard transitionGuard(opCtx);
auto status = _replicationCoordinator->setFollowerMode(MemberState::RS_ROLLBACK);
if (!status.isOK()) {
@@ -878,7 +879,7 @@ void RollbackImpl::_transitionFromRollbackToSecondary(OperationContext* opCtx) {
log() << "transition to SECONDARY";
- Lock::GlobalWrite globalWrite(opCtx);
+ ReplicationStateTransitionLockGuard transitionGuard(opCtx);
auto status = _replicationCoordinator->setFollowerMode(MemberState::RS_SECONDARY);
if (!status.isOK()) {
diff --git a/src/mongo/db/repl/rs_rollback.cpp b/src/mongo/db/repl/rs_rollback.cpp
index 9c76bbf5b18..1927ffd20ea 100644
--- a/src/mongo/db/repl/rs_rollback.cpp
+++ b/src/mongo/db/repl/rs_rollback.cpp
@@ -67,6 +67,7 @@
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/replication_coordinator_impl.h"
#include "mongo/db/repl/replication_process.h"
+#include "mongo/db/repl/replication_state_transition_lock_guard.h"
#include "mongo/db/repl/roll_back_local_operations.h"
#include "mongo/db/repl/rollback_source.h"
#include "mongo/db/repl/rslog.h"
@@ -1492,7 +1493,8 @@ void rollback(OperationContext* opCtx,
// then.
{
- Lock::GlobalWrite globalWrite(opCtx);
+ ReplicationStateTransitionLockGuard transitionGuard(opCtx);
+
auto status = replCoord->setFollowerMode(MemberState::RS_ROLLBACK);
if (!status.isOK()) {
log() << "Cannot transition from " << replCoord->getMemberState().toString() << " to "
diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp
index 140f76c7a7d..1ae75be248b 100644
--- a/src/mongo/db/repl/sync_tail.cpp
+++ b/src/mongo/db/repl/sync_tail.cpp
@@ -68,6 +68,7 @@
#include "mongo/db/repl/repl_client_info.h"
#include "mongo/db/repl/repl_set_config.h"
#include "mongo/db/repl/replication_coordinator.h"
+#include "mongo/db/repl/replication_state_transition_lock_guard.h"
#include "mongo/db/repl/session_update_tracker.h"
#include "mongo/db/service_context.h"
#include "mongo/db/session.h"
@@ -643,8 +644,8 @@ void tryToGoLiveAsASecondary(OperationContext* opCtx,
// This needs to happen after the attempt so readers can be sure we've already tried.
ON_BLOCK_EXIT([] { attemptsToBecomeSecondary.increment(); });
- // Need global X lock to transition to SECONDARY
- Lock::GlobalWrite writeLock(opCtx);
+ // Need the RSTL in mode X to transition to SECONDARY
+ ReplicationStateTransitionLockGuard transitionGuard(opCtx);
// Maintenance mode will force us to remain in RECOVERING state, no matter what.
if (replCoord->getMaintenanceMode()) {