diff options
author | Suganthi Mani <suganthi.mani@mongodb.com> | 2018-05-15 10:17:27 -0400 |
---|---|---|
committer | Suganthi Mani <suganthi.mani@mongodb.com> | 2018-05-24 18:03:07 -0400 |
commit | 6fed52516bdc33c9e19c9daa40b01bc12e6519bc (patch) | |
tree | 7b18f21091612ab658458a1453da1eced8672687 /src/mongo/db | |
parent | 49d89c0f4378787aa47d30ab253f431897b457f0 (diff) | |
download | mongo-6fed52516bdc33c9e19c9daa40b01bc12e6519bc.tar.gz |
SERVER-34102 Fix to prevent race between _handleTimePassing (stepdown timeout) and _startElectSelfIfEligibleV1 (election timeout) for pv1 single node replica set case.
(cherry picked from commit 678947e0836ccf6ebb0e9397e56ada985541bf14)
Diffstat (limited to 'src/mongo/db')
6 files changed, 71 insertions, 2 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp index 7bcfec46b9d..bccf4d27c2c 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp +++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp @@ -226,6 +226,10 @@ void ReplicationCoordinatorExternalStateMock::setAreSnapshotsEnabled(bool val) { _areSnapshotsEnabled = val; } +void ReplicationCoordinatorExternalStateMock::setElectionTimeoutOffsetLimitFraction(double val) { + _electionTimeoutOffsetLimitFraction = val; +} + void ReplicationCoordinatorExternalStateMock::notifyOplogMetadataWaiters( const OpTime& committedOpTime) {} @@ -235,7 +239,7 @@ boost::optional<OpTime> ReplicationCoordinatorExternalStateMock::getEarliestDrop } double ReplicationCoordinatorExternalStateMock::getElectionTimeoutOffsetLimitFraction() const { - return 0.15; + return _electionTimeoutOffsetLimitFraction; } bool ReplicationCoordinatorExternalStateMock::isReadCommittedSupportedByStorageEngine( diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.h b/src/mongo/db/repl/replication_coordinator_external_state_mock.h index fa57e07c6d6..30086dae707 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_mock.h +++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.h @@ -159,6 +159,11 @@ public: void setAreSnapshotsEnabled(bool val); /** + * Sets the election timeout offset limit. Default is 0.15. + */ + void setElectionTimeoutOffsetLimitFraction(double val); + + /** * Noop */ virtual void setupNoopWriter(Seconds waitTime); @@ -191,6 +196,7 @@ private: bool _isReadCommittedSupported = true; bool _areSnapshotsEnabled = true; OpTime _firstOpTimeOfMyTerm; + double _electionTimeoutOffsetLimitFraction = 0.15; }; } // namespace repl diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index d16dfcaa613..a51399cca8c 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -1768,6 +1768,14 @@ void ReplicationCoordinatorImpl::_handleTimePassing( return; } + // For election protocol v1, call _startElectSelfIfEligibleV1 to avoid race + // against other elections caused by events like election timeout, replSetStepUp etc. + if (isV1ElectionProtocol()) { + _startElectSelfIfEligibleV1( + TopologyCoordinator::StartElectionReason::kSingleNodeStepDownTimeout); + return; + } + bool wonSingleNodeElection = [this]() { stdx::lock_guard<stdx::mutex> lk(_mutex); return _topCoord->becomeCandidateIfStepdownPeriodOverAndSingleNodeSet(_replExecutor->now()); diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp index db261f273ac..6f19a1f944e 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp @@ -832,6 +832,11 @@ void ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1( return; } stdx::lock_guard<stdx::mutex> lock(_mutex); + // If it is not a single node replica set, no need to start an election after stepdown timeout. + if (reason == TopologyCoordinator::StartElectionReason::kSingleNodeStepDownTimeout && + _rsConfig.getNumMembers() != 1) { + return; + } // We should always reschedule this callback even if we do not make it to the election // process. @@ -864,6 +869,10 @@ void ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1( log() << "Not starting an election for a catchup takeover, " << "since we are not electable due to: " << status.reason(); break; + case TopologyCoordinator::StartElectionReason::kSingleNodeStepDownTimeout: + log() << "Not starting an election for a single node replica set stepdown timeout, " + << "since we are not electable due to: " << status.reason(); + break; } return; } @@ -882,6 +891,9 @@ void ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1( case TopologyCoordinator::StartElectionReason::kCatchupTakeover: log() << "Starting an election for a catchup takeover"; break; + case TopologyCoordinator::StartElectionReason::kSingleNodeStepDownTimeout: + log() << "Starting an election due to single node replica set stepdown timeout"; + break; } _startElectSelfV1_inlock(reason); diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp index 2835a550339..669a8e0e6bc 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp @@ -1836,6 +1836,44 @@ TEST_F( ASSERT_TRUE(getReplCoord()->getMemberState().secondary()); } +// This test checks if a primary is chosen even if there are two simultaneous elections +// happening because of election timeout and step-down timeout in a single node replica set. +TEST_F(ReplCoordTest, SingleNodeReplSetStepDownTimeoutAndElectionTimeoutExpiresAtTheSameTime) { + init(); + + assertStartSuccess(BSON("_id" + << "mySet" + << "version" + << 1 + << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "test1:1234")) + << "protocolVersion" + << 1 + << "settings" + << BSON("electionTimeoutMillis" << 1000)), + HostAndPort("test1", 1234)); + auto opCtx = makeOperationContext(); + getExternalState()->setElectionTimeoutOffsetLimitFraction(0); + runSingleNodeElection(opCtx.get()); + + // Stepdown command with "force=true" resets the election timer to election timeout (10 seconds + // later) and allows the node to resume primary after stepdown timeout (also 10 seconds). + ASSERT_OK(getReplCoord()->stepDown(opCtx.get(), true, Milliseconds(0), Milliseconds(1000))); + getNet()->enterNetwork(); + ASSERT_TRUE(getTopoCoord().getMemberState().secondary()); + ASSERT_TRUE(getReplCoord()->getMemberState().secondary()); + + // Now run time forward and make sure that the node becomes primary again when stepdown timeout + // and election timeout occurs at the same time. + Date_t stepdownUntil = getNet()->now() + Seconds(1); + getNet()->runUntil(stepdownUntil); + ASSERT_EQUALS(stepdownUntil, getNet()->now()); + ASSERT_TRUE(getTopoCoord().getMemberState().primary()); + getNet()->exitNetwork(); + ASSERT_TRUE(getReplCoord()->getMemberState().primary()); +} + TEST_F(ReplCoordTest, NodeBecomesPrimaryAgainWhenStepDownTimeoutExpiresInASingleNodeSet) { init("mySet"); diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h index fd630827579..60ac2f83179 100644 --- a/src/mongo/db/repl/topology_coordinator.h +++ b/src/mongo/db/repl/topology_coordinator.h @@ -706,7 +706,8 @@ public: kElectionTimeout, kPriorityTakeover, kStepUpRequest, - kCatchupTakeover + kCatchupTakeover, + kSingleNodeStepDownTimeout }; /** |