summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_mock.cpp6
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_mock.h6
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp8
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp12
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_test.cpp38
-rw-r--r--src/mongo/db/repl/topology_coordinator.h3
6 files changed, 71 insertions, 2 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
index 7bcfec46b9d..bccf4d27c2c 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
@@ -226,6 +226,10 @@ void ReplicationCoordinatorExternalStateMock::setAreSnapshotsEnabled(bool val) {
_areSnapshotsEnabled = val;
}
+void ReplicationCoordinatorExternalStateMock::setElectionTimeoutOffsetLimitFraction(double val) {
+ _electionTimeoutOffsetLimitFraction = val;
+}
+
void ReplicationCoordinatorExternalStateMock::notifyOplogMetadataWaiters(
const OpTime& committedOpTime) {}
@@ -235,7 +239,7 @@ boost::optional<OpTime> ReplicationCoordinatorExternalStateMock::getEarliestDrop
}
double ReplicationCoordinatorExternalStateMock::getElectionTimeoutOffsetLimitFraction() const {
- return 0.15;
+ return _electionTimeoutOffsetLimitFraction;
}
bool ReplicationCoordinatorExternalStateMock::isReadCommittedSupportedByStorageEngine(
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.h b/src/mongo/db/repl/replication_coordinator_external_state_mock.h
index fa57e07c6d6..30086dae707 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.h
@@ -159,6 +159,11 @@ public:
void setAreSnapshotsEnabled(bool val);
/**
+ * Sets the election timeout offset limit. Default is 0.15.
+ */
+ void setElectionTimeoutOffsetLimitFraction(double val);
+
+ /**
* Noop
*/
virtual void setupNoopWriter(Seconds waitTime);
@@ -191,6 +196,7 @@ private:
bool _isReadCommittedSupported = true;
bool _areSnapshotsEnabled = true;
OpTime _firstOpTimeOfMyTerm;
+ double _electionTimeoutOffsetLimitFraction = 0.15;
};
} // namespace repl
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index d16dfcaa613..a51399cca8c 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -1768,6 +1768,14 @@ void ReplicationCoordinatorImpl::_handleTimePassing(
return;
}
+ // For election protocol v1, call _startElectSelfIfEligibleV1 to avoid race
+ // against other elections caused by events like election timeout, replSetStepUp etc.
+ if (isV1ElectionProtocol()) {
+ _startElectSelfIfEligibleV1(
+ TopologyCoordinator::StartElectionReason::kSingleNodeStepDownTimeout);
+ return;
+ }
+
bool wonSingleNodeElection = [this]() {
stdx::lock_guard<stdx::mutex> lk(_mutex);
return _topCoord->becomeCandidateIfStepdownPeriodOverAndSingleNodeSet(_replExecutor->now());
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index db261f273ac..6f19a1f944e 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -832,6 +832,11 @@ void ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1(
return;
}
stdx::lock_guard<stdx::mutex> lock(_mutex);
+ // If it is not a single node replica set, no need to start an election after stepdown timeout.
+ if (reason == TopologyCoordinator::StartElectionReason::kSingleNodeStepDownTimeout &&
+ _rsConfig.getNumMembers() != 1) {
+ return;
+ }
// We should always reschedule this callback even if we do not make it to the election
// process.
@@ -864,6 +869,10 @@ void ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1(
log() << "Not starting an election for a catchup takeover, "
<< "since we are not electable due to: " << status.reason();
break;
+ case TopologyCoordinator::StartElectionReason::kSingleNodeStepDownTimeout:
+ log() << "Not starting an election for a single node replica set stepdown timeout, "
+ << "since we are not electable due to: " << status.reason();
+ break;
}
return;
}
@@ -882,6 +891,9 @@ void ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1(
case TopologyCoordinator::StartElectionReason::kCatchupTakeover:
log() << "Starting an election for a catchup takeover";
break;
+ case TopologyCoordinator::StartElectionReason::kSingleNodeStepDownTimeout:
+ log() << "Starting an election due to single node replica set stepdown timeout";
+ break;
}
_startElectSelfV1_inlock(reason);
diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
index 2835a550339..669a8e0e6bc 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
@@ -1836,6 +1836,44 @@ TEST_F(
ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
}
+// This test checks if a primary is chosen even if there are two simultaneous elections
+// happening because of election timeout and step-down timeout in a single node replica set.
+TEST_F(ReplCoordTest, SingleNodeReplSetStepDownTimeoutAndElectionTimeoutExpiresAtTheSameTime) {
+ init();
+
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version"
+ << 1
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "test1:1234"))
+ << "protocolVersion"
+ << 1
+ << "settings"
+ << BSON("electionTimeoutMillis" << 1000)),
+ HostAndPort("test1", 1234));
+ auto opCtx = makeOperationContext();
+ getExternalState()->setElectionTimeoutOffsetLimitFraction(0);
+ runSingleNodeElection(opCtx.get());
+
+ // Stepdown command with "force=true" resets the election timer to election timeout (10 seconds
+ // later) and allows the node to resume primary after stepdown timeout (also 10 seconds).
+ ASSERT_OK(getReplCoord()->stepDown(opCtx.get(), true, Milliseconds(0), Milliseconds(1000)));
+ getNet()->enterNetwork();
+ ASSERT_TRUE(getTopoCoord().getMemberState().secondary());
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+
+ // Now run time forward and make sure that the node becomes primary again when stepdown timeout
+ // and election timeout occurs at the same time.
+ Date_t stepdownUntil = getNet()->now() + Seconds(1);
+ getNet()->runUntil(stepdownUntil);
+ ASSERT_EQUALS(stepdownUntil, getNet()->now());
+ ASSERT_TRUE(getTopoCoord().getMemberState().primary());
+ getNet()->exitNetwork();
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+}
+
TEST_F(ReplCoordTest, NodeBecomesPrimaryAgainWhenStepDownTimeoutExpiresInASingleNodeSet) {
init("mySet");
diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h
index fd630827579..60ac2f83179 100644
--- a/src/mongo/db/repl/topology_coordinator.h
+++ b/src/mongo/db/repl/topology_coordinator.h
@@ -706,7 +706,8 @@ public:
kElectionTimeout,
kPriorityTakeover,
kStepUpRequest,
- kCatchupTakeover
+ kCatchupTakeover,
+ kSingleNodeStepDownTimeout
};
/**