summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVesselina Ratcheva <vesselina.ratcheva@10gen.com>2018-06-25 14:38:41 -0400
committerVesselina Ratcheva <vesselina.ratcheva@10gen.com>2018-08-07 12:54:26 -0400
commite38baa8d1416a6ab424856bb9a821ea67dc92b13 (patch)
tree5421cc237a71eee7a4f4ff29fd4bd24cfbcda3cb
parent8c8148e4ed16436c2a41ab9df53ecf39be7fc8a5 (diff)
downloadmongo-e38baa8d1416a6ab424856bb9a821ea67dc92b13.tar.gz
SERVER-35623 Add function for choosing node to step up for election handoff
(cherry picked from commit f5405e05491659a1bf7b975fd60124e44e71cdf6)
-rw-r--r--src/mongo/db/repl/topology_coordinator.cpp47
-rw-r--r--src/mongo/db/repl/topology_coordinator.h12
-rw-r--r--src/mongo/db/repl/topology_coordinator_v1_test.cpp217
3 files changed, 272 insertions, 4 deletions
diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp
index f6127283273..aebc7254465 100644
--- a/src/mongo/db/repl/topology_coordinator.cpp
+++ b/src/mongo/db/repl/topology_coordinator.cpp
@@ -2797,6 +2797,14 @@ bool TopologyCoordinator::_canCompleteStepDownAttempt(Date_t now, Date_t waitUnt
return isSafeToStepDown();
}
+bool TopologyCoordinator::_isCaughtUpAndElectable(int memberIndex, OpTime lastApplied) {
+ if (_getUnelectableReason(memberIndex)) {
+ return false;
+ }
+
+ return (_memberData.at(memberIndex).getHeartbeatAppliedOpTime() >= lastApplied);
+}
+
bool TopologyCoordinator::isSafeToStepDown() {
if (!_rsConfig.isInitialized() || _selfIndex < 0) {
return false;
@@ -2813,15 +2821,12 @@ bool TopologyCoordinator::isSafeToStepDown() {
}
// Now check that we also have at least one caught up node that is electable.
- const OpTime lastOpApplied = getMyLastAppliedOpTime();
for (int memberIndex = 0; memberIndex < _rsConfig.getNumMembers(); memberIndex++) {
// ignore your self
if (memberIndex == _selfIndex) {
continue;
}
- UnelectableReasonMask reason = _getUnelectableReason(memberIndex);
- if (!reason && _memberData.at(memberIndex).getHeartbeatAppliedOpTime() >= lastOpApplied) {
- // Found a caught up and electable node, succeed with step down.
+ if (_isCaughtUpAndElectable(memberIndex, lastApplied)) {
return true;
}
}
@@ -2829,6 +2834,40 @@ bool TopologyCoordinator::isSafeToStepDown() {
return false;
}
+int TopologyCoordinator::chooseElectionHandoffCandidate() {
+
+ OpTime lastApplied = getMyLastAppliedOpTime();
+
+ int bestCandidateIndex = -1;
+ int highestPriority = -1;
+
+ for (int memberIndex = 0; memberIndex < _rsConfig.getNumMembers(); memberIndex++) {
+
+ // Skip your own member index.
+ if (memberIndex == _selfIndex) {
+ continue;
+ }
+
+ // Skip this node if it is not eligible to become primary. This includes nodes with
+ // priority 0.
+ if (!_isCaughtUpAndElectable(memberIndex, lastApplied)) {
+ continue;
+ }
+
+ // Only update best if priority is strictly greater. This guarantees that
+ // we will pick the member with the lowest index in case of a tie. Note that
+ // member priority is always a non-negative number.
+ auto memberPriority = _rsConfig.getMemberAt(memberIndex).getPriority();
+ if (memberPriority > highestPriority) {
+ bestCandidateIndex = memberIndex;
+ highestPriority = memberPriority;
+ }
+ }
+
+ // This is the most suitable node.
+ return bestCandidateIndex;
+}
+
void TopologyCoordinator::setFollowerMode(MemberState::MS newMode) {
invariant(_role == Role::kFollower);
switch (newMode) {
diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h
index f598fad9194..049b50ffb80 100644
--- a/src/mongo/db/repl/topology_coordinator.h
+++ b/src/mongo/db/repl/topology_coordinator.h
@@ -650,6 +650,13 @@ public:
void finishUnconditionalStepDown();
/**
+ * Returns the index of the most suitable candidate for an election handoff. The node must be
+ * caught up and electable. Ties are resolved first by highest priority, then by lowest member
+ * id.
+ */
+ int chooseElectionHandoffCandidate();
+
+ /**
* Considers whether or not this node should stand for election, and returns true
* if the node has transitioned to candidate role as a result of the call.
*/
@@ -879,6 +886,11 @@ private:
*/
bool _canCompleteStepDownAttempt(Date_t now, Date_t waitUntil, bool force);
+ /**
+ * Returns true if a node is both caught up to our last applied opTime and electable.
+ */
+ bool _isCaughtUpAndElectable(int memberIndex, OpTime lastApplied);
+
void _stepDownSelfAndReplaceWith(int newPrimary);
/**
diff --git a/src/mongo/db/repl/topology_coordinator_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_v1_test.cpp
index ee283790a80..26abcd9b9a3 100644
--- a/src/mongo/db/repl/topology_coordinator_v1_test.cpp
+++ b/src/mongo/db/repl/topology_coordinator_v1_test.cpp
@@ -4769,6 +4769,223 @@ TEST_F(PrepareFreezeResponseTest, NodeDoesNotFreezeWhenToldToFreezeForSeveralSec
ASSERT_EQUALS(0LL, getTopoCoord().getStepDownTime().asInt64());
}
+TEST_F(TopoCoordTest, NoElectionHandoffCandidateInSingleNodeReplicaSet) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version"
+ << 2
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host0:27017"))),
+ 0);
+
+ makeSelfPrimary();
+ setMyOpTime(OpTime(Timestamp(100, 0), getTopoCoord().getTerm()));
+
+ // There are no other nodes in the set.
+ ASSERT_EQUALS(-1, getTopoCoord().chooseElectionHandoffCandidate());
+}
+
+TEST_F(TopoCoordTest, NoElectionHandoffCandidateWithOneLaggedNode) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version"
+ << 2
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host0:27017")
+ << BSON("_id" << 1 << "host"
+ << "host1:27017"))),
+ 0);
+
+ const auto term = getTopoCoord().getTerm();
+ makeSelfPrimary();
+ setMyOpTime(OpTime(Timestamp(200, 0), term));
+
+ // Node1 is electable, but not caught up.
+ heartbeatFromMember(
+ HostAndPort("host1"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(100, 0), term));
+
+ ASSERT_EQUALS(-1, getTopoCoord().chooseElectionHandoffCandidate());
+}
+
+TEST_F(TopoCoordTest, NoElectionHandoffCandidateWithOneUnelectableNode) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version"
+ << 2
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host0:27017")
+ << BSON("_id" << 1 << "host"
+ << "host1:27017"
+ << "priority"
+ << 0))),
+ 0);
+
+ const auto term = getTopoCoord().getTerm();
+ makeSelfPrimary();
+ setMyOpTime(OpTime(Timestamp(100, 0), term));
+
+ // Node1 is caught up, but not electable.
+ heartbeatFromMember(
+ HostAndPort("host1"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(100, 0), term));
+
+ ASSERT_EQUALS(-1, getTopoCoord().chooseElectionHandoffCandidate());
+}
+
+TEST_F(TopoCoordTest, NoElectionHandoffCandidateWithOneLaggedAndOneUnelectableNode) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version"
+ << 2
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host0:27017")
+ << BSON("_id" << 1 << "host"
+ << "host1:27017")
+ << BSON("_id" << 2 << "host"
+ << "host2:27017"
+ << "priority"
+ << 0))),
+ 0);
+
+ const auto term = getTopoCoord().getTerm();
+ makeSelfPrimary();
+ setMyOpTime(OpTime(Timestamp(200, 0), term));
+
+ // Node1 is electable, but not caught up.
+ heartbeatFromMember(
+ HostAndPort("host1"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(100, 0), term));
+ // Node2 is caught up, but not electable.
+ heartbeatFromMember(
+ HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(200, 0), term));
+
+ ASSERT_EQUALS(-1, getTopoCoord().chooseElectionHandoffCandidate());
+}
+
+TEST_F(TopoCoordTest, ExactlyOneNodeEligibleForElectionHandoffOutOfOneSecondary) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version"
+ << 2
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host0:27017")
+ << BSON("_id" << 1 << "host"
+ << "host1:27017"))),
+ 0);
+
+ const auto term = getTopoCoord().getTerm();
+ makeSelfPrimary();
+ setMyOpTime(OpTime(Timestamp(100, 0), term));
+
+ // Node1 is caught up and electable.
+ heartbeatFromMember(
+ HostAndPort("host1"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(100, 0), term));
+
+ ASSERT_EQUALS(1, getTopoCoord().chooseElectionHandoffCandidate());
+}
+
+TEST_F(TopoCoordTest, ExactlyOneNodeEligibleForElectionHandoffOutOfThreeSecondaries) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version"
+ << 2
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host0:27017")
+ << BSON("_id" << 1 << "host"
+ << "host1:27017"
+ << "priority"
+ << 0)
+ << BSON("_id" << 2 << "host"
+ << "host2:27017")
+ << BSON("_id" << 3 << "host"
+ << "host3:27017"))),
+ 0);
+
+ const auto term = getTopoCoord().getTerm();
+ makeSelfPrimary();
+ setMyOpTime(OpTime(Timestamp(200, 0), term));
+
+ // Node1 is caught up, but not electable.
+ heartbeatFromMember(
+ HostAndPort("host1"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(200, 0), term));
+
+ // Node2 is electable, but not caught up.
+ heartbeatFromMember(
+ HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(100, 0), term));
+
+ // Node3 is caught up and electable.
+ heartbeatFromMember(
+ HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(200, 0), term));
+
+ ASSERT_EQUALS(3, getTopoCoord().chooseElectionHandoffCandidate());
+}
+
+TEST_F(TopoCoordTest, TwoNodesEligibleForElectionHandoffResolveByPriority) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version"
+ << 2
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host0:27017")
+ << BSON("_id" << 1 << "host"
+ << "host1:27017")
+ << BSON("_id" << 2 << "host"
+ << "host2:27017"
+ << "priority"
+ << 5))),
+ 0);
+
+ const auto term = getTopoCoord().getTerm();
+ makeSelfPrimary();
+ setMyOpTime(OpTime(Timestamp(100, 0), term));
+
+ // Node1 is caught up and has default priority (1).
+ heartbeatFromMember(
+ HostAndPort("host1"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(100, 0), term));
+
+ // Node2 is caught up and has priority 5.
+ heartbeatFromMember(
+ HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(100, 0), term));
+
+ // Candidates tied in opTime. Choose node with highest priority.
+ ASSERT_EQUALS(2, getTopoCoord().chooseElectionHandoffCandidate());
+}
+
+TEST_F(TopoCoordTest, TwoNodesEligibleForElectionHandoffEqualPriorityResolveByMemberId) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version"
+ << 2
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host0:27017")
+ << BSON("_id" << 1 << "host"
+ << "host1:27017")
+ << BSON("_id" << 2 << "host"
+ << "host2:27017"))),
+ 0);
+
+ const auto term = getTopoCoord().getTerm();
+ makeSelfPrimary();
+ setMyOpTime(OpTime(Timestamp(100, 0), term));
+
+ // Node1 is caught up and has default priority (1).
+ heartbeatFromMember(
+ HostAndPort("host1"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(100, 0), term));
+
+ // Node2 is caught up and has default priority (1).
+ heartbeatFromMember(
+ HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(100, 0), term));
+
+ // Candidates tied in opTime and priority. Choose node with lowest member index.
+ ASSERT_EQUALS(1, getTopoCoord().chooseElectionHandoffCandidate());
+}
+
TEST_F(HeartbeatResponseTestV1,
ScheduleACatchupTakeoverWhenElectableAndReceiveHeartbeatFromPrimaryInCatchup) {
updateConfig(BSON("_id"