diff options
author | Vesselina Ratcheva <vesselina.ratcheva@10gen.com> | 2018-06-25 14:38:41 -0400 |
---|---|---|
committer | Vesselina Ratcheva <vesselina.ratcheva@10gen.com> | 2018-08-07 12:54:26 -0400 |
commit | e38baa8d1416a6ab424856bb9a821ea67dc92b13 (patch) | |
tree | 5421cc237a71eee7a4f4ff29fd4bd24cfbcda3cb | |
parent | 8c8148e4ed16436c2a41ab9df53ecf39be7fc8a5 (diff) | |
download | mongo-e38baa8d1416a6ab424856bb9a821ea67dc92b13.tar.gz |
SERVER-35623 Add function for choosing node to step up for election handoff
(cherry picked from commit f5405e05491659a1bf7b975fd60124e44e71cdf6)
-rw-r--r-- | src/mongo/db/repl/topology_coordinator.cpp | 47 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator.h | 12 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator_v1_test.cpp | 217 |
3 files changed, 272 insertions, 4 deletions
diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp index f6127283273..aebc7254465 100644 --- a/src/mongo/db/repl/topology_coordinator.cpp +++ b/src/mongo/db/repl/topology_coordinator.cpp @@ -2797,6 +2797,14 @@ bool TopologyCoordinator::_canCompleteStepDownAttempt(Date_t now, Date_t waitUnt return isSafeToStepDown(); } +bool TopologyCoordinator::_isCaughtUpAndElectable(int memberIndex, OpTime lastApplied) { + if (_getUnelectableReason(memberIndex)) { + return false; + } + + return (_memberData.at(memberIndex).getHeartbeatAppliedOpTime() >= lastApplied); +} + bool TopologyCoordinator::isSafeToStepDown() { if (!_rsConfig.isInitialized() || _selfIndex < 0) { return false; @@ -2813,15 +2821,12 @@ bool TopologyCoordinator::isSafeToStepDown() { } // Now check that we also have at least one caught up node that is electable. - const OpTime lastOpApplied = getMyLastAppliedOpTime(); for (int memberIndex = 0; memberIndex < _rsConfig.getNumMembers(); memberIndex++) { // ignore your self if (memberIndex == _selfIndex) { continue; } - UnelectableReasonMask reason = _getUnelectableReason(memberIndex); - if (!reason && _memberData.at(memberIndex).getHeartbeatAppliedOpTime() >= lastOpApplied) { - // Found a caught up and electable node, succeed with step down. + if (_isCaughtUpAndElectable(memberIndex, lastApplied)) { return true; } } @@ -2829,6 +2834,40 @@ bool TopologyCoordinator::isSafeToStepDown() { return false; } +int TopologyCoordinator::chooseElectionHandoffCandidate() { + + OpTime lastApplied = getMyLastAppliedOpTime(); + + int bestCandidateIndex = -1; + int highestPriority = -1; + + for (int memberIndex = 0; memberIndex < _rsConfig.getNumMembers(); memberIndex++) { + + // Skip your own member index. + if (memberIndex == _selfIndex) { + continue; + } + + // Skip this node if it is not eligible to become primary. This includes nodes with + // priority 0. + if (!_isCaughtUpAndElectable(memberIndex, lastApplied)) { + continue; + } + + // Only update best if priority is strictly greater. This guarantees that + // we will pick the member with the lowest index in case of a tie. Note that + // member priority is always a non-negative number. + auto memberPriority = _rsConfig.getMemberAt(memberIndex).getPriority(); + if (memberPriority > highestPriority) { + bestCandidateIndex = memberIndex; + highestPriority = memberPriority; + } + } + + // This is the most suitable node. + return bestCandidateIndex; +} + void TopologyCoordinator::setFollowerMode(MemberState::MS newMode) { invariant(_role == Role::kFollower); switch (newMode) { diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h index f598fad9194..049b50ffb80 100644 --- a/src/mongo/db/repl/topology_coordinator.h +++ b/src/mongo/db/repl/topology_coordinator.h @@ -650,6 +650,13 @@ public: void finishUnconditionalStepDown(); /** + * Returns the index of the most suitable candidate for an election handoff. The node must be + * caught up and electable. Ties are resolved first by highest priority, then by lowest member + * id. + */ + int chooseElectionHandoffCandidate(); + + /** * Considers whether or not this node should stand for election, and returns true * if the node has transitioned to candidate role as a result of the call. */ @@ -879,6 +886,11 @@ private: */ bool _canCompleteStepDownAttempt(Date_t now, Date_t waitUntil, bool force); + /** + * Returns true if a node is both caught up to our last applied opTime and electable. + */ + bool _isCaughtUpAndElectable(int memberIndex, OpTime lastApplied); + void _stepDownSelfAndReplaceWith(int newPrimary); /** diff --git a/src/mongo/db/repl/topology_coordinator_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_v1_test.cpp index ee283790a80..26abcd9b9a3 100644 --- a/src/mongo/db/repl/topology_coordinator_v1_test.cpp +++ b/src/mongo/db/repl/topology_coordinator_v1_test.cpp @@ -4769,6 +4769,223 @@ TEST_F(PrepareFreezeResponseTest, NodeDoesNotFreezeWhenToldToFreezeForSeveralSec ASSERT_EQUALS(0LL, getTopoCoord().getStepDownTime().asInt64()); } +TEST_F(TopoCoordTest, NoElectionHandoffCandidateInSingleNodeReplicaSet) { + updateConfig(BSON("_id" + << "rs0" + << "version" + << 2 + << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "host0:27017"))), + 0); + + makeSelfPrimary(); + setMyOpTime(OpTime(Timestamp(100, 0), getTopoCoord().getTerm())); + + // There are no other nodes in the set. + ASSERT_EQUALS(-1, getTopoCoord().chooseElectionHandoffCandidate()); +} + +TEST_F(TopoCoordTest, NoElectionHandoffCandidateWithOneLaggedNode) { + updateConfig(BSON("_id" + << "rs0" + << "version" + << 2 + << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "host0:27017") + << BSON("_id" << 1 << "host" + << "host1:27017"))), + 0); + + const auto term = getTopoCoord().getTerm(); + makeSelfPrimary(); + setMyOpTime(OpTime(Timestamp(200, 0), term)); + + // Node1 is electable, but not caught up. + heartbeatFromMember( + HostAndPort("host1"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(100, 0), term)); + + ASSERT_EQUALS(-1, getTopoCoord().chooseElectionHandoffCandidate()); +} + +TEST_F(TopoCoordTest, NoElectionHandoffCandidateWithOneUnelectableNode) { + updateConfig(BSON("_id" + << "rs0" + << "version" + << 2 + << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "host0:27017") + << BSON("_id" << 1 << "host" + << "host1:27017" + << "priority" + << 0))), + 0); + + const auto term = getTopoCoord().getTerm(); + makeSelfPrimary(); + setMyOpTime(OpTime(Timestamp(100, 0), term)); + + // Node1 is caught up, but not electable. + heartbeatFromMember( + HostAndPort("host1"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(100, 0), term)); + + ASSERT_EQUALS(-1, getTopoCoord().chooseElectionHandoffCandidate()); +} + +TEST_F(TopoCoordTest, NoElectionHandoffCandidateWithOneLaggedAndOneUnelectableNode) { + updateConfig(BSON("_id" + << "rs0" + << "version" + << 2 + << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "host0:27017") + << BSON("_id" << 1 << "host" + << "host1:27017") + << BSON("_id" << 2 << "host" + << "host2:27017" + << "priority" + << 0))), + 0); + + const auto term = getTopoCoord().getTerm(); + makeSelfPrimary(); + setMyOpTime(OpTime(Timestamp(200, 0), term)); + + // Node1 is electable, but not caught up. + heartbeatFromMember( + HostAndPort("host1"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(100, 0), term)); + // Node2 is caught up, but not electable. + heartbeatFromMember( + HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(200, 0), term)); + + ASSERT_EQUALS(-1, getTopoCoord().chooseElectionHandoffCandidate()); +} + +TEST_F(TopoCoordTest, ExactlyOneNodeEligibleForElectionHandoffOutOfOneSecondary) { + updateConfig(BSON("_id" + << "rs0" + << "version" + << 2 + << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "host0:27017") + << BSON("_id" << 1 << "host" + << "host1:27017"))), + 0); + + const auto term = getTopoCoord().getTerm(); + makeSelfPrimary(); + setMyOpTime(OpTime(Timestamp(100, 0), term)); + + // Node1 is caught up and electable. + heartbeatFromMember( + HostAndPort("host1"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(100, 0), term)); + + ASSERT_EQUALS(1, getTopoCoord().chooseElectionHandoffCandidate()); +} + +TEST_F(TopoCoordTest, ExactlyOneNodeEligibleForElectionHandoffOutOfThreeSecondaries) { + updateConfig(BSON("_id" + << "rs0" + << "version" + << 2 + << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "host0:27017") + << BSON("_id" << 1 << "host" + << "host1:27017" + << "priority" + << 0) + << BSON("_id" << 2 << "host" + << "host2:27017") + << BSON("_id" << 3 << "host" + << "host3:27017"))), + 0); + + const auto term = getTopoCoord().getTerm(); + makeSelfPrimary(); + setMyOpTime(OpTime(Timestamp(200, 0), term)); + + // Node1 is caught up, but not electable. + heartbeatFromMember( + HostAndPort("host1"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(200, 0), term)); + + // Node2 is electable, but not caught up. + heartbeatFromMember( + HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(100, 0), term)); + + // Node3 is caught up and electable. + heartbeatFromMember( + HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(200, 0), term)); + + ASSERT_EQUALS(3, getTopoCoord().chooseElectionHandoffCandidate()); +} + +TEST_F(TopoCoordTest, TwoNodesEligibleForElectionHandoffResolveByPriority) { + updateConfig(BSON("_id" + << "rs0" + << "version" + << 2 + << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "host0:27017") + << BSON("_id" << 1 << "host" + << "host1:27017") + << BSON("_id" << 2 << "host" + << "host2:27017" + << "priority" + << 5))), + 0); + + const auto term = getTopoCoord().getTerm(); + makeSelfPrimary(); + setMyOpTime(OpTime(Timestamp(100, 0), term)); + + // Node1 is caught up and has default priority (1). + heartbeatFromMember( + HostAndPort("host1"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(100, 0), term)); + + // Node2 is caught up and has priority 5. + heartbeatFromMember( + HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(100, 0), term)); + + // Candidates tied in opTime. Choose node with highest priority. + ASSERT_EQUALS(2, getTopoCoord().chooseElectionHandoffCandidate()); +} + +TEST_F(TopoCoordTest, TwoNodesEligibleForElectionHandoffEqualPriorityResolveByMemberId) { + updateConfig(BSON("_id" + << "rs0" + << "version" + << 2 + << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "host0:27017") + << BSON("_id" << 1 << "host" + << "host1:27017") + << BSON("_id" << 2 << "host" + << "host2:27017"))), + 0); + + const auto term = getTopoCoord().getTerm(); + makeSelfPrimary(); + setMyOpTime(OpTime(Timestamp(100, 0), term)); + + // Node1 is caught up and has default priority (1). + heartbeatFromMember( + HostAndPort("host1"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(100, 0), term)); + + // Node2 is caught up and has default priority (1). + heartbeatFromMember( + HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(100, 0), term)); + + // Candidates tied in opTime and priority. Choose node with lowest member index. + ASSERT_EQUALS(1, getTopoCoord().chooseElectionHandoffCandidate()); +} + TEST_F(HeartbeatResponseTestV1, ScheduleACatchupTakeoverWhenElectableAndReceiveHeartbeatFromPrimaryInCatchup) { updateConfig(BSON("_id" |