diff options
author | Pavithra Vetriselvan <pavithra.vetriselvan@mongodb.com> | 2017-11-29 16:39:54 -0500 |
---|---|---|
committer | William Schultz <william.schultz@mongodb.com> | 2017-12-11 11:21:00 -0500 |
commit | 1307027473db00bf0da76113113046e0f98193a4 (patch) | |
tree | db4f80b2eb0a53cb3d614569e6464ab8bda6a63b | |
parent | fbd86ada63c3356f70a4b59b5f925e4fdee16679 (diff) | |
download | mongo-1307027473db00bf0da76113113046e0f98193a4.tar.gz |
SERVER-30457 cancel catchup takeover if primary is caught up
(cherry picked from commit 4174a84257760cae2ea9fdb26e8d3e65feadf253)
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp | 44 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp | 10 |
2 files changed, 43 insertions, 11 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp index 5b2ef14fc62..d22fc9f6a13 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp @@ -1003,6 +1003,10 @@ TEST_F(TakeoverTest, SchedulesCatchupTakeoverIfNodeIsFresherThanCurrentPrimary) OperationContextNoop opCtx; OpTime currentOptime(Timestamp(200, 1), 0); + // Update the current term to simulate a scenario where an election has occured + // and some other node became the new primary. Once you hear about a primary election + // in term 1, your term will be increased. + replCoord->updateTerm_forTest(1, nullptr); replCoord->setMyLastAppliedOpTime(currentOptime); replCoord->setMyLastDurableOpTime(currentOptime); OpTime behindOptime(Timestamp(100, 1), 0); @@ -1048,6 +1052,10 @@ TEST_F(TakeoverTest, SchedulesCatchupTakeoverIfBothTakeoversAnOption) { OperationContextNoop opCtx; OpTime currentOptime(Timestamp(200, 1), 0); + // Update the current term to simulate a scenario where an election has occured + // and some other node became the new primary. Once you hear about a primary election + // in term 1, your term will be increased. + replCoord->updateTerm_forTest(1, nullptr); replCoord->setMyLastAppliedOpTime(currentOptime); replCoord->setMyLastDurableOpTime(currentOptime); OpTime behindOptime(Timestamp(100, 1), 0); @@ -1092,6 +1100,10 @@ TEST_F(TakeoverTest, CatchupTakeoverNotScheduledTwice) { OperationContextNoop opCtx; OpTime currentOptime(Timestamp(200, 1), 0); + // Update the current term to simulate a scenario where an election has occured + // and some other node became the new primary. Once you hear about a primary election + // in term 1, your term will be increased. + replCoord->updateTerm_forTest(1, nullptr); replCoord->setMyLastAppliedOpTime(currentOptime); replCoord->setMyLastDurableOpTime(currentOptime); OpTime behindOptime(Timestamp(100, 1), 0); @@ -1147,6 +1159,10 @@ TEST_F(TakeoverTest, CatchupAndPriorityTakeoverNotScheduledAtSameTime) { OperationContextNoop opCtx; OpTime currentOptime(Timestamp(200, 1), 0); + // Update the current term to simulate a scenario where an election has occured + // and some other node became the new primary. Once you hear about a primary election + // in term 1, your term will be increased. + replCoord->updateTerm_forTest(1, nullptr); replCoord->setMyLastAppliedOpTime(currentOptime); replCoord->setMyLastDurableOpTime(currentOptime); OpTime behindOptime(Timestamp(100, 1), 0); @@ -1167,18 +1183,16 @@ TEST_F(TakeoverTest, CatchupAndPriorityTakeoverNotScheduledAtSameTime) { Milliseconds catchupTakeoverDelay = catchupTakeoverTime - now; ASSERT_EQUALS(config.getCatchUpTakeoverDelay(), catchupTakeoverDelay); - // Mock another heartbeat where the primary is now up to date + // Create a new OpTime so that the primary's last applied OpTime will be in the current term. + OpTime caughtupOptime(Timestamp(300, 1), 1); + // Mock another heartbeat where the primary is now up to date. now = respondToHeartbeatsUntil( - config, now + catchupTakeoverDelay / 2, HostAndPort("node2", 12345), currentOptime); + config, now + catchupTakeoverDelay / 2, HostAndPort("node2", 12345), caughtupOptime); - // Since we are no longer ahead of the primary, we can't schedule a catchup - // takeover anymore. But we are still higher priority than the primary, so - // after the heartbeat we will try to schedule a priority takeover. - // Because we can't schedule two takeovers at the same time and the - // catchup takeover hasn't fired yet, make sure that we don't schedule a - // priority takeover. - ASSERT(replCoord->getCatchupTakeover_forTest()); - ASSERT_FALSE(replCoord->getPriorityTakeover_forTest()); + // Since the primary has caught up, we cancel the scheduled catchup takeover. + // But we are still higher priority than the primary, so after the heartbeat + // we will schedule a priority takeover. + ASSERT(replCoord->getPriorityTakeover_forTest()); } TEST_F(TakeoverTest, CatchupTakeoverCallbackCanceledIfElectionTimeoutRuns) { @@ -1203,6 +1217,10 @@ TEST_F(TakeoverTest, CatchupTakeoverCallbackCanceledIfElectionTimeoutRuns) { OperationContextNoop opCtx; OpTime currentOptime(Timestamp(200, 1), 0); + // Update the current term to simulate a scenario where an election has occured + // and some other node became the new primary. Once you hear about a primary election + // in term 1, your term will be increased. + replCoord->updateTerm_forTest(1, nullptr); replCoord->setMyLastAppliedOpTime(currentOptime); replCoord->setMyLastDurableOpTime(currentOptime); OpTime behindOptime(Timestamp(100, 1), 0); @@ -1273,6 +1291,10 @@ TEST_F(TakeoverTest, CatchupTakeoverCanceledIfTransitionToRollback) { OperationContextNoop opCtx; OpTime currentOptime(Timestamp(200, 1), 0); + // Update the current term to simulate a scenario where an election has occured + // and some other node became the new primary. Once you hear about a primary election + // in term 1, your term will be increased. + replCoord->updateTerm_forTest(1, nullptr); replCoord->setMyLastAppliedOpTime(currentOptime); replCoord->setMyLastDurableOpTime(currentOptime); OpTime behindOptime(Timestamp(100, 1), 0); @@ -2483,4 +2505,4 @@ TEST_F(PrimaryCatchUpTest, ZeroTimeout) { } // namespace } // namespace repl -} // namespace mongo +} // namespace mongo
\ No newline at end of file diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp index 8a72ffb7f2d..b542f445f90 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp @@ -232,6 +232,16 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponse( _catchupState->signalHeartbeatUpdate_inlock(); } + // Cancel catchup takeover if the last applied write by any node in the replica set was made + // in the current term, which implies that the primary has caught up. + bool catchupTakeoverScheduled = _catchupTakeoverCbh.isValid(); + if (responseStatus.isOK() && catchupTakeoverScheduled && hbResponse.hasAppliedOpTime()) { + const auto& hbLastAppliedOpTime = hbResponse.getAppliedOpTime(); + if (hbLastAppliedOpTime.getTerm() == _topCoord->getTerm()) { + _cancelCatchupTakeover_inlock(); + } + } + _scheduleHeartbeatToTarget_inlock( target, targetIndex, std::max(now, action.getNextHeartbeatStartDate())); |