summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavithra Vetriselvan <pavithra.vetriselvan@mongodb.com>2017-11-29 16:39:54 -0500
committerWilliam Schultz <william.schultz@mongodb.com>2017-12-11 11:21:00 -0500
commit1307027473db00bf0da76113113046e0f98193a4 (patch)
treedb4f80b2eb0a53cb3d614569e6464ab8bda6a63b
parentfbd86ada63c3356f70a4b59b5f925e4fdee16679 (diff)
downloadmongo-1307027473db00bf0da76113113046e0f98193a4.tar.gz
SERVER-30457 cancel catchup takeover if primary is caught up
(cherry picked from commit 4174a84257760cae2ea9fdb26e8d3e65feadf253)
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp44
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp10
2 files changed, 43 insertions, 11 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
index 5b2ef14fc62..d22fc9f6a13 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
@@ -1003,6 +1003,10 @@ TEST_F(TakeoverTest, SchedulesCatchupTakeoverIfNodeIsFresherThanCurrentPrimary)
OperationContextNoop opCtx;
OpTime currentOptime(Timestamp(200, 1), 0);
+ // Update the current term to simulate a scenario where an election has occured
+ // and some other node became the new primary. Once you hear about a primary election
+ // in term 1, your term will be increased.
+ replCoord->updateTerm_forTest(1, nullptr);
replCoord->setMyLastAppliedOpTime(currentOptime);
replCoord->setMyLastDurableOpTime(currentOptime);
OpTime behindOptime(Timestamp(100, 1), 0);
@@ -1048,6 +1052,10 @@ TEST_F(TakeoverTest, SchedulesCatchupTakeoverIfBothTakeoversAnOption) {
OperationContextNoop opCtx;
OpTime currentOptime(Timestamp(200, 1), 0);
+ // Update the current term to simulate a scenario where an election has occured
+ // and some other node became the new primary. Once you hear about a primary election
+ // in term 1, your term will be increased.
+ replCoord->updateTerm_forTest(1, nullptr);
replCoord->setMyLastAppliedOpTime(currentOptime);
replCoord->setMyLastDurableOpTime(currentOptime);
OpTime behindOptime(Timestamp(100, 1), 0);
@@ -1092,6 +1100,10 @@ TEST_F(TakeoverTest, CatchupTakeoverNotScheduledTwice) {
OperationContextNoop opCtx;
OpTime currentOptime(Timestamp(200, 1), 0);
+ // Update the current term to simulate a scenario where an election has occured
+ // and some other node became the new primary. Once you hear about a primary election
+ // in term 1, your term will be increased.
+ replCoord->updateTerm_forTest(1, nullptr);
replCoord->setMyLastAppliedOpTime(currentOptime);
replCoord->setMyLastDurableOpTime(currentOptime);
OpTime behindOptime(Timestamp(100, 1), 0);
@@ -1147,6 +1159,10 @@ TEST_F(TakeoverTest, CatchupAndPriorityTakeoverNotScheduledAtSameTime) {
OperationContextNoop opCtx;
OpTime currentOptime(Timestamp(200, 1), 0);
+ // Update the current term to simulate a scenario where an election has occured
+ // and some other node became the new primary. Once you hear about a primary election
+ // in term 1, your term will be increased.
+ replCoord->updateTerm_forTest(1, nullptr);
replCoord->setMyLastAppliedOpTime(currentOptime);
replCoord->setMyLastDurableOpTime(currentOptime);
OpTime behindOptime(Timestamp(100, 1), 0);
@@ -1167,18 +1183,16 @@ TEST_F(TakeoverTest, CatchupAndPriorityTakeoverNotScheduledAtSameTime) {
Milliseconds catchupTakeoverDelay = catchupTakeoverTime - now;
ASSERT_EQUALS(config.getCatchUpTakeoverDelay(), catchupTakeoverDelay);
- // Mock another heartbeat where the primary is now up to date
+ // Create a new OpTime so that the primary's last applied OpTime will be in the current term.
+ OpTime caughtupOptime(Timestamp(300, 1), 1);
+ // Mock another heartbeat where the primary is now up to date.
now = respondToHeartbeatsUntil(
- config, now + catchupTakeoverDelay / 2, HostAndPort("node2", 12345), currentOptime);
+ config, now + catchupTakeoverDelay / 2, HostAndPort("node2", 12345), caughtupOptime);
- // Since we are no longer ahead of the primary, we can't schedule a catchup
- // takeover anymore. But we are still higher priority than the primary, so
- // after the heartbeat we will try to schedule a priority takeover.
- // Because we can't schedule two takeovers at the same time and the
- // catchup takeover hasn't fired yet, make sure that we don't schedule a
- // priority takeover.
- ASSERT(replCoord->getCatchupTakeover_forTest());
- ASSERT_FALSE(replCoord->getPriorityTakeover_forTest());
+ // Since the primary has caught up, we cancel the scheduled catchup takeover.
+ // But we are still higher priority than the primary, so after the heartbeat
+ // we will schedule a priority takeover.
+ ASSERT(replCoord->getPriorityTakeover_forTest());
}
TEST_F(TakeoverTest, CatchupTakeoverCallbackCanceledIfElectionTimeoutRuns) {
@@ -1203,6 +1217,10 @@ TEST_F(TakeoverTest, CatchupTakeoverCallbackCanceledIfElectionTimeoutRuns) {
OperationContextNoop opCtx;
OpTime currentOptime(Timestamp(200, 1), 0);
+ // Update the current term to simulate a scenario where an election has occured
+ // and some other node became the new primary. Once you hear about a primary election
+ // in term 1, your term will be increased.
+ replCoord->updateTerm_forTest(1, nullptr);
replCoord->setMyLastAppliedOpTime(currentOptime);
replCoord->setMyLastDurableOpTime(currentOptime);
OpTime behindOptime(Timestamp(100, 1), 0);
@@ -1273,6 +1291,10 @@ TEST_F(TakeoverTest, CatchupTakeoverCanceledIfTransitionToRollback) {
OperationContextNoop opCtx;
OpTime currentOptime(Timestamp(200, 1), 0);
+ // Update the current term to simulate a scenario where an election has occured
+ // and some other node became the new primary. Once you hear about a primary election
+ // in term 1, your term will be increased.
+ replCoord->updateTerm_forTest(1, nullptr);
replCoord->setMyLastAppliedOpTime(currentOptime);
replCoord->setMyLastDurableOpTime(currentOptime);
OpTime behindOptime(Timestamp(100, 1), 0);
@@ -2483,4 +2505,4 @@ TEST_F(PrimaryCatchUpTest, ZeroTimeout) {
} // namespace
} // namespace repl
-} // namespace mongo
+} // namespace mongo \ No newline at end of file
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index 8a72ffb7f2d..b542f445f90 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -232,6 +232,16 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponse(
_catchupState->signalHeartbeatUpdate_inlock();
}
+ // Cancel catchup takeover if the last applied write by any node in the replica set was made
+ // in the current term, which implies that the primary has caught up.
+ bool catchupTakeoverScheduled = _catchupTakeoverCbh.isValid();
+ if (responseStatus.isOK() && catchupTakeoverScheduled && hbResponse.hasAppliedOpTime()) {
+ const auto& hbLastAppliedOpTime = hbResponse.getAppliedOpTime();
+ if (hbLastAppliedOpTime.getTerm() == _topCoord->getTerm()) {
+ _cancelCatchupTakeover_inlock();
+ }
+ }
+
_scheduleHeartbeatToTarget_inlock(
target, targetIndex, std::max(now, action.getNextHeartbeatStartDate()));