summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamy Lanka <samy.lanka@10gen.com>2017-07-05 15:18:06 -0400
committerSamy Lanka <samy.lanka@10gen.com>2017-08-02 15:29:26 -0400
commit5f1ce8b6765a25d45ba5e35063db417b3069c8d6 (patch)
treede942dd067b10bb2ae70f3a5b0f5601cd7fa87ec
parent743119c879ab2e5d1d8ca05aadf6fe29b1526a94 (diff)
downloadmongo-5f1ce8b6765a25d45ba5e35063db417b3069c8d6.tar.gz
SERVER-29500 Call for catchup takeover election when catchup takeover timeout fires
-rw-r--r--buildscripts/resmokeconfig/suites/replica_sets_pv0.yml5
-rw-r--r--jstests/libs/write_concern_util.js8
-rw-r--r--jstests/replsets/catchup_takeover_one_high_priority.js84
-rw-r--r--jstests/replsets/catchup_takeover_two_nodes_ahead.js56
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp2
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h6
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_elect.cpp4
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp210
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp38
-rw-r--r--src/mongo/db/repl/topology_coordinator.h9
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl.cpp71
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl.h10
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp232
-rw-r--r--src/mongo/shell/replsettest.js5
14 files changed, 682 insertions, 58 deletions
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_pv0.yml b/buildscripts/resmokeconfig/suites/replica_sets_pv0.yml
index a5e8a37c1cd..b78e6fbb13e 100644
--- a/buildscripts/resmokeconfig/suites/replica_sets_pv0.yml
+++ b/buildscripts/resmokeconfig/suites/replica_sets_pv0.yml
@@ -4,7 +4,6 @@ selector:
roots:
- jstests/replsets/*.js
exclude_files:
- - jstests/replsets/catchup.js
- jstests/replsets/config_server_checks.js
- jstests/replsets/disallow_adding_initialized_node1.js
- jstests/replsets/disallow_adding_initialized_node2.js
@@ -32,6 +31,10 @@ selector:
- jstests/replsets/command_response_operation_time.js
# Majority read concern not supported in PV0.
- jstests/replsets/operation_time_read_and_write_concern.js
+ # Catchup not supported in PV0.
+ - jstests/replsets/catchup.js
+ - jstests/replsets/catchup_takeover_one_high_priority.js
+ - jstests/replsets/catchup_takeover_two_nodes_ahead.js
executor:
config:
diff --git a/jstests/libs/write_concern_util.js b/jstests/libs/write_concern_util.js
index 8a846cd7f83..4bbf6ad4d29 100644
--- a/jstests/libs/write_concern_util.js
+++ b/jstests/libs/write_concern_util.js
@@ -35,8 +35,12 @@ function stopServerReplication(conn) {
assert.commandWorked(
conn.adminCommand({configureFailPoint: 'stopReplProducer', mode: 'alwaysOn'}), errMsg);
- // Wait until the fail point is actually hit.
- checkLog.contains(conn, 'bgsync - stopReplProducer fail point enabled');
+ // Wait until the fail point is actually hit. Don't wait if the node is the primary, because
+ // the fail point won't be hit until the node transitions from being the primary.
+ if (assert.commandWorked(conn.adminCommand('replSetGetStatus')).myState !=
+ ReplSetTest.State.PRIMARY) {
+ checkLog.contains(conn, 'bgsync - stopReplProducer fail point enabled');
+ }
}
// Stops replication at all replicaset secondaries.
diff --git a/jstests/replsets/catchup_takeover_one_high_priority.js b/jstests/replsets/catchup_takeover_one_high_priority.js
new file mode 100644
index 00000000000..ed6d7faee50
--- /dev/null
+++ b/jstests/replsets/catchup_takeover_one_high_priority.js
@@ -0,0 +1,84 @@
+// Test to ensure that catchup takeover runs even if it isn't the highest
+// priority node and that once the high priority node is caught up,
+// it becomes primary again.
+
+// 3-node replica set with one high priority node.
+// Start replica set. Make node 0 primary and stop the replication
+// for the high priority node as well as isolate it. Have the
+// primary write something so node 2 is more than 2 seconds behind.
+// Write something else to ensure the third node is also lagged.
+// Reconnect the high priority node to the other nodes and make
+// the lagged node (node 1) the next primary.
+// Confirm that the most up-to-date node becomes primary.
+// Let the highest priority node catchup and then confirm
+// that it becomes primary.
+
+(function() {
+ 'use strict';
+
+ load('jstests/replsets/rslib.js');
+
+ var name = 'catchup_takeover_one_high_priority';
+ var replSet =
+ new ReplSetTest({name: name, nodes: [{}, {}, {rsConfig: {priority: 2}}], useBridge: true});
+ var nodes = replSet.startSet();
+ replSet.initiate();
+
+ // Wait until node 2 becomes primary.
+ replSet.waitForState(2, ReplSetTest.State.PRIMARY, replSet.kDefaultTimeoutMS);
+ jsTestLog('node 2 is now primary');
+
+ replSet.awaitReplication();
+
+ // Stop replication and disconnect node 2 so that it cannot do a priority takeover.
+ stopServerReplication(nodes[2]);
+ nodes[2].disconnect(nodes[1]);
+ nodes[2].disconnect(nodes[0]);
+
+ // Ensure that node 0 becomes primary.
+ assert.commandWorked(nodes[0].adminCommand({replSetStepUp: 1}));
+ replSet.awaitNodesAgreeOnPrimary(replSet.kDefaultTimeoutMS, nodes.slice(0, 2));
+ assert.eq(ReplSetTest.State.PRIMARY,
+ assert.commandWorked(nodes[0].adminCommand('replSetGetStatus')).myState,
+ nodes[0].host + " was not primary after step-up");
+ jsTestLog('node 0 is now primary');
+
+ // Sleep for a few seconds to ensure that node 2's optime is more than 2 seconds behind.
+ // This will ensure it can't do a priority takeover until it catches up.
+ sleep(3000);
+
+ var primary = replSet.getPrimary();
+ var writeConcern = {writeConcern: {w: 2, wtimeout: replSet.kDefaultTimeoutMS}};
+ assert.writeOK(primary.getDB(name).bar.insert({y: 100}, writeConcern));
+
+ // Write something so that node 0 is ahead of node 1.
+ stopServerReplication(nodes[1]);
+ writeConcern = {writeConcern: {w: 1, wtimeout: replSet.kDefaultTimeoutMS}};
+ assert.writeOK(primary.getDB(name).bar.insert({x: 100}, writeConcern));
+
+ nodes[2].reconnect(nodes[0]);
+ nodes[2].reconnect(nodes[1]);
+
+ // Step up a lagged node.
+ assert.commandWorked(nodes[1].adminCommand({replSetStepUp: 1}));
+ replSet.awaitNodesAgreeOnPrimary(replSet.kDefaultTimeoutMS, nodes);
+ assert.eq(ReplSetTest.State.PRIMARY,
+ assert.commandWorked(nodes[1].adminCommand('replSetGetStatus')).myState,
+ nodes[1].host + " was not primary after step-up");
+ jsTestLog('node 1 is now primary, but cannot accept writes');
+
+ // Confirm that the most up-to-date node becomes primary
+ // after the default catchup delay.
+ replSet.waitForState(0, ReplSetTest.State.PRIMARY, 60 * 1000);
+ jsTestLog('node 0 performed catchup takeover and is now primary');
+
+ // Let the nodes catchup.
+ restartServerReplication(nodes[1]);
+ restartServerReplication(nodes[2]);
+
+ // Confirm that the highest priority node becomes primary
+ // after catching up.
+ replSet.waitForState(2, ReplSetTest.State.PRIMARY, 30 * 1000);
+ jsTestLog('node 2 performed priority takeover and is now primary');
+
+})(); \ No newline at end of file
diff --git a/jstests/replsets/catchup_takeover_two_nodes_ahead.js b/jstests/replsets/catchup_takeover_two_nodes_ahead.js
new file mode 100644
index 00000000000..4be8fee9007
--- /dev/null
+++ b/jstests/replsets/catchup_takeover_two_nodes_ahead.js
@@ -0,0 +1,56 @@
+// Test to ensure that a catchup takeover happens when the primary is lagged.
+// Make sure that when two nodes are more caught up than the primary,
+// the most up-to-date node becomes the primary.
+
+// 5-node replica set
+// Start replica set. Ensure that node 0 becomes primary.
+// Stop the replication for some nodes and have the primary write something.
+// Stop replication for an up-to-date node and have the primary write something.
+// Now the primary is most-up-to-date and another node is more up-to-date than others.
+// Make a lagged node the next primary.
+// Confirm that the most up-to-date node becomes primary.
+
+(function() {
+ 'use strict';
+
+ load('jstests/replsets/rslib.js');
+
+ var name = 'catchup_takeover_two_nodes_ahead';
+ var replSet = new ReplSetTest({name: name, nodes: 5});
+ var nodes = replSet.startSet();
+ replSet.initiate();
+
+ // Wait until all nodes get the "no-op" of "new primary" after initial sync.
+ waitUntilAllNodesCaughtUp(nodes);
+
+ // Write something so that nodes 0 and 1 are ahead.
+ stopServerReplication(nodes.slice(2, 5));
+ var primary = replSet.getPrimary();
+ var writeConcern = {writeConcern: {w: 2, wtimeout: replSet.kDefaultTimeoutMS}};
+ assert.writeOK(primary.getDB(name).bar.insert({x: 100}, writeConcern));
+
+ // Write something so that node 0 is ahead of node 1.
+ stopServerReplication(nodes[1]);
+ writeConcern = {writeConcern: {w: 1, wtimeout: replSet.kDefaultTimeoutMS}};
+ assert.writeOK(primary.getDB(name).bar.insert({y: 100}, writeConcern));
+
+ // Step up one of the lagged nodes.
+ assert.commandWorked(nodes[2].adminCommand({replSetStepUp: 1}));
+ replSet.awaitNodesAgreeOnPrimary(replSet.kDefaultTimeoutMS, nodes);
+ assert.eq(ReplSetTest.State.PRIMARY,
+ assert.commandWorked(nodes[2].adminCommand('replSetGetStatus')).myState,
+ nodes[2].host + " was not primary after step-up");
+ jsTestLog('node 2 is now primary, but cannot accept writes');
+
+ // Make sure that node 2 cannot write anything. Because it is lagged and replication
+ // has been stopped, it shouldn't be able to become master.
+ assert.writeErrorWithCode(nodes[2].getDB(name).bar.insert({z: 100}, writeConcern),
+ ErrorCodes.NotMaster);
+
+ // Confirm that the most up-to-date node becomes primary
+ // after the default catchup delay.
+ replSet.waitForState(0, ReplSetTest.State.PRIMARY, 60 * 1000);
+
+ // Let the nodes catchup
+ restartServerReplication(nodes.slice(1, 5));
+})(); \ No newline at end of file
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index e1f995f4d6f..84f5cb58031 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -3375,7 +3375,7 @@ Status ReplicationCoordinatorImpl::stepUpIfEligible() {
"Step-up command is only supported by Protocol Version 1");
}
- _startElectSelfIfEligibleV1(StartElectionV1Reason::kStepUpRequest);
+ _startElectSelfIfEligibleV1(TopologyCoordinator::StartElectionReason::kStepUpRequest);
EventHandle finishEvent;
{
stdx::lock_guard<stdx::mutex> lk(_mutex);
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index 1fbc2d059fc..27d698feb69 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -1056,12 +1056,8 @@ private:
/**
* Callback which starts an election if this node is electable and using protocolVersion 1.
- * "isPriorityTakeover" is used to determine if the caller was a priority takeover or not and
- * log messages accordingly.
*/
- enum StartElectionV1Reason { kElectionTimeout, kPriorityTakeover, kStepUpRequest };
-
- void _startElectSelfIfEligibleV1(StartElectionV1Reason reason);
+ void _startElectSelfIfEligibleV1(TopologyCoordinator::StartElectionReason reason);
/**
* Resets the term of last vote to 0 to prevent any node from voting for term 0.
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect.cpp
index 91b1c5dfc15..727d652630a 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect.cpp
@@ -284,7 +284,9 @@ void ReplicationCoordinatorImpl::_recoverFromElectionTie(
if (!status.isOK()) {
LOG(2) << "ReplicationCoordinatorImpl::_recoverFromElectionTie -- " << status.reason();
} else {
- fassertStatusOK(28817, _topCoord->becomeCandidateIfElectable(now, false));
+ fassertStatusOK(28817,
+ _topCoord->becomeCandidateIfElectable(
+ now, TopologyCoordinator::StartElectionReason::kElectionTimeout));
_startElectSelf_inlock();
}
}
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
index 1f75f7c9a75..ca7ef2f5d86 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
@@ -859,9 +859,11 @@ public:
return net->now();
}
- void performSuccessfulPriorityTakeover(Date_t priorityTakeoverTime) {
+ void performSuccessfulTakeover(Date_t takeoverTime,
+ TopologyCoordinator::StartElectionReason reason,
+ const LastVote& lastVoteExpected) {
startCapturingLogMessages();
- simulateSuccessfulV1ElectionAt(priorityTakeoverTime);
+ simulateSuccessfulV1ElectionAt(takeoverTime);
getReplCoord()->waitForElectionFinish_forTest();
stopCapturingLogMessages();
@@ -870,10 +872,13 @@ public:
// Check last vote
auto lastVote = getExternalState()->loadLocalLastVoteDocument(nullptr);
ASSERT(lastVote.isOK());
- ASSERT_EQ(0, lastVote.getValue().getCandidateIndex());
- ASSERT_EQ(1, lastVote.getValue().getTerm());
+ ASSERT_EQ(lastVoteExpected.getCandidateIndex(), lastVote.getValue().getCandidateIndex());
+ ASSERT_EQ(lastVoteExpected.getTerm(), lastVote.getValue().getTerm());
- ASSERT_EQUALS(1, countLogLinesContaining("Starting an election for a priority takeover"));
+ if (reason == TopologyCoordinator::StartElectionReason::kPriorityTakeover) {
+ ASSERT_EQUALS(1,
+ countLogLinesContaining("Starting an election for a priority takeover"));
+ }
ASSERT_EQUALS(1, countLogLinesContaining("election succeeded"));
}
@@ -894,11 +899,22 @@ private:
// Process all requests queued at the present time.
while (net->hasReadyRequests()) {
- auto noi = net->getNextReadyRequest();
+
+ // If we see that the next request isn't for a heartbeat, exit the function.
+ // This allows us to mock heartbeat responses with whatever info we want
+ // right up until another event happens (like an election). This is
+ // particularly important for simulating a catchup takeover because
+ // we need to know specific info about the primary.
+ auto noi = net->getFrontOfUnscheduledQueue();
+ auto&& nextRequest = noi->getRequest();
+ if (nextRequest.cmdObj.firstElement().fieldNameStringData() != "replSetHeartbeat") {
+ return;
+ }
+
+ noi = net->getNextReadyRequest();
auto&& request = noi->getRequest();
log() << request.target << " processing " << request.cmdObj;
- ASSERT_EQUALS("replSetHeartbeat", request.cmdObj.firstElement().fieldNameStringData());
// Make sure the heartbeat request is valid.
ReplSetHeartbeatArgsV1 hbArgs;
@@ -1290,7 +1306,7 @@ TEST_F(TakeoverTest, CatchupTakeoverCanceledIfTransitionToRollback) {
ASSERT_EQUALS(0, countLogLinesContaining("Starting an election for a catchup takeover"));
}
-TEST_F(TakeoverTest, CatchupTakeoverElectionIsANoop) {
+TEST_F(TakeoverTest, SuccessfulCatchupTakeover) {
BSONObj configObj = BSON("_id"
<< "mySet"
<< "version"
@@ -1318,6 +1334,11 @@ TEST_F(TakeoverTest, CatchupTakeoverElectionIsANoop) {
replCoord->setMyLastAppliedOpTime(currentOptime);
replCoord->setMyLastDurableOpTime(currentOptime);
+ // Update the term so that the current term is ahead of the term of
+ // the last applied op time. This means that the primary is still in
+ // catchup mode since it hasn't written anything this term.
+ ASSERT_EQUALS(ErrorCodes::StaleTerm, replCoord->updateTerm(&opCtx, replCoord->getTerm() + 1));
+
// Make sure we're secondary and that no takeover has been scheduled.
ASSERT_OK(replCoord->setFollowerMode(MemberState::RS_SECONDARY));
ASSERT_FALSE(replCoord->getCatchupTakeover_forTest());
@@ -1333,12 +1354,164 @@ TEST_F(TakeoverTest, CatchupTakeoverElectionIsANoop) {
ASSERT_EQUALS(config.getCatchUpTakeoverDelay(), catchupTakeoverDelay);
startCapturingLogMessages();
- now = respondToHeartbeatsUntil(config, catchupTakeoverTime, primaryHostAndPort, behindOptime);
+
+ // The catchup takeover will be scheduled at a time later than one election
+ // timeout after our initial heartbeat responses, so mock a few rounds of
+ // heartbeat responses to prevent a normal election timeout.
+ now = respondToHeartbeatsUntil(
+ config, catchupTakeoverTime, HostAndPort("node2", 12345), behindOptime);
+ stopCapturingLogMessages();
+
+ // Since the heartbeats go through the catchupTakeoverTimeout, this log
+ // message happens already (otherwise it would happen in performSuccessfulTakeover).
+ ASSERT_EQUALS(1, countLogLinesContaining("Starting an election for a catchup takeover"));
+
+ LastVote lastVoteExpected = LastVote(replCoord->getTerm() + 1, 0);
+ performSuccessfulTakeover(catchupTakeoverTime,
+ TopologyCoordinator::StartElectionReason::kCatchupTakeover,
+ lastVoteExpected);
+}
+
+TEST_F(TakeoverTest, PrimaryCatchesUpBeforeCatchupTakeover) {
+ BSONObj configObj = BSON("_id"
+ << "mySet"
+ << "version"
+ << 1
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")
+ << BSON("_id" << 3 << "host"
+ << "node3:12345"))
+ << "protocolVersion"
+ << 1);
+ assertStartSuccess(configObj, HostAndPort("node1", 12345));
+ ReplSetConfig config = assertMakeRSConfig(configObj);
+
+ auto replCoord = getReplCoord();
+ auto now = getNet()->now();
+
+ OperationContextNoop opCtx;
+ OpTime currentOptime(Timestamp(200, 1), 0);
+ replCoord->setMyLastAppliedOpTime(currentOptime);
+ replCoord->setMyLastDurableOpTime(currentOptime);
+ OpTime behindOptime(Timestamp(100, 1), 0);
+
+ // Update the term so that the current term is ahead of the term of
+ // the last applied op time.
+ ASSERT_EQUALS(ErrorCodes::StaleTerm, replCoord->updateTerm(&opCtx, replCoord->getTerm() + 1));
+
+ // Make sure we're secondary and that no catchup takeover has been scheduled.
+ ASSERT_OK(replCoord->setFollowerMode(MemberState::RS_SECONDARY));
+ ASSERT_FALSE(replCoord->getCatchupTakeover_forTest());
+
+ startCapturingLogMessages();
+
+ // Mock a first round of heartbeat responses, which should give us enough information to know
+ // that we are fresher than the current primary, prompting the scheduling of a catchup
+ // takeover.
+ now = respondToHeartbeatsUntil(config, now, HostAndPort("node2", 12345), behindOptime);
+
+ // Make sure that the catchup takeover has actually been scheduled and at the
+ // correct time.
+ ASSERT(replCoord->getCatchupTakeover_forTest());
+ auto catchupTakeoverTime = replCoord->getCatchupTakeover_forTest().get();
+ Milliseconds catchupTakeoverDelay = catchupTakeoverTime - now;
+ ASSERT_EQUALS(config.getCatchUpTakeoverDelay(), catchupTakeoverDelay);
+
+ // Mock another heartbeat where the primary is now up to date
+ // and run time through when catchup takeover was supposed to happen.
+ now = respondToHeartbeatsUntil(
+ config, now + catchupTakeoverDelay, HostAndPort("node2", 12345), currentOptime);
+
+ stopCapturingLogMessages();
+
+ // Make sure we're secondary and that no catchup takeover election happened.
+ ASSERT(replCoord->getMemberState().secondary());
+ ASSERT_FALSE(replCoord->getCatchupTakeover_forTest());
+ ASSERT_EQUALS(1, countLogLinesContaining("Not starting an election for a catchup takeover"));
+}
+
+TEST_F(TakeoverTest, PrimaryCatchesUpBeforeHighPriorityNodeCatchupTakeover) {
+ BSONObj configObj = BSON("_id"
+ << "mySet"
+ << "version"
+ << 1
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345"
+ << "priority"
+ << 2)
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")
+ << BSON("_id" << 3 << "host"
+ << "node3:12345"))
+ << "protocolVersion"
+ << 1);
+ assertStartSuccess(configObj, HostAndPort("node1", 12345));
+ ReplSetConfig config = assertMakeRSConfig(configObj);
+
+ auto replCoord = getReplCoord();
+ auto now = getNet()->now();
+
+ OperationContextNoop opCtx;
+ OpTime currentOptime(Timestamp(200, 1), 0);
+ replCoord->setMyLastAppliedOpTime(currentOptime);
+ replCoord->setMyLastDurableOpTime(currentOptime);
+ OpTime behindOptime(Timestamp(100, 1), 0);
+
+ // Update the term so that the current term is ahead of the term of
+ // the last applied op time.
+ ASSERT_EQUALS(ErrorCodes::StaleTerm, replCoord->updateTerm(&opCtx, replCoord->getTerm() + 1));
+
+ // Make sure we're secondary and that no catchup takeover has been scheduled.
+ ASSERT_OK(replCoord->setFollowerMode(MemberState::RS_SECONDARY));
+ ASSERT_FALSE(replCoord->getCatchupTakeover_forTest());
+
+ startCapturingLogMessages();
+
+ // Mock a first round of heartbeat responses, which should give us enough information to know
+ // that we are fresher than the current primary, prompting the scheduling of a catchup
+ // takeover.
+ now = respondToHeartbeatsUntil(config, now, HostAndPort("node2", 12345), behindOptime);
+
+ // Make sure that the catchup takeover has actually been scheduled and at the
+ // correct time.
+ ASSERT(replCoord->getCatchupTakeover_forTest());
+ auto catchupTakeoverTime = replCoord->getCatchupTakeover_forTest().get();
+ Milliseconds catchupTakeoverDelay = catchupTakeoverTime - now;
+ ASSERT_EQUALS(config.getCatchUpTakeoverDelay(), catchupTakeoverDelay);
+
+ // Mock another heartbeat where the primary is now up to date
+ // and run time through when catchup takeover was supposed to happen.
+ now = respondToHeartbeatsUntil(
+ config, now + catchupTakeoverDelay, HostAndPort("node2", 12345), currentOptime);
+
stopCapturingLogMessages();
- // Make sure that the catchup takeover fired as a NOOP.
+ // Make sure we're secondary and that no catchup takeover election happens.
ASSERT(replCoord->getMemberState().secondary());
- ASSERT_EQUALS(1, countLogLinesContaining("Starting an election for a catchup takeover [NOOP]"));
+ ASSERT_FALSE(replCoord->getCatchupTakeover_forTest());
+ ASSERT_EQUALS(1, countLogLinesContaining("Not starting an election for a catchup takeover"));
+
+ // Make sure that the priority takeover has now been scheduled and at the
+ // correct time.
+ ASSERT(replCoord->getPriorityTakeover_forTest());
+ auto priorityTakeoverTime = replCoord->getPriorityTakeover_forTest().get();
+ assertValidPriorityTakeoverDelay(config, now, priorityTakeoverTime, 0);
+
+ // The priority takeover might be scheduled at a time later than one election
+ // timeout after our initial heartbeat responses, so mock another round of
+ // heartbeat responses to prevent a normal election timeout.
+ Milliseconds halfElectionTimeout = config.getElectionTimeoutPeriod() / 2;
+ now = respondToHeartbeatsUntil(
+ config, now + halfElectionTimeout, HostAndPort("node2", 12345), currentOptime);
+
+ LastVote lastVoteExpected = LastVote(replCoord->getTerm() + 1, 0);
+ performSuccessfulTakeover(priorityTakeoverTime,
+ TopologyCoordinator::StartElectionReason::kPriorityTakeover,
+ lastVoteExpected);
}
TEST_F(TakeoverTest, SchedulesPriorityTakeoverIfNodeHasHigherPriorityThanCurrentPrimary) {
@@ -1437,7 +1610,10 @@ TEST_F(TakeoverTest, SuccessfulPriorityTakeover) {
now = respondToHeartbeatsUntil(
config, now + halfElectionTimeout, HostAndPort("node2", 12345), myOptime);
- performSuccessfulPriorityTakeover(priorityTakeoverTime);
+ LastVote lastVoteExpected = LastVote(replCoord->getTerm() + 1, 0);
+ performSuccessfulTakeover(priorityTakeoverTime,
+ TopologyCoordinator::StartElectionReason::kPriorityTakeover,
+ lastVoteExpected);
}
TEST_F(TakeoverTest, DontCallForPriorityTakeoverWhenLaggedSameSecond) {
@@ -1513,7 +1689,10 @@ TEST_F(TakeoverTest, DontCallForPriorityTakeoverWhenLaggedSameSecond) {
replCoord->setMyLastAppliedOpTime(closeEnoughOpTime);
replCoord->setMyLastDurableOpTime(closeEnoughOpTime);
- performSuccessfulPriorityTakeover(priorityTakeoverTime);
+ LastVote lastVoteExpected = LastVote(replCoord->getTerm() + 1, 0);
+ performSuccessfulTakeover(priorityTakeoverTime,
+ TopologyCoordinator::StartElectionReason::kPriorityTakeover,
+ lastVoteExpected);
}
TEST_F(TakeoverTest, DontCallForPriorityTakeoverWhenLaggedDifferentSecond) {
@@ -1588,7 +1767,10 @@ TEST_F(TakeoverTest, DontCallForPriorityTakeoverWhenLaggedDifferentSecond) {
replCoord->setMyLastAppliedOpTime(closeEnoughOpTime);
replCoord->setMyLastDurableOpTime(closeEnoughOpTime);
- performSuccessfulPriorityTakeover(priorityTakeoverTime);
+ LastVote lastVoteExpected = LastVote(replCoord->getTerm() + 1, 0);
+ performSuccessfulTakeover(priorityTakeoverTime,
+ TopologyCoordinator::StartElectionReason::kPriorityTakeover,
+ lastVoteExpected);
}
TEST_F(ReplCoordTest, NodeCancelsElectionUponReceivingANewConfigDuringDryRun) {
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index fbaeb9dca44..35199798605 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -286,7 +286,7 @@ stdx::unique_lock<stdx::mutex> ReplicationCoordinatorImpl::_handleHeartbeatRespo
_priorityTakeoverWhen,
stdx::bind(&ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1,
this,
- StartElectionV1Reason::kPriorityTakeover));
+ TopologyCoordinator::StartElectionReason::kPriorityTakeover));
}
break;
}
@@ -297,11 +297,10 @@ stdx::unique_lock<stdx::mutex> ReplicationCoordinatorImpl::_handleHeartbeatRespo
_catchupTakeoverWhen = _replExecutor->now() + catchupTakeoverDelay;
log() << "Scheduling catchup takeover at " << _catchupTakeoverWhen;
_catchupTakeoverCbh = _scheduleWorkAt(
- _catchupTakeoverWhen, [this](const executor::TaskExecutor::CallbackArgs& args) {
- stdx::lock_guard<stdx::mutex> lock(_mutex);
- _cancelCatchupTakeover_inlock();
- log() << "Starting an election for a catchup takeover [NOOP]";
- });
+ _catchupTakeoverWhen,
+ stdx::bind(&ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1,
+ this,
+ TopologyCoordinator::StartElectionReason::kCatchupTakeover));
}
break;
}
@@ -767,10 +766,11 @@ void ReplicationCoordinatorImpl::_cancelAndRescheduleElectionTimeout_inlock() {
_scheduleWorkAt(when,
stdx::bind(&ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1,
this,
- StartElectionV1Reason::kElectionTimeout));
+ TopologyCoordinator::StartElectionReason::kElectionTimeout));
}
-void ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1(StartElectionV1Reason reason) {
+void ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1(
+ TopologyCoordinator::StartElectionReason reason) {
if (!isV1ElectionProtocol()) {
return;
}
@@ -788,37 +788,43 @@ void ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1(StartElectionV1Reas
}
}
- const auto status = _topCoord->becomeCandidateIfElectable(
- _replExecutor->now(), reason == StartElectionV1Reason::kPriorityTakeover);
+ const auto status = _topCoord->becomeCandidateIfElectable(_replExecutor->now(), reason);
if (!status.isOK()) {
switch (reason) {
- case StartElectionV1Reason::kElectionTimeout:
+ case TopologyCoordinator::TopologyCoordinator::StartElectionReason::kElectionTimeout:
log() << "Not starting an election, since we are not electable due to: "
<< status.reason();
break;
- case StartElectionV1Reason::kPriorityTakeover:
+ case TopologyCoordinator::StartElectionReason::kPriorityTakeover:
log() << "Not starting an election for a priority takeover, "
<< "since we are not electable due to: " << status.reason();
break;
- case StartElectionV1Reason::kStepUpRequest:
+ case TopologyCoordinator::StartElectionReason::kStepUpRequest:
log() << "Not starting an election for a replSetStepUp request, "
<< "since we are not electable due to: " << status.reason();
break;
+ case TopologyCoordinator::StartElectionReason::kCatchupTakeover:
+ log() << "Not starting an election for a catchup takeover, "
+ << "since we are not electable due to: " << status.reason();
+ break;
}
return;
}
switch (reason) {
- case StartElectionV1Reason::kElectionTimeout:
+ case TopologyCoordinator::StartElectionReason::kElectionTimeout:
log() << "Starting an election, since we've seen no PRIMARY in the past "
<< _rsConfig.getElectionTimeoutPeriod();
break;
- case StartElectionV1Reason::kPriorityTakeover:
+ case TopologyCoordinator::StartElectionReason::kPriorityTakeover:
log() << "Starting an election for a priority takeover";
break;
- case StartElectionV1Reason::kStepUpRequest:
+ case TopologyCoordinator::StartElectionReason::kStepUpRequest:
log() << "Starting an election due to step up request";
break;
+ case TopologyCoordinator::StartElectionReason::kCatchupTakeover:
+ log() << "Starting an election for a catchup takeover";
+ break;
}
_startElectSelfV1_inlock();
diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h
index 377959fc47e..3c2df705d93 100644
--- a/src/mongo/db/repl/topology_coordinator.h
+++ b/src/mongo/db/repl/topology_coordinator.h
@@ -587,10 +587,17 @@ public:
*/
virtual void setPrimaryIndex(long long primaryIndex) = 0;
+ enum StartElectionReason {
+ kElectionTimeout,
+ kPriorityTakeover,
+ kStepUpRequest,
+ kCatchupTakeover
+ };
+
/**
* Transitions to the candidate role if the node is electable.
*/
- virtual Status becomeCandidateIfElectable(const Date_t now, bool isPriorityTakeover) = 0;
+ virtual Status becomeCandidateIfElectable(const Date_t now, StartElectionReason reason) = 0;
/**
* Updates the storage engine read committed support in the TopologyCoordinator options after
diff --git a/src/mongo/db/repl/topology_coordinator_impl.cpp b/src/mongo/db/repl/topology_coordinator_impl.cpp
index 8d707362fda..2d17af00d7a 100644
--- a/src/mongo/db/repl/topology_coordinator_impl.cpp
+++ b/src/mongo/db/repl/topology_coordinator_impl.cpp
@@ -740,7 +740,7 @@ Status TopologyCoordinatorImpl::prepareHeartbeatResponse(Date_t now,
const OpTime lastOpDurable = getMyLastDurableOpTime();
// Are we electable
- response->setElectable(!_getMyUnelectableReason(now, false));
+ response->setElectable(!_getMyUnelectableReason(now, StartElectionReason::kElectionTimeout));
// Heartbeat status message
response->setHbMsg(_getHbmsg(now));
@@ -1182,7 +1182,7 @@ HeartbeatResponseAction TopologyCoordinatorImpl::setMemberAsDown(Date_t now,
MemberData& hbData = _memberData.at(memberIndex);
hbData.setDownValues(now, "no response within election timeout period");
- if (CannotSeeMajority & _getMyUnelectableReason(now, false)) {
+ if (CannotSeeMajority & _getMyUnelectableReason(now, StartElectionReason::kElectionTimeout)) {
if (_stepDownPending) {
return HeartbeatResponseAction::makeNoAction();
}
@@ -1487,7 +1487,8 @@ HeartbeatResponseAction TopologyCoordinatorImpl::_updatePrimaryFromHBData(
// If we are primary, check if we can still see majority of the set;
// stepdown if we can't.
if (_iAmPrimary()) {
- if (CannotSeeMajority & _getMyUnelectableReason(now, false)) {
+ if (CannotSeeMajority &
+ _getMyUnelectableReason(now, StartElectionReason::kElectionTimeout)) {
if (_stepDownPending) {
return HeartbeatResponseAction::makeNoAction();
}
@@ -1522,7 +1523,7 @@ HeartbeatResponseAction TopologyCoordinatorImpl::_updatePrimaryFromHBData(
LOG(2) << "TopologyCoordinatorImpl::_updatePrimaryFromHBData - " << status.reason();
return HeartbeatResponseAction::makeNoAction();
}
- fassertStatusOK(28816, becomeCandidateIfElectable(now, false));
+ fassertStatusOK(28816, becomeCandidateIfElectable(now, StartElectionReason::kElectionTimeout));
return HeartbeatResponseAction::makeElectAction();
}
@@ -1536,7 +1537,8 @@ Status TopologyCoordinatorImpl::checkShouldStandForElection(Date_t now) const {
return {ErrorCodes::NodeNotElectable, "Not standing for election again; already candidate"};
}
- const UnelectableReasonMask unelectableReason = _getMyUnelectableReason(now, false);
+ const UnelectableReasonMask unelectableReason =
+ _getMyUnelectableReason(now, StartElectionReason::kElectionTimeout);
if (NotCloseEnoughToLatestOptime & unelectableReason) {
return {ErrorCodes::NodeNotElectable,
str::stream() << "Not standing for election because "
@@ -1633,6 +1635,40 @@ bool TopologyCoordinatorImpl::_amIFreshEnoughForPriorityTakeover() const {
}
}
+bool TopologyCoordinatorImpl::_amIFreshEnoughForCatchupTakeover() const {
+
+ const OpTime latestKnownOpTime = _latestKnownOpTime();
+
+ // Rules are:
+ // - We must have the freshest optime of all the up nodes.
+ // - We must specifically have a fresher optime than the primary (can't be equal).
+ // - The term of our last applied op must be less than the current term. This ensures that no
+ // writes have happened since the most recent election and that the primary is still in
+ // catchup mode.
+
+ // There is no point to a catchup takeover if we aren't the freshest node because
+ // another node would immediately perform another catchup takeover when we become primary.
+ const OpTime ourLastOpApplied = getMyLastAppliedOpTime();
+ if (ourLastOpApplied < latestKnownOpTime) {
+ return false;
+ }
+
+ if (_currentPrimaryIndex == -1) {
+ return false;
+ }
+
+ // If we aren't ahead of the primary, there is no point to having a catchup takeover.
+ const OpTime primaryLastOpApplied = _memberData[_currentPrimaryIndex].getLastAppliedOpTime();
+
+ if (ourLastOpApplied <= primaryLastOpApplied) {
+ return false;
+ }
+
+ // If the term of our last applied op is less than the current term, the primary didn't write
+ // anything and it is still in catchup mode.
+ return ourLastOpApplied.getTerm() < _term;
+}
+
bool TopologyCoordinatorImpl::_iAmPrimary() const {
if (_role == Role::leader) {
invariant(_currentPrimaryIndex == _selfIndex);
@@ -1685,7 +1721,7 @@ int TopologyCoordinatorImpl::_getHighestPriorityElectableIndex(Date_t now) const
int maxIndex = -1;
for (int currentIndex = 0; currentIndex < _rsConfig.getNumMembers(); currentIndex++) {
UnelectableReasonMask reason = currentIndex == _selfIndex
- ? _getMyUnelectableReason(now, false)
+ ? _getMyUnelectableReason(now, StartElectionReason::kElectionTimeout)
: _getUnelectableReason(currentIndex);
if (None == reason && _isMemberHigherPriority(currentIndex, maxIndex)) {
maxIndex = currentIndex;
@@ -2310,7 +2346,7 @@ TopologyCoordinatorImpl::UnelectableReasonMask TopologyCoordinatorImpl::_getUnel
}
TopologyCoordinatorImpl::UnelectableReasonMask TopologyCoordinatorImpl::_getMyUnelectableReason(
- const Date_t now, bool isPriorityTakeover) const {
+ const Date_t now, StartElectionReason reason) const {
UnelectableReasonMask result = None;
const OpTime lastApplied = getMyLastAppliedOpTime();
if (lastApplied.isNull()) {
@@ -2351,9 +2387,15 @@ TopologyCoordinatorImpl::UnelectableReasonMask TopologyCoordinatorImpl::_getMyUn
} else {
// Election rules only for protocol version 1.
invariant(_rsConfig.getProtocolVersion() == 1);
- if (isPriorityTakeover && !_amIFreshEnoughForPriorityTakeover()) {
+ if (reason == StartElectionReason::kPriorityTakeover &&
+ !_amIFreshEnoughForPriorityTakeover()) {
result |= NotCloseEnoughToLatestForPriorityTakeover;
}
+
+ if (reason == StartElectionReason::kCatchupTakeover &&
+ !_amIFreshEnoughForCatchupTakeover()) {
+ result |= NotFreshEnoughForCatchupTakeover;
+ }
}
return result;
}
@@ -2426,6 +2468,14 @@ std::string TopologyCoordinatorImpl::_getUnelectableReasonString(
"takeover - must be within "
<< priorityTakeoverFreshnessWindowSeconds << " seconds";
}
+ if (ur & NotFreshEnoughForCatchupTakeover) {
+ if (hasWrittenToStream) {
+ ss << "; ";
+ }
+ hasWrittenToStream = true;
+ ss << "member is either not the most up-to-date member or not ahead of the primary, and "
+ "therefore cannot call for catchup takeover";
+ }
if (ur & NotInitialized) {
if (hasWrittenToStream) {
ss << "; ";
@@ -2986,7 +3036,7 @@ void TopologyCoordinatorImpl::setPrimaryIndex(long long primaryIndex) {
}
Status TopologyCoordinatorImpl::becomeCandidateIfElectable(const Date_t now,
- bool isPriorityTakeover) {
+ StartElectionReason reason) {
if (_role == Role::leader) {
return {ErrorCodes::NodeNotElectable, "Not standing for election again; already primary"};
}
@@ -2995,8 +3045,7 @@ Status TopologyCoordinatorImpl::becomeCandidateIfElectable(const Date_t now,
return {ErrorCodes::NodeNotElectable, "Not standing for election again; already candidate"};
}
- const UnelectableReasonMask unelectableReason =
- _getMyUnelectableReason(now, isPriorityTakeover);
+ const UnelectableReasonMask unelectableReason = _getMyUnelectableReason(now, reason);
if (unelectableReason) {
return {ErrorCodes::NodeNotElectable,
str::stream() << "Not standing for election because "
diff --git a/src/mongo/db/repl/topology_coordinator_impl.h b/src/mongo/db/repl/topology_coordinator_impl.h
index f9f50f62871..5114f012be2 100644
--- a/src/mongo/db/repl/topology_coordinator_impl.h
+++ b/src/mongo/db/repl/topology_coordinator_impl.h
@@ -247,7 +247,7 @@ public:
virtual MemberData* findMemberDataByMemberId(const int memberId);
virtual MemberData* findMemberDataByRid(const OID rid);
virtual MemberData* addSlaveMemberData(const OID rid);
- virtual Status becomeCandidateIfElectable(const Date_t now, bool isPriorityTakeover);
+ virtual Status becomeCandidateIfElectable(const Date_t now, StartElectionReason reason);
virtual void setStorageEngineSupportsReadCommitted(bool supported);
virtual void restartHeartbeats();
@@ -295,6 +295,7 @@ private:
VotedTooRecently = 1 << 8,
RefusesToStand = 1 << 9,
NotCloseEnoughToLatestForPriorityTakeover = 1 << 10,
+ NotFreshEnoughForCatchupTakeover = 1 << 11,
};
typedef int UnelectableReasonMask;
@@ -327,8 +328,13 @@ private:
// Is our optime close enough to the latest known optime to call for a priority takeover.
bool _amIFreshEnoughForPriorityTakeover() const;
+ // Is the primary node still in catchup mode and is our optime the latest
+ // known optime of all the up nodes.
+ bool _amIFreshEnoughForCatchupTakeover() const;
+
// Returns reason why "self" member is unelectable
- UnelectableReasonMask _getMyUnelectableReason(const Date_t now, bool isPriorityTakeover) const;
+ UnelectableReasonMask _getMyUnelectableReason(const Date_t now,
+ StartElectionReason reason) const;
// Returns reason why memberIndex is unelectable
UnelectableReasonMask _getUnelectableReason(int memberIndex) const;
diff --git a/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp
index 72dbefe0def..8370f3595de 100644
--- a/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp
+++ b/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp
@@ -3829,6 +3829,232 @@ TEST_F(HeartbeatResponseTestV1, UpdateHeartbeatDataTermPreventsPriorityTakeover)
ASSERT_EQUALS(2, getCurrentPrimaryIndex());
}
+TEST_F(TopoCoordTest, FreshestNodeDoesCatchupTakeover) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version"
+ << 5
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "host1:27017")
+ << BSON("_id" << 2 << "host"
+ << "host2:27017")
+ << BSON("_id" << 3 << "host"
+ << "host3:27017"))
+ << "protocolVersion"
+ << 1
+ << "settings"
+
+ << BSON("heartbeatTimeoutSecs" << 5)),
+ 0);
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ OpTime currentOptime(Timestamp(200, 1), 0);
+ OpTime behindOptime(Timestamp(100, 1), 0);
+
+ // Create a mock heartbeat response to be able to compare who is the freshest node.
+ // The latest heartbeat responses are looked at for determining the latest optime
+ // and therefore freshness for catchup takeover.
+ ReplSetHeartbeatResponse hbResp = ReplSetHeartbeatResponse();
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setAppliedOpTime(currentOptime);
+ hbResp.setTerm(1);
+
+ Date_t firstRequestDate = unittest::assertGet(dateFromISOString("2014-08-29T13:00Z"));
+
+ getTopoCoord().prepareHeartbeatRequestV1(firstRequestDate, "rs0", HostAndPort("host2:27017"));
+ getTopoCoord().prepareHeartbeatRequestV1(firstRequestDate, "rs0", HostAndPort("host3:27017"));
+
+ // Set optimes so that I am the freshest node and strictly ahead of the primary.
+ getTopoCoord().getMyMemberData()->setLastAppliedOpTime(currentOptime, Date_t());
+ getTopoCoord().processHeartbeatResponse(firstRequestDate + Milliseconds(1000),
+ Milliseconds(999),
+ HostAndPort("host3:27017"),
+ StatusWith<ReplSetHeartbeatResponse>(hbResp));
+ hbResp.setAppliedOpTime(behindOptime);
+ hbResp.setState(MemberState::RS_PRIMARY);
+ getTopoCoord().processHeartbeatResponse(firstRequestDate + Milliseconds(1000),
+ Milliseconds(999),
+ HostAndPort("host2:27017"),
+ StatusWith<ReplSetHeartbeatResponse>(hbResp));
+ getTopoCoord().updateTerm(1, Date_t());
+
+ ASSERT_OK(getTopoCoord().becomeCandidateIfElectable(
+ Date_t(), TopologyCoordinator::StartElectionReason::kCatchupTakeover));
+}
+
+TEST_F(TopoCoordTest, StaleNodeDoesntDoCatchupTakeover) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version"
+ << 5
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "host1:27017")
+ << BSON("_id" << 2 << "host"
+ << "host2:27017")
+ << BSON("_id" << 3 << "host"
+ << "host3:27017"))
+ << "protocolVersion"
+ << 1
+ << "settings"
+
+ << BSON("heartbeatTimeoutSecs" << 5)),
+ 0);
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ OpTime currentOptime(Timestamp(200, 1), 0);
+ OpTime behindOptime(Timestamp(100, 1), 0);
+
+ // Create a mock heartbeat response to be able to compare who is the freshest node.
+ ReplSetHeartbeatResponse hbResp = ReplSetHeartbeatResponse();
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setAppliedOpTime(currentOptime);
+ hbResp.setTerm(1);
+
+ Date_t firstRequestDate = unittest::assertGet(dateFromISOString("2014-08-29T13:00Z"));
+
+ getTopoCoord().prepareHeartbeatRequestV1(firstRequestDate, "rs0", HostAndPort("host2:27017"));
+ getTopoCoord().prepareHeartbeatRequestV1(firstRequestDate, "rs0", HostAndPort("host3:27017"));
+
+ // Set optimes so that the other (non-primary) node is ahead of me.
+ getTopoCoord().getMyMemberData()->setLastAppliedOpTime(behindOptime, Date_t());
+ getTopoCoord().processHeartbeatResponse(firstRequestDate + Milliseconds(1000),
+ Milliseconds(999),
+ HostAndPort("host3:27017"),
+ StatusWith<ReplSetHeartbeatResponse>(hbResp));
+ hbResp.setAppliedOpTime(behindOptime);
+ hbResp.setState(MemberState::RS_PRIMARY);
+ getTopoCoord().processHeartbeatResponse(firstRequestDate + Milliseconds(1000),
+ Milliseconds(999),
+ HostAndPort("host2:27017"),
+ StatusWith<ReplSetHeartbeatResponse>(hbResp));
+ getTopoCoord().updateTerm(1, Date_t());
+
+ Status result = getTopoCoord().becomeCandidateIfElectable(
+ Date_t(), TopologyCoordinator::StartElectionReason::kCatchupTakeover);
+ ASSERT_NOT_OK(result);
+ ASSERT_STRING_CONTAINS(result.reason(),
+ "member is either not the most up-to-date member or not ahead of the "
+ "primary, and therefore cannot call for catchup takeover");
+}
+
+TEST_F(TopoCoordTest, NodeDoesntDoCatchupTakeoverHeartbeatSaysPrimaryCaughtUp) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version"
+ << 5
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "host1:27017")
+ << BSON("_id" << 2 << "host"
+ << "host2:27017")
+ << BSON("_id" << 3 << "host"
+ << "host3:27017"))
+ << "protocolVersion"
+ << 1
+ << "settings"
+
+ << BSON("heartbeatTimeoutSecs" << 5)),
+ 0);
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ OpTime currentOptime(Timestamp(200, 1), 0);
+
+ // Create a mock heartbeat response to be able to compare who is the freshest node.
+ ReplSetHeartbeatResponse hbResp = ReplSetHeartbeatResponse();
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setAppliedOpTime(currentOptime);
+ hbResp.setTerm(1);
+
+ Date_t firstRequestDate = unittest::assertGet(dateFromISOString("2014-08-29T13:00Z"));
+
+ getTopoCoord().prepareHeartbeatRequestV1(firstRequestDate, "rs0", HostAndPort("host2:27017"));
+ getTopoCoord().prepareHeartbeatRequestV1(firstRequestDate, "rs0", HostAndPort("host3:27017"));
+
+ // Set optimes so that the primary node is caught up with me.
+ getTopoCoord().getMyMemberData()->setLastAppliedOpTime(currentOptime, Date_t());
+ getTopoCoord().processHeartbeatResponse(firstRequestDate + Milliseconds(1000),
+ Milliseconds(999),
+ HostAndPort("host3:27017"),
+ StatusWith<ReplSetHeartbeatResponse>(hbResp));
+ hbResp.setState(MemberState::RS_PRIMARY);
+ getTopoCoord().processHeartbeatResponse(firstRequestDate + Milliseconds(1000),
+ Milliseconds(999),
+ HostAndPort("host2:27017"),
+ StatusWith<ReplSetHeartbeatResponse>(hbResp));
+ getTopoCoord().updateTerm(1, Date_t());
+
+ Status result = getTopoCoord().becomeCandidateIfElectable(
+ Date_t(), TopologyCoordinator::StartElectionReason::kCatchupTakeover);
+ ASSERT_NOT_OK(result);
+ ASSERT_STRING_CONTAINS(result.reason(),
+ "member is either not the most up-to-date member or not ahead of the "
+ "primary, and therefore cannot call for catchup takeover");
+}
+
+TEST_F(TopoCoordTest, NodeDoesntDoCatchupTakeoverIfTermNumbersSayPrimaryCaughtUp) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version"
+ << 5
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "host1:27017")
+ << BSON("_id" << 2 << "host"
+ << "host2:27017")
+ << BSON("_id" << 3 << "host"
+ << "host3:27017"))
+ << "protocolVersion"
+ << 1
+ << "settings"
+
+ << BSON("heartbeatTimeoutSecs" << 5)),
+ 0);
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ OpTime currentOptime(Timestamp(200, 1), 1);
+ OpTime behindOptime(Timestamp(100, 1), 0);
+
+ // Create a mock heartbeat response to be able to compare who is the freshest node.
+ ReplSetHeartbeatResponse hbResp = ReplSetHeartbeatResponse();
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setAppliedOpTime(currentOptime);
+ hbResp.setTerm(1);
+
+ Date_t firstRequestDate = unittest::assertGet(dateFromISOString("2014-08-29T13:00Z"));
+
+ getTopoCoord().prepareHeartbeatRequestV1(firstRequestDate, "rs0", HostAndPort("host2:27017"));
+ getTopoCoord().prepareHeartbeatRequestV1(firstRequestDate, "rs0", HostAndPort("host3:27017"));
+
+ // Simulates a scenario where the node hasn't received a heartbeat from the primary in a while
+ // but the primary is caught up and has written something. The node is aware of this change
+ // and as a result realizes the primary is caught up.
+ getTopoCoord().getMyMemberData()->setLastAppliedOpTime(currentOptime, Date_t());
+ getTopoCoord().processHeartbeatResponse(firstRequestDate + Milliseconds(1000),
+ Milliseconds(999),
+ HostAndPort("host3:27017"),
+ StatusWith<ReplSetHeartbeatResponse>(hbResp));
+ hbResp.setAppliedOpTime(behindOptime);
+ hbResp.setState(MemberState::RS_PRIMARY);
+ getTopoCoord().processHeartbeatResponse(firstRequestDate + Milliseconds(1000),
+ Milliseconds(999),
+ HostAndPort("host2:27017"),
+ StatusWith<ReplSetHeartbeatResponse>(hbResp));
+ getTopoCoord().updateTerm(1, Date_t());
+
+ Status result = getTopoCoord().becomeCandidateIfElectable(
+ Date_t(), TopologyCoordinator::StartElectionReason::kCatchupTakeover);
+ ASSERT_NOT_OK(result);
+ ASSERT_STRING_CONTAINS(result.reason(),
+ "member is either not the most up-to-date member or not ahead of the "
+ "primary, and therefore cannot call for catchup takeover");
+}
+
TEST_F(HeartbeatResponseTestV1,
ScheduleACatchupTakeoverWhenElectableAndReceiveHeartbeatFromPrimaryInCatchup) {
updateConfig(BSON("_id"
@@ -3973,7 +4199,8 @@ TEST_F(HeartbeatResponseTestV1,
ASSERT_NO_ACTION(nextAction.getAction());
ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
// We are electable now.
- ASSERT_OK(getTopoCoord().becomeCandidateIfElectable(now(), false));
+ ASSERT_OK(getTopoCoord().becomeCandidateIfElectable(
+ now(), TopologyCoordinator::StartElectionReason::kElectionTimeout));
ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
}
@@ -3998,7 +4225,8 @@ TEST_F(HeartbeatResponseTestV1, ScheduleElectionWhenPrimaryIsMarkedDownAndWeAreE
ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
// We are electable now.
- ASSERT_OK(getTopoCoord().becomeCandidateIfElectable(now(), false));
+ ASSERT_OK(getTopoCoord().becomeCandidateIfElectable(
+ now(), TopologyCoordinator::StartElectionReason::kElectionTimeout));
ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
}
diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js
index d0ceb2520d1..3666f6e1d21 100644
--- a/src/mongo/shell/replsettest.js
+++ b/src/mongo/shell/replsettest.js
@@ -602,8 +602,9 @@ var ReplSetTest = function(opts) {
};
/**
- * Blocking call, which will wait for a primary to be elected for some pre-defined timeout and
- * if primary is available will return a connection to it. Otherwise throws an exception.
+ * Blocking call, which will wait for a primary to be elected and become master for some
+ * pre-defined timeout. If a primary is available it will return a connection to it.
+ * Otherwise throws an exception.
*/
this.getPrimary = function(timeout) {
timeout = timeout || self.kDefaultTimeoutMS;