summaryrefslogtreecommitdiff
path: root/src/mongo/db/repl/replication_coordinator_impl_test.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/repl/replication_coordinator_impl_test.cpp')
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_test.cpp194
1 files changed, 163 insertions, 31 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
index f730e4deccd..2c791030e76 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
@@ -1377,7 +1377,6 @@ private:
virtual void setUp() {
ReplCoordTest::setUp();
init("mySet/test1:1234,test2:1234,test3:1234");
-
assertStartSuccess(BSON("_id"
<< "mySet"
<< "version" << 1 << "members"
@@ -1393,6 +1392,7 @@ private:
}
};
+
TEST_F(ReplCoordTest, NodeReturnsBadValueWhenUpdateTermIsRunAgainstANonReplNode) {
init(ReplSettings());
ASSERT_TRUE(ReplicationCoordinator::modeNone == getReplCoord()->getReplicationMode());
@@ -1554,43 +1554,170 @@ TEST_F(StepDownTest,
ASSERT_TRUE(getReplCoord()->getMemberState().primary());
}
-TEST_F(StepDownTest,
- NodeTransitionsToSecondaryImmediatelyWhenStepDownIsRunAndAnUpToDateElectableNodeExists) {
+/* Step Down Test for a 5-node replica set */
+class StepDownTestFiveNode : public StepDownTest {
+protected:
+ /*
+ * Simulate a round of heartbeat requests from the primary by manually setting
+ * the heartbeat response messages from each node. 'numNodesCaughtUp' will
+ * determine how many nodes return an optime that is up to date with the
+ * primary's optime. Sets electability of all caught up nodes to 'caughtUpAreElectable'
+ */
+ void simulateHeartbeatResponses(OpTime optimePrimary,
+ OpTime optimeLagged,
+ int numNodesCaughtUp,
+ bool caughtUpAreElectable) {
+ int hbNum = 1;
+ while (getNet()->hasReadyRequests()) {
+ NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
+ RemoteCommandRequest request = noi->getRequest();
+
+ // Only process heartbeat requests.
+ ASSERT_EQ(request.cmdObj.firstElement().fieldNameStringData().toString(),
+ "replSetHeartbeat");
+
+ ReplSetHeartbeatArgsV1 hbArgs;
+ ASSERT_OK(hbArgs.initialize(request.cmdObj));
+
+ log() << request.target.toString() << " processing " << request.cmdObj;
+
+ // Catch up 'numNodesCaughtUp' nodes out of 5.
+ OpTime optimeResponse = (hbNum <= numNodesCaughtUp) ? optimePrimary : optimeLagged;
+ bool isElectable = (hbNum <= numNodesCaughtUp) ? caughtUpAreElectable : true;
+
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName(hbArgs.getSetName());
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setConfigVersion(hbArgs.getConfigVersion());
+ hbResp.setDurableOpTime(optimeResponse);
+ hbResp.setAppliedOpTime(optimeResponse);
+ hbResp.setElectable(isElectable);
+ BSONObjBuilder respObj;
+ respObj << "ok" << 1;
+ hbResp.addToBSON(&respObj, false);
+ getNet()->scheduleResponse(noi, getNet()->now(), makeResponseStatus(respObj.obj()));
+ hbNum += 1;
+ }
+ }
+
+private:
+ virtual void setUp() {
+ ReplCoordTest::setUp();
+ init("mySet/test1:1234,test2:1234,test3:1234,test4:1234,test5:1234");
+
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "test1:1234")
+ << BSON("_id" << 1 << "host"
+ << "test2:1234")
+ << BSON("_id" << 2 << "host"
+ << "test3:1234")
+ << BSON("_id" << 3 << "host"
+ << "test4:1234")
+ << BSON("_id" << 4 << "host"
+ << "test5:1234"))),
+ HostAndPort("test1", 1234));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ myRid = getReplCoord()->getMyRID();
+ }
+};
+
+TEST_F(
+ StepDownTestFiveNode,
+ NodeReturnsExceededTimeLimitWhenStepDownIsRunAndCaughtUpMajorityExistsButWithoutElectableNode) {
OperationContextReplMock txn;
- OpTimeWithTermZero optime1(100, 1);
+ OpTime optimeLagged(Timestamp(100, 1), 1);
+ OpTime optimePrimary(Timestamp(100, 2), 1);
+
// All nodes are caught up
- getReplCoord()->setMyLastAppliedOpTime(optime1);
- getReplCoord()->setMyLastDurableOpTime(optime1);
- ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 1, optime1));
- ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 2, optime1));
+ getReplCoord()->setMyLastAppliedOpTime(optimePrimary);
+ getReplCoord()->setMyLastDurableOpTime(optimePrimary);
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 1, optimeLagged));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 2, optimeLagged));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 3, optimeLagged));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 4, optimeLagged));
simulateSuccessfulV1Election();
enterNetwork();
getNet()->runUntil(getNet()->now() + Seconds(2));
ASSERT(getNet()->hasReadyRequests());
- NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
- RemoteCommandRequest request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- ReplSetHeartbeatArgsV1 hbArgs;
- if (hbArgs.initialize(request.cmdObj).isOK()) {
- ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName(hbArgs.getSetName());
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setConfigVersion(hbArgs.getConfigVersion());
- hbResp.setDurableOpTime(optime1);
- hbResp.setAppliedOpTime(optime1);
- BSONObjBuilder respObj;
- respObj << "ok" << 1;
- hbResp.addToBSON(&respObj, false);
- getNet()->scheduleResponse(noi, getNet()->now(), makeResponseStatus(respObj.obj()));
- }
- while (getNet()->hasReadyRequests()) {
- getNet()->blackHole(getNet()->getNextReadyRequest());
- }
+
+ // Make sure a majority are caught up (i.e. 3 out of 5). We catch up two secondaries since
+ // the primary counts as one towards majority
+ int numNodesCaughtUp = 2;
+ simulateHeartbeatResponses(optimePrimary, optimeLagged, numNodesCaughtUp, false);
+ getNet()->runReadyNetworkOperations();
+ exitNetwork();
+
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+ auto status = getReplCoord()->stepDown(&txn, false, Milliseconds(0), Milliseconds(1000));
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, status);
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+}
+
+TEST_F(StepDownTestFiveNode,
+ NodeReturnsExceededTimeLimitWhenStepDownIsRunAndNoCaughtUpMajorityExists) {
+ OperationContextReplMock txn;
+ OpTime optimeLagged(Timestamp(100, 1), 1);
+ OpTime optimePrimary(Timestamp(100, 2), 1);
+
+ // All nodes are caught up
+ getReplCoord()->setMyLastAppliedOpTime(optimePrimary);
+ getReplCoord()->setMyLastDurableOpTime(optimePrimary);
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 1, optimeLagged));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 2, optimeLagged));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 3, optimeLagged));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 4, optimeLagged));
+
+ simulateSuccessfulV1Election();
+
+ enterNetwork();
+ getNet()->runUntil(getNet()->now() + Seconds(2));
+ ASSERT(getNet()->hasReadyRequests());
+
+ // Make sure less than a majority are caught up (i.e. 2 out of 5) We catch up one secondary
+ // since the primary counts as one towards majority
+ int numNodesCaughtUp = 1;
+ simulateHeartbeatResponses(optimePrimary, optimeLagged, numNodesCaughtUp, true);
getNet()->runReadyNetworkOperations();
exitNetwork();
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+ auto status = getReplCoord()->stepDown(&txn, false, Milliseconds(0), Milliseconds(1000));
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, status);
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+}
+
+TEST_F(
+ StepDownTestFiveNode,
+ NodeTransitionsToSecondaryImmediatelyWhenStepDownIsRunAndAnUpToDateMajorityWithElectableNodeExists) {
+ OperationContextReplMock txn;
+ OpTime optimeLagged(Timestamp(100, 1), 1);
+ OpTime optimePrimary(Timestamp(100, 2), 1);
+
+ // All nodes are caught up
+ getReplCoord()->setMyLastAppliedOpTime(optimePrimary);
+ getReplCoord()->setMyLastDurableOpTime(optimePrimary);
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 1, optimeLagged));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 2, optimeLagged));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 3, optimeLagged));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 4, optimeLagged));
+
+ simulateSuccessfulV1Election();
+
+ enterNetwork();
+ getNet()->runUntil(getNet()->now() + Seconds(2));
+ ASSERT(getNet()->hasReadyRequests());
+
+ // Make sure a majority are caught up (i.e. 3 out of 5). We catch up two secondaries since
+ // the primary counts as one towards majority
+ int numNodesCaughtUp = 2;
+ simulateHeartbeatResponses(optimePrimary, optimeLagged, numNodesCaughtUp, true);
+ getNet()->runReadyNetworkOperations();
+ exitNetwork();
ASSERT_TRUE(getReplCoord()->getMemberState().primary());
ASSERT_OK(getReplCoord()->stepDown(&txn, false, Milliseconds(0), Milliseconds(1000)));
@@ -1666,8 +1793,9 @@ TEST_F(StepDownTest,
TEST_F(StepDownTest,
NodeTransitionsToSecondaryWhenASecondaryCatchesUpAfterTheFirstRoundOfHeartbeats) {
OperationContextReplMock txn;
- OpTimeWithTermZero optime1(100, 1);
- OpTimeWithTermZero optime2(100, 2);
+ OpTime optime1(Timestamp(100, 1), 1);
+ OpTime optime2(Timestamp(100, 2), 1);
+
// No secondary is caught up
auto repl = getReplCoord();
repl->setMyLastAppliedOpTime(optime2);
@@ -1677,6 +1805,8 @@ TEST_F(StepDownTest,
simulateSuccessfulV1Election();
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+
// Step down where the secondary actually has to catch up before the stepDown can succeed.
// On entering the network, _stepDownContinue should cancel the heartbeats scheduled for
// T + 2 seconds and send out a new round of heartbeats immediately.
@@ -1713,6 +1843,7 @@ TEST_F(StepDownTest,
log() << "Blackholing network request " << noi->getRequest().cmdObj;
getNet()->blackHole(noi);
}
+
getNet()->runReadyNetworkOperations();
exitNetwork();
@@ -1724,8 +1855,9 @@ TEST_F(StepDownTest,
TEST_F(StepDownTest,
NodeTransitionsToSecondaryWhenASecondaryCatchesUpDuringStepDownsSecondaryCatchupPeriod) {
OperationContextReplMock txn;
- OpTimeWithTermZero optime1(100, 1);
- OpTimeWithTermZero optime2(100, 2);
+ OpTime optime1(Timestamp(100, 1), 1);
+ OpTime optime2(Timestamp(100, 2), 1);
+
// No secondary is caught up
auto repl = getReplCoord();
repl->setMyLastAppliedOpTime(optime2);