diff options
Diffstat (limited to 'src/mongo/db/repl/replication_coordinator_impl_test.cpp')
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl_test.cpp | 194 |
1 files changed, 163 insertions, 31 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp index f730e4deccd..2c791030e76 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp @@ -1377,7 +1377,6 @@ private: virtual void setUp() { ReplCoordTest::setUp(); init("mySet/test1:1234,test2:1234,test3:1234"); - assertStartSuccess(BSON("_id" << "mySet" << "version" << 1 << "members" @@ -1393,6 +1392,7 @@ private: } }; + TEST_F(ReplCoordTest, NodeReturnsBadValueWhenUpdateTermIsRunAgainstANonReplNode) { init(ReplSettings()); ASSERT_TRUE(ReplicationCoordinator::modeNone == getReplCoord()->getReplicationMode()); @@ -1554,43 +1554,170 @@ TEST_F(StepDownTest, ASSERT_TRUE(getReplCoord()->getMemberState().primary()); } -TEST_F(StepDownTest, - NodeTransitionsToSecondaryImmediatelyWhenStepDownIsRunAndAnUpToDateElectableNodeExists) { +/* Step Down Test for a 5-node replica set */ +class StepDownTestFiveNode : public StepDownTest { +protected: + /* + * Simulate a round of heartbeat requests from the primary by manually setting + * the heartbeat response messages from each node. 'numNodesCaughtUp' will + * determine how many nodes return an optime that is up to date with the + * primary's optime. Sets electability of all caught up nodes to 'caughtUpAreElectable' + */ + void simulateHeartbeatResponses(OpTime optimePrimary, + OpTime optimeLagged, + int numNodesCaughtUp, + bool caughtUpAreElectable) { + int hbNum = 1; + while (getNet()->hasReadyRequests()) { + NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest(); + RemoteCommandRequest request = noi->getRequest(); + + // Only process heartbeat requests. + ASSERT_EQ(request.cmdObj.firstElement().fieldNameStringData().toString(), + "replSetHeartbeat"); + + ReplSetHeartbeatArgsV1 hbArgs; + ASSERT_OK(hbArgs.initialize(request.cmdObj)); + + log() << request.target.toString() << " processing " << request.cmdObj; + + // Catch up 'numNodesCaughtUp' nodes out of 5. + OpTime optimeResponse = (hbNum <= numNodesCaughtUp) ? optimePrimary : optimeLagged; + bool isElectable = (hbNum <= numNodesCaughtUp) ? caughtUpAreElectable : true; + + ReplSetHeartbeatResponse hbResp; + hbResp.setSetName(hbArgs.getSetName()); + hbResp.setState(MemberState::RS_SECONDARY); + hbResp.setConfigVersion(hbArgs.getConfigVersion()); + hbResp.setDurableOpTime(optimeResponse); + hbResp.setAppliedOpTime(optimeResponse); + hbResp.setElectable(isElectable); + BSONObjBuilder respObj; + respObj << "ok" << 1; + hbResp.addToBSON(&respObj, false); + getNet()->scheduleResponse(noi, getNet()->now(), makeResponseStatus(respObj.obj())); + hbNum += 1; + } + } + +private: + virtual void setUp() { + ReplCoordTest::setUp(); + init("mySet/test1:1234,test2:1234,test3:1234,test4:1234,test5:1234"); + + assertStartSuccess(BSON("_id" + << "mySet" + << "version" << 1 << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "test1:1234") + << BSON("_id" << 1 << "host" + << "test2:1234") + << BSON("_id" << 2 << "host" + << "test3:1234") + << BSON("_id" << 3 << "host" + << "test4:1234") + << BSON("_id" << 4 << "host" + << "test5:1234"))), + HostAndPort("test1", 1234)); + ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); + myRid = getReplCoord()->getMyRID(); + } +}; + +TEST_F( + StepDownTestFiveNode, + NodeReturnsExceededTimeLimitWhenStepDownIsRunAndCaughtUpMajorityExistsButWithoutElectableNode) { OperationContextReplMock txn; - OpTimeWithTermZero optime1(100, 1); + OpTime optimeLagged(Timestamp(100, 1), 1); + OpTime optimePrimary(Timestamp(100, 2), 1); + // All nodes are caught up - getReplCoord()->setMyLastAppliedOpTime(optime1); - getReplCoord()->setMyLastDurableOpTime(optime1); - ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 1, optime1)); - ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 2, optime1)); + getReplCoord()->setMyLastAppliedOpTime(optimePrimary); + getReplCoord()->setMyLastDurableOpTime(optimePrimary); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 1, optimeLagged)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 2, optimeLagged)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 3, optimeLagged)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 4, optimeLagged)); simulateSuccessfulV1Election(); enterNetwork(); getNet()->runUntil(getNet()->now() + Seconds(2)); ASSERT(getNet()->hasReadyRequests()); - NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest(); - RemoteCommandRequest request = noi->getRequest(); - log() << request.target.toString() << " processing " << request.cmdObj; - ReplSetHeartbeatArgsV1 hbArgs; - if (hbArgs.initialize(request.cmdObj).isOK()) { - ReplSetHeartbeatResponse hbResp; - hbResp.setSetName(hbArgs.getSetName()); - hbResp.setState(MemberState::RS_SECONDARY); - hbResp.setConfigVersion(hbArgs.getConfigVersion()); - hbResp.setDurableOpTime(optime1); - hbResp.setAppliedOpTime(optime1); - BSONObjBuilder respObj; - respObj << "ok" << 1; - hbResp.addToBSON(&respObj, false); - getNet()->scheduleResponse(noi, getNet()->now(), makeResponseStatus(respObj.obj())); - } - while (getNet()->hasReadyRequests()) { - getNet()->blackHole(getNet()->getNextReadyRequest()); - } + + // Make sure a majority are caught up (i.e. 3 out of 5). We catch up two secondaries since + // the primary counts as one towards majority + int numNodesCaughtUp = 2; + simulateHeartbeatResponses(optimePrimary, optimeLagged, numNodesCaughtUp, false); + getNet()->runReadyNetworkOperations(); + exitNetwork(); + + ASSERT_TRUE(getReplCoord()->getMemberState().primary()); + auto status = getReplCoord()->stepDown(&txn, false, Milliseconds(0), Milliseconds(1000)); + ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, status); + ASSERT_TRUE(getReplCoord()->getMemberState().primary()); +} + +TEST_F(StepDownTestFiveNode, + NodeReturnsExceededTimeLimitWhenStepDownIsRunAndNoCaughtUpMajorityExists) { + OperationContextReplMock txn; + OpTime optimeLagged(Timestamp(100, 1), 1); + OpTime optimePrimary(Timestamp(100, 2), 1); + + // All nodes are caught up + getReplCoord()->setMyLastAppliedOpTime(optimePrimary); + getReplCoord()->setMyLastDurableOpTime(optimePrimary); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 1, optimeLagged)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 2, optimeLagged)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 3, optimeLagged)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 4, optimeLagged)); + + simulateSuccessfulV1Election(); + + enterNetwork(); + getNet()->runUntil(getNet()->now() + Seconds(2)); + ASSERT(getNet()->hasReadyRequests()); + + // Make sure less than a majority are caught up (i.e. 2 out of 5) We catch up one secondary + // since the primary counts as one towards majority + int numNodesCaughtUp = 1; + simulateHeartbeatResponses(optimePrimary, optimeLagged, numNodesCaughtUp, true); getNet()->runReadyNetworkOperations(); exitNetwork(); + ASSERT_TRUE(getReplCoord()->getMemberState().primary()); + auto status = getReplCoord()->stepDown(&txn, false, Milliseconds(0), Milliseconds(1000)); + ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, status); + ASSERT_TRUE(getReplCoord()->getMemberState().primary()); +} + +TEST_F( + StepDownTestFiveNode, + NodeTransitionsToSecondaryImmediatelyWhenStepDownIsRunAndAnUpToDateMajorityWithElectableNodeExists) { + OperationContextReplMock txn; + OpTime optimeLagged(Timestamp(100, 1), 1); + OpTime optimePrimary(Timestamp(100, 2), 1); + + // All nodes are caught up + getReplCoord()->setMyLastAppliedOpTime(optimePrimary); + getReplCoord()->setMyLastDurableOpTime(optimePrimary); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 1, optimeLagged)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 2, optimeLagged)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 3, optimeLagged)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 4, optimeLagged)); + + simulateSuccessfulV1Election(); + + enterNetwork(); + getNet()->runUntil(getNet()->now() + Seconds(2)); + ASSERT(getNet()->hasReadyRequests()); + + // Make sure a majority are caught up (i.e. 3 out of 5). We catch up two secondaries since + // the primary counts as one towards majority + int numNodesCaughtUp = 2; + simulateHeartbeatResponses(optimePrimary, optimeLagged, numNodesCaughtUp, true); + getNet()->runReadyNetworkOperations(); + exitNetwork(); ASSERT_TRUE(getReplCoord()->getMemberState().primary()); ASSERT_OK(getReplCoord()->stepDown(&txn, false, Milliseconds(0), Milliseconds(1000))); @@ -1666,8 +1793,9 @@ TEST_F(StepDownTest, TEST_F(StepDownTest, NodeTransitionsToSecondaryWhenASecondaryCatchesUpAfterTheFirstRoundOfHeartbeats) { OperationContextReplMock txn; - OpTimeWithTermZero optime1(100, 1); - OpTimeWithTermZero optime2(100, 2); + OpTime optime1(Timestamp(100, 1), 1); + OpTime optime2(Timestamp(100, 2), 1); + // No secondary is caught up auto repl = getReplCoord(); repl->setMyLastAppliedOpTime(optime2); @@ -1677,6 +1805,8 @@ TEST_F(StepDownTest, simulateSuccessfulV1Election(); + ASSERT_TRUE(getReplCoord()->getMemberState().primary()); + // Step down where the secondary actually has to catch up before the stepDown can succeed. // On entering the network, _stepDownContinue should cancel the heartbeats scheduled for // T + 2 seconds and send out a new round of heartbeats immediately. @@ -1713,6 +1843,7 @@ TEST_F(StepDownTest, log() << "Blackholing network request " << noi->getRequest().cmdObj; getNet()->blackHole(noi); } + getNet()->runReadyNetworkOperations(); exitNetwork(); @@ -1724,8 +1855,9 @@ TEST_F(StepDownTest, TEST_F(StepDownTest, NodeTransitionsToSecondaryWhenASecondaryCatchesUpDuringStepDownsSecondaryCatchupPeriod) { OperationContextReplMock txn; - OpTimeWithTermZero optime1(100, 1); - OpTimeWithTermZero optime2(100, 2); + OpTime optime1(Timestamp(100, 1), 1); + OpTime optime2(Timestamp(100, 2), 1); + // No secondary is caught up auto repl = getReplCoord(); repl->setMyLastAppliedOpTime(optime2); |