From efec3cc4b253d02fa9e11947ce92d53b727181b0 Mon Sep 17 00:00:00 2001 From: Vishnu Kaushik Date: Tue, 8 Jun 2021 16:14:08 +0000 Subject: SERVER-55465 Response from all nodes means sufficient responses have been received even if primary gave bad response in catchup takeover dry run --- src/mongo/db/repl/vote_requester.cpp | 10 ++++++++-- src/mongo/db/repl/vote_requester_test.cpp | 31 +++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/src/mongo/db/repl/vote_requester.cpp b/src/mongo/db/repl/vote_requester.cpp index 368ef902e14..6a29e4785c8 100644 --- a/src/mongo/db/repl/vote_requester.cpp +++ b/src/mongo/db/repl/vote_requester.cpp @@ -172,12 +172,18 @@ bool VoteRequester::Algorithm::hasReceivedSufficientResponses() const { return true; } + // We require the primary's response during catchup takeover. An error response from the primary + // is not a yes, no or pending, but is still a response. Therefore, we handle the case in which + // we have received some response (even if error) from all nodes first. + if (_responsesProcessed == static_cast(_targets.size())) { + return true; + } + if (_primaryHost && _primaryVote == PrimaryVote::Pending) { return false; } - return _staleTerm || _votes >= _rsConfig.getMajorityVoteCount() || - _responsesProcessed == static_cast(_targets.size()); + return _staleTerm || _votes >= _rsConfig.getMajorityVoteCount(); } VoteRequester::Result VoteRequester::Algorithm::getResult() const { diff --git a/src/mongo/db/repl/vote_requester_test.cpp b/src/mongo/db/repl/vote_requester_test.cpp index 463d532759d..48c401dd876 100644 --- a/src/mongo/db/repl/vote_requester_test.cpp +++ b/src/mongo/db/repl/vote_requester_test.cpp @@ -651,6 +651,37 @@ TEST_F(VoteRequesterCatchupTakeoverDryRunTest, CatchupTakeoverPrimarySaysNoLoseE stopCapturingLogMessages(); } +TEST_F(VoteRequesterCatchupTakeoverDryRunTest, + CatchupTakeoverAllNodesRespondedMeansSufficientResponses) { + startCapturingLogMessages(); + ASSERT_FALSE(hasReceivedSufficientResponses()); + + // Getting a good response from the other secondaries is insufficient. + processResponse(requestFrom("host2"), votedYes()); + processResponse(requestFrom("host3"), votedYes()); + processResponse(requestFrom("host4"), votedYes()); + ASSERT_FALSE(hasReceivedSufficientResponses()); + + // Getting a bad response from the primary means all targets have responded, and so we have + // received sufficient responses. + processResponse(requestFrom("host1"), badRemoteCommandResponse()); + ASSERT_EQUALS( + 1, + countBSONFormatLogLinesIsSubset(BSON("attr" << BSON("failReason" + << "failed to receive response" + << "from" + << "host1:27017")))); + ASSERT_TRUE(hasReceivedSufficientResponses()); + + // A bad response from the primary is equivalent to it saying NO. + ASSERT(VoteRequester::Result::kPrimaryRespondedNo == getResult()); + + // Only the secondaries are counted; the primary is excluded from the responders since it gave a + // bad response. + ASSERT_EQUALS(3, getNumResponders()); + stopCapturingLogMessages(); +} + } // namespace } // namespace repl } // namespace mongo -- cgit v1.2.1