summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavi Vetriselvan <pavithra.vetriselvan@mongodb.com>2020-07-13 10:26:06 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-08-20 12:17:46 +0000
commit28d7f0bb30fd99a9be0a317e06f1099433ffe39c (patch)
tree96d99f1ff3b26ce1859d720d7b030d15f41a34cd
parente5f8be9675d78c8a82e0c3e60e51b11d3ec31ad0 (diff)
downloadmongo-28d7f0bb30fd99a9be0a317e06f1099433ffe39c.tar.gz
SERVER-48776 remove config term/version check in quorum checker
(cherry picked from commit 1c3532ee1941e37f934f6c14bdc7786619d6b258)
-rw-r--r--src/mongo/db/repl/check_quorum_for_config_change.cpp19
-rw-r--r--src/mongo/db/repl/check_quorum_for_config_change_test.cpp126
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp12
3 files changed, 13 insertions, 144 deletions
diff --git a/src/mongo/db/repl/check_quorum_for_config_change.cpp b/src/mongo/db/repl/check_quorum_for_config_change.cpp
index 40ae42d86f4..327e44e75cc 100644
--- a/src/mongo/db/repl/check_quorum_for_config_change.cpp
+++ b/src/mongo/db/repl/check_quorum_for_config_change.cpp
@@ -225,25 +225,6 @@ void QuorumChecker::_tabulateHeartbeatResponse(const RemoteCommandRequest& reque
return;
}
- if (!hbResp.getReplicaSetName().empty()) {
- if (hbResp.getConfigVersionAndTerm() >= _rsConfig->getConfigVersionAndTerm()) {
- static constexpr char message[] =
- "Our config (term, version) is no larger than that of the request target";
- _vetoStatus = {ErrorCodes::NewReplicaSetConfigurationIncompatible,
- str::stream() << message << ", rsConfig: "
- << _rsConfig->getConfigVersionAndTerm().toString()
- << ", requestTarget: " << request.target.toString()
- << ", requestTargetConfig: "
- << hbResp.getConfigVersionAndTerm().toString()};
- LOGV2_WARNING(23725,
- message,
- "rsConfig"_attr = _rsConfig->getConfigVersionAndTerm(),
- "requestTarget"_attr = request.target.toString(),
- "requestTargetConfig"_attr = hbResp.getConfigVersionAndTerm());
- return;
- }
- }
-
if (_rsConfig->hasReplicaSetId()) {
StatusWith<rpc::ReplSetMetadata> replMetadata =
rpc::ReplSetMetadata::readFromMetadata(response.data);
diff --git a/src/mongo/db/repl/check_quorum_for_config_change_test.cpp b/src/mongo/db/repl/check_quorum_for_config_change_test.cpp
index 4d8032a22ad..05c0679edae 100644
--- a/src/mongo/db/repl/check_quorum_for_config_change_test.cpp
+++ b/src/mongo/db/repl/check_quorum_for_config_change_test.cpp
@@ -482,124 +482,12 @@ TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToSetIdMismatch) {
ASSERT_NOT_REASON_CONTAINS(status, "h5:1");
}
-TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToInitializedNode) {
- // In this test, "we" are host "h3:1". All nodes respond
- // successfully to their heartbeat requests, but quorum check fails because
- // "h5" declares that it is already initialized.
-
- const ReplSetConfig rsConfig =
- assertMakeRSConfig(BSON("_id"
- << "rs0"
- << "version" << 1 << "protocolVersion" << 1 << "members"
- << BSON_ARRAY(BSON("_id" << 1 << "host"
- << "h1:1")
- << BSON("_id" << 2 << "host"
- << "h2:1")
- << BSON("_id" << 3 << "host"
- << "h3:1")
- << BSON("_id" << 4 << "host"
- << "h4:1")
- << BSON("_id" << 5 << "host"
- << "h5:1"))));
- const int myConfigIndex = 2;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = getNet()->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- stdx::unordered_set<HostAndPort> seenHosts;
- getNet()->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_BSONOBJ_EQ(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second)
- << "Already saw " << request.target.toString();
- if (request.target == HostAndPort("h5", 1)) {
- long long configVersion = 1;
- getNet()->scheduleResponse(
- noi,
- startDate + Milliseconds(10),
- makeHeartbeatResponse(rsConfig, Milliseconds(8), configVersion));
- } else {
- getNet()->scheduleResponse(noi,
- startDate + Milliseconds(10),
- makeHeartbeatResponse(rsConfig, Milliseconds(8)));
- }
- }
- getNet()->runUntil(startDate + Milliseconds(10));
- getNet()->exitNetwork();
- Status status = waitForQuorumCheck();
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
- ASSERT_STRING_SEARCH_REGEX(status.reason(), "Our config .* is no larger");
- ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h4:1");
- ASSERT_REASON_CONTAINS(status, "h5:1");
-}
-
-TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToInitializedNodeOnlyOneRespondent) {
- // In this test, "we" are host "h3:1". Only node "h5" responds before the test completes,
- // and quorum check fails because "h5" declares that it is already initialized.
- //
- // Compare to QuorumCheckFailedDueToInitializedNode, above.
-
- const ReplSetConfig rsConfig =
- assertMakeRSConfig(BSON("_id"
- << "rs0"
- << "version" << 1 << "protocolVersion" << 1 << "members"
- << BSON_ARRAY(BSON("_id" << 1 << "host"
- << "h1:1")
- << BSON("_id" << 2 << "host"
- << "h2:1")
- << BSON("_id" << 3 << "host"
- << "h3:1")
- << BSON("_id" << 4 << "host"
- << "h4:1")
- << BSON("_id" << 5 << "host"
- << "h5:1"))));
- const int myConfigIndex = 2;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = getNet()->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- stdx::unordered_set<HostAndPort> seenHosts;
- getNet()->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_BSONOBJ_EQ(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second)
- << "Already saw " << request.target.toString();
- if (request.target == HostAndPort("h5", 1)) {
- long long configVersion = 1;
- getNet()->scheduleResponse(
- noi,
- startDate + Milliseconds(10),
- makeHeartbeatResponse(rsConfig, Milliseconds(8), configVersion));
- } else {
- getNet()->blackHole(noi);
- }
- }
- getNet()->runUntil(startDate + Milliseconds(10));
- getNet()->exitNetwork();
- Status status = waitForQuorumCheck();
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
- ASSERT_STRING_SEARCH_REGEX(status.reason(), "Our config .* is no larger");
- ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h4:1");
- ASSERT_REASON_CONTAINS(status, "h5:1");
-}
-
-TEST_F(CheckQuorumForReconfig, QuorumCheckVetoedDueToHigherConfigVersion) {
+TEST_F(CheckQuorumForReconfig, QuorumCheckSucceedsWhenOtherNodesHaveHigherVersion) {
// In this test, "we" are host "h3:1". The request to "h2" does not arrive before the end
// of the test, and the request to "h1" comes back indicating a higher config version.
+ //
+ // Since The quorum checker does not compare config versions or terms of remotes nodes,
+ // this test should always succeed.
const ReplSetConfig rsConfig =
assertMakeRSConfig(BSON("_id"
@@ -639,11 +527,7 @@ TEST_F(CheckQuorumForReconfig, QuorumCheckVetoedDueToHigherConfigVersion) {
getNet()->runUntil(startDate + Milliseconds(10));
getNet()->exitNetwork();
Status status = waitForQuorumCheck();
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
- ASSERT_STRING_SEARCH_REGEX(status.reason(), "Our config .* is no larger");
- ASSERT_REASON_CONTAINS(status, "h1:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
+ ASSERT_OK(status);
}
TEST_F(CheckQuorumForReconfig, QuorumCheckVetoedDueToIncompatibleSetName) {
diff --git a/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
index 6f0a0efacc7..45037075cd4 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
@@ -329,7 +329,7 @@ void doReplSetReconfig(ReplicationCoordinatorImpl* replCoord,
*status = replCoord->processReplSetReconfig(opCtx, args, &garbage);
}
-TEST_F(ReplCoordTest, QuorumCheckFailsWhenOtherNodesHaveHigherTerm) {
+TEST_F(ReplCoordTest, QuorumCheckSucceedsWhenOtherNodesHaveHigherTerm) {
// Initiate a config without a term to avoid stepup auto-reconfig.
auto configObj =
configWithMembers(2, -1, BSON_ARRAY(member(1, "node1:12345") << member(2, "node2:12345")));
@@ -356,7 +356,9 @@ TEST_F(ReplCoordTest, QuorumCheckFailsWhenOtherNodesHaveHigherTerm) {
repl::ReplSetHeartbeatResponse hbResp;
hbResp.setSetName("mySet");
hbResp.setState(MemberState::RS_SECONDARY);
- // The config version from the remote is lower, but its term is higher.
+ // The config version from the remote is lower, but its term is higher. The quorum checker
+ // does not compare config versions or terms of remotes nodes, so this should always
+ // succeed.
hbResp.setConfigVersion(2);
hbResp.setConfigTerm(getReplCoord()->getTerm() + 1);
hbResp.setAppliedOpTimeAndWallTime({opTime, net->now()});
@@ -365,7 +367,7 @@ TEST_F(ReplCoordTest, QuorumCheckFailsWhenOtherNodesHaveHigherTerm) {
net->runReadyNetworkOperations();
net->exitNetwork();
reconfigThread.join();
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
+ ASSERT_OK(status);
}
TEST_F(ReplCoordTest, QuorumCheckSucceedsWhenOtherNodesHaveLowerTerm) {
@@ -395,7 +397,9 @@ TEST_F(ReplCoordTest, QuorumCheckSucceedsWhenOtherNodesHaveLowerTerm) {
repl::ReplSetHeartbeatResponse hbResp;
hbResp.setSetName("mySet");
hbResp.setState(MemberState::RS_SECONDARY);
- // The config version from the remote is higher, but its term is lower.
+ // The config version from the remote is higher, but its term is lower. The quorum checker
+ // does not compare config versions or terms of remotes nodes, so this should always
+ // succeed.
hbResp.setConfigVersion(4);
hbResp.setConfigTerm(getReplCoord()->getTerm() - 1);
hbResp.setAppliedOpTimeAndWallTime({opTime, net->now()});