summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Russotto <matthew.russotto@mongodb.com>2022-03-10 13:27:10 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-03-15 15:49:05 +0000
commite43a65ada2aea66c9ab21134bbd72a71fa581758 (patch)
treef741bb1ca919884f12d896b33db6f89ba3a9f380
parent5bc5790be407657748307f20404af62de62cba31 (diff)
downloadmongo-e43a65ada2aea66c9ab21134bbd72a71fa581758.tar.gz
SERVER-63512 Use optimized (no isSelf calls) reconfiguration on heartbeat reconfig
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp18
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp109
2 files changed, 125 insertions, 2 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index dd7cb413540..e98b0462d07 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -722,8 +722,22 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigStore(
"config"_attr = configToApply);
}
- const StatusWith<int> myIndex = validateConfigForHeartbeatReconfig(
- _externalState.get(), configToApply, getGlobalServiceContext());
+ const auto myIndex = [&]() -> StatusWith<int> {
+ // We always check the config when _selfIndex is not valid, in order to be able to
+ // recover from transient DNS errors.
+ if (_selfIndex >= 0 && sameConfigContents(_rsConfig, configToApply)) {
+ LOGV2_FOR_HEARTBEATS(
+ 6351200,
+ 2,
+ "New heartbeat config is only a version/term change, skipping validation checks",
+ "oldConfig"_attr = _rsConfig,
+ "newConfig"_attr = configToApply);
+ // If the configs are the same, so is our index.
+ return _selfIndex;
+ }
+ return validateConfigForHeartbeatReconfig(
+ _externalState.get(), configToApply, getGlobalServiceContext());
+ }();
if (myIndex.getStatus() == ErrorCodes::NodeNotFound) {
stdx::lock_guard<Latch> lk(_mutex);
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
index 36ce4bdaf08..6cb6e55609c 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
@@ -977,6 +977,114 @@ TEST_F(ReplCoordHBV1ReconfigTest,
ASSERT_EQ(myConfig.getConfigTerm(), UninitializedTerm);
}
+TEST_F(ReplCoordHBV1ReconfigTest, ConfigWithTermAndVersionChangeOnlyDoesntCallIsSelf) {
+ // Config with newer term and newer version.
+ ReplSetConfig rsConfig =
+ makeRSConfigWithVersionAndTerm(initConfigVersion + 1, initConfigTerm + 1);
+
+ // Receive a heartbeat request that tells us about a newer config.
+ receiveHeartbeatFrom(rsConfig, 1, HostAndPort("h1", 1));
+
+ getNet()->enterNetwork();
+ ReplSetHeartbeatArgsV1 hbArgs;
+ auto noi = getNet()->getNextReadyRequest();
+ const RemoteCommandRequest& hbrequest = noi->getRequest();
+ ASSERT_EQUALS(HostAndPort("h1", 1), hbrequest.target);
+ ASSERT_OK(hbArgs.initialize(hbrequest.cmdObj));
+ ASSERT_EQUALS("mySet", hbArgs.getSetName());
+ ASSERT_EQUALS(initConfigVersion, hbArgs.getConfigVersion());
+ ASSERT_EQUALS(initConfigTerm, hbArgs.getConfigTerm());
+ ASSERT_EQUALS(OpTime::kInitialTerm, hbArgs.getTerm());
+
+ // Construct the heartbeat response containing the newer config.
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName("mySet");
+ hbResp.setState(MemberState::RS_PRIMARY);
+ hbResp.setConfigVersion(rsConfig.getConfigVersion());
+ hbResp.setConfigTerm(rsConfig.getConfigTerm());
+ // The smallest valid optime in PV1.
+ OpTime opTime(Timestamp(), 0);
+ hbResp.setAppliedOpTimeAndWallTime({opTime, Date_t()});
+ hbResp.setDurableOpTimeAndWallTime({opTime, Date_t()});
+ BSONObjBuilder responseBuilder;
+ responseBuilder << "ok" << 1;
+ hbResp.addToBSON(&responseBuilder);
+ // Add the raw config object.
+ responseBuilder << "config" << rsConfig.toBSON();
+ auto origResObj = responseBuilder.obj();
+
+ // Make isSelf not work.
+ getExternalState()->clearSelfHosts();
+
+ // Schedule and deliver the heartbeat response.
+ getNet()->scheduleResponse(noi, getNet()->now(), makeResponseStatus(origResObj));
+ getNet()->runReadyNetworkOperations();
+
+ ASSERT_EQ(getReplCoord()->getConfigTerm(), initConfigTerm + 1);
+ ASSERT_EQ(getReplCoord()->getConfigVersion(), initConfigVersion + 1);
+
+ // If we couldn't find ourselves in the config, we're REMOVED. That means we made an
+ // unnecessary isSelf call which failed because we cleared selfHosts.
+ ASSERT(getReplCoord()->getMemberState().secondary())
+ << "Member state is actually " << getReplCoord()->getMemberState();
+}
+
+TEST_F(ReplCoordHBV1ReconfigTest, ConfigWithSignificantChangeDoesCallIsSelf) {
+ // Config with members 1 (self) and 2 swapped.
+ ReplSetConfig rsConfig = makeRSConfigWithVersionAndTerm(initConfigVersion + 1, initConfigTerm);
+ auto mutableConfig = rsConfig.getMutable();
+ auto members = mutableConfig.getMembers();
+ std::swap(members[1], members[2]);
+ mutableConfig.setMembers(members);
+ rsConfig = ReplSetConfig(std::move(mutableConfig));
+
+ // Receive a heartbeat request that tells us about a newer config.
+ receiveHeartbeatFrom(rsConfig, 1, HostAndPort("h1", 1));
+
+ getNet()->enterNetwork();
+ ReplSetHeartbeatArgsV1 hbArgs;
+ auto noi = getNet()->getNextReadyRequest();
+ const RemoteCommandRequest& hbrequest = noi->getRequest();
+ ASSERT_EQUALS(HostAndPort("h1", 1), hbrequest.target);
+ ASSERT_OK(hbArgs.initialize(hbrequest.cmdObj));
+ ASSERT_EQUALS("mySet", hbArgs.getSetName());
+ ASSERT_EQUALS(initConfigVersion, hbArgs.getConfigVersion());
+ ASSERT_EQUALS(initConfigTerm, hbArgs.getConfigTerm());
+ ASSERT_EQUALS(OpTime::kInitialTerm, hbArgs.getTerm());
+
+ // Construct the heartbeat response containing the newer config.
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName("mySet");
+ hbResp.setState(MemberState::RS_PRIMARY);
+ hbResp.setConfigVersion(rsConfig.getConfigVersion());
+ hbResp.setConfigTerm(rsConfig.getConfigTerm());
+ // The smallest valid optime in PV1.
+ OpTime opTime(Timestamp(), 0);
+ hbResp.setAppliedOpTimeAndWallTime({opTime, Date_t()});
+ hbResp.setDurableOpTimeAndWallTime({opTime, Date_t()});
+ BSONObjBuilder responseBuilder;
+ responseBuilder << "ok" << 1;
+ hbResp.addToBSON(&responseBuilder);
+ // Add the raw config object.
+ responseBuilder << "config" << rsConfig.toBSON();
+ auto origResObj = responseBuilder.obj();
+
+ // Schedule and deliver the heartbeat response.
+ getNet()->scheduleResponse(noi, getNet()->now(), makeResponseStatus(origResObj));
+ getNet()->runReadyNetworkOperations();
+
+ ASSERT_EQ(getReplCoord()->getConfigTerm(), initConfigTerm);
+ ASSERT_EQ(getReplCoord()->getConfigVersion(), initConfigVersion + 1);
+
+ // We should remain secondary.
+ ASSERT(getReplCoord()->getMemberState().secondary())
+ << "Member state is actually " << getReplCoord()->getMemberState();
+
+ // We should be the member in slot 2, not slot 1.
+ ASSERT_NE(rsConfig.getMemberAt(1).getId().getData(), getReplCoord()->getMyId());
+ ASSERT_EQ(rsConfig.getMemberAt(2).getId().getData(), getReplCoord()->getMyId());
+}
+
TEST_F(ReplCoordHBV1Test, RejectHeartbeatReconfigDuringElection) {
auto severityGuard = unittest::MinimumLoggedSeverityGuard{
logv2::LogComponent::kReplicationHeartbeats, logv2::LogSeverity::Debug(1)};
@@ -1793,6 +1901,7 @@ void HBStepdownAndReconfigTest::sendHBResponse(int targetIndex,
if (includeConfig) {
auto configDoc = MutableDocument(Document(_initialConfig));
configDoc["version"] = Value(configVersion);
+ configDoc["term"] = Value(configTerm);
auto newConfig = ReplSetConfig::parse(configDoc.freeze().toBson());
hbResp.setConfig(newConfig);
}