From e43a65ada2aea66c9ab21134bbd72a71fa581758 Mon Sep 17 00:00:00 2001 From: Matthew Russotto Date: Thu, 10 Mar 2022 13:27:10 -0500 Subject: SERVER-63512 Use optimized (no isSelf calls) reconfiguration on heartbeat reconfig --- .../replication_coordinator_impl_heartbeat.cpp | 18 +++- ...lication_coordinator_impl_heartbeat_v1_test.cpp | 109 +++++++++++++++++++++ 2 files changed, 125 insertions(+), 2 deletions(-) diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp index dd7cb413540..e98b0462d07 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp @@ -722,8 +722,22 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigStore( "config"_attr = configToApply); } - const StatusWith myIndex = validateConfigForHeartbeatReconfig( - _externalState.get(), configToApply, getGlobalServiceContext()); + const auto myIndex = [&]() -> StatusWith { + // We always check the config when _selfIndex is not valid, in order to be able to + // recover from transient DNS errors. + if (_selfIndex >= 0 && sameConfigContents(_rsConfig, configToApply)) { + LOGV2_FOR_HEARTBEATS( + 6351200, + 2, + "New heartbeat config is only a version/term change, skipping validation checks", + "oldConfig"_attr = _rsConfig, + "newConfig"_attr = configToApply); + // If the configs are the same, so is our index. + return _selfIndex; + } + return validateConfigForHeartbeatReconfig( + _externalState.get(), configToApply, getGlobalServiceContext()); + }(); if (myIndex.getStatus() == ErrorCodes::NodeNotFound) { stdx::lock_guard lk(_mutex); diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp index 36ce4bdaf08..6cb6e55609c 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp @@ -977,6 +977,114 @@ TEST_F(ReplCoordHBV1ReconfigTest, ASSERT_EQ(myConfig.getConfigTerm(), UninitializedTerm); } +TEST_F(ReplCoordHBV1ReconfigTest, ConfigWithTermAndVersionChangeOnlyDoesntCallIsSelf) { + // Config with newer term and newer version. + ReplSetConfig rsConfig = + makeRSConfigWithVersionAndTerm(initConfigVersion + 1, initConfigTerm + 1); + + // Receive a heartbeat request that tells us about a newer config. + receiveHeartbeatFrom(rsConfig, 1, HostAndPort("h1", 1)); + + getNet()->enterNetwork(); + ReplSetHeartbeatArgsV1 hbArgs; + auto noi = getNet()->getNextReadyRequest(); + const RemoteCommandRequest& hbrequest = noi->getRequest(); + ASSERT_EQUALS(HostAndPort("h1", 1), hbrequest.target); + ASSERT_OK(hbArgs.initialize(hbrequest.cmdObj)); + ASSERT_EQUALS("mySet", hbArgs.getSetName()); + ASSERT_EQUALS(initConfigVersion, hbArgs.getConfigVersion()); + ASSERT_EQUALS(initConfigTerm, hbArgs.getConfigTerm()); + ASSERT_EQUALS(OpTime::kInitialTerm, hbArgs.getTerm()); + + // Construct the heartbeat response containing the newer config. + ReplSetHeartbeatResponse hbResp; + hbResp.setSetName("mySet"); + hbResp.setState(MemberState::RS_PRIMARY); + hbResp.setConfigVersion(rsConfig.getConfigVersion()); + hbResp.setConfigTerm(rsConfig.getConfigTerm()); + // The smallest valid optime in PV1. + OpTime opTime(Timestamp(), 0); + hbResp.setAppliedOpTimeAndWallTime({opTime, Date_t()}); + hbResp.setDurableOpTimeAndWallTime({opTime, Date_t()}); + BSONObjBuilder responseBuilder; + responseBuilder << "ok" << 1; + hbResp.addToBSON(&responseBuilder); + // Add the raw config object. + responseBuilder << "config" << rsConfig.toBSON(); + auto origResObj = responseBuilder.obj(); + + // Make isSelf not work. + getExternalState()->clearSelfHosts(); + + // Schedule and deliver the heartbeat response. + getNet()->scheduleResponse(noi, getNet()->now(), makeResponseStatus(origResObj)); + getNet()->runReadyNetworkOperations(); + + ASSERT_EQ(getReplCoord()->getConfigTerm(), initConfigTerm + 1); + ASSERT_EQ(getReplCoord()->getConfigVersion(), initConfigVersion + 1); + + // If we couldn't find ourselves in the config, we're REMOVED. That means we made an + // unnecessary isSelf call which failed because we cleared selfHosts. + ASSERT(getReplCoord()->getMemberState().secondary()) + << "Member state is actually " << getReplCoord()->getMemberState(); +} + +TEST_F(ReplCoordHBV1ReconfigTest, ConfigWithSignificantChangeDoesCallIsSelf) { + // Config with members 1 (self) and 2 swapped. + ReplSetConfig rsConfig = makeRSConfigWithVersionAndTerm(initConfigVersion + 1, initConfigTerm); + auto mutableConfig = rsConfig.getMutable(); + auto members = mutableConfig.getMembers(); + std::swap(members[1], members[2]); + mutableConfig.setMembers(members); + rsConfig = ReplSetConfig(std::move(mutableConfig)); + + // Receive a heartbeat request that tells us about a newer config. + receiveHeartbeatFrom(rsConfig, 1, HostAndPort("h1", 1)); + + getNet()->enterNetwork(); + ReplSetHeartbeatArgsV1 hbArgs; + auto noi = getNet()->getNextReadyRequest(); + const RemoteCommandRequest& hbrequest = noi->getRequest(); + ASSERT_EQUALS(HostAndPort("h1", 1), hbrequest.target); + ASSERT_OK(hbArgs.initialize(hbrequest.cmdObj)); + ASSERT_EQUALS("mySet", hbArgs.getSetName()); + ASSERT_EQUALS(initConfigVersion, hbArgs.getConfigVersion()); + ASSERT_EQUALS(initConfigTerm, hbArgs.getConfigTerm()); + ASSERT_EQUALS(OpTime::kInitialTerm, hbArgs.getTerm()); + + // Construct the heartbeat response containing the newer config. + ReplSetHeartbeatResponse hbResp; + hbResp.setSetName("mySet"); + hbResp.setState(MemberState::RS_PRIMARY); + hbResp.setConfigVersion(rsConfig.getConfigVersion()); + hbResp.setConfigTerm(rsConfig.getConfigTerm()); + // The smallest valid optime in PV1. + OpTime opTime(Timestamp(), 0); + hbResp.setAppliedOpTimeAndWallTime({opTime, Date_t()}); + hbResp.setDurableOpTimeAndWallTime({opTime, Date_t()}); + BSONObjBuilder responseBuilder; + responseBuilder << "ok" << 1; + hbResp.addToBSON(&responseBuilder); + // Add the raw config object. + responseBuilder << "config" << rsConfig.toBSON(); + auto origResObj = responseBuilder.obj(); + + // Schedule and deliver the heartbeat response. + getNet()->scheduleResponse(noi, getNet()->now(), makeResponseStatus(origResObj)); + getNet()->runReadyNetworkOperations(); + + ASSERT_EQ(getReplCoord()->getConfigTerm(), initConfigTerm); + ASSERT_EQ(getReplCoord()->getConfigVersion(), initConfigVersion + 1); + + // We should remain secondary. + ASSERT(getReplCoord()->getMemberState().secondary()) + << "Member state is actually " << getReplCoord()->getMemberState(); + + // We should be the member in slot 2, not slot 1. + ASSERT_NE(rsConfig.getMemberAt(1).getId().getData(), getReplCoord()->getMyId()); + ASSERT_EQ(rsConfig.getMemberAt(2).getId().getData(), getReplCoord()->getMyId()); +} + TEST_F(ReplCoordHBV1Test, RejectHeartbeatReconfigDuringElection) { auto severityGuard = unittest::MinimumLoggedSeverityGuard{ logv2::LogComponent::kReplicationHeartbeats, logv2::LogSeverity::Debug(1)}; @@ -1793,6 +1901,7 @@ void HBStepdownAndReconfigTest::sendHBResponse(int targetIndex, if (includeConfig) { auto configDoc = MutableDocument(Document(_initialConfig)); configDoc["version"] = Value(configVersion); + configDoc["term"] = Value(configTerm); auto newConfig = ReplSetConfig::parse(configDoc.freeze().toBson()); hbResp.setConfig(newConfig); } -- cgit v1.2.1