From 1d8aaed7f9286ce90317d03fc1815bb58b43e31d Mon Sep 17 00:00:00 2001 From: Kshitij Gupta Date: Mon, 17 Jan 2022 19:35:13 +0000 Subject: SERVER-59384: Should provide ability to perform periodic DNS health checks --- src/mongo/db/commands/server_status_servers.cpp | 8 +- src/mongo/db/process_health/SConscript | 2 + .../db/process_health/dns_health_observer.cpp | 123 +++++++++++++++++++++ src/mongo/db/process_health/dns_health_observer.h | 65 +++++++++++ .../db/process_health/fault_manager_config.cpp | 3 + src/mongo/db/process_health/fault_manager_config.h | 5 + .../db/process_health/fault_manager_test_suite.h | 3 + .../db/process_health/fault_state_machine_test.cpp | 47 ++++++++ 8 files changed, 253 insertions(+), 3 deletions(-) create mode 100644 src/mongo/db/process_health/dns_health_observer.cpp create mode 100644 src/mongo/db/process_health/dns_health_observer.h (limited to 'src/mongo/db') diff --git a/src/mongo/db/commands/server_status_servers.cpp b/src/mongo/db/commands/server_status_servers.cpp index 82ea2b95f12..34814f3a3df 100644 --- a/src/mongo/db/commands/server_status_servers.cpp +++ b/src/mongo/db/commands/server_status_servers.cpp @@ -178,9 +178,11 @@ public: void appendSection(OperationContext* opCtx, const BSONElement& configElement, BSONObjBuilder* out) const override { - out->append( - "advisoryHostFQDNs", - getHostFQDNs(getHostNameCached(), HostnameCanonicalizationMode::kForwardAndReverse)); + auto statusWith = + getHostFQDNs(getHostNameCached(), HostnameCanonicalizationMode::kForwardAndReverse); + if (statusWith.isOK()) { + out->append("advisoryHostFQDNs", statusWith.getValue()); + } } } advisoryHostFQDNs; } // namespace diff --git a/src/mongo/db/process_health/SConscript b/src/mongo/db/process_health/SConscript index 9cd8847e168..4fc5b9e855b 100644 --- a/src/mongo/db/process_health/SConscript +++ b/src/mongo/db/process_health/SConscript @@ -7,6 +7,7 @@ env = env.Clone() env.Library( target='fault_manager', source=[ + 'dns_health_observer.cpp', 'fault.cpp', 'fault_facet_impl.cpp', 'fault_manager.cpp', @@ -27,6 +28,7 @@ env.Library( '$BUILD_DIR/mongo/executor/network_interface_factory', '$BUILD_DIR/mongo/executor/network_interface_thread_pool', '$BUILD_DIR/mongo/executor/thread_pool_task_executor', + '$BUILD_DIR/mongo/s/grid', ], LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/db/process_health_feature_flag', diff --git a/src/mongo/db/process_health/dns_health_observer.cpp b/src/mongo/db/process_health/dns_health_observer.cpp new file mode 100644 index 00000000000..ef414513aff --- /dev/null +++ b/src/mongo/db/process_health/dns_health_observer.cpp @@ -0,0 +1,123 @@ +/** + * Copyright (C) 2021-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ +#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kProcessHealth + +#include "mongo/db/process_health/dns_health_observer.h" + +#include "mongo/db/process_health/health_observer_registration.h" +#include "mongo/logv2/log.h" +#include "mongo/s/grid.h" +#include "mongo/util/dns_name.h" +#include "mongo/util/net/hostandport.h" +#include "mongo/util/net/hostname_canonicalization.h" +#include +#include + +namespace mongo { +namespace process_health { + +MONGO_FAIL_POINT_DEFINE(dnsHealthObserverFp); + +Future DnsHealthObserver::periodicCheckImpl( + PeriodicHealthCheckContext&& periodicCheckContext) noexcept { + LOGV2_DEBUG(5938401, 2, "DNS health observer executing"); + + auto makeFailedHealthCheckFuture = [this](const Status& status) { + return Future::makeReady( + makeSimpleFailedStatus(Severity::kFailure, {status})); + }; + + ConnectionString connString; + auto isFailPointActive = false; + if (MONGO_unlikely(dnsHealthObserverFp.shouldFail())) { + isFailPointActive = true; + dnsHealthObserverFp.executeIf( + [this, &connString](const BSONObj& data) { + auto fpHostname = data["hostname"].String(); + connString = ConnectionString::forReplicaSet("serverWithBadHostName", + {HostAndPort(fpHostname, 27017)}); + }, + [&](const BSONObj& data) { return !data.isEmpty(); }); + } + + if (!isFailPointActive) { + auto client = _svcCtx->makeClient("DNSHealthObserver"); + auto opCtx = client->makeOperationContext(); + auto const shardRegistry = Grid::get(_svcCtx)->shardRegistry(); + auto shardIds = shardRegistry->getAllShardIds(opCtx.get()); + + if (shardIds.size() == 0) { + connString = shardRegistry->getConfigServerConnectionString(); + } else { + auto shardSW = + shardRegistry->getShard(opCtx.get(), shardIds.at(rand() % shardIds.size())); + auto shardSWStatus = shardSW.getStatus(); + if (shardSWStatus.isOK()) { + connString = shardSW.getValue()->getConnString(); + } else { + return makeFailedHealthCheckFuture(shardSWStatus); + } + } + } + + auto servers = connString.getServers(); + if (servers.empty()) { + return makeFailedHealthCheckFuture( + Status(ErrorCodes::NetworkTimeout, "No hostnames for DNS health check")); + } + + std::shuffle(servers.begin(), servers.end(), _random.urbg()); + + auto completionPf = makePromiseFuture(); + + auto status = periodicCheckContext.taskExecutor->scheduleWork( + [this, servers, promise = std::move(completionPf.promise)]( + const executor::TaskExecutor::CallbackArgs& cbArgs) mutable { + auto statusWith = + getHostFQDNs(servers.front().host(), HostnameCanonicalizationMode::kForward); + if (statusWith.isOK() && !statusWith.getValue().empty()) { + promise.emplaceValue(makeHealthyStatus()); + } else { + promise.emplaceValue( + makeSimpleFailedStatus(Severity::kFailure, {statusWith.getStatus()})); + } + }); + + return std::move(completionPf.future); +} + +namespace { +MONGO_INITIALIZER(DnsHealthObserver)(InitializerContext*) { + HealthObserverRegistration::registerObserverFactory( + [](ServiceContext* svcCtx) { return std::make_unique(svcCtx); }); +} +} // namespace + +} // namespace process_health +} // namespace mongo diff --git a/src/mongo/db/process_health/dns_health_observer.h b/src/mongo/db/process_health/dns_health_observer.h new file mode 100644 index 00000000000..2640c9024f7 --- /dev/null +++ b/src/mongo/db/process_health/dns_health_observer.h @@ -0,0 +1,65 @@ +/** + * Copyright (C) 2021-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ +#pragma once + +#include "mongo/db/process_health/health_observer_base.h" +#include "mongo/platform/random.h" + +namespace mongo { +namespace process_health { +class DnsHealthObserver final : public HealthObserverBase { +public: + DnsHealthObserver(ServiceContext* svcCtx) + : HealthObserverBase(svcCtx), _random(PseudoRandom(SecureRandom().nextInt64())){}; + +protected: + FaultFacetType getType() const override { + return FaultFacetType::kDns; + } + + Milliseconds healthCheckJitter() const override { + return Milliseconds(5); + } + + Milliseconds getObserverTimeout() const override { + return Milliseconds(Seconds(10)); + } + + bool isConfigured() const override { + return true; + } + + Future periodicCheckImpl( + PeriodicHealthCheckContext&& periodicCheckContext) noexcept override; + +private: + mutable PseudoRandom _random; +}; +} // namespace process_health +} // namespace mongo diff --git a/src/mongo/db/process_health/fault_manager_config.cpp b/src/mongo/db/process_health/fault_manager_config.cpp index 4b3b3695459..b5e3cc57d46 100644 --- a/src/mongo/db/process_health/fault_manager_config.cpp +++ b/src/mongo/db/process_health/fault_manager_config.cpp @@ -40,6 +40,7 @@ namespace { constexpr auto inline kDefaultObserverInterval = Milliseconds{10000}; constexpr auto inline kDefaultLdapObserverInterval = Milliseconds{30000}; constexpr auto inline kDefaultConfigServerObserverInterval = Milliseconds{30000}; +constexpr auto inline kDefaultDNSObserverInterval = Milliseconds{30000}; constexpr auto inline kDefaultTestObserverInterval = Milliseconds{1000}; } // namespace @@ -49,6 +50,8 @@ Milliseconds FaultManagerConfig::_getDefaultObserverInterval(FaultFacetType type return kDefaultLdapObserverInterval; case FaultFacetType::kConfigServer: return kDefaultConfigServerObserverInterval; + case FaultFacetType::kDns: + return kDefaultDNSObserverInterval; case FaultFacetType::kMock1: case FaultFacetType::kMock2: case FaultFacetType::kTestObserver: diff --git a/src/mongo/db/process_health/fault_manager_config.h b/src/mongo/db/process_health/fault_manager_config.h index ac4f4ba0a75..83d87b1d67d 100644 --- a/src/mongo/db/process_health/fault_manager_config.h +++ b/src/mongo/db/process_health/fault_manager_config.h @@ -110,6 +110,11 @@ public: auto getIntensity = [this, intensities](FaultFacetType type) { auto observerType = toObserverType(type); if (observerType) { + stdx::lock_guard lock(_mutex); + if (_facetToIntensityMapForTest.contains(type)) { + return _facetToIntensityMapForTest.at(type); + } + auto x = intensities->_data->getValues(); if (x) { for (auto setting : *x) { diff --git a/src/mongo/db/process_health/fault_manager_test_suite.h b/src/mongo/db/process_health/fault_manager_test_suite.h index 38235658923..b9a5ab8aeea 100644 --- a/src/mongo/db/process_health/fault_manager_test_suite.h +++ b/src/mongo/db/process_health/fault_manager_test_suite.h @@ -32,6 +32,7 @@ #include "mongo/db/process_health/fault_manager.h" +#include "mongo/db/concurrency/locker_noop_client_observer.h" #include "mongo/db/process_health/health_observer_mock.h" #include "mongo/db/process_health/health_observer_registration.h" #include "mongo/executor/network_interface_factory.h" @@ -136,6 +137,8 @@ public: _svcCtx->setFastClockSource(std::make_unique()); _svcCtx->setPreciseClockSource(std::make_unique()); _svcCtx->setTickSource(std::make_unique>()); + _svcCtx->registerClientObserver( + std::make_unique()); advanceTime(Seconds(100)); } } diff --git a/src/mongo/db/process_health/fault_state_machine_test.cpp b/src/mongo/db/process_health/fault_state_machine_test.cpp index 111d62cb5d6..0a08b20cb8d 100644 --- a/src/mongo/db/process_health/fault_state_machine_test.cpp +++ b/src/mongo/db/process_health/fault_state_machine_test.cpp @@ -29,6 +29,7 @@ #include "mongo/db/process_health/fault_manager.h" +#include "mongo/db/process_health/dns_health_observer.h" #include "mongo/db/process_health/fault_manager_test_suite.h" #include "mongo/db/process_health/health_check_status.h" #include "mongo/executor/thread_pool_task_executor_test_fixture.h" @@ -359,6 +360,52 @@ TEST_F(FaultManagerTest, HealthCheckWithOffFacetCreatesNoFaultInOk) { ASSERT_EQ(manager().getFaultState(), FaultState::kOk); } +TEST_F(FaultManagerTest, DNSHealthCheckWithBadHostNameFailsAndGoodHostNameSuccess) { + RAIIServerParameterControllerForTest _controller{"featureFlagHealthMonitoring", true}; + const auto faultFacetType = FaultFacetType::kDns; + auto config = std::make_unique(); + config->setIntensityForType(faultFacetType, HealthObserverIntensityEnum::kCritical); + resetManager(std::move(config)); + + auto serverParam = + ServerParameterSet::getNodeParameterSet()->get( + "healthMonitoringIntervals"); + auto bsonOBj = BSON("values" << BSON_ARRAY(BSON("type" + << "dns" + << "interval" << 1000))); + const BSONObj newParameterObj = BSON("key" << bsonOBj); + auto element = newParameterObj.getField("key"); + uassertStatusOK(serverParam->set(element)); + + registerHealthObserver(); + globalFailPointRegistry() + .find("dnsHealthObserverFp") + ->setMode(FailPoint::alwaysOn, + 0, + BSON("hostname" + << "yahoo.com")); + + auto initialHealthCheckFuture = manager().startPeriodicHealthChecks(); + assertSoon([this]() { return manager().getFaultState() == FaultState::kOk; }); + + globalFailPointRegistry() + .find("dnsHealthObserverFp") + ->setMode(FailPoint::alwaysOn, + 0, + BSON("hostname" + << "badhostname.invalid")); + sleepFor(Seconds(1)); + assertSoon([this]() { return manager().getFaultState() == FaultState::kTransientFault; }); + + globalFailPointRegistry() + .find("dnsHealthObserverFp") + ->setMode(FailPoint::alwaysOn, + 0, + BSON("hostname" + << "yahoo.com")); + assertSoon([this]() { return manager().getFaultState() == FaultState::kOk; }); +} + } // namespace } // namespace process_health } // namespace mongo -- cgit v1.2.1