diff options
author | Davis Haupt <davis.haupt@mongodb.com> | 2021-12-20 16:57:27 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-12-29 00:52:32 +0000 |
commit | 7fb0f31550e53174aac9f54d98d2bf900e3444db (patch) | |
tree | 2e124866715cd2e7401c40b7f6a369527c7c452b /src | |
parent | 9f3704ba0ea0e4572b615e98e7829bc7d6019538 (diff) | |
download | mongo-7fb0f31550e53174aac9f54d98d2bf900e3444db.tar.gz |
SERVER-60944 Simplify Fault class hierarchy and interface for updating fault facets
(cherry picked from commit 3b11d6fa60ff69f9ae52a90690cd05404625284e)
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/process_health/SConscript | 4 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault.cpp (renamed from src/mongo/db/process_health/fault_impl.cpp) | 57 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault.h | 78 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault_facet_container.h | 97 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault_facets_container.h | 90 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault_impl.h | 84 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault_manager.cpp | 66 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault_manager.h | 22 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault_manager_config.h | 2 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault_manager_test_suite.h | 10 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault_state_machine_test.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault_test.cpp (renamed from src/mongo/db/process_health/fault_impl_test.cpp) | 22 | ||||
-rw-r--r-- | src/mongo/db/process_health/health_observer.h | 4 | ||||
-rw-r--r-- | src/mongo/db/process_health/health_observer_base.cpp | 1 | ||||
-rw-r--r-- | src/mongo/db/process_health/health_observer_base.h | 1 |
15 files changed, 152 insertions, 392 deletions
diff --git a/src/mongo/db/process_health/SConscript b/src/mongo/db/process_health/SConscript index a18e492245a..f30a0a018e8 100644 --- a/src/mongo/db/process_health/SConscript +++ b/src/mongo/db/process_health/SConscript @@ -7,8 +7,8 @@ env = env.Clone() env.Library( target='fault_manager', source=[ + 'fault.cpp', 'fault_facet_impl.cpp', - 'fault_impl.cpp', 'fault_manager.cpp', 'fault_manager_config.cpp', 'health_monitoring_server_parameters.idl', @@ -37,10 +37,10 @@ env.Library( env.CppUnitTest( target='fault_base_classes_test', source=[ - 'fault_impl_test.cpp', 'fault_facet_test.cpp', 'fault_manager_test.cpp', 'fault_state_machine_test.cpp', + 'fault_test.cpp', 'health_observer_test.cpp', 'state_machine_test.cpp', ], diff --git a/src/mongo/db/process_health/fault_impl.cpp b/src/mongo/db/process_health/fault.cpp index 0fe95df330b..6b47d92e525 100644 --- a/src/mongo/db/process_health/fault_impl.cpp +++ b/src/mongo/db/process_health/fault.cpp @@ -27,21 +27,21 @@ * it in the license file. */ -#include "mongo/db/process_health/fault_impl.h" +#include "mongo/db/process_health/fault.h" namespace mongo { namespace process_health { -FaultImpl::FaultImpl(ClockSource* clockSource) +Fault::Fault(ClockSource* clockSource) : _clockSource(clockSource), _startTime(_clockSource->now()) { invariant(clockSource); // Will crash before this line, just for readability. } -UUID FaultImpl::getId() const { +UUID Fault::getId() const { return _id; } -double FaultImpl::getSeverity() const { +double Fault::getSeverity() const { auto facets = getFacets(); // Simple algo to compute aggregate severity: take the max from all facets. @@ -56,17 +56,17 @@ double FaultImpl::getSeverity() const { return severity; } -Milliseconds FaultImpl::getDuration() const { +Milliseconds Fault::getDuration() const { return Milliseconds(_clockSource->now() - _startTime); } -std::vector<FaultFacetPtr> FaultImpl::getFacets() const { +std::vector<FaultFacetPtr> Fault::getFacets() const { auto lk = stdx::lock_guard(_mutex); std::vector<FaultFacetPtr> result(_facets.begin(), _facets.end()); return result; } -FaultFacetPtr FaultImpl::getFaultFacet(FaultFacetType type) { +FaultFacetPtr Fault::getFaultFacet(FaultFacetType type) { auto lk = stdx::lock_guard(_mutex); auto it = std::find_if(_facets.begin(), _facets.end(), [type](const FaultFacetPtr& facet) { return facet->getType() == type; @@ -77,34 +77,31 @@ FaultFacetPtr FaultImpl::getFaultFacet(FaultFacetType type) { return *it; } -void FaultImpl::updateWithSuppliedFacet(FaultFacetType type, FaultFacetPtr facet) { +void Fault::removeFacet(FaultFacetType type) { auto lk = stdx::lock_guard(_mutex); + _facets.erase( + std::remove_if(_facets.begin(), + _facets.end(), + [this, type](const FaultFacetPtr& f) { return f->getType() == type; }), + _facets.end()); +} - if (!facet) { - // Delete existing. - _facets.erase( - std::remove_if(_facets.begin(), - _facets.end(), - [this, type](const FaultFacetPtr& f) { return f->getType() == type; }), - _facets.end()); - return; - } - - invariant(type == facet->getType()); - // Update or insert. +void Fault::upsertFacet(FaultFacetPtr facet) { + invariant(facet); + auto type = facet->getType(); + auto lk = stdx::lock_guard(_mutex); for (auto& existing : _facets) { invariant(existing); if (existing->getType() == type) { - existing = facet; + existing->update(facet->getStatus()); return; } } - // We are here if existing was not found - insert new. _facets.push_back(std::move(facet)); } -void FaultImpl::garbageCollectResolvedFacets() { +void Fault::garbageCollectResolvedFacets() { auto lk = stdx::lock_guard(_mutex); _facets.erase(std::remove_if(_facets.begin(), _facets.end(), @@ -116,7 +113,7 @@ void FaultImpl::garbageCollectResolvedFacets() { _facets.end()); } -void FaultImpl::appendDescription(BSONObjBuilder* builder) const { +void Fault::appendDescription(BSONObjBuilder* builder) const { builder->append("id", getId().toBSON()); builder->append("severity", getSeverity()); builder->append("duration", getDuration().toBSON()); @@ -129,5 +126,17 @@ void FaultImpl::appendDescription(BSONObjBuilder* builder) const { builder->append("numFacets", static_cast<int>(_facets.size())); } +bool Fault::hasCriticalFacet(const FaultManagerConfig& config) const { + const auto& facets = this->getFacets(); + for (const auto& facet : facets) { + auto facetType = facet->getType(); + if (config.getHealthObserverIntensity(facetType) == + HealthObserverIntensityEnum::kCritical) { + return true; + } + } + return false; +} + } // namespace process_health } // namespace mongo diff --git a/src/mongo/db/process_health/fault.h b/src/mongo/db/process_health/fault.h index 383c4121e5e..a3fc1dbf577 100644 --- a/src/mongo/db/process_health/fault.h +++ b/src/mongo/db/process_health/fault.h @@ -28,29 +28,31 @@ */ #pragma once -#include <memory> - -#include "mongo/bson/bsonobjbuilder.h" -#include "mongo/db/process_health/fault_facets_container.h" +#include "mongo/db/process_health/fault_facet.h" +#include "mongo/db/service_context.h" +#include "mongo/util/clock_source.h" #include "mongo/util/duration.h" -#include "mongo/util/uuid.h" +#include "mongo/util/timer.h" namespace mongo { namespace process_health { /** - * Detailed description of the current fault. - * @see FaultManager for more details. + * Internal implementation of the Fault class. + * @see Fault */ class Fault : public std::enable_shared_from_this<Fault> { Fault(const Fault&) = delete; Fault& operator=(const Fault&) = delete; public: - Fault() = default; - virtual ~Fault() = default; + explicit Fault(ClockSource* clockSource); + + ~Fault() = default; - virtual UUID getId() const = 0; + // Fault interface. + + UUID getId() const; /** * The fault severity value is an aggregate severity calculated @@ -61,36 +63,70 @@ public: * (0, 1.0): Transient fault condition * [1.0, Inf): Active fault condition */ - virtual double getSeverity() const = 0; + double getSeverity() const; /** * @return The lifetime of this fault from the moment it was created. * Invariant: getDuration() >= getActiveFaultDuration() */ - virtual Milliseconds getDuration() const = 0; + Milliseconds getDuration() const; /** * Describes the current fault. */ - virtual void appendDescription(BSONObjBuilder* builder) const = 0; + void appendDescription(BSONObjBuilder* builder) const; BSONObj toBSON() const { BSONObjBuilder builder; appendDescription(&builder); return builder.obj(); } -}; -using FaultConstPtr = std::shared_ptr<const Fault>; + std::vector<FaultFacetPtr> getFacets() const; -/** - * Internal Fault interface that has accessors to manage Facets this Fault owns. - */ -class FaultInternal : public Fault, public FaultFacetsContainer { -public: - ~FaultInternal() override = default; + /** + * Checks that a Facet of a given type already exists and returns it. + * + * @returns existing facet or null. + */ + FaultFacetPtr getFaultFacet(FaultFacetType type); + + /** + * Update the fault with supplied facet. + * + * @param facet new value to insert/replace or nullptr to delete. + */ + void upsertFacet(FaultFacetPtr facet); + + + /** + * Delete a facet from this fault by its type. + * + * @param type type of facet to remove. + */ + void removeFacet(FaultFacetType type); + + /** + * Performs necessary actions to delete all resolved facets. + */ + void garbageCollectResolvedFacets(); + + bool hasCriticalFacet(const FaultManagerConfig& config) const; + +private: + const UUID _id = UUID::gen(); + + ClockSource* const _clockSource; + const Date_t _startTime; + + mutable Mutex _mutex = MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(0), "Fault::_mutex"); + // We don't need a map by type because we expect to have only few facets. + // Linear search is much faster, we want to avoid any lock contention here. + std::deque<FaultFacetPtr> _facets; }; +using FaultPtr = std::shared_ptr<Fault>; +using FaultConstPtr = std::shared_ptr<const Fault>; } // namespace process_health } // namespace mongo diff --git a/src/mongo/db/process_health/fault_facet_container.h b/src/mongo/db/process_health/fault_facet_container.h deleted file mode 100644 index 1dca1ac2397..00000000000 --- a/src/mongo/db/process_health/fault_facet_container.h +++ /dev/null @@ -1,97 +0,0 @@ -/** - * Copyright (C) 2021-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * <http://www.mongodb.com/licensing/server-side-public-license>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ -#pragma once - -#include <memory> - -#include "mongo/db/process_health/fault_facet.h" - -namespace mongo { -namespace process_health { - -/** - * Interface for the container of Fault facets. - */ -class FaultFacetContainer { -public: - /** - * We do not allow the facets added to this container to be immediately deleted. This - * is the minimal lifetime before a fully resolved facet could be deleted. - */ - static constexpr Milliseconds kMinimalFacetLifetimeToDelete = Milliseconds(10000); - - virtual ~FaultFacetContainer() = default; - - virtual std::vector<FaultFacetPtr> getFacets() const = 0; - - /** - * Checks that a Facet of a given type already exists and returns it. - */ - virtual FaultFacetPtr getFaultFacet(FaultFacetType type) = 0; - - /** - * Getter that takes a create callback in case the facet of a given type is missing. - * We do not have a separate create factory interface to avoid having the registration - * mechanism for those factories, which is not necessary. - * - * @param createCb The callback is invoked only if the facet of this type does not exist. - */ - virtual FaultFacetPtr getOrCreateFaultFacet(FaultFacetType type, - std::function<FaultFacetPtr()> createCb) = 0; - - /** - * Performs necessary actions to delete all resolved facets with lifetime of - * at least kMinimalFacetLifetimeToDelete. - * - * The interface for deleting facets is not provided because the container should - * garbage collect them. - */ - virtual void garbageCollectResolvedFacets() = 0; -}; - -using FaultFacetContainerPtr = std::shared_ptr<FaultFacetContainer>; - -/** - * Interface to get or create a FaultFacetContainer. - * The implementor of this interface owns the singleton instance. - */ -class FaultFacetContainerFactory { -public: - virtual ~FaultFacetContainerFactory() = default; - - /** - * @return FaultFacetContainer or null pointer if it doesn't exist. - */ - virtual FaultFacetContainerPtr getFaultFacetContainer() = 0; - - virtual FaultFacetContainerPtr getOrCreateFaultFacetContainer() = 0; -}; - -} // namespace process_health -} // namespace mongo diff --git a/src/mongo/db/process_health/fault_facets_container.h b/src/mongo/db/process_health/fault_facets_container.h deleted file mode 100644 index 59f14896eb4..00000000000 --- a/src/mongo/db/process_health/fault_facets_container.h +++ /dev/null @@ -1,90 +0,0 @@ -/** - * Copyright (C) 2021-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * <http://www.mongodb.com/licensing/server-side-public-license>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ -#pragma once - -#include <memory> - -#include "mongo/db/process_health/fault_facet.h" - -namespace mongo { -namespace process_health { - -/** - * Interface for the container of Fault facets. - */ -class FaultFacetsContainer { -public: - virtual ~FaultFacetsContainer() = default; - - virtual std::vector<FaultFacetPtr> getFacets() const = 0; - - /** - * Checks that a Facet of a given type already exists and returns it. - * - * @returns existing facet or null. - */ - virtual FaultFacetPtr getFaultFacet(FaultFacetType type) = 0; - - /** - * Update the container with supplied facet. If the optional contains no - * value, remove the existing facet from the container. - * - * @param facet new value to insert/replace or nullptr to delete. - */ - virtual void updateWithSuppliedFacet(FaultFacetType type, FaultFacetPtr facet) = 0; - - /** - * Performs necessary actions to delete all resolved facets. - */ - virtual void garbageCollectResolvedFacets() = 0; -}; - -using FaultFacetsContainerPtr = std::shared_ptr<FaultFacetsContainer>; - -/** - * Interface to get or create a FaultFacetsContainer. - * The implementor of this interface owns the singleton instance. - */ -class FaultFacetsContainerFactory { -public: - virtual ~FaultFacetsContainerFactory() = default; - - virtual FaultFacetsContainerPtr getFaultFacetsContainer() const = 0; - - virtual FaultFacetsContainerPtr getOrCreateFaultFacetsContainer() = 0; - - /** - * Update the container with supplied check result. - * Create or delete existing facet depending on the status. - */ - virtual void updateWithCheckStatus(HealthCheckStatus&& checkStatus) = 0; -}; - -} // namespace process_health -} // namespace mongo diff --git a/src/mongo/db/process_health/fault_impl.h b/src/mongo/db/process_health/fault_impl.h deleted file mode 100644 index de60212aa42..00000000000 --- a/src/mongo/db/process_health/fault_impl.h +++ /dev/null @@ -1,84 +0,0 @@ -/** - * Copyright (C) 2021-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * <http://www.mongodb.com/licensing/server-side-public-license>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ -#pragma once - -#include "mongo/db/process_health/fault.h" - -#include "mongo/db/service_context.h" -#include "mongo/util/clock_source.h" -#include "mongo/util/duration.h" -#include "mongo/util/timer.h" - -namespace mongo { -namespace process_health { - -/** - * Internal implementation of the Fault class. - * @see Fault - */ -class FaultImpl : public FaultInternal { -public: - explicit FaultImpl(ClockSource* clockSource); - - ~FaultImpl() override = default; - - // Fault interface. - - UUID getId() const override; - - double getSeverity() const override; - - Milliseconds getDuration() const override; - - void appendDescription(BSONObjBuilder* builder) const override; - - // FaultFacetsContainer interface. - - std::vector<FaultFacetPtr> getFacets() const override; - - FaultFacetPtr getFaultFacet(FaultFacetType type) override; - - void updateWithSuppliedFacet(FaultFacetType type, FaultFacetPtr facet) override; - - void garbageCollectResolvedFacets() override; - -private: - const UUID _id = UUID::gen(); - - ClockSource* const _clockSource; - const Date_t _startTime; - - mutable Mutex _mutex = MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(0), "FaultImpl::_mutex"); - // We don't need a map by type because we expect to have only few facets. - // Linear search is much faster, we want to avoid any lock contention here. - std::deque<FaultFacetPtr> _facets; -}; - -} // namespace process_health -} // namespace mongo diff --git a/src/mongo/db/process_health/fault_manager.cpp b/src/mongo/db/process_health/fault_manager.cpp index c916270fe86..71b82141aa5 100644 --- a/src/mongo/db/process_health/fault_manager.cpp +++ b/src/mongo/db/process_health/fault_manager.cpp @@ -35,8 +35,8 @@ #include <algorithm> +#include "mongo/db/process_health/fault.h" #include "mongo/db/process_health/fault_facet_impl.h" -#include "mongo/db/process_health/fault_impl.h" #include "mongo/db/process_health/fault_manager_config.h" #include "mongo/db/process_health/health_monitoring_feature_flag.h" #include "mongo/db/process_health/health_monitoring_gen.h" @@ -274,7 +274,7 @@ boost::optional<FaultState> FaultManager::handleStartupCheck(const OptionalMessa } updateWithCheckStatus(HealthCheckStatus(status)); - auto optionalFault = getFaultFacetsContainer(); + auto optionalFault = getFault(); if (optionalFault) { optionalFault->garbageCollectResolvedFacets(); } @@ -284,7 +284,10 @@ boost::optional<FaultState> FaultManager::handleStartupCheck(const OptionalMessa FaultState::kStartupCheck, FaultState::kStartupCheck, boost::none); } - std::shared_ptr<FaultInternal> faultToDelete; + // If the whole fault becomes resolved, garbage collect it + // with proper locking. + std::shared_ptr<Fault> faultToDelete; + { auto lk = stdx::lock_guard(_mutex); if (_fault && _fault->getFacets().empty()) { @@ -336,7 +339,7 @@ boost::optional<FaultState> FaultManager::handleTransientFault(const OptionalMes updateWithCheckStatus(HealthCheckStatus(status)); - auto optionalActiveFault = getFaultFacetsContainer(); + auto optionalActiveFault = getFault(); if (optionalActiveFault) { optionalActiveFault->garbageCollectResolvedFacets(); } @@ -377,7 +380,7 @@ void FaultManager::logCurrentState(FaultState, FaultState newState, const Option } void FaultManager::setTransientFaultDeadline(FaultState, FaultState, const OptionalMessageType&) { - if (hasCriticalFacet(_fault.get()) && !_transientFaultDeadline) { + if (_fault->hasCriticalFacet(getConfig()) && !_transientFaultDeadline) { _transientFaultDeadline = std::make_unique<TransientFaultDeadline>( this, _taskExecutor, _config->getActiveFaultDuration()); } @@ -481,21 +484,27 @@ Date_t FaultManager::getLastTransitionTime() const { FaultConstPtr FaultManager::currentFault() const { auto lk = stdx::lock_guard(_mutex); - return std::static_pointer_cast<const Fault>(_fault); + return _fault; +} + +FaultPtr FaultManager::getFault() const { + auto lk = stdx::lock_guard(_mutex); + return _fault; } -FaultFacetsContainerPtr FaultManager::getFaultFacetsContainer() const { +FaultPtr FaultManager::createFault() { auto lk = stdx::lock_guard(_mutex); - return std::static_pointer_cast<FaultFacetsContainer>(_fault); + _fault = std::make_shared<Fault>(_svcCtx->getFastClockSource()); + return _fault; } -FaultFacetsContainerPtr FaultManager::getOrCreateFaultFacetsContainer() { +FaultPtr FaultManager::getOrCreateFault() { auto lk = stdx::lock_guard(_mutex); if (!_fault) { // Create a new one. - _fault = std::make_shared<FaultImpl>(_svcCtx->getFastClockSource()); + _fault = std::make_shared<Fault>(_svcCtx->getFastClockSource()); } - return std::static_pointer_cast<FaultFacetsContainer>(_fault); + return _fault; } void FaultManager::healthCheck(HealthObserver* observer, std::shared_ptr<AtomicWord<bool>> token) { @@ -568,7 +577,7 @@ void FaultManager::healthCheck(HealthObserver* observer, std::shared_ptr<AtomicW } // Run asynchronous health check. Send output to the state machine. Schedule next run. - auto healthCheckFuture = observer->periodicCheck(*this, _taskExecutor, token) + auto healthCheckFuture = observer->periodicCheck(_taskExecutor, token) .thenRunOn(_taskExecutor) .onCompletion([this, acceptNotOKStatus, schedulerCb]( StatusWith<HealthCheckStatus> status) { @@ -592,37 +601,22 @@ void FaultManager::healthCheck(HealthObserver* observer, std::shared_ptr<AtomicW } void FaultManager::updateWithCheckStatus(HealthCheckStatus&& checkStatus) { + auto fault = getFault(); + // Remove resolved facet from the fault. if (HealthCheckStatus::isResolved(checkStatus.getSeverity())) { - auto container = getFaultFacetsContainer(); - if (container) { - container->updateWithSuppliedFacet(checkStatus.getType(), nullptr); + if (fault) { + fault->removeFacet(checkStatus.getType()); } - return; } - auto container = getOrCreateFaultFacetsContainer(); - auto facet = container->getFaultFacet(checkStatus.getType()); - if (!facet) { - const auto type = checkStatus.getType(); - auto newFacet = - new FaultFacetImpl(type, _svcCtx->getFastClockSource(), std::move(checkStatus)); - container->updateWithSuppliedFacet(type, FaultFacetPtr(newFacet)); - } else { - facet->update(std::move(checkStatus)); + if (!_fault) { + fault = createFault(); // Create fault if it doesn't exist. } -} -bool FaultManager::hasCriticalFacet(const FaultInternal* fault) const { - invariant(fault); - const auto& facets = fault->getFacets(); - for (const auto& facet : facets) { - auto facetType = facet->getType(); - if (_config->getHealthObserverIntensity(facetType) == - HealthObserverIntensityEnum::kCritical) - return true; - } - return false; + const auto type = checkStatus.getType(); + fault->upsertFacet(std::make_shared<FaultFacetImpl>( + type, _svcCtx->getFastClockSource(), std::move(checkStatus))); } FaultManagerConfig FaultManager::getConfig() const { diff --git a/src/mongo/db/process_health/fault_manager.h b/src/mongo/db/process_health/fault_manager.h index 89eac503cd0..a9f8969b66b 100644 --- a/src/mongo/db/process_health/fault_manager.h +++ b/src/mongo/db/process_health/fault_manager.h @@ -32,7 +32,6 @@ #include "mongo/db/process_health/fault.h" #include "mongo/db/process_health/fault_facet.h" -#include "mongo/db/process_health/fault_facet_container.h" #include "mongo/db/process_health/fault_manager_config.h" #include "mongo/db/process_health/health_monitoring_server_parameters_gen.h" #include "mongo/db/process_health/health_observer.h" @@ -56,8 +55,7 @@ namespace process_health { * * If an active fault state persists, FaultManager will terminate the server process. */ -class FaultManager : protected StateMachine<HealthCheckStatus, FaultState>, - protected FaultFacetsContainerFactory { +class FaultManager : protected StateMachine<HealthCheckStatus, FaultState> { FaultManager(const FaultManager&) = delete; FaultManager& operator=(const FaultManager&) = delete; @@ -131,20 +129,20 @@ protected: // run. virtual void healthCheck(HealthObserver* observer, std::shared_ptr<AtomicWord<bool>> token); - // Protected interface FaultFacetsContainerFactory implementation. + FaultPtr getFault() const; - // The interface FaultFacetsContainerFactory is implemented by the member '_fault'. - FaultFacetsContainerPtr getFaultFacetsContainer() const override; + FaultPtr createFault(); - FaultFacetsContainerPtr getOrCreateFaultFacetsContainer() override; + FaultPtr getOrCreateFault(); - void updateWithCheckStatus(HealthCheckStatus&& checkStatus) override; + /** + * Update the active fault with supplied check result. + * Create or delete existing facet depending on the status. + */ + void updateWithCheckStatus(HealthCheckStatus&& checkStatus); void schedulePeriodicHealthCheckThread(); - // TODO: move this into fault class; refactor to remove FaultInternal - bool hasCriticalFacet(const FaultInternal* fault) const; - void progressMonitorCheckForTests(std::function<void(std::string cause)> crashCb); private: @@ -160,7 +158,7 @@ private: mutable Mutex _mutex = MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(5), "FaultManager::_mutex"); - std::shared_ptr<FaultInternal> _fault; + std::shared_ptr<Fault> _fault; // This source is canceled before the _taskExecutor shutdown(). It // can be used to check for the start of the shutdown sequence. // In later versions, this is using the cancellation token. diff --git a/src/mongo/db/process_health/fault_manager_config.h b/src/mongo/db/process_health/fault_manager_config.h index db218853da0..cb92353798f 100644 --- a/src/mongo/db/process_health/fault_manager_config.h +++ b/src/mongo/db/process_health/fault_manager_config.h @@ -90,7 +90,7 @@ public: /* Maximum possible jitter added to the time between health checks */ static auto inline constexpr kPeriodicHealthCheckMaxJitter{Milliseconds{100}}; - HealthObserverIntensityEnum getHealthObserverIntensity(FaultFacetType type) { + HealthObserverIntensityEnum getHealthObserverIntensity(FaultFacetType type) const { auto intensities = _getHealthObserverIntensities(); auto toObserverType = [](FaultFacetType type) -> boost::optional<HealthObserverTypeEnum> { diff --git a/src/mongo/db/process_health/fault_manager_test_suite.h b/src/mongo/db/process_health/fault_manager_test_suite.h index 9d2063be39b..3d087de65ea 100644 --- a/src/mongo/db/process_health/fault_manager_test_suite.h +++ b/src/mongo/db/process_health/fault_manager_test_suite.h @@ -96,14 +96,14 @@ public: return getHealthObservers(); } - FaultFacetsContainerPtr getOrCreateFaultFacetsContainerTest() { - return getOrCreateFaultFacetsContainer(); + FaultPtr getOrCreateFaultTest() { + return getOrCreateFault(); } - FaultInternal& getFault() { - FaultFacetsContainerPtr fault = getFaultFacetsContainer(); + Fault& getFault() { + FaultPtr fault = FaultManager::getFault(); invariant(fault); - return *(static_cast<FaultInternal*>(fault.get())); + return *(static_cast<Fault*>(fault.get())); } void progressMonitorCheckTest(std::function<void(std::string cause)> crashCb) { diff --git a/src/mongo/db/process_health/fault_state_machine_test.cpp b/src/mongo/db/process_health/fault_state_machine_test.cpp index ec381d92404..8de270878ba 100644 --- a/src/mongo/db/process_health/fault_state_machine_test.cpp +++ b/src/mongo/db/process_health/fault_state_machine_test.cpp @@ -201,12 +201,10 @@ TEST_F(FaultManagerTest, OneFacetIsResolved) { ASSERT(manager().getFaultState() == FaultState::kStartupCheck); manager().acceptTest(HealthCheckStatus(FaultFacetType::kMock1, 1.1, "failing health check 1")); manager().acceptTest(HealthCheckStatus(FaultFacetType::kMock2, 1.1, "failing health check 2")); - assertSoon([this] { - return manager().getOrCreateFaultFacetsContainerTest()->getFacets().size() == 2; - }); + assertSoon([this] { return manager().getOrCreateFaultTest()->getFacets().size() == 2; }); manager().acceptTest(HealthCheckStatus(FaultFacetType::kMock1)); assertSoon([this] { - return manager().getOrCreateFaultFacetsContainerTest()->getFacets().front()->getType() == + return manager().getOrCreateFaultTest()->getFacets().front()->getType() == FaultFacetType::kMock2; }); ASSERT(manager().getFaultState() == FaultState::kStartupCheck); diff --git a/src/mongo/db/process_health/fault_impl_test.cpp b/src/mongo/db/process_health/fault_test.cpp index d7ec9c0e0da..9f8d1fc6d70 100644 --- a/src/mongo/db/process_health/fault_impl_test.cpp +++ b/src/mongo/db/process_health/fault_test.cpp @@ -27,7 +27,7 @@ * it in the license file. */ -#include "mongo/db/process_health/fault_impl.h" +#include "mongo/db/process_health/fault.h" #include "mongo/db/process_health/fault_facet_mock.h" #include "mongo/unittest/unittest.h" @@ -37,37 +37,37 @@ namespace mongo { namespace process_health { namespace { -class FaultImplTest : public unittest::Test { +class FaultTest : public unittest::Test { public: void setUp() override { _svcCtx = ServiceContext::make(); _svcCtx->setFastClockSource(std::make_unique<ClockSourceMock>()); - _faultImpl = std::make_unique<FaultImpl>(_svcCtx->getFastClockSource()); + _faultImpl = std::make_unique<Fault>(_svcCtx->getFastClockSource()); } ClockSourceMock& clockSource() { return *static_cast<ClockSourceMock*>(_svcCtx->getFastClockSource()); } - FaultImpl& fault() { + Fault& fault() { return *_faultImpl; } private: ServiceContext::UniqueServiceContext _svcCtx; - std::unique_ptr<FaultImpl> _faultImpl; + std::unique_ptr<Fault> _faultImpl; }; -TEST_F(FaultImplTest, TimeSourceWorks) { +TEST_F(FaultTest, TimeSourceWorks) { // Fault was just created, duration should be zero. ASSERT_EQ(Milliseconds(0), fault().getDuration()); clockSource().advance(Milliseconds(1)); ASSERT_EQ(Milliseconds(1), fault().getDuration()); } -TEST_F(FaultImplTest, SeverityLevelHelpersWork) { +TEST_F(FaultTest, SeverityLevelHelpersWork) { FaultFacetMock resolvedFacet(FaultFacetType::kMock1, &clockSource(), [] { return 0; }); ASSERT_TRUE(HealthCheckStatus::isResolved(resolvedFacet.getStatus().getSeverity())); @@ -78,26 +78,26 @@ TEST_F(FaultImplTest, SeverityLevelHelpersWork) { ASSERT_TRUE(HealthCheckStatus::isActiveFault(faultyFacet.getStatus().getSeverity())); } -TEST_F(FaultImplTest, FindFacetByType) { +TEST_F(FaultTest, FindFacetByType) { ASSERT_EQ(0, fault().getFacets().size()); ASSERT_FALSE(fault().getFaultFacet(FaultFacetType::kMock1)); FaultFacetPtr newFacet = std::make_shared<FaultFacetMock>(FaultFacetType::kMock1, &clockSource(), [] { return 0; }); - fault().updateWithSuppliedFacet(FaultFacetType::kMock1, newFacet); + fault().upsertFacet(newFacet); auto facet = fault().getFaultFacet(FaultFacetType::kMock1); ASSERT_TRUE(facet); auto status = facet->getStatus(); ASSERT_EQ(FaultFacetType::kMock1, status.getType()); } -TEST_F(FaultImplTest, CanCreateAndGarbageCollectFacets) { +TEST_F(FaultTest, CanCreateAndGarbageCollectFacets) { AtomicDouble severity{0.1}; ASSERT_EQ(0, fault().getFacets().size()); FaultFacetPtr newFacet = std::make_shared<FaultFacetMock>( FaultFacetType::kMock1, &clockSource(), [&severity] { return severity.load(); }); - fault().updateWithSuppliedFacet(FaultFacetType::kMock1, newFacet); + fault().upsertFacet(newFacet); // New facet was added successfully. ASSERT_EQ(1, fault().getFacets().size()); diff --git a/src/mongo/db/process_health/health_observer.h b/src/mongo/db/process_health/health_observer.h index f6178889faa..a992e79de46 100644 --- a/src/mongo/db/process_health/health_observer.h +++ b/src/mongo/db/process_health/health_observer.h @@ -29,7 +29,6 @@ #pragma once #include "mongo/db/process_health/fault_facet.h" -#include "mongo/db/process_health/fault_facets_container.h" #include "mongo/db/process_health/fault_manager_config.h" #include "mongo/executor/task_executor.h" #include "mongo/util/future.h" @@ -74,10 +73,9 @@ public: * Triggers health check. The implementation should not block to wait for the completion * of this check. * - * @param factory Interface to get or create the factory of facets container. + * @param factory Interface to get or create the factory of faults. */ virtual SharedSemiFuture<HealthCheckStatus> periodicCheck( - FaultFacetsContainerFactory& factory, std::shared_ptr<executor::TaskExecutor> taskExecutor, std::shared_ptr<AtomicWord<bool>> cancellationToken) = 0; diff --git a/src/mongo/db/process_health/health_observer_base.cpp b/src/mongo/db/process_health/health_observer_base.cpp index c7be7035b16..3e40bcf7484 100644 --- a/src/mongo/db/process_health/health_observer_base.cpp +++ b/src/mongo/db/process_health/health_observer_base.cpp @@ -42,7 +42,6 @@ HealthObserverBase::HealthObserverBase(ServiceContext* svcCtx) : _svcCtx(svcCtx), _rand(PseudoRandom(SecureRandom().nextInt64())) {} SharedSemiFuture<HealthCheckStatus> HealthObserverBase::periodicCheck( - FaultFacetsContainerFactory& factory, std::shared_ptr<executor::TaskExecutor> taskExecutor, std::shared_ptr<AtomicWord<bool>> cancellationToken) { // If we have reached here, the intensity of this health observer must not be off diff --git a/src/mongo/db/process_health/health_observer_base.h b/src/mongo/db/process_health/health_observer_base.h index 82ca9643754..fc47d36e806 100644 --- a/src/mongo/db/process_health/health_observer_base.h +++ b/src/mongo/db/process_health/health_observer_base.h @@ -62,7 +62,6 @@ public: // Implements the common logic for periodic checks. // Every observer should implement periodicCheckImpl() for specific tests. SharedSemiFuture<HealthCheckStatus> periodicCheck( - FaultFacetsContainerFactory& factory, std::shared_ptr<executor::TaskExecutor> taskExecutor, std::shared_ptr<AtomicWord<bool>> cancellationToken) override; |