summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavis Haupt <davis.haupt@mongodb.com>2021-12-20 16:57:27 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-12-20 17:56:55 +0000
commit3b11d6fa60ff69f9ae52a90690cd05404625284e (patch)
tree367b356b63e23f3c05f0f92efbaa2cc1380db798
parent3b81b9afedd26f9ff6352441e9e9eb075a43b3cf (diff)
downloadmongo-3b11d6fa60ff69f9ae52a90690cd05404625284e.tar.gz
SERVER-60944 Simplify Fault class hierarchy and interface for updating fault facets
-rw-r--r--src/mongo/db/process_health/SConscript4
-rw-r--r--src/mongo/db/process_health/fault.cpp (renamed from src/mongo/db/process_health/fault_impl.cpp)57
-rw-r--r--src/mongo/db/process_health/fault.h78
-rw-r--r--src/mongo/db/process_health/fault_facet_container.h97
-rw-r--r--src/mongo/db/process_health/fault_facets_container.h90
-rw-r--r--src/mongo/db/process_health/fault_impl.h84
-rw-r--r--src/mongo/db/process_health/fault_manager.cpp66
-rw-r--r--src/mongo/db/process_health/fault_manager.h22
-rw-r--r--src/mongo/db/process_health/fault_manager_config.h2
-rw-r--r--src/mongo/db/process_health/fault_manager_test_suite.h10
-rw-r--r--src/mongo/db/process_health/fault_state_machine_test.cpp6
-rw-r--r--src/mongo/db/process_health/fault_test.cpp (renamed from src/mongo/db/process_health/fault_impl_test.cpp)22
-rw-r--r--src/mongo/db/process_health/health_observer.h7
-rw-r--r--src/mongo/db/process_health/health_observer_base.cpp4
-rw-r--r--src/mongo/db/process_health/health_observer_base.h4
15 files changed, 155 insertions, 398 deletions
diff --git a/src/mongo/db/process_health/SConscript b/src/mongo/db/process_health/SConscript
index a225b2d8470..86bae3abe45 100644
--- a/src/mongo/db/process_health/SConscript
+++ b/src/mongo/db/process_health/SConscript
@@ -7,8 +7,8 @@ env = env.Clone()
env.Library(
target='fault_manager',
source=[
+ 'fault.cpp',
'fault_facet_impl.cpp',
- 'fault_impl.cpp',
'fault_manager.cpp',
'fault_manager_config.cpp',
'health_monitoring_server_parameters.idl',
@@ -37,10 +37,10 @@ env.Library(
env.CppUnitTest(
target='fault_base_classes_test',
source=[
- 'fault_impl_test.cpp',
'fault_facet_test.cpp',
'fault_manager_test.cpp',
'fault_state_machine_test.cpp',
+ 'fault_test.cpp',
'health_observer_test.cpp',
'state_machine_test.cpp',
],
diff --git a/src/mongo/db/process_health/fault_impl.cpp b/src/mongo/db/process_health/fault.cpp
index 0fe95df330b..6b47d92e525 100644
--- a/src/mongo/db/process_health/fault_impl.cpp
+++ b/src/mongo/db/process_health/fault.cpp
@@ -27,21 +27,21 @@
* it in the license file.
*/
-#include "mongo/db/process_health/fault_impl.h"
+#include "mongo/db/process_health/fault.h"
namespace mongo {
namespace process_health {
-FaultImpl::FaultImpl(ClockSource* clockSource)
+Fault::Fault(ClockSource* clockSource)
: _clockSource(clockSource), _startTime(_clockSource->now()) {
invariant(clockSource); // Will crash before this line, just for readability.
}
-UUID FaultImpl::getId() const {
+UUID Fault::getId() const {
return _id;
}
-double FaultImpl::getSeverity() const {
+double Fault::getSeverity() const {
auto facets = getFacets();
// Simple algo to compute aggregate severity: take the max from all facets.
@@ -56,17 +56,17 @@ double FaultImpl::getSeverity() const {
return severity;
}
-Milliseconds FaultImpl::getDuration() const {
+Milliseconds Fault::getDuration() const {
return Milliseconds(_clockSource->now() - _startTime);
}
-std::vector<FaultFacetPtr> FaultImpl::getFacets() const {
+std::vector<FaultFacetPtr> Fault::getFacets() const {
auto lk = stdx::lock_guard(_mutex);
std::vector<FaultFacetPtr> result(_facets.begin(), _facets.end());
return result;
}
-FaultFacetPtr FaultImpl::getFaultFacet(FaultFacetType type) {
+FaultFacetPtr Fault::getFaultFacet(FaultFacetType type) {
auto lk = stdx::lock_guard(_mutex);
auto it = std::find_if(_facets.begin(), _facets.end(), [type](const FaultFacetPtr& facet) {
return facet->getType() == type;
@@ -77,34 +77,31 @@ FaultFacetPtr FaultImpl::getFaultFacet(FaultFacetType type) {
return *it;
}
-void FaultImpl::updateWithSuppliedFacet(FaultFacetType type, FaultFacetPtr facet) {
+void Fault::removeFacet(FaultFacetType type) {
auto lk = stdx::lock_guard(_mutex);
+ _facets.erase(
+ std::remove_if(_facets.begin(),
+ _facets.end(),
+ [this, type](const FaultFacetPtr& f) { return f->getType() == type; }),
+ _facets.end());
+}
- if (!facet) {
- // Delete existing.
- _facets.erase(
- std::remove_if(_facets.begin(),
- _facets.end(),
- [this, type](const FaultFacetPtr& f) { return f->getType() == type; }),
- _facets.end());
- return;
- }
-
- invariant(type == facet->getType());
- // Update or insert.
+void Fault::upsertFacet(FaultFacetPtr facet) {
+ invariant(facet);
+ auto type = facet->getType();
+ auto lk = stdx::lock_guard(_mutex);
for (auto& existing : _facets) {
invariant(existing);
if (existing->getType() == type) {
- existing = facet;
+ existing->update(facet->getStatus());
return;
}
}
-
// We are here if existing was not found - insert new.
_facets.push_back(std::move(facet));
}
-void FaultImpl::garbageCollectResolvedFacets() {
+void Fault::garbageCollectResolvedFacets() {
auto lk = stdx::lock_guard(_mutex);
_facets.erase(std::remove_if(_facets.begin(),
_facets.end(),
@@ -116,7 +113,7 @@ void FaultImpl::garbageCollectResolvedFacets() {
_facets.end());
}
-void FaultImpl::appendDescription(BSONObjBuilder* builder) const {
+void Fault::appendDescription(BSONObjBuilder* builder) const {
builder->append("id", getId().toBSON());
builder->append("severity", getSeverity());
builder->append("duration", getDuration().toBSON());
@@ -129,5 +126,17 @@ void FaultImpl::appendDescription(BSONObjBuilder* builder) const {
builder->append("numFacets", static_cast<int>(_facets.size()));
}
+bool Fault::hasCriticalFacet(const FaultManagerConfig& config) const {
+ const auto& facets = this->getFacets();
+ for (const auto& facet : facets) {
+ auto facetType = facet->getType();
+ if (config.getHealthObserverIntensity(facetType) ==
+ HealthObserverIntensityEnum::kCritical) {
+ return true;
+ }
+ }
+ return false;
+}
+
} // namespace process_health
} // namespace mongo
diff --git a/src/mongo/db/process_health/fault.h b/src/mongo/db/process_health/fault.h
index 383c4121e5e..a3fc1dbf577 100644
--- a/src/mongo/db/process_health/fault.h
+++ b/src/mongo/db/process_health/fault.h
@@ -28,29 +28,31 @@
*/
#pragma once
-#include <memory>
-
-#include "mongo/bson/bsonobjbuilder.h"
-#include "mongo/db/process_health/fault_facets_container.h"
+#include "mongo/db/process_health/fault_facet.h"
+#include "mongo/db/service_context.h"
+#include "mongo/util/clock_source.h"
#include "mongo/util/duration.h"
-#include "mongo/util/uuid.h"
+#include "mongo/util/timer.h"
namespace mongo {
namespace process_health {
/**
- * Detailed description of the current fault.
- * @see FaultManager for more details.
+ * Internal implementation of the Fault class.
+ * @see Fault
*/
class Fault : public std::enable_shared_from_this<Fault> {
Fault(const Fault&) = delete;
Fault& operator=(const Fault&) = delete;
public:
- Fault() = default;
- virtual ~Fault() = default;
+ explicit Fault(ClockSource* clockSource);
+
+ ~Fault() = default;
- virtual UUID getId() const = 0;
+ // Fault interface.
+
+ UUID getId() const;
/**
* The fault severity value is an aggregate severity calculated
@@ -61,36 +63,70 @@ public:
* (0, 1.0): Transient fault condition
* [1.0, Inf): Active fault condition
*/
- virtual double getSeverity() const = 0;
+ double getSeverity() const;
/**
* @return The lifetime of this fault from the moment it was created.
* Invariant: getDuration() >= getActiveFaultDuration()
*/
- virtual Milliseconds getDuration() const = 0;
+ Milliseconds getDuration() const;
/**
* Describes the current fault.
*/
- virtual void appendDescription(BSONObjBuilder* builder) const = 0;
+ void appendDescription(BSONObjBuilder* builder) const;
BSONObj toBSON() const {
BSONObjBuilder builder;
appendDescription(&builder);
return builder.obj();
}
-};
-using FaultConstPtr = std::shared_ptr<const Fault>;
+ std::vector<FaultFacetPtr> getFacets() const;
-/**
- * Internal Fault interface that has accessors to manage Facets this Fault owns.
- */
-class FaultInternal : public Fault, public FaultFacetsContainer {
-public:
- ~FaultInternal() override = default;
+ /**
+ * Checks that a Facet of a given type already exists and returns it.
+ *
+ * @returns existing facet or null.
+ */
+ FaultFacetPtr getFaultFacet(FaultFacetType type);
+
+ /**
+ * Update the fault with supplied facet.
+ *
+ * @param facet new value to insert/replace or nullptr to delete.
+ */
+ void upsertFacet(FaultFacetPtr facet);
+
+
+ /**
+ * Delete a facet from this fault by its type.
+ *
+ * @param type type of facet to remove.
+ */
+ void removeFacet(FaultFacetType type);
+
+ /**
+ * Performs necessary actions to delete all resolved facets.
+ */
+ void garbageCollectResolvedFacets();
+
+ bool hasCriticalFacet(const FaultManagerConfig& config) const;
+
+private:
+ const UUID _id = UUID::gen();
+
+ ClockSource* const _clockSource;
+ const Date_t _startTime;
+
+ mutable Mutex _mutex = MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(0), "Fault::_mutex");
+ // We don't need a map by type because we expect to have only few facets.
+ // Linear search is much faster, we want to avoid any lock contention here.
+ std::deque<FaultFacetPtr> _facets;
};
+using FaultPtr = std::shared_ptr<Fault>;
+using FaultConstPtr = std::shared_ptr<const Fault>;
} // namespace process_health
} // namespace mongo
diff --git a/src/mongo/db/process_health/fault_facet_container.h b/src/mongo/db/process_health/fault_facet_container.h
deleted file mode 100644
index 1dca1ac2397..00000000000
--- a/src/mongo/db/process_health/fault_facet_container.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/**
- * Copyright (C) 2021-present MongoDB, Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the Server Side Public License, version 1,
- * as published by MongoDB, Inc.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * Server Side Public License for more details.
- *
- * You should have received a copy of the Server Side Public License
- * along with this program. If not, see
- * <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the Server Side Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-#pragma once
-
-#include <memory>
-
-#include "mongo/db/process_health/fault_facet.h"
-
-namespace mongo {
-namespace process_health {
-
-/**
- * Interface for the container of Fault facets.
- */
-class FaultFacetContainer {
-public:
- /**
- * We do not allow the facets added to this container to be immediately deleted. This
- * is the minimal lifetime before a fully resolved facet could be deleted.
- */
- static constexpr Milliseconds kMinimalFacetLifetimeToDelete = Milliseconds(10000);
-
- virtual ~FaultFacetContainer() = default;
-
- virtual std::vector<FaultFacetPtr> getFacets() const = 0;
-
- /**
- * Checks that a Facet of a given type already exists and returns it.
- */
- virtual FaultFacetPtr getFaultFacet(FaultFacetType type) = 0;
-
- /**
- * Getter that takes a create callback in case the facet of a given type is missing.
- * We do not have a separate create factory interface to avoid having the registration
- * mechanism for those factories, which is not necessary.
- *
- * @param createCb The callback is invoked only if the facet of this type does not exist.
- */
- virtual FaultFacetPtr getOrCreateFaultFacet(FaultFacetType type,
- std::function<FaultFacetPtr()> createCb) = 0;
-
- /**
- * Performs necessary actions to delete all resolved facets with lifetime of
- * at least kMinimalFacetLifetimeToDelete.
- *
- * The interface for deleting facets is not provided because the container should
- * garbage collect them.
- */
- virtual void garbageCollectResolvedFacets() = 0;
-};
-
-using FaultFacetContainerPtr = std::shared_ptr<FaultFacetContainer>;
-
-/**
- * Interface to get or create a FaultFacetContainer.
- * The implementor of this interface owns the singleton instance.
- */
-class FaultFacetContainerFactory {
-public:
- virtual ~FaultFacetContainerFactory() = default;
-
- /**
- * @return FaultFacetContainer or null pointer if it doesn't exist.
- */
- virtual FaultFacetContainerPtr getFaultFacetContainer() = 0;
-
- virtual FaultFacetContainerPtr getOrCreateFaultFacetContainer() = 0;
-};
-
-} // namespace process_health
-} // namespace mongo
diff --git a/src/mongo/db/process_health/fault_facets_container.h b/src/mongo/db/process_health/fault_facets_container.h
deleted file mode 100644
index 59f14896eb4..00000000000
--- a/src/mongo/db/process_health/fault_facets_container.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/**
- * Copyright (C) 2021-present MongoDB, Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the Server Side Public License, version 1,
- * as published by MongoDB, Inc.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * Server Side Public License for more details.
- *
- * You should have received a copy of the Server Side Public License
- * along with this program. If not, see
- * <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the Server Side Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-#pragma once
-
-#include <memory>
-
-#include "mongo/db/process_health/fault_facet.h"
-
-namespace mongo {
-namespace process_health {
-
-/**
- * Interface for the container of Fault facets.
- */
-class FaultFacetsContainer {
-public:
- virtual ~FaultFacetsContainer() = default;
-
- virtual std::vector<FaultFacetPtr> getFacets() const = 0;
-
- /**
- * Checks that a Facet of a given type already exists and returns it.
- *
- * @returns existing facet or null.
- */
- virtual FaultFacetPtr getFaultFacet(FaultFacetType type) = 0;
-
- /**
- * Update the container with supplied facet. If the optional contains no
- * value, remove the existing facet from the container.
- *
- * @param facet new value to insert/replace or nullptr to delete.
- */
- virtual void updateWithSuppliedFacet(FaultFacetType type, FaultFacetPtr facet) = 0;
-
- /**
- * Performs necessary actions to delete all resolved facets.
- */
- virtual void garbageCollectResolvedFacets() = 0;
-};
-
-using FaultFacetsContainerPtr = std::shared_ptr<FaultFacetsContainer>;
-
-/**
- * Interface to get or create a FaultFacetsContainer.
- * The implementor of this interface owns the singleton instance.
- */
-class FaultFacetsContainerFactory {
-public:
- virtual ~FaultFacetsContainerFactory() = default;
-
- virtual FaultFacetsContainerPtr getFaultFacetsContainer() const = 0;
-
- virtual FaultFacetsContainerPtr getOrCreateFaultFacetsContainer() = 0;
-
- /**
- * Update the container with supplied check result.
- * Create or delete existing facet depending on the status.
- */
- virtual void updateWithCheckStatus(HealthCheckStatus&& checkStatus) = 0;
-};
-
-} // namespace process_health
-} // namespace mongo
diff --git a/src/mongo/db/process_health/fault_impl.h b/src/mongo/db/process_health/fault_impl.h
deleted file mode 100644
index de60212aa42..00000000000
--- a/src/mongo/db/process_health/fault_impl.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/**
- * Copyright (C) 2021-present MongoDB, Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the Server Side Public License, version 1,
- * as published by MongoDB, Inc.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * Server Side Public License for more details.
- *
- * You should have received a copy of the Server Side Public License
- * along with this program. If not, see
- * <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the Server Side Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-#pragma once
-
-#include "mongo/db/process_health/fault.h"
-
-#include "mongo/db/service_context.h"
-#include "mongo/util/clock_source.h"
-#include "mongo/util/duration.h"
-#include "mongo/util/timer.h"
-
-namespace mongo {
-namespace process_health {
-
-/**
- * Internal implementation of the Fault class.
- * @see Fault
- */
-class FaultImpl : public FaultInternal {
-public:
- explicit FaultImpl(ClockSource* clockSource);
-
- ~FaultImpl() override = default;
-
- // Fault interface.
-
- UUID getId() const override;
-
- double getSeverity() const override;
-
- Milliseconds getDuration() const override;
-
- void appendDescription(BSONObjBuilder* builder) const override;
-
- // FaultFacetsContainer interface.
-
- std::vector<FaultFacetPtr> getFacets() const override;
-
- FaultFacetPtr getFaultFacet(FaultFacetType type) override;
-
- void updateWithSuppliedFacet(FaultFacetType type, FaultFacetPtr facet) override;
-
- void garbageCollectResolvedFacets() override;
-
-private:
- const UUID _id = UUID::gen();
-
- ClockSource* const _clockSource;
- const Date_t _startTime;
-
- mutable Mutex _mutex = MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(0), "FaultImpl::_mutex");
- // We don't need a map by type because we expect to have only few facets.
- // Linear search is much faster, we want to avoid any lock contention here.
- std::deque<FaultFacetPtr> _facets;
-};
-
-} // namespace process_health
-} // namespace mongo
diff --git a/src/mongo/db/process_health/fault_manager.cpp b/src/mongo/db/process_health/fault_manager.cpp
index 4091546a8b9..22ebedbdda0 100644
--- a/src/mongo/db/process_health/fault_manager.cpp
+++ b/src/mongo/db/process_health/fault_manager.cpp
@@ -35,8 +35,8 @@
#include <algorithm>
+#include "mongo/db/process_health/fault.h"
#include "mongo/db/process_health/fault_facet_impl.h"
-#include "mongo/db/process_health/fault_impl.h"
#include "mongo/db/process_health/fault_manager_config.h"
#include "mongo/db/process_health/health_monitoring_gen.h"
#include "mongo/db/process_health/health_observer_registration.h"
@@ -269,7 +269,7 @@ boost::optional<FaultState> FaultManager::handleStartupCheck(const OptionalMessa
}
updateWithCheckStatus(HealthCheckStatus(status));
- auto optionalFault = getFaultFacetsContainer();
+ auto optionalFault = getFault();
if (optionalFault) {
optionalFault->garbageCollectResolvedFacets();
}
@@ -279,7 +279,10 @@ boost::optional<FaultState> FaultManager::handleStartupCheck(const OptionalMessa
FaultState::kStartupCheck, FaultState::kStartupCheck, boost::none);
}
- std::shared_ptr<FaultInternal> faultToDelete;
+ // If the whole fault becomes resolved, garbage collect it
+ // with proper locking.
+ std::shared_ptr<Fault> faultToDelete;
+
{
auto lk = stdx::lock_guard(_mutex);
if (_fault && _fault->getFacets().empty()) {
@@ -331,7 +334,7 @@ boost::optional<FaultState> FaultManager::handleTransientFault(const OptionalMes
updateWithCheckStatus(HealthCheckStatus(status));
- auto optionalActiveFault = getFaultFacetsContainer();
+ auto optionalActiveFault = getFault();
if (optionalActiveFault) {
optionalActiveFault->garbageCollectResolvedFacets();
}
@@ -372,7 +375,7 @@ void FaultManager::logCurrentState(FaultState, FaultState newState, const Option
}
void FaultManager::setTransientFaultDeadline(FaultState, FaultState, const OptionalMessageType&) {
- if (hasCriticalFacet(_fault.get()) && !_transientFaultDeadline) {
+ if (_fault->hasCriticalFacet(getConfig()) && !_transientFaultDeadline) {
_transientFaultDeadline = std::make_unique<TransientFaultDeadline>(
this, _taskExecutor, _config->getActiveFaultDuration());
}
@@ -480,21 +483,27 @@ Date_t FaultManager::getLastTransitionTime() const {
FaultConstPtr FaultManager::currentFault() const {
auto lk = stdx::lock_guard(_mutex);
- return std::static_pointer_cast<const Fault>(_fault);
+ return _fault;
+}
+
+FaultPtr FaultManager::getFault() const {
+ auto lk = stdx::lock_guard(_mutex);
+ return _fault;
}
-FaultFacetsContainerPtr FaultManager::getFaultFacetsContainer() const {
+FaultPtr FaultManager::createFault() {
auto lk = stdx::lock_guard(_mutex);
- return std::static_pointer_cast<FaultFacetsContainer>(_fault);
+ _fault = std::make_shared<Fault>(_svcCtx->getFastClockSource());
+ return _fault;
}
-FaultFacetsContainerPtr FaultManager::getOrCreateFaultFacetsContainer() {
+FaultPtr FaultManager::getOrCreateFault() {
auto lk = stdx::lock_guard(_mutex);
if (!_fault) {
// Create a new one.
- _fault = std::make_shared<FaultImpl>(_svcCtx->getFastClockSource());
+ _fault = std::make_shared<Fault>(_svcCtx->getFastClockSource());
}
- return std::static_pointer_cast<FaultFacetsContainer>(_fault);
+ return _fault;
}
void FaultManager::healthCheck(HealthObserver* observer, CancellationToken token) {
@@ -565,7 +574,7 @@ void FaultManager::healthCheck(HealthObserver* observer, CancellationToken token
}
// Run asynchronous health check. Send output to the state machine. Schedule next run.
- auto healthCheckFuture = observer->periodicCheck(*this, _taskExecutor, token)
+ auto healthCheckFuture = observer->periodicCheck(_taskExecutor, token)
.thenRunOn(_taskExecutor)
.onCompletion([this, acceptNotOKStatus, schedulerCb](
StatusWith<HealthCheckStatus> status) {
@@ -589,37 +598,22 @@ void FaultManager::healthCheck(HealthObserver* observer, CancellationToken token
}
void FaultManager::updateWithCheckStatus(HealthCheckStatus&& checkStatus) {
+ auto fault = getFault();
+ // Remove resolved facet from the fault.
if (HealthCheckStatus::isResolved(checkStatus.getSeverity())) {
- auto container = getFaultFacetsContainer();
- if (container) {
- container->updateWithSuppliedFacet(checkStatus.getType(), nullptr);
+ if (fault) {
+ fault->removeFacet(checkStatus.getType());
}
-
return;
}
- auto container = getOrCreateFaultFacetsContainer();
- auto facet = container->getFaultFacet(checkStatus.getType());
- if (!facet) {
- const auto type = checkStatus.getType();
- auto newFacet =
- new FaultFacetImpl(type, _svcCtx->getFastClockSource(), std::move(checkStatus));
- container->updateWithSuppliedFacet(type, FaultFacetPtr(newFacet));
- } else {
- facet->update(std::move(checkStatus));
+ if (!_fault) {
+ fault = createFault(); // Create fault if it doesn't exist.
}
-}
-bool FaultManager::hasCriticalFacet(const FaultInternal* fault) const {
- invariant(fault);
- const auto& facets = fault->getFacets();
- for (const auto& facet : facets) {
- auto facetType = facet->getType();
- if (_config->getHealthObserverIntensity(facetType) ==
- HealthObserverIntensityEnum::kCritical)
- return true;
- }
- return false;
+ const auto type = checkStatus.getType();
+ fault->upsertFacet(std::make_shared<FaultFacetImpl>(
+ type, _svcCtx->getFastClockSource(), std::move(checkStatus)));
}
FaultManagerConfig FaultManager::getConfig() const {
diff --git a/src/mongo/db/process_health/fault_manager.h b/src/mongo/db/process_health/fault_manager.h
index c627c18ee1d..e53daefac86 100644
--- a/src/mongo/db/process_health/fault_manager.h
+++ b/src/mongo/db/process_health/fault_manager.h
@@ -32,7 +32,6 @@
#include "mongo/db/process_health/fault.h"
#include "mongo/db/process_health/fault_facet.h"
-#include "mongo/db/process_health/fault_facet_container.h"
#include "mongo/db/process_health/fault_manager_config.h"
#include "mongo/db/process_health/health_monitoring_server_parameters_gen.h"
#include "mongo/db/process_health/health_observer.h"
@@ -56,8 +55,7 @@ namespace process_health {
*
* If an active fault state persists, FaultManager will terminate the server process.
*/
-class FaultManager : protected StateMachine<HealthCheckStatus, FaultState>,
- protected FaultFacetsContainerFactory {
+class FaultManager : protected StateMachine<HealthCheckStatus, FaultState> {
FaultManager(const FaultManager&) = delete;
FaultManager& operator=(const FaultManager&) = delete;
@@ -131,20 +129,20 @@ protected:
// run.
virtual void healthCheck(HealthObserver* observer, CancellationToken token);
- // Protected interface FaultFacetsContainerFactory implementation.
+ FaultPtr getFault() const;
- // The interface FaultFacetsContainerFactory is implemented by the member '_fault'.
- FaultFacetsContainerPtr getFaultFacetsContainer() const override;
+ FaultPtr createFault();
- FaultFacetsContainerPtr getOrCreateFaultFacetsContainer() override;
+ FaultPtr getOrCreateFault();
- void updateWithCheckStatus(HealthCheckStatus&& checkStatus) override;
+ /**
+ * Update the active fault with supplied check result.
+ * Create or delete existing facet depending on the status.
+ */
+ void updateWithCheckStatus(HealthCheckStatus&& checkStatus);
void schedulePeriodicHealthCheckThread();
- // TODO: move this into fault class; refactor to remove FaultInternal
- bool hasCriticalFacet(const FaultInternal* fault) const;
-
void progressMonitorCheckForTests(std::function<void(std::string cause)> crashCb);
private:
@@ -160,7 +158,7 @@ private:
mutable Mutex _mutex =
MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(5), "FaultManager::_mutex");
- std::shared_ptr<FaultInternal> _fault;
+ std::shared_ptr<Fault> _fault;
// This source is canceled before the _taskExecutor shutdown(). It
// can be used to check for the start of the shutdown sequence.
CancellationSource _managerShuttingDownCancellationSource;
diff --git a/src/mongo/db/process_health/fault_manager_config.h b/src/mongo/db/process_health/fault_manager_config.h
index db218853da0..cb92353798f 100644
--- a/src/mongo/db/process_health/fault_manager_config.h
+++ b/src/mongo/db/process_health/fault_manager_config.h
@@ -90,7 +90,7 @@ public:
/* Maximum possible jitter added to the time between health checks */
static auto inline constexpr kPeriodicHealthCheckMaxJitter{Milliseconds{100}};
- HealthObserverIntensityEnum getHealthObserverIntensity(FaultFacetType type) {
+ HealthObserverIntensityEnum getHealthObserverIntensity(FaultFacetType type) const {
auto intensities = _getHealthObserverIntensities();
auto toObserverType = [](FaultFacetType type) -> boost::optional<HealthObserverTypeEnum> {
diff --git a/src/mongo/db/process_health/fault_manager_test_suite.h b/src/mongo/db/process_health/fault_manager_test_suite.h
index 59d937846c7..fb9ce56f2c4 100644
--- a/src/mongo/db/process_health/fault_manager_test_suite.h
+++ b/src/mongo/db/process_health/fault_manager_test_suite.h
@@ -92,14 +92,14 @@ public:
return getHealthObservers();
}
- FaultFacetsContainerPtr getOrCreateFaultFacetsContainerTest() {
- return getOrCreateFaultFacetsContainer();
+ FaultPtr getOrCreateFaultTest() {
+ return getOrCreateFault();
}
- FaultInternal& getFault() {
- FaultFacetsContainerPtr fault = getFaultFacetsContainer();
+ Fault& getFault() {
+ FaultPtr fault = FaultManager::getFault();
invariant(fault);
- return *(static_cast<FaultInternal*>(fault.get()));
+ return *(static_cast<Fault*>(fault.get()));
}
void progressMonitorCheckTest(std::function<void(std::string cause)> crashCb) {
diff --git a/src/mongo/db/process_health/fault_state_machine_test.cpp b/src/mongo/db/process_health/fault_state_machine_test.cpp
index 8a4b639d576..f58d869ccb4 100644
--- a/src/mongo/db/process_health/fault_state_machine_test.cpp
+++ b/src/mongo/db/process_health/fault_state_machine_test.cpp
@@ -200,12 +200,10 @@ TEST_F(FaultManagerTest, OneFacetIsResolved) {
ASSERT(manager().getFaultState() == FaultState::kStartupCheck);
manager().acceptTest(HealthCheckStatus(FaultFacetType::kMock1, 1.1, "failing health check 1"));
manager().acceptTest(HealthCheckStatus(FaultFacetType::kMock2, 1.1, "failing health check 2"));
- assertSoon([this] {
- return manager().getOrCreateFaultFacetsContainerTest()->getFacets().size() == 2;
- });
+ assertSoon([this] { return manager().getOrCreateFaultTest()->getFacets().size() == 2; });
manager().acceptTest(HealthCheckStatus(FaultFacetType::kMock1));
assertSoon([this] {
- return manager().getOrCreateFaultFacetsContainerTest()->getFacets().front()->getType() ==
+ return manager().getOrCreateFaultTest()->getFacets().front()->getType() ==
FaultFacetType::kMock2;
});
ASSERT(manager().getFaultState() == FaultState::kStartupCheck);
diff --git a/src/mongo/db/process_health/fault_impl_test.cpp b/src/mongo/db/process_health/fault_test.cpp
index d7ec9c0e0da..9f8d1fc6d70 100644
--- a/src/mongo/db/process_health/fault_impl_test.cpp
+++ b/src/mongo/db/process_health/fault_test.cpp
@@ -27,7 +27,7 @@
* it in the license file.
*/
-#include "mongo/db/process_health/fault_impl.h"
+#include "mongo/db/process_health/fault.h"
#include "mongo/db/process_health/fault_facet_mock.h"
#include "mongo/unittest/unittest.h"
@@ -37,37 +37,37 @@ namespace mongo {
namespace process_health {
namespace {
-class FaultImplTest : public unittest::Test {
+class FaultTest : public unittest::Test {
public:
void setUp() override {
_svcCtx = ServiceContext::make();
_svcCtx->setFastClockSource(std::make_unique<ClockSourceMock>());
- _faultImpl = std::make_unique<FaultImpl>(_svcCtx->getFastClockSource());
+ _faultImpl = std::make_unique<Fault>(_svcCtx->getFastClockSource());
}
ClockSourceMock& clockSource() {
return *static_cast<ClockSourceMock*>(_svcCtx->getFastClockSource());
}
- FaultImpl& fault() {
+ Fault& fault() {
return *_faultImpl;
}
private:
ServiceContext::UniqueServiceContext _svcCtx;
- std::unique_ptr<FaultImpl> _faultImpl;
+ std::unique_ptr<Fault> _faultImpl;
};
-TEST_F(FaultImplTest, TimeSourceWorks) {
+TEST_F(FaultTest, TimeSourceWorks) {
// Fault was just created, duration should be zero.
ASSERT_EQ(Milliseconds(0), fault().getDuration());
clockSource().advance(Milliseconds(1));
ASSERT_EQ(Milliseconds(1), fault().getDuration());
}
-TEST_F(FaultImplTest, SeverityLevelHelpersWork) {
+TEST_F(FaultTest, SeverityLevelHelpersWork) {
FaultFacetMock resolvedFacet(FaultFacetType::kMock1, &clockSource(), [] { return 0; });
ASSERT_TRUE(HealthCheckStatus::isResolved(resolvedFacet.getStatus().getSeverity()));
@@ -78,26 +78,26 @@ TEST_F(FaultImplTest, SeverityLevelHelpersWork) {
ASSERT_TRUE(HealthCheckStatus::isActiveFault(faultyFacet.getStatus().getSeverity()));
}
-TEST_F(FaultImplTest, FindFacetByType) {
+TEST_F(FaultTest, FindFacetByType) {
ASSERT_EQ(0, fault().getFacets().size());
ASSERT_FALSE(fault().getFaultFacet(FaultFacetType::kMock1));
FaultFacetPtr newFacet =
std::make_shared<FaultFacetMock>(FaultFacetType::kMock1, &clockSource(), [] { return 0; });
- fault().updateWithSuppliedFacet(FaultFacetType::kMock1, newFacet);
+ fault().upsertFacet(newFacet);
auto facet = fault().getFaultFacet(FaultFacetType::kMock1);
ASSERT_TRUE(facet);
auto status = facet->getStatus();
ASSERT_EQ(FaultFacetType::kMock1, status.getType());
}
-TEST_F(FaultImplTest, CanCreateAndGarbageCollectFacets) {
+TEST_F(FaultTest, CanCreateAndGarbageCollectFacets) {
AtomicDouble severity{0.1};
ASSERT_EQ(0, fault().getFacets().size());
FaultFacetPtr newFacet = std::make_shared<FaultFacetMock>(
FaultFacetType::kMock1, &clockSource(), [&severity] { return severity.load(); });
- fault().updateWithSuppliedFacet(FaultFacetType::kMock1, newFacet);
+ fault().upsertFacet(newFacet);
// New facet was added successfully.
ASSERT_EQ(1, fault().getFacets().size());
diff --git a/src/mongo/db/process_health/health_observer.h b/src/mongo/db/process_health/health_observer.h
index fd44b035a23..cdfeeb81f08 100644
--- a/src/mongo/db/process_health/health_observer.h
+++ b/src/mongo/db/process_health/health_observer.h
@@ -29,7 +29,6 @@
#pragma once
#include "mongo/db/process_health/fault_facet.h"
-#include "mongo/db/process_health/fault_facets_container.h"
#include "mongo/db/process_health/fault_manager_config.h"
#include "mongo/executor/task_executor.h"
#include "mongo/util/future.h"
@@ -74,12 +73,10 @@ public:
* Triggers health check. The implementation should not block to wait for the completion
* of this check.
*
- * @param factory Interface to get or create the factory of facets container.
+ * @param factory Interface to get or create the factory of faults.
*/
virtual SharedSemiFuture<HealthCheckStatus> periodicCheck(
- FaultFacetsContainerFactory& factory,
- std::shared_ptr<executor::TaskExecutor> taskExecutor,
- CancellationToken token) = 0;
+ std::shared_ptr<executor::TaskExecutor> taskExecutor, CancellationToken token) = 0;
virtual HealthObserverLivenessStats getStats() const = 0;
diff --git a/src/mongo/db/process_health/health_observer_base.cpp b/src/mongo/db/process_health/health_observer_base.cpp
index 47831d5fa56..949bbe5e5bd 100644
--- a/src/mongo/db/process_health/health_observer_base.cpp
+++ b/src/mongo/db/process_health/health_observer_base.cpp
@@ -42,9 +42,7 @@ HealthObserverBase::HealthObserverBase(ServiceContext* svcCtx)
: _svcCtx(svcCtx), _rand(PseudoRandom(SecureRandom().nextInt64())) {}
SharedSemiFuture<HealthCheckStatus> HealthObserverBase::periodicCheck(
- FaultFacetsContainerFactory& factory,
- std::shared_ptr<executor::TaskExecutor> taskExecutor,
- CancellationToken token) {
+ std::shared_ptr<executor::TaskExecutor> taskExecutor, CancellationToken token) {
// If we have reached here, the intensity of this health observer must not be off
{
auto lk = stdx::lock_guard(_mutex);
diff --git a/src/mongo/db/process_health/health_observer_base.h b/src/mongo/db/process_health/health_observer_base.h
index 6232d7306f3..10012ca0a46 100644
--- a/src/mongo/db/process_health/health_observer_base.h
+++ b/src/mongo/db/process_health/health_observer_base.h
@@ -62,9 +62,7 @@ public:
// Implements the common logic for periodic checks.
// Every observer should implement periodicCheckImpl() for specific tests.
SharedSemiFuture<HealthCheckStatus> periodicCheck(
- FaultFacetsContainerFactory& factory,
- std::shared_ptr<executor::TaskExecutor> taskExecutor,
- CancellationToken token) override;
+ std::shared_ptr<executor::TaskExecutor> taskExecutor, CancellationToken token) override;
HealthCheckStatus makeHealthyStatus() const;
HealthCheckStatus makeSimpleFailedStatus(double severity, std::vector<Status>&& failures) const;