From 05f90673ab20eca2857e673b5ef8d3d26e3bbb21 Mon Sep 17 00:00:00 2001 From: Andrew Shuvalov Date: Thu, 21 Oct 2021 01:50:32 +0000 Subject: SERVER-60587 Implement FaultFacet and make necessary changes in HealthCheckStatus --- src/mongo/db/process_health/SConscript | 1 + src/mongo/db/process_health/fault_facet.h | 8 +-- src/mongo/db/process_health/fault_facet_impl.cpp | 58 ++++++++++++++++++ src/mongo/db/process_health/fault_facet_impl.h | 68 ++++++++++++++++++++++ src/mongo/db/process_health/fault_facet_mock.h | 23 ++------ src/mongo/db/process_health/fault_facet_test.cpp | 58 +++++++++++++++++- src/mongo/db/process_health/fault_manager.cpp | 27 +++------ src/mongo/db/process_health/health_check_status.h | 55 ++++------------- .../db/process_health/health_observer_base.cpp | 22 ++++++- src/mongo/db/process_health/health_observer_base.h | 6 ++ src/mongo/db/process_health/health_observer_mock.h | 3 +- .../db/process_health/health_observer_test.cpp | 7 +++ 12 files changed, 247 insertions(+), 89 deletions(-) create mode 100644 src/mongo/db/process_health/fault_facet_impl.cpp create mode 100644 src/mongo/db/process_health/fault_facet_impl.h (limited to 'src/mongo/db/process_health') diff --git a/src/mongo/db/process_health/SConscript b/src/mongo/db/process_health/SConscript index 129d6d0982c..d866191a258 100644 --- a/src/mongo/db/process_health/SConscript +++ b/src/mongo/db/process_health/SConscript @@ -7,6 +7,7 @@ env = env.Clone() env.Library( target='fault_manager', source=[ + 'fault_facet_impl.cpp', 'fault_impl.cpp', 'fault_manager.cpp', 'health_observer_base.cpp', diff --git a/src/mongo/db/process_health/fault_facet.h b/src/mongo/db/process_health/fault_facet.h index 1a46f2686bf..ad7fb6673d7 100644 --- a/src/mongo/db/process_health/fault_facet.h +++ b/src/mongo/db/process_health/fault_facet.h @@ -52,10 +52,10 @@ public: */ virtual HealthCheckStatus getStatus() const = 0; - // This interface contains no methods to change the state of the Facet - // because this update is handled by the particular implementation of - // the HealthObserver interface, updating its matching Facet. - // Please do not add any non-const methods here. + /** + * Change the state of this Facet with health check result. + */ + virtual void update(HealthCheckStatus status) = 0; }; using FaultFacetPtr = std::shared_ptr; diff --git a/src/mongo/db/process_health/fault_facet_impl.cpp b/src/mongo/db/process_health/fault_facet_impl.cpp new file mode 100644 index 00000000000..f2975b6b559 --- /dev/null +++ b/src/mongo/db/process_health/fault_facet_impl.cpp @@ -0,0 +1,58 @@ +/** + * Copyright (C) 2021-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/process_health/fault_facet_impl.h" + +namespace mongo { +namespace process_health { + +FaultFacetImpl::FaultFacetImpl(FaultFacetType type, + ClockSource* clockSource, + HealthCheckStatus status) + : _type(type), _clockSource(clockSource) { + update(status); +} + +FaultFacetType FaultFacetImpl::getType() const { + return _type; +} + +HealthCheckStatus FaultFacetImpl::getStatus() const { + auto lk = stdx::lock_guard(_mutex); + return HealthCheckStatus(getType(), _severity, _description); +} + +void FaultFacetImpl::update(HealthCheckStatus status) { + auto lk = stdx::lock_guard(_mutex); + _severity = status.getSeverity(); + _description = status.getShortDescription().toString(); +} + +} // namespace process_health +} // namespace mongo diff --git a/src/mongo/db/process_health/fault_facet_impl.h b/src/mongo/db/process_health/fault_facet_impl.h new file mode 100644 index 00000000000..76279052470 --- /dev/null +++ b/src/mongo/db/process_health/fault_facet_impl.h @@ -0,0 +1,68 @@ +/** + * Copyright (C) 2021-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ +#pragma once + +#include "mongo/db/process_health/fault_facet.h" + +#include "mongo/db/process_health/health_check_status.h" +#include "mongo/db/process_health/health_observer.h" +#include "mongo/util/clock_source.h" + +namespace mongo { +namespace process_health { + +// Internal representation of the Facet. +class FaultFacetImpl : public FaultFacet { +public: + FaultFacetImpl(FaultFacetType type, ClockSource* clockSource, HealthCheckStatus status); + + ~FaultFacetImpl() override = default; + + // Public interface methods. + + FaultFacetType getType() const override; + + HealthCheckStatus getStatus() const override; + + void update(HealthCheckStatus status) override; + +private: + const FaultFacetType _type; + ClockSource* const _clockSource; + + const Date_t _startTime = _clockSource->now(); + + mutable Mutex _mutex = + MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(1), "FaultFacetImpl::_mutex"); + double _severity = 0; + std::string _description; +}; + +} // namespace process_health +} // namespace mongo diff --git a/src/mongo/db/process_health/fault_facet_mock.h b/src/mongo/db/process_health/fault_facet_mock.h index c607f50939c..6035a5c89df 100644 --- a/src/mongo/db/process_health/fault_facet_mock.h +++ b/src/mongo/db/process_health/fault_facet_mock.h @@ -59,34 +59,23 @@ public: } HealthCheckStatus getStatus() const override { - double severity = _callback(); + const double severity = _callback(); - auto lk = stdx::lock_guard(_mutex); - if (HealthCheckStatus::isActiveFault(severity)) { - if (_activeFaultTime == Date_t::max()) { - _activeFaultTime = _clockSource->now(); - } - } else { - _activeFaultTime = Date_t::max(); - } - - auto now = _clockSource->now(); - HealthCheckStatus healthCheckStatus( - _mockType, severity, "Mock facet", now - _activeFaultTime, now - _startTime); + auto healthCheckStatus = HealthCheckStatus(_mockType, severity, "Mock facet"); LOGV2(5956702, "Mock fault facet status", "status"_attr = healthCheckStatus); return healthCheckStatus; } + void update(HealthCheckStatus status) override { + MONGO_UNREACHABLE; // Don't use this in mock. + } + private: const FaultFacetType _mockType; ClockSource* const _clockSource; const Date_t _startTime = _clockSource->now(); const MockCallback _callback; - - mutable Mutex _mutex; - // We also update the _activeFaultTime inside the const getStatus(). - mutable Date_t _activeFaultTime = Date_t::max(); }; } // namespace process_health diff --git a/src/mongo/db/process_health/fault_facet_test.cpp b/src/mongo/db/process_health/fault_facet_test.cpp index 8251848e2e6..537c7db37e7 100644 --- a/src/mongo/db/process_health/fault_facet_test.cpp +++ b/src/mongo/db/process_health/fault_facet_test.cpp @@ -28,6 +28,7 @@ */ #include "mongo/db/process_health/fault_facet.h" +#include "mongo/db/process_health/fault_facet_impl.h" #include "mongo/db/process_health/fault_facet_mock.h" #include "mongo/unittest/unittest.h" @@ -36,7 +37,8 @@ namespace process_health { namespace { -class FaultFacetTest : public unittest::Test { +// Using the Mock facet. +class FaultFacetTestWithMock : public unittest::Test { public: void startMock(FaultFacetMock::MockCallback callback) { _svcCtx = ServiceContext::make(); @@ -54,12 +56,64 @@ private: std::unique_ptr _facetMock; }; -TEST_F(FaultFacetTest, FacetWithFailure) { +TEST_F(FaultFacetTestWithMock, FacetWithFailure) { startMock([] { return 0.5; }); auto status = getStatus(); ASSERT_APPROX_EQUAL(0.5, status.getSeverity(), 0.001); } +// Using the FaultFacetImpl. +class FaultFacetImplTest : public unittest::Test { +public: + static inline const auto kNoFailures = HealthCheckStatus(FaultFacetType::kMock1, 0.0, "test"); + static inline const auto kFailure = HealthCheckStatus(FaultFacetType::kMock1, 0.5, "test"); + + void setUp() { + _svcCtx = ServiceContext::make(); + _svcCtx->setFastClockSource(std::make_unique()); + } + + void createWithStatus(HealthCheckStatus status) { + _facet = std::make_unique( + FaultFacetType::kMock1, _svcCtx->getFastClockSource(), status); + } + + void update(HealthCheckStatus status) { + _facet->update(status); + } + + HealthCheckStatus getStatus() const { + return _facet->getStatus(); + } + + ClockSourceMock& clockSource() { + return *static_cast(_svcCtx->getFastClockSource()); + } + + template + void advanceTime(Duration d) { + clockSource().advance(d); + } + +private: + ServiceContext::UniqueServiceContext _svcCtx; + std::unique_ptr _facet; +}; + +TEST_F(FaultFacetImplTest, Simple) { + createWithStatus(kFailure); + const auto status = getStatus(); + ASSERT_GT(status.getSeverity(), 0); + ASSERT_EQ(status.getType(), FaultFacetType::kMock1); + ASSERT_EQ(status.getShortDescription(), "test"_sd); +} + +TEST_F(FaultFacetImplTest, Update) { + createWithStatus(kFailure); + update(kNoFailures); + ASSERT_EQ(getStatus().getSeverity(), 0); +} + } // namespace } // namespace process_health } // namespace mongo diff --git a/src/mongo/db/process_health/fault_manager.cpp b/src/mongo/db/process_health/fault_manager.cpp index ce442cfd275..60c00c1ccdf 100644 --- a/src/mongo/db/process_health/fault_manager.cpp +++ b/src/mongo/db/process_health/fault_manager.cpp @@ -33,6 +33,7 @@ #include "mongo/db/process_health/fault_manager.h" +#include "mongo/db/process_health/fault_facet_impl.h" #include "mongo/db/process_health/fault_impl.h" #include "mongo/db/process_health/health_monitoring_gen.h" #include "mongo/db/process_health/health_observer_registration.h" @@ -207,23 +208,6 @@ void FaultManager::updateWithCheckStatus(HealthCheckStatus&& checkStatus) { return; } - // TODO(SERVER-60587): implement Facet properly. - class Impl : public FaultFacet { - public: - Impl(HealthCheckStatus status) : _status(status) {} - - FaultFacetType getType() const override { - return _status.getType(); - } - - HealthCheckStatus getStatus() const override { - return _status; - } - - private: - HealthCheckStatus _status; - }; - if (!container) { // Need to create container first. container = getOrCreateFaultFacetsContainer(); @@ -231,10 +215,13 @@ void FaultManager::updateWithCheckStatus(HealthCheckStatus&& checkStatus) { auto facet = container->getFaultFacet(checkStatus.getType()); if (!facet) { - auto newFacet = FaultFacetPtr(new Impl(checkStatus)); - container->updateWithSuppliedFacet(checkStatus.getType(), newFacet); + const auto type = checkStatus.getType(); + auto newFacet = + new FaultFacetImpl(type, _svcCtx->getFastClockSource(), std::move(checkStatus)); + container->updateWithSuppliedFacet(type, FaultFacetPtr(newFacet)); + } else { + facet->update(std::move(checkStatus)); } - // TODO(SERVER-60587): update facet with new check status. } void FaultManager::checkForStateTransition() { diff --git a/src/mongo/db/process_health/health_check_status.h b/src/mongo/db/process_health/health_check_status.h index 9dffed1f10e..c05ae48d8bb 100644 --- a/src/mongo/db/process_health/health_check_status.h +++ b/src/mongo/db/process_health/health_check_status.h @@ -43,7 +43,7 @@ namespace process_health { enum class FaultFacetType { kMock1 = 0, kMock2, kLdap }; /** - * The class representing current status of an ongoing fault tracked by facet. + * Immutable class representing current status of an ongoing fault tracked by facet. */ class HealthCheckStatus { public: @@ -54,27 +54,17 @@ public: // avoid rounding problems and be sure that severity of 1.0 is guaranteed to be an active fault. static constexpr double kActiveFaultSeverityEpsilon = 0.000001; - HealthCheckStatus(FaultFacetType type, - double severity, - StringData description, - Milliseconds activeFaultDuration, - Milliseconds duration) - : _type(type), - _severity(severity), - _description(description), - _activeFaultDuration(activeFaultDuration), - _duration(duration) { - uassert(5949601, - str::stream() << "Active fault duration " << _activeFaultDuration - << " cannot be longer than duration " << _duration, - _duration >= _activeFaultDuration); - } + using Severity = double; + + HealthCheckStatus(FaultFacetType type, Severity severity, StringData description) + : _type(type), _severity(severity), _description(description) {} // Constructs a resolved status (no fault detected). - HealthCheckStatus(FaultFacetType type) : _type(type), _severity(0), _description("resolved") {} + explicit HealthCheckStatus(FaultFacetType type) + : _type(type), _severity(0), _description("resolved"_sd) {} HealthCheckStatus(const HealthCheckStatus&) = default; - HealthCheckStatus& operator=(const HealthCheckStatus&) = default; + HealthCheckStatus& operator=(const HealthCheckStatus&) = delete; /** * @return FaultFacetType of this status. @@ -86,31 +76,14 @@ public: /** * The fault severity value if any. * - * @return Current fault severity. The expected values: - * 0: Ok - * (0, 1.0): Transient fault condition - * [1.0, Inf): Active fault condition + * @return Current fault severity */ - double getSeverity() const { + Severity getSeverity() const { return _severity; } - /** - * Gets the duration of an active fault, if any. - * This is the time from the moment the severity reached the 1.0 value - * and stayed on or above 1.0. - * - * Note: each time the severity drops below 1.0 the duration is reset. - */ - Milliseconds getActiveFaultDuration() const { - return _activeFaultDuration; - } - - /** - * @return duration of the fault facet or fault from the moment it was created. - */ - Milliseconds getDuration() const { - return _duration; + StringData getShortDescription() const { + return _description; } void appendDescription(BSONObjBuilder* builder) const; @@ -141,8 +114,6 @@ private: const FaultFacetType _type; const double _severity; const std::string _description; - const Milliseconds _activeFaultDuration; - const Milliseconds _duration; }; inline StringBuilder& operator<<(StringBuilder& s, const FaultFacetType& type) { @@ -167,8 +138,6 @@ inline void HealthCheckStatus::appendDescription(BSONObjBuilder* builder) const builder->append("type", _type); builder->append("description", _description); builder->append("severity", _severity); - builder->append("activeFaultDuration", _activeFaultDuration.toString()); - builder->append("duration", _duration.toString()); } inline StringBuilder& operator<<(StringBuilder& s, const HealthCheckStatus& hcs) { diff --git a/src/mongo/db/process_health/health_observer_base.cpp b/src/mongo/db/process_health/health_observer_base.cpp index 097f5dd5aab..918093b34b4 100644 --- a/src/mongo/db/process_health/health_observer_base.cpp +++ b/src/mongo/db/process_health/health_observer_base.cpp @@ -66,7 +66,7 @@ void HealthObserverBase::periodicCheck(FaultFacetsContainerFactory& factory, // Do the health check. taskExecutor->schedule([this, &factory](Status status) { periodicCheckImpl({}) - .then([this, &factory](HealthCheckStatus checkStatus) { + .then([this, &factory](HealthCheckStatus&& checkStatus) mutable { factory.updateWithCheckStatus(std::move(checkStatus)); }) .onCompletion([this](Status status) { @@ -87,5 +87,25 @@ HealthObserverIntensity HealthObserverBase::getIntensity() { return _intensity; } +HealthCheckStatus HealthObserverBase::makeHealthyStatus() const { + return HealthCheckStatus(getType()); +} + +HealthCheckStatus HealthObserverBase::makeSimpleFailedStatus(double severity, + std::vector&& failures) const { + if (severity <= 0) { + LOGV2_WARNING(6007903, + "Creating faulty health check status requires positive severity", + "observerType"_attr = getType()); + } + StringBuilder sb; + for (const auto& s : failures) { + sb.append(s.toString()); + sb.append(" "); + } + + return HealthCheckStatus(getType(), severity, sb.stringData()); +} + } // namespace process_health } // namespace mongo diff --git a/src/mongo/db/process_health/health_observer_base.h b/src/mongo/db/process_health/health_observer_base.h index d8f13737bc7..5c227e6355c 100644 --- a/src/mongo/db/process_health/health_observer_base.h +++ b/src/mongo/db/process_health/health_observer_base.h @@ -79,6 +79,12 @@ protected: HealthObserverIntensity getIntensity() override; + // Helper method to create a status without errors. + HealthCheckStatus makeHealthyStatus() const; + + // Make a generic error status. + HealthCheckStatus makeSimpleFailedStatus(double severity, std::vector&& failures) const; + ClockSource* const _clockSource; TickSource* const _tickSource; diff --git a/src/mongo/db/process_health/health_observer_mock.h b/src/mongo/db/process_health/health_observer_mock.h index dafbbc3608d..96c984960ae 100644 --- a/src/mongo/db/process_health/health_observer_mock.h +++ b/src/mongo/db/process_health/health_observer_mock.h @@ -66,8 +66,7 @@ protected: if (HealthCheckStatus::isResolved(severity)) { completionPf.promise.emplaceValue(HealthCheckStatus(getType())); } else { - completionPf.promise.emplaceValue( - HealthCheckStatus(getType(), severity, "failed", Milliseconds(0), Milliseconds(0))); + completionPf.promise.emplaceValue(HealthCheckStatus(getType(), severity, "failed")); } return std::move(completionPf.future); } diff --git a/src/mongo/db/process_health/health_observer_test.cpp b/src/mongo/db/process_health/health_observer_test.cpp index c53e54c6f77..9780c45209e 100644 --- a/src/mongo/db/process_health/health_observer_test.cpp +++ b/src/mongo/db/process_health/health_observer_test.cpp @@ -100,6 +100,8 @@ TEST_F(FaultManagerTest, SeverityIsMaxFromAllFacetsSeverity) { manager().healthCheckTest(); do { waitForFaultBeingCreated(); + advanceTime(Milliseconds(100)); + manager().healthCheckTest(); } while (manager().getFault().getFacets().size() != 2); // Race between two facets. auto currentFault = manager().currentFault(); @@ -121,6 +123,7 @@ TEST_F(FaultManagerTest, HealthCheckCreatesFacetThenIsGarbageCollectedAndStateTr ASSERT_FALSE(manager().currentFault()); // State is transitioned. ASSERT_EQ(FaultState::kOk, manager().getFaultState()); + resetManager(); // Before atomic fields above go out of scope. } TEST_F(FaultManagerTest, HealthCheckCreates2FacetsThenIsGarbageCollected) { @@ -133,6 +136,8 @@ TEST_F(FaultManagerTest, HealthCheckCreates2FacetsThenIsGarbageCollected) { while (manager().getFault().getFacets().size() != 2) { sleepFor(Milliseconds(1)); + advanceTime(Milliseconds(100)); + manager().healthCheckTest(); } // Resolve one facet and it should be garbage collected. @@ -149,6 +154,7 @@ TEST_F(FaultManagerTest, HealthCheckCreates2FacetsThenIsGarbageCollected) { } ASSERT_FALSE(internalFault.getFaultFacet(FaultFacetType::kMock1)); ASSERT_TRUE(internalFault.getFaultFacet(FaultFacetType::kMock2)); + resetManager(); // Before atomic fields above go out of scope. } TEST_F(FaultManagerTest, HealthCheckWithOffFacetCreatesNoFault) { @@ -178,6 +184,7 @@ TEST_F(FaultManagerTest, DoesNotRestartCheckBeforeIntervalExpired) { waitForFaultBeingCreated(); currentFault = manager().currentFault(); ASSERT_TRUE(currentFault); // The fault was created. + resetManager(); // Before atomic fields above go out of scope. } TEST_F(FaultManagerTest, HealthCheckWithCriticalFacetCreatesFault) { -- cgit v1.2.1