diff options
-rw-r--r-- | src/mongo/db/process_health/SConscript | 2 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault_manager.cpp | 49 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault_manager.h | 17 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault_manager_test.cpp | 16 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault_manager_test_suite.h | 101 | ||||
-rw-r--r-- | src/mongo/db/process_health/health_observer.h | 2 | ||||
-rw-r--r-- | src/mongo/db/process_health/health_observer_base.cpp | 7 | ||||
-rw-r--r-- | src/mongo/db/process_health/health_observer_base.h | 8 | ||||
-rw-r--r-- | src/mongo/db/process_health/health_observer_mock.h | 18 | ||||
-rw-r--r-- | src/mongo/db/process_health/health_observer_test.cpp | 39 |
10 files changed, 211 insertions, 48 deletions
diff --git a/src/mongo/db/process_health/SConscript b/src/mongo/db/process_health/SConscript index 00241d2a62e..c291f09aeb4 100644 --- a/src/mongo/db/process_health/SConscript +++ b/src/mongo/db/process_health/SConscript @@ -25,8 +25,8 @@ env.CppUnitTest( source=[ 'fault_impl_test.cpp', 'fault_facet_test.cpp', - 'health_observer_test.cpp', 'fault_manager_test.cpp', + 'health_observer_test.cpp', ], LIBDEPS=[ '$BUILD_DIR/mongo/base', diff --git a/src/mongo/db/process_health/fault_manager.cpp b/src/mongo/db/process_health/fault_manager.cpp index 8a3e559fe71..d37edc75350 100644 --- a/src/mongo/db/process_health/fault_manager.cpp +++ b/src/mongo/db/process_health/fault_manager.cpp @@ -32,6 +32,7 @@ #include "mongo/db/process_health/fault_manager.h" #include "mongo/db/process_health/fault_impl.h" +#include "mongo/db/process_health/health_observer_registration.h" #include "mongo/logv2/log.h" namespace mongo { @@ -91,7 +92,10 @@ FaultFacetsContainerPtr FaultManager::getOrCreateFaultFacetsContainer() { return std::static_pointer_cast<FaultFacetsContainer>(_fault); } -void FaultManager::healthCheck() {} +void FaultManager::healthCheck() { + // One time init. + _initHealthObserversIfNeeded(); +} Status FaultManager::transitionToState(FaultState newState) { Status status = Status::OK(); @@ -111,8 +115,9 @@ Status FaultManager::transitionToState(FaultState newState) { break; } - if (status.isOK()) + if (status.isOK()) { LOGV2_DEBUG(5936201, 1, "Transitioned fault manager state", "newState"_attr = newState); + } return status; } @@ -152,5 +157,45 @@ Status FaultManager::_transitionToKActiveFault() { return Status::OK(); } +void FaultManager::_initHealthObserversIfNeeded() { + if (_initializedAllHealthObservers.load()) { + return; + } + + stdx::lock_guard<Latch> lk(_mutex); + // One more time under lock to avoid race. + if (_initializedAllHealthObservers.load()) { + return; + } + _initializedAllHealthObservers.store(true); + + HealthObserverRegistration& registration = *HealthObserverRegistration::get(_svcCtx); + _observers = registration.instantiateAllObservers(); + + // Verify that all observer types are unique. + std::set<FaultFacetType> allTypes; + for (const auto& observer : _observers) { + allTypes.insert(observer->getType()); + } + invariant(allTypes.size() == _observers.size()); + + stdx::lock_guard<Latch> lk2(_stateMutex); + LOGV2(5956701, + "Instantiated health observers, periodic health checking starts", + "managerState"_attr = _currentState, + "observersCount"_attr = _observers.size()); +} + +std::vector<HealthObserver*> FaultManager::getHealthObservers() { + std::vector<HealthObserver*> result; + stdx::lock_guard<Latch> lk(_mutex); + result.reserve(_observers.size()); + std::transform(_observers.cbegin(), + _observers.cend(), + std::back_inserter(result), + [](const std::unique_ptr<HealthObserver>& value) { return value.get(); }); + return result; +} + } // namespace process_health } // namespace mongo diff --git a/src/mongo/db/process_health/fault_manager.h b/src/mongo/db/process_health/fault_manager.h index eebbe4b60fe..f9b608a442b 100644 --- a/src/mongo/db/process_health/fault_manager.h +++ b/src/mongo/db/process_health/fault_manager.h @@ -32,8 +32,10 @@ #include "mongo/db/process_health/fault.h" #include "mongo/db/process_health/fault_facet.h" -#include "mongo/db/process_health/fault_facets_container.h" +#include "mongo/db/process_health/fault_facet_container.h" +#include "mongo/db/process_health/health_observer.h" #include "mongo/db/service_context.h" +#include "mongo/platform/atomic_word.h" #include "mongo/platform/mutex.h" namespace mongo { @@ -92,6 +94,10 @@ protected: virtual Status transitionToState(FaultState newState); + // All observers remain valid for the manager lifetime, thus returning + // just pointers is safe, as long as they are used while manager exists. + std::vector<HealthObserver*> getHealthObservers(); + // Protected interface FaultFacetsContainerFactory implementation. // The interface FaultFacetsContainerFactory is implemented by the member '_fault'. @@ -104,11 +110,18 @@ private: Status _transitionToKTransientFault(); Status _transitionToKActiveFault(); + // One time init. + void _initHealthObserversIfNeeded(); + ServiceContext* const _svcCtx; mutable Mutex _mutex = - MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(0), "FaultManager::_mutex"); + MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(1), "FaultManager::_mutex"); std::shared_ptr<FaultInternal> _fault; + // We lazily init all health observers. + AtomicWord<bool> _initializedAllHealthObservers{false}; + // Manager owns all observer instances. + std::vector<std::unique_ptr<HealthObserver>> _observers; mutable Mutex _stateMutex = MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(0), "FaultManager::_stateMutex"); diff --git a/src/mongo/db/process_health/fault_manager_test.cpp b/src/mongo/db/process_health/fault_manager_test.cpp index 267035b4fdc..1564ae8e5ea 100644 --- a/src/mongo/db/process_health/fault_manager_test.cpp +++ b/src/mongo/db/process_health/fault_manager_test.cpp @@ -29,12 +29,15 @@ #include "mongo/db/process_health/fault_manager.h" +#include "mongo/db/process_health/fault_manager_test_suite.h" #include "mongo/unittest/unittest.h" namespace mongo { namespace process_health { +using test::FaultManagerTestImpl; + namespace { TEST(FaultManagerTest, Registration) { @@ -42,19 +45,6 @@ TEST(FaultManagerTest, Registration) { ASSERT_TRUE(FaultManager::get(serviceCtx.get())); } -class FaultManagerTestImpl : public FaultManager { -public: - FaultManagerTestImpl(ServiceContext* svcCtx) : FaultManager(svcCtx) {} - - Status transitionStateTest(FaultState newState) { - return transitionToState(newState); - } - - FaultState getFaultStateTest() { - return getFaultState(); - } -}; - // State machine tests. TEST(FaultManagerForTest, StateTransitionsFromOk) { auto serviceCtx = ServiceContext::make(); diff --git a/src/mongo/db/process_health/fault_manager_test_suite.h b/src/mongo/db/process_health/fault_manager_test_suite.h new file mode 100644 index 00000000000..dfc39b1ffec --- /dev/null +++ b/src/mongo/db/process_health/fault_manager_test_suite.h @@ -0,0 +1,101 @@ +/** + * Copyright (C) 2021-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ +#pragma once + +#include <memory> + +#include "mongo/db/process_health/fault_manager.h" + +#include "mongo/db/process_health/health_observer_mock.h" +#include "mongo/db/process_health/health_observer_registration.h" +#include "mongo/unittest/unittest.h" + +namespace mongo { + +namespace process_health { + +namespace test { + +/** + * Test wrapper class for FaultManager that has access to protected methods + * for testing. + */ +class FaultManagerTestImpl : public FaultManager { +public: + FaultManagerTestImpl(ServiceContext* svcCtx) : FaultManager(svcCtx) {} + + Status transitionStateTest(FaultState newState) { + return transitionToState(newState); + } + + FaultState getFaultStateTest() { + return getFaultState(); + } + + void healthCheckTest() { + healthCheck(); + } + + std::vector<HealthObserver*> getHealthObserversTest() { + return getHealthObservers(); + } +}; + +/** + * Test suite for fault manager. + */ +class FaultManagerTest : public unittest::Test { +public: + void setUp() override { + _svcCtx = ServiceContext::make(); + FaultManager::set(_svcCtx.get(), std::make_unique<FaultManagerTestImpl>(_svcCtx.get())); + } + + void registerMockHealthObserver(std::function<double()> getSeverityCallback) { + HealthObserverRegistration* reg = HealthObserverRegistration::get(_svcCtx.get()); + reg->registerObserverFactory([getSeverityCallback](ServiceContext* svcCtx) { + return std::make_unique<HealthObserverMock>(svcCtx, getSeverityCallback); + }); + } + + FaultManagerTestImpl& manager() { + return *static_cast<FaultManagerTestImpl*>(FaultManager::get(_svcCtx.get())); + } + + HealthObserverRegistration& healthObserverRegistration() { + return *HealthObserverRegistration::get(_svcCtx.get()); + } + +private: + ServiceContext::UniqueServiceContext _svcCtx; +}; + +} // namespace test +} // namespace process_health +} // namespace mongo diff --git a/src/mongo/db/process_health/health_observer.h b/src/mongo/db/process_health/health_observer.h index 89b2823850a..6b2771c148f 100644 --- a/src/mongo/db/process_health/health_observer.h +++ b/src/mongo/db/process_health/health_observer.h @@ -54,6 +54,8 @@ public: * Triggers health check. * It should be safe to invoke this method arbitrary often, the implementation * should prorate the invocations to avoid DoS. + * The implementation may or may not block for the completion of the check, this remains + * unspecified. */ virtual void periodicCheck() = 0; }; diff --git a/src/mongo/db/process_health/health_observer_base.cpp b/src/mongo/db/process_health/health_observer_base.cpp index b2aa6567f58..ff6aaaeeb4c 100644 --- a/src/mongo/db/process_health/health_observer_base.cpp +++ b/src/mongo/db/process_health/health_observer_base.cpp @@ -36,7 +36,12 @@ namespace process_health { HealthObserverBase::HealthObserverBase(ServiceContext* svcCtx) : _svcCtx(svcCtx) {} -void HealthObserverBase::periodicCheck() {} +void HealthObserverBase::periodicCheck() { + double severity = periodicCheckImpl(); + + if (HealthCheckStatus::isResolved(severity)) { + } +} } // namespace process_health } // namespace mongo diff --git a/src/mongo/db/process_health/health_observer_base.h b/src/mongo/db/process_health/health_observer_base.h index 82cb3a0f668..6ce7569552a 100644 --- a/src/mongo/db/process_health/health_observer_base.h +++ b/src/mongo/db/process_health/health_observer_base.h @@ -45,9 +45,15 @@ public: HealthObserverBase(ServiceContext* svcCtx); virtual ~HealthObserverBase() = default; - void periodicCheck() override; + // Implements the common logic for periodic checks. + // Every observer should implement periodicCheckImpl() for specific tests. + void periodicCheck() final; protected: + // Returns the severity after the check. + // TODO(SERVER-59592): futurize this. + virtual double periodicCheckImpl() = 0; + ServiceContext* const _svcCtx; }; diff --git a/src/mongo/db/process_health/health_observer_mock.h b/src/mongo/db/process_health/health_observer_mock.h index 516b47ee433..d94c3ce675f 100644 --- a/src/mongo/db/process_health/health_observer_mock.h +++ b/src/mongo/db/process_health/health_observer_mock.h @@ -28,26 +28,34 @@ */ #pragma once +#include <functional> + #include "mongo/db/process_health/health_observer_base.h" namespace mongo { namespace process_health { /** - * Interface to conduct periodic health checks. - * Every instance of health observer is wired internally to update the state of the FaultManager - * when a problem is detected. + * Mocked health observer is using a test callback to fetch the next + * fault severity value every time the periodic check is invoked. */ class HealthObserverMock : public HealthObserverBase { public: - HealthObserverMock(ServiceContext* svcCtx) : HealthObserverBase(svcCtx) {} + HealthObserverMock(ServiceContext* svcCtx, std::function<double()> getSeverityCallback) + : HealthObserverBase(svcCtx), _getSeverityCallback(getSeverityCallback) {} + virtual ~HealthObserverMock() = default; FaultFacetType getType() const override { return FaultFacetType::kMock; } - void periodicCheck() override {} + double periodicCheckImpl() override { + return _getSeverityCallback(); + } + +private: + std::function<double()> _getSeverityCallback; }; } // namespace process_health diff --git a/src/mongo/db/process_health/health_observer_test.cpp b/src/mongo/db/process_health/health_observer_test.cpp index 6c7e97763b8..6e1ab126004 100644 --- a/src/mongo/db/process_health/health_observer_test.cpp +++ b/src/mongo/db/process_health/health_observer_test.cpp @@ -29,6 +29,7 @@ #include "mongo/db/process_health/health_observer.h" +#include "mongo/db/process_health/fault_manager_test_suite.h" #include "mongo/db/process_health/health_observer_mock.h" #include "mongo/db/process_health/health_observer_registration.h" #include "mongo/db/service_context.h" @@ -38,35 +39,27 @@ namespace mongo { namespace process_health { -namespace { - -class HealthObserverTest : public unittest::Test { -public: - void setUp() override { - _svcCtx = ServiceContext::make(); - } - - void registerMock() { - HealthObserverRegistration* reg = HealthObserverRegistration::get(_svcCtx.get()); - reg->registerObserverFactory( - [](ServiceContext* svcCtx) { return std::make_unique<HealthObserverMock>(svcCtx); }); - } - - HealthObserverRegistration* registration() { - return HealthObserverRegistration::get(_svcCtx.get()); - } +// Using the common fault manager test suite. +using test::FaultManagerTest; -private: - ServiceContext::UniqueServiceContext _svcCtx; -}; +namespace { -TEST_F(HealthObserverTest, Registration) { - registerMock(); - auto allObservers = registration()->instantiateAllObservers(); +TEST_F(FaultManagerTest, Registration) { + registerMockHealthObserver([] { return 0; }); + auto allObservers = healthObserverRegistration().instantiateAllObservers(); ASSERT_EQ(1, allObservers.size()); ASSERT_EQ(FaultFacetType::kMock, allObservers[0]->getType()); } +TEST_F(FaultManagerTest, HealthCheckCreatesObservers) { + registerMockHealthObserver([] { return 0.1; }); + ASSERT_EQ(0, manager().getHealthObserversTest().size()); + + // Trigger periodic health check. + manager().healthCheckTest(); + ASSERT_EQ(1, manager().getHealthObserversTest().size()); +} + } // namespace } // namespace process_health } // namespace mongo |