summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/mongo/db/process_health/SConscript2
-rw-r--r--src/mongo/db/process_health/fault_manager.cpp49
-rw-r--r--src/mongo/db/process_health/fault_manager.h17
-rw-r--r--src/mongo/db/process_health/fault_manager_test.cpp16
-rw-r--r--src/mongo/db/process_health/fault_manager_test_suite.h101
-rw-r--r--src/mongo/db/process_health/health_observer.h2
-rw-r--r--src/mongo/db/process_health/health_observer_base.cpp7
-rw-r--r--src/mongo/db/process_health/health_observer_base.h8
-rw-r--r--src/mongo/db/process_health/health_observer_mock.h18
-rw-r--r--src/mongo/db/process_health/health_observer_test.cpp39
10 files changed, 211 insertions, 48 deletions
diff --git a/src/mongo/db/process_health/SConscript b/src/mongo/db/process_health/SConscript
index 00241d2a62e..c291f09aeb4 100644
--- a/src/mongo/db/process_health/SConscript
+++ b/src/mongo/db/process_health/SConscript
@@ -25,8 +25,8 @@ env.CppUnitTest(
source=[
'fault_impl_test.cpp',
'fault_facet_test.cpp',
- 'health_observer_test.cpp',
'fault_manager_test.cpp',
+ 'health_observer_test.cpp',
],
LIBDEPS=[
'$BUILD_DIR/mongo/base',
diff --git a/src/mongo/db/process_health/fault_manager.cpp b/src/mongo/db/process_health/fault_manager.cpp
index 8a3e559fe71..d37edc75350 100644
--- a/src/mongo/db/process_health/fault_manager.cpp
+++ b/src/mongo/db/process_health/fault_manager.cpp
@@ -32,6 +32,7 @@
#include "mongo/db/process_health/fault_manager.h"
#include "mongo/db/process_health/fault_impl.h"
+#include "mongo/db/process_health/health_observer_registration.h"
#include "mongo/logv2/log.h"
namespace mongo {
@@ -91,7 +92,10 @@ FaultFacetsContainerPtr FaultManager::getOrCreateFaultFacetsContainer() {
return std::static_pointer_cast<FaultFacetsContainer>(_fault);
}
-void FaultManager::healthCheck() {}
+void FaultManager::healthCheck() {
+ // One time init.
+ _initHealthObserversIfNeeded();
+}
Status FaultManager::transitionToState(FaultState newState) {
Status status = Status::OK();
@@ -111,8 +115,9 @@ Status FaultManager::transitionToState(FaultState newState) {
break;
}
- if (status.isOK())
+ if (status.isOK()) {
LOGV2_DEBUG(5936201, 1, "Transitioned fault manager state", "newState"_attr = newState);
+ }
return status;
}
@@ -152,5 +157,45 @@ Status FaultManager::_transitionToKActiveFault() {
return Status::OK();
}
+void FaultManager::_initHealthObserversIfNeeded() {
+ if (_initializedAllHealthObservers.load()) {
+ return;
+ }
+
+ stdx::lock_guard<Latch> lk(_mutex);
+ // One more time under lock to avoid race.
+ if (_initializedAllHealthObservers.load()) {
+ return;
+ }
+ _initializedAllHealthObservers.store(true);
+
+ HealthObserverRegistration& registration = *HealthObserverRegistration::get(_svcCtx);
+ _observers = registration.instantiateAllObservers();
+
+ // Verify that all observer types are unique.
+ std::set<FaultFacetType> allTypes;
+ for (const auto& observer : _observers) {
+ allTypes.insert(observer->getType());
+ }
+ invariant(allTypes.size() == _observers.size());
+
+ stdx::lock_guard<Latch> lk2(_stateMutex);
+ LOGV2(5956701,
+ "Instantiated health observers, periodic health checking starts",
+ "managerState"_attr = _currentState,
+ "observersCount"_attr = _observers.size());
+}
+
+std::vector<HealthObserver*> FaultManager::getHealthObservers() {
+ std::vector<HealthObserver*> result;
+ stdx::lock_guard<Latch> lk(_mutex);
+ result.reserve(_observers.size());
+ std::transform(_observers.cbegin(),
+ _observers.cend(),
+ std::back_inserter(result),
+ [](const std::unique_ptr<HealthObserver>& value) { return value.get(); });
+ return result;
+}
+
} // namespace process_health
} // namespace mongo
diff --git a/src/mongo/db/process_health/fault_manager.h b/src/mongo/db/process_health/fault_manager.h
index eebbe4b60fe..f9b608a442b 100644
--- a/src/mongo/db/process_health/fault_manager.h
+++ b/src/mongo/db/process_health/fault_manager.h
@@ -32,8 +32,10 @@
#include "mongo/db/process_health/fault.h"
#include "mongo/db/process_health/fault_facet.h"
-#include "mongo/db/process_health/fault_facets_container.h"
+#include "mongo/db/process_health/fault_facet_container.h"
+#include "mongo/db/process_health/health_observer.h"
#include "mongo/db/service_context.h"
+#include "mongo/platform/atomic_word.h"
#include "mongo/platform/mutex.h"
namespace mongo {
@@ -92,6 +94,10 @@ protected:
virtual Status transitionToState(FaultState newState);
+ // All observers remain valid for the manager lifetime, thus returning
+ // just pointers is safe, as long as they are used while manager exists.
+ std::vector<HealthObserver*> getHealthObservers();
+
// Protected interface FaultFacetsContainerFactory implementation.
// The interface FaultFacetsContainerFactory is implemented by the member '_fault'.
@@ -104,11 +110,18 @@ private:
Status _transitionToKTransientFault();
Status _transitionToKActiveFault();
+ // One time init.
+ void _initHealthObserversIfNeeded();
+
ServiceContext* const _svcCtx;
mutable Mutex _mutex =
- MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(0), "FaultManager::_mutex");
+ MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(1), "FaultManager::_mutex");
std::shared_ptr<FaultInternal> _fault;
+ // We lazily init all health observers.
+ AtomicWord<bool> _initializedAllHealthObservers{false};
+ // Manager owns all observer instances.
+ std::vector<std::unique_ptr<HealthObserver>> _observers;
mutable Mutex _stateMutex =
MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(0), "FaultManager::_stateMutex");
diff --git a/src/mongo/db/process_health/fault_manager_test.cpp b/src/mongo/db/process_health/fault_manager_test.cpp
index 267035b4fdc..1564ae8e5ea 100644
--- a/src/mongo/db/process_health/fault_manager_test.cpp
+++ b/src/mongo/db/process_health/fault_manager_test.cpp
@@ -29,12 +29,15 @@
#include "mongo/db/process_health/fault_manager.h"
+#include "mongo/db/process_health/fault_manager_test_suite.h"
#include "mongo/unittest/unittest.h"
namespace mongo {
namespace process_health {
+using test::FaultManagerTestImpl;
+
namespace {
TEST(FaultManagerTest, Registration) {
@@ -42,19 +45,6 @@ TEST(FaultManagerTest, Registration) {
ASSERT_TRUE(FaultManager::get(serviceCtx.get()));
}
-class FaultManagerTestImpl : public FaultManager {
-public:
- FaultManagerTestImpl(ServiceContext* svcCtx) : FaultManager(svcCtx) {}
-
- Status transitionStateTest(FaultState newState) {
- return transitionToState(newState);
- }
-
- FaultState getFaultStateTest() {
- return getFaultState();
- }
-};
-
// State machine tests.
TEST(FaultManagerForTest, StateTransitionsFromOk) {
auto serviceCtx = ServiceContext::make();
diff --git a/src/mongo/db/process_health/fault_manager_test_suite.h b/src/mongo/db/process_health/fault_manager_test_suite.h
new file mode 100644
index 00000000000..dfc39b1ffec
--- /dev/null
+++ b/src/mongo/db/process_health/fault_manager_test_suite.h
@@ -0,0 +1,101 @@
+/**
+ * Copyright (C) 2021-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+#pragma once
+
+#include <memory>
+
+#include "mongo/db/process_health/fault_manager.h"
+
+#include "mongo/db/process_health/health_observer_mock.h"
+#include "mongo/db/process_health/health_observer_registration.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo {
+
+namespace process_health {
+
+namespace test {
+
+/**
+ * Test wrapper class for FaultManager that has access to protected methods
+ * for testing.
+ */
+class FaultManagerTestImpl : public FaultManager {
+public:
+ FaultManagerTestImpl(ServiceContext* svcCtx) : FaultManager(svcCtx) {}
+
+ Status transitionStateTest(FaultState newState) {
+ return transitionToState(newState);
+ }
+
+ FaultState getFaultStateTest() {
+ return getFaultState();
+ }
+
+ void healthCheckTest() {
+ healthCheck();
+ }
+
+ std::vector<HealthObserver*> getHealthObserversTest() {
+ return getHealthObservers();
+ }
+};
+
+/**
+ * Test suite for fault manager.
+ */
+class FaultManagerTest : public unittest::Test {
+public:
+ void setUp() override {
+ _svcCtx = ServiceContext::make();
+ FaultManager::set(_svcCtx.get(), std::make_unique<FaultManagerTestImpl>(_svcCtx.get()));
+ }
+
+ void registerMockHealthObserver(std::function<double()> getSeverityCallback) {
+ HealthObserverRegistration* reg = HealthObserverRegistration::get(_svcCtx.get());
+ reg->registerObserverFactory([getSeverityCallback](ServiceContext* svcCtx) {
+ return std::make_unique<HealthObserverMock>(svcCtx, getSeverityCallback);
+ });
+ }
+
+ FaultManagerTestImpl& manager() {
+ return *static_cast<FaultManagerTestImpl*>(FaultManager::get(_svcCtx.get()));
+ }
+
+ HealthObserverRegistration& healthObserverRegistration() {
+ return *HealthObserverRegistration::get(_svcCtx.get());
+ }
+
+private:
+ ServiceContext::UniqueServiceContext _svcCtx;
+};
+
+} // namespace test
+} // namespace process_health
+} // namespace mongo
diff --git a/src/mongo/db/process_health/health_observer.h b/src/mongo/db/process_health/health_observer.h
index 89b2823850a..6b2771c148f 100644
--- a/src/mongo/db/process_health/health_observer.h
+++ b/src/mongo/db/process_health/health_observer.h
@@ -54,6 +54,8 @@ public:
* Triggers health check.
* It should be safe to invoke this method arbitrary often, the implementation
* should prorate the invocations to avoid DoS.
+ * The implementation may or may not block for the completion of the check, this remains
+ * unspecified.
*/
virtual void periodicCheck() = 0;
};
diff --git a/src/mongo/db/process_health/health_observer_base.cpp b/src/mongo/db/process_health/health_observer_base.cpp
index b2aa6567f58..ff6aaaeeb4c 100644
--- a/src/mongo/db/process_health/health_observer_base.cpp
+++ b/src/mongo/db/process_health/health_observer_base.cpp
@@ -36,7 +36,12 @@ namespace process_health {
HealthObserverBase::HealthObserverBase(ServiceContext* svcCtx) : _svcCtx(svcCtx) {}
-void HealthObserverBase::periodicCheck() {}
+void HealthObserverBase::periodicCheck() {
+ double severity = periodicCheckImpl();
+
+ if (HealthCheckStatus::isResolved(severity)) {
+ }
+}
} // namespace process_health
} // namespace mongo
diff --git a/src/mongo/db/process_health/health_observer_base.h b/src/mongo/db/process_health/health_observer_base.h
index 82cb3a0f668..6ce7569552a 100644
--- a/src/mongo/db/process_health/health_observer_base.h
+++ b/src/mongo/db/process_health/health_observer_base.h
@@ -45,9 +45,15 @@ public:
HealthObserverBase(ServiceContext* svcCtx);
virtual ~HealthObserverBase() = default;
- void periodicCheck() override;
+ // Implements the common logic for periodic checks.
+ // Every observer should implement periodicCheckImpl() for specific tests.
+ void periodicCheck() final;
protected:
+ // Returns the severity after the check.
+ // TODO(SERVER-59592): futurize this.
+ virtual double periodicCheckImpl() = 0;
+
ServiceContext* const _svcCtx;
};
diff --git a/src/mongo/db/process_health/health_observer_mock.h b/src/mongo/db/process_health/health_observer_mock.h
index 516b47ee433..d94c3ce675f 100644
--- a/src/mongo/db/process_health/health_observer_mock.h
+++ b/src/mongo/db/process_health/health_observer_mock.h
@@ -28,26 +28,34 @@
*/
#pragma once
+#include <functional>
+
#include "mongo/db/process_health/health_observer_base.h"
namespace mongo {
namespace process_health {
/**
- * Interface to conduct periodic health checks.
- * Every instance of health observer is wired internally to update the state of the FaultManager
- * when a problem is detected.
+ * Mocked health observer is using a test callback to fetch the next
+ * fault severity value every time the periodic check is invoked.
*/
class HealthObserverMock : public HealthObserverBase {
public:
- HealthObserverMock(ServiceContext* svcCtx) : HealthObserverBase(svcCtx) {}
+ HealthObserverMock(ServiceContext* svcCtx, std::function<double()> getSeverityCallback)
+ : HealthObserverBase(svcCtx), _getSeverityCallback(getSeverityCallback) {}
+
virtual ~HealthObserverMock() = default;
FaultFacetType getType() const override {
return FaultFacetType::kMock;
}
- void periodicCheck() override {}
+ double periodicCheckImpl() override {
+ return _getSeverityCallback();
+ }
+
+private:
+ std::function<double()> _getSeverityCallback;
};
} // namespace process_health
diff --git a/src/mongo/db/process_health/health_observer_test.cpp b/src/mongo/db/process_health/health_observer_test.cpp
index 6c7e97763b8..6e1ab126004 100644
--- a/src/mongo/db/process_health/health_observer_test.cpp
+++ b/src/mongo/db/process_health/health_observer_test.cpp
@@ -29,6 +29,7 @@
#include "mongo/db/process_health/health_observer.h"
+#include "mongo/db/process_health/fault_manager_test_suite.h"
#include "mongo/db/process_health/health_observer_mock.h"
#include "mongo/db/process_health/health_observer_registration.h"
#include "mongo/db/service_context.h"
@@ -38,35 +39,27 @@ namespace mongo {
namespace process_health {
-namespace {
-
-class HealthObserverTest : public unittest::Test {
-public:
- void setUp() override {
- _svcCtx = ServiceContext::make();
- }
-
- void registerMock() {
- HealthObserverRegistration* reg = HealthObserverRegistration::get(_svcCtx.get());
- reg->registerObserverFactory(
- [](ServiceContext* svcCtx) { return std::make_unique<HealthObserverMock>(svcCtx); });
- }
-
- HealthObserverRegistration* registration() {
- return HealthObserverRegistration::get(_svcCtx.get());
- }
+// Using the common fault manager test suite.
+using test::FaultManagerTest;
-private:
- ServiceContext::UniqueServiceContext _svcCtx;
-};
+namespace {
-TEST_F(HealthObserverTest, Registration) {
- registerMock();
- auto allObservers = registration()->instantiateAllObservers();
+TEST_F(FaultManagerTest, Registration) {
+ registerMockHealthObserver([] { return 0; });
+ auto allObservers = healthObserverRegistration().instantiateAllObservers();
ASSERT_EQ(1, allObservers.size());
ASSERT_EQ(FaultFacetType::kMock, allObservers[0]->getType());
}
+TEST_F(FaultManagerTest, HealthCheckCreatesObservers) {
+ registerMockHealthObserver([] { return 0.1; });
+ ASSERT_EQ(0, manager().getHealthObserversTest().size());
+
+ // Trigger periodic health check.
+ manager().healthCheckTest();
+ ASSERT_EQ(1, manager().getHealthObserversTest().size());
+}
+
} // namespace
} // namespace process_health
} // namespace mongo