summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Shuvalov <andrew.shuvalov@mongodb.com>2021-11-10 22:44:09 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-12-28 21:25:36 +0000
commit57ecb25d950f9249a55fc7802a16e4588dbacffc (patch)
treed25a12d906febee47df68a521c4dcc10d0aaa98d
parenta4c83e4ec032a6cabfd1ae8f3eaaee5c31f8a0f1 (diff)
downloadmongo-57ecb25d950f9249a55fc7802a16e4588dbacffc.tar.gz
SERVER-61368 SERVER-61315 Ldap health check executor should support aborted tasks; test refactorings
-rw-r--r--src/mongo/db/process_health/fault_manager.cpp7
-rw-r--r--src/mongo/db/process_health/fault_manager_config.h24
-rw-r--r--src/mongo/db/process_health/fault_manager_test_suite.h111
-rw-r--r--src/mongo/db/process_health/health_observer_base.cpp4
-rw-r--r--src/mongo/db/process_health/health_observer_base.h3
-rw-r--r--src/mongo/db/process_health/health_observer_test.cpp12
6 files changed, 123 insertions, 38 deletions
diff --git a/src/mongo/db/process_health/fault_manager.cpp b/src/mongo/db/process_health/fault_manager.cpp
index fef9ab103d4..9fc4465c2c8 100644
--- a/src/mongo/db/process_health/fault_manager.cpp
+++ b/src/mongo/db/process_health/fault_manager.cpp
@@ -179,6 +179,7 @@ FaultManager::~FaultManager() {
if (!_initialHealthCheckCompletedPromise.getFuture().isReady()) {
_initialHealthCheckCompletedPromise.emplaceValue();
}
+ LOGV2_DEBUG(6136801, 1, "Done shutting down periodic health checks");
}
void FaultManager::startPeriodicHealthChecks() {
@@ -225,7 +226,11 @@ void FaultManager::healthCheck() {
// One time init.
_firstTimeInitIfNeeded();
- ON_BLOCK_EXIT([this] { schedulePeriodicHealthCheckThread(); });
+ ON_BLOCK_EXIT([this] {
+ if (!_config->periodicChecksDisabledForTests()) {
+ schedulePeriodicHealthCheckThread();
+ }
+ });
std::vector<HealthObserver*> observers = FaultManager::getHealthObservers();
diff --git a/src/mongo/db/process_health/fault_manager_config.h b/src/mongo/db/process_health/fault_manager_config.h
index 2cf26e2f7cf..251f7c1cf7f 100644
--- a/src/mongo/db/process_health/fault_manager_config.h
+++ b/src/mongo/db/process_health/fault_manager_config.h
@@ -68,6 +68,8 @@ enum class FaultFacetType { kMock1 = 0, kMock2, kLdap, kDns };
class FaultManagerConfig {
public:
+ static auto inline constexpr kPeriodicHealthCheckInterval{Milliseconds(50)};
+
HealthObserverIntensityEnum getHealthObserverIntensity(FaultFacetType type) {
auto intensities = getHealthObserverIntensities();
switch (type) {
@@ -90,11 +92,11 @@ public:
}
Milliseconds getActiveFaultDuration() const {
- return kActiveFaultDuration;
+ return _activeFaultDuration;
}
Milliseconds getPeriodicHealthCheckInterval() const {
- return Milliseconds(50);
+ return kPeriodicHealthCheckInterval;
}
Milliseconds getPeriodicLivenessCheckInterval() const {
@@ -105,6 +107,21 @@ public:
return Seconds(300);
}
+ /** @returns true if the periodic checks are disabled for testing purposes. This is
+ * always false in production.
+ */
+ bool periodicChecksDisabledForTests() const {
+ return _periodicChecksDisabledForTests;
+ }
+
+ void disablePeriodicChecksForTests() {
+ _periodicChecksDisabledForTests = true;
+ }
+
+ void setActiveFaultDurationForTests(Milliseconds duration) {
+ _activeFaultDuration = duration;
+ }
+
protected:
// If the server persists in TransientFault for more than this duration
// it will move to the ActiveFault state and terminate.
@@ -115,6 +132,9 @@ private:
return ServerParameterSet::getGlobal()->get<HealthMonitoringIntensitiesServerParameter>(
"healthMonitoring");
}
+
+ bool _periodicChecksDisabledForTests = false;
+ Milliseconds _activeFaultDuration = kActiveFaultDuration;
};
} // namespace process_health
diff --git a/src/mongo/db/process_health/fault_manager_test_suite.h b/src/mongo/db/process_health/fault_manager_test_suite.h
index 590ab2b1966..da84e2b0518 100644
--- a/src/mongo/db/process_health/fault_manager_test_suite.h
+++ b/src/mongo/db/process_health/fault_manager_test_suite.h
@@ -35,8 +35,10 @@
#include "mongo/db/process_health/health_monitoring_feature_flag.h"
#include "mongo/db/process_health/health_observer_mock.h"
#include "mongo/db/process_health/health_observer_registration.h"
+#include "mongo/executor/network_interface_factory.h"
#include "mongo/executor/thread_pool_task_executor_test_fixture.h"
#include "mongo/unittest/unittest.h"
+#include "mongo/util/concurrency/thread_pool.h"
#include "mongo/util/tick_source_mock.h"
namespace mongo {
@@ -48,6 +50,12 @@ namespace process_health {
namespace test {
+static inline std::unique_ptr<FaultManagerConfig> getConfigWithDisabledPeriodicChecks() {
+ auto config = std::make_unique<FaultManagerConfig>();
+ config->disablePeriodicChecksForTests();
+ return config;
+}
+
/**
* Test wrapper class for FaultManager that has access to protected methods
* for testing.
@@ -55,14 +63,22 @@ namespace test {
class FaultManagerTestImpl : public FaultManager {
public:
FaultManagerTestImpl(ServiceContext* svcCtx,
- std::shared_ptr<executor::TaskExecutor> taskExecutor)
- : FaultManager(
- svcCtx, taskExecutor, std::make_unique<FaultManagerConfig>(), [](std::string cause) {
- // In tests, do not crash.
- LOGV2(5936606,
- "Fault manager progress monitor triggered the termination",
- "cause"_attr = cause);
- }) {}
+ std::shared_ptr<executor::TaskExecutor> taskExecutor,
+ std::unique_ptr<FaultManagerConfig> config)
+ : FaultManager(svcCtx,
+ taskExecutor,
+ [&config]() -> std::unique_ptr<FaultManagerConfig> {
+ if (config)
+ return std::move(config);
+ else
+ return getConfigWithDisabledPeriodicChecks();
+ }(),
+ [](std::string cause) {
+ // In tests, do not crash.
+ LOGV2(5936606,
+ "Fault manager progress monitor triggered the termination",
+ "cause"_attr = cause);
+ }) {}
void transitionStateTest(FaultState newState) {
transitionToState(newState);
@@ -112,13 +128,28 @@ public:
feature_flags::gFeatureFlagHealthMonitoring = true;
HealthObserverRegistration::resetObserverFactoriesForTest();
- _svcCtx = ServiceContext::make();
- _svcCtx->setFastClockSource(std::make_unique<ClockSourceMock>());
- _svcCtx->setPreciseClockSource(std::make_unique<ClockSourceMock>());
- _svcCtx->setTickSource(std::make_unique<TickSourceMock<Milliseconds>>());
-
+ createServiceContextIfNeeded();
+ bumpUpLogging();
resetManager();
- _executor->startup();
+ }
+
+ void createServiceContextIfNeeded() {
+ if (!_svcCtx) {
+ // Reset only once because the Ldap connection reaper is running asynchronously
+ // and is using the simulated clock, which should not go out of scope.
+ _svcCtx = ServiceContext::make();
+ _svcCtx->setFastClockSource(std::make_unique<ClockSourceMock>());
+ _svcCtx->setPreciseClockSource(std::make_unique<ClockSourceMock>());
+ _svcCtx->setTickSource(std::make_unique<TickSourceMock<Milliseconds>>());
+ advanceTime(Seconds(100));
+ }
+ }
+
+ void bumpUpLogging() {
+ logv2::LogManager::global().getGlobalSettings().setMinimumLoggedSeverity(
+ mongo::logv2::LogComponent::kProcessHealth, logv2::LogSeverity::Debug(3));
+ logv2::LogManager::global().getGlobalSettings().setMinimumLoggedSeverity(
+ mongo::logv2::LogComponent::kAccessControl, logv2::LogSeverity::Debug(3));
}
void tearDown() override {
@@ -127,15 +158,22 @@ public:
resetManager();
}
- void resetManager() {
- // Construct task executor
- auto network = std::make_unique<executor::NetworkInterfaceMock>();
- _net = network.get();
- _executor = makeSharedThreadPoolTestExecutor(std::move(network));
+ void constructTaskExecutor() {
+ auto network = std::shared_ptr<executor::NetworkInterface>(
+ executor::makeNetworkInterface("FaultManagerTest").release());
+ ThreadPool::Options options;
+ auto pool = std::make_unique<ThreadPool>(options);
- invariant(_svcCtx->getFastClockSource());
- FaultManager::set(_svcCtx.get(),
- std::make_unique<FaultManagerTestImpl>(_svcCtx.get(), _executor));
+ _executor =
+ std::make_unique<executor::ThreadPoolTaskExecutor>(std::move(pool), std::move(network));
+ _executor->startup();
+ }
+
+ void resetManager(std::unique_ptr<FaultManagerConfig> config = nullptr) {
+ constructTaskExecutor();
+ FaultManager::set(
+ _svcCtx.get(),
+ std::make_unique<FaultManagerTestImpl>(_svcCtx.get(), _executor, std::move(config)));
}
void registerMockHealthObserver(FaultFacetType mockType,
@@ -146,6 +184,12 @@ public:
});
}
+ template <typename Observer>
+ void registerHealthObserver() {
+ HealthObserverRegistration::registerObserverFactory(
+ [](ServiceContext* svcCtx) { return std::make_unique<Observer>(svcCtx); });
+ }
+
FaultManagerTestImpl& manager() {
return *static_cast<FaultManagerTestImpl*>(FaultManager::get(_svcCtx.get()));
}
@@ -162,15 +206,25 @@ public:
return *static_cast<TickSourceMock<Milliseconds>*>(_svcCtx->getTickSource());
}
- template <typename Duration>
- void advanceTime(Duration d) {
- executor::NetworkInterfaceMock::InNetworkGuard guard(_net);
- _net->advanceTime(_net->now() + d);
- advanceClockSourcesTime(d);
+ template <typename Observer>
+ Observer& observer(FaultFacetType type) {
+ std::vector<HealthObserver*> observers = manager().getHealthObserversTest();
+ ASSERT_TRUE(!observers.empty());
+ auto it = std::find_if(observers.begin(), observers.end(), [type](const HealthObserver* o) {
+ return o->getType() == type;
+ });
+ ASSERT_TRUE(it != observers.end());
+ return *static_cast<Observer*>(*it);
+ }
+
+ HealthObserverBase::PeriodicHealthCheckContext checkContext() {
+ HealthObserverBase::PeriodicHealthCheckContext ctx{CancellationToken::uncancelable(),
+ _executor};
+ return ctx;
}
template <typename Duration>
- void advanceClockSourcesTime(Duration d) {
+ void advanceTime(Duration d) {
clockSource().advance(d);
static_cast<ClockSourceMock*>(_svcCtx->getPreciseClockSource())->advance(d);
tickSource().advance(d);
@@ -231,7 +285,6 @@ public:
private:
ServiceContext::UniqueServiceContext _svcCtx;
- executor::NetworkInterfaceMock* _net;
std::shared_ptr<executor::ThreadPoolTaskExecutor> _executor;
};
diff --git a/src/mongo/db/process_health/health_observer_base.cpp b/src/mongo/db/process_health/health_observer_base.cpp
index 0edc49ae259..25801dda696 100644
--- a/src/mongo/db/process_health/health_observer_base.cpp
+++ b/src/mongo/db/process_health/health_observer_base.cpp
@@ -51,6 +51,10 @@ void HealthObserverBase::periodicCheck(FaultFacetsContainerFactory& factory,
const auto now = _svcCtx->getPreciseClockSource()->now();
if (now - _lastTimeTheCheckWasRun < minimalCheckInterval()) {
+ LOGV2_DEBUG(6136802,
+ 3,
+ "Safety interval prevented new health check",
+ "observerType"_attr = getType());
return;
}
_lastTimeTheCheckWasRun = now;
diff --git a/src/mongo/db/process_health/health_observer_base.h b/src/mongo/db/process_health/health_observer_base.h
index 60727965c71..ff8b58f29c5 100644
--- a/src/mongo/db/process_health/health_observer_base.h
+++ b/src/mongo/db/process_health/health_observer_base.h
@@ -72,12 +72,13 @@ public:
HealthObserverLivenessStats getStats() const override;
-protected:
+ // Common params for every health check.
struct PeriodicHealthCheckContext {
std::shared_ptr<AtomicWord<bool>> cancellationToken;
std::shared_ptr<executor::TaskExecutor> taskExecutor;
};
+protected:
/**
* The main method every health observer should implement for a particular
* health check it does.
diff --git a/src/mongo/db/process_health/health_observer_test.cpp b/src/mongo/db/process_health/health_observer_test.cpp
index 38b7b8f8633..61b8d76487c 100644
--- a/src/mongo/db/process_health/health_observer_test.cpp
+++ b/src/mongo/db/process_health/health_observer_test.cpp
@@ -181,8 +181,7 @@ TEST_F(FaultManagerTest, DoesNotRestartCheckBeforeIntervalExpired) {
ASSERT_TRUE(!currentFault);
advanceTime(Milliseconds(100));
- manager().healthCheckTest();
- waitForFaultBeingCreated();
+ assertSoonWithHealthCheck([this]() { return hasFault(); });
currentFault = manager().currentFault();
ASSERT_TRUE(currentFault); // The fault was created.
resetManager(); // Before atomic fields above go out of scope.
@@ -201,7 +200,7 @@ TEST_F(FaultManagerTest, InitialHealthCheckDoesNotBlockIfTransitionToOkSucceeds)
feature_flags::gFeatureFlagHealthMonitoring = true;
registerMockHealthObserver(FaultFacetType::kMock1, [] { return 0.0; });
- manager().startPeriodicHealthChecks();
+ manager().healthCheckTest();
auto currentFault = manager().currentFault();
ASSERT_TRUE(!currentFault); // Is not created.
@@ -255,7 +254,7 @@ TEST_F(FaultManagerTest, ProgressMonitorCheck) {
manager().progressMonitorCheckTest(crashCb);
// The progress check passed because the simulated time did not advance.
ASSERT_FALSE(crashTriggered);
- advanceClockSourcesTime(manager().getConfig().getPeriodicLivenessDeadline() + Seconds(1));
+ advanceTime(manager().getConfig().getPeriodicLivenessDeadline() + Seconds(1));
manager().progressMonitorCheckTest(crashCb);
// The progress check simulated a crash.
ASSERT_TRUE(crashTriggered);
@@ -264,10 +263,13 @@ TEST_F(FaultManagerTest, ProgressMonitorCheck) {
}
TEST_F(FaultManagerTest, TransitionsToActiveFaultAfterTimeout) {
+ auto config = test::getConfigWithDisabledPeriodicChecks();
+ config->setActiveFaultDurationForTests(Milliseconds(10));
+ resetManager(std::move(config));
registerMockHealthObserver(FaultFacetType::kMock1, [] { return 1.1; });
waitForTransitionIntoState(FaultState::kTransientFault);
ASSERT_TRUE(manager().getFaultState() == FaultState::kTransientFault);
- advanceTime(manager().getConfig().getActiveFaultDuration() + Milliseconds(1));
+ advanceTime(Milliseconds(10));
waitForTransitionIntoState(FaultState::kActiveFault);
}