diff options
author | Andrew Shuvalov <andrew.shuvalov@mongodb.com> | 2021-11-10 22:44:09 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-11-10 23:18:15 +0000 |
commit | 7673b195c85582a7a8295f508ae61cc50bb9c628 (patch) | |
tree | cb6c647ea3e382c9955381a4ddcf9115c70015f9 /src/mongo | |
parent | 195bfdd18bdfba1b98beb28628cef19d48d335de (diff) | |
download | mongo-7673b195c85582a7a8295f508ae61cc50bb9c628.tar.gz |
SERVER-61368 SERVER-61315 Ldap health check executor should support aborted tasks; test refactorings
Diffstat (limited to 'src/mongo')
6 files changed, 123 insertions, 39 deletions
diff --git a/src/mongo/db/process_health/fault_manager.cpp b/src/mongo/db/process_health/fault_manager.cpp index 86d7b4406bc..2c4494b97db 100644 --- a/src/mongo/db/process_health/fault_manager.cpp +++ b/src/mongo/db/process_health/fault_manager.cpp @@ -175,6 +175,7 @@ FaultManager::~FaultManager() { if (!_initialHealthCheckCompletedPromise.getFuture().isReady()) { _initialHealthCheckCompletedPromise.emplaceValue(); } + LOGV2_DEBUG(6136801, 1, "Done shutting down periodic health checks"); } void FaultManager::startPeriodicHealthChecks() { @@ -222,7 +223,11 @@ void FaultManager::healthCheck() { // One time init. _firstTimeInitIfNeeded(); - ON_BLOCK_EXIT([this] { schedulePeriodicHealthCheckThread(); }); + ON_BLOCK_EXIT([this] { + if (!_config->periodicChecksDisabledForTests()) { + schedulePeriodicHealthCheckThread(); + } + }); std::vector<HealthObserver*> observers = FaultManager::getHealthObservers(); diff --git a/src/mongo/db/process_health/fault_manager_config.h b/src/mongo/db/process_health/fault_manager_config.h index 2cf26e2f7cf..251f7c1cf7f 100644 --- a/src/mongo/db/process_health/fault_manager_config.h +++ b/src/mongo/db/process_health/fault_manager_config.h @@ -68,6 +68,8 @@ enum class FaultFacetType { kMock1 = 0, kMock2, kLdap, kDns }; class FaultManagerConfig { public: + static auto inline constexpr kPeriodicHealthCheckInterval{Milliseconds(50)}; + HealthObserverIntensityEnum getHealthObserverIntensity(FaultFacetType type) { auto intensities = getHealthObserverIntensities(); switch (type) { @@ -90,11 +92,11 @@ public: } Milliseconds getActiveFaultDuration() const { - return kActiveFaultDuration; + return _activeFaultDuration; } Milliseconds getPeriodicHealthCheckInterval() const { - return Milliseconds(50); + return kPeriodicHealthCheckInterval; } Milliseconds getPeriodicLivenessCheckInterval() const { @@ -105,6 +107,21 @@ public: return Seconds(300); } + /** @returns true if the periodic checks are disabled for testing purposes. This is + * always false in production. + */ + bool periodicChecksDisabledForTests() const { + return _periodicChecksDisabledForTests; + } + + void disablePeriodicChecksForTests() { + _periodicChecksDisabledForTests = true; + } + + void setActiveFaultDurationForTests(Milliseconds duration) { + _activeFaultDuration = duration; + } + protected: // If the server persists in TransientFault for more than this duration // it will move to the ActiveFault state and terminate. @@ -115,6 +132,9 @@ private: return ServerParameterSet::getGlobal()->get<HealthMonitoringIntensitiesServerParameter>( "healthMonitoring"); } + + bool _periodicChecksDisabledForTests = false; + Milliseconds _activeFaultDuration = kActiveFaultDuration; }; } // namespace process_health diff --git a/src/mongo/db/process_health/fault_manager_test_suite.h b/src/mongo/db/process_health/fault_manager_test_suite.h index 5f7a59ca538..d45bc97bd1d 100644 --- a/src/mongo/db/process_health/fault_manager_test_suite.h +++ b/src/mongo/db/process_health/fault_manager_test_suite.h @@ -34,9 +34,11 @@ #include "mongo/db/process_health/health_observer_mock.h" #include "mongo/db/process_health/health_observer_registration.h" +#include "mongo/executor/network_interface_factory.h" #include "mongo/executor/thread_pool_task_executor_test_fixture.h" #include "mongo/idl/server_parameter_test_util.h" #include "mongo/unittest/unittest.h" +#include "mongo/util/concurrency/thread_pool.h" #include "mongo/util/tick_source_mock.h" namespace mongo { @@ -48,6 +50,12 @@ namespace process_health { namespace test { +static inline std::unique_ptr<FaultManagerConfig> getConfigWithDisabledPeriodicChecks() { + auto config = std::make_unique<FaultManagerConfig>(); + config->disablePeriodicChecksForTests(); + return config; +} + /** * Test wrapper class for FaultManager that has access to protected methods * for testing. @@ -55,14 +63,22 @@ namespace test { class FaultManagerTestImpl : public FaultManager { public: FaultManagerTestImpl(ServiceContext* svcCtx, - std::shared_ptr<executor::TaskExecutor> taskExecutor) - : FaultManager( - svcCtx, taskExecutor, std::make_unique<FaultManagerConfig>(), [](std::string cause) { - // In tests, do not crash. - LOGV2(5936606, - "Fault manager progress monitor triggered the termination", - "cause"_attr = cause); - }) {} + std::shared_ptr<executor::TaskExecutor> taskExecutor, + std::unique_ptr<FaultManagerConfig> config) + : FaultManager(svcCtx, + taskExecutor, + [&config]() -> std::unique_ptr<FaultManagerConfig> { + if (config) + return std::move(config); + else + return getConfigWithDisabledPeriodicChecks(); + }(), + [](std::string cause) { + // In tests, do not crash. + LOGV2(5936606, + "Fault manager progress monitor triggered the termination", + "cause"_attr = cause); + }) {} void transitionStateTest(FaultState newState) { transitionToState(newState); @@ -112,13 +128,28 @@ public: RAIIServerParameterControllerForTest _controller{"featureFlagHealthMonitoring", true}; HealthObserverRegistration::resetObserverFactoriesForTest(); - _svcCtx = ServiceContext::make(); - _svcCtx->setFastClockSource(std::make_unique<ClockSourceMock>()); - _svcCtx->setPreciseClockSource(std::make_unique<ClockSourceMock>()); - _svcCtx->setTickSource(std::make_unique<TickSourceMock<Milliseconds>>()); - + createServiceContextIfNeeded(); + bumpUpLogging(); resetManager(); - _executor->startup(); + } + + void createServiceContextIfNeeded() { + if (!_svcCtx) { + // Reset only once because the Ldap connection reaper is running asynchronously + // and is using the simulated clock, which should not go out of scope. + _svcCtx = ServiceContext::make(); + _svcCtx->setFastClockSource(std::make_unique<ClockSourceMock>()); + _svcCtx->setPreciseClockSource(std::make_unique<ClockSourceMock>()); + _svcCtx->setTickSource(std::make_unique<TickSourceMock<Milliseconds>>()); + advanceTime(Seconds(100)); + } + } + + void bumpUpLogging() { + logv2::LogManager::global().getGlobalSettings().setMinimumLoggedSeverity( + mongo::logv2::LogComponent::kProcessHealth, logv2::LogSeverity::Debug(3)); + logv2::LogManager::global().getGlobalSettings().setMinimumLoggedSeverity( + mongo::logv2::LogComponent::kAccessControl, logv2::LogSeverity::Debug(3)); } void tearDown() override { @@ -127,15 +158,22 @@ public: resetManager(); } - void resetManager() { - // Construct task executor - auto network = std::make_unique<executor::NetworkInterfaceMock>(); - _net = network.get(); - _executor = makeSharedThreadPoolTestExecutor(std::move(network)); + void constructTaskExecutor() { + auto network = std::shared_ptr<executor::NetworkInterface>( + executor::makeNetworkInterface("FaultManagerTest").release()); + ThreadPool::Options options; + auto pool = std::make_unique<ThreadPool>(options); - invariant(_svcCtx->getFastClockSource()); - FaultManager::set(_svcCtx.get(), - std::make_unique<FaultManagerTestImpl>(_svcCtx.get(), _executor)); + _executor = + std::make_unique<executor::ThreadPoolTaskExecutor>(std::move(pool), std::move(network)); + _executor->startup(); + } + + void resetManager(std::unique_ptr<FaultManagerConfig> config = nullptr) { + constructTaskExecutor(); + FaultManager::set( + _svcCtx.get(), + std::make_unique<FaultManagerTestImpl>(_svcCtx.get(), _executor, std::move(config))); } void registerMockHealthObserver(FaultFacetType mockType, @@ -146,6 +184,12 @@ public: }); } + template <typename Observer> + void registerHealthObserver() { + HealthObserverRegistration::registerObserverFactory( + [](ServiceContext* svcCtx) { return std::make_unique<Observer>(svcCtx); }); + } + FaultManagerTestImpl& manager() { return *static_cast<FaultManagerTestImpl*>(FaultManager::get(_svcCtx.get())); } @@ -162,15 +206,25 @@ public: return *static_cast<TickSourceMock<Milliseconds>*>(_svcCtx->getTickSource()); } - template <typename Duration> - void advanceTime(Duration d) { - executor::NetworkInterfaceMock::InNetworkGuard guard(_net); - _net->advanceTime(_net->now() + d); - advanceClockSourcesTime(d); + template <typename Observer> + Observer& observer(FaultFacetType type) { + std::vector<HealthObserver*> observers = manager().getHealthObserversTest(); + ASSERT_TRUE(!observers.empty()); + auto it = std::find_if(observers.begin(), observers.end(), [type](const HealthObserver* o) { + return o->getType() == type; + }); + ASSERT_TRUE(it != observers.end()); + return *static_cast<Observer*>(*it); + } + + HealthObserverBase::PeriodicHealthCheckContext checkContext() { + HealthObserverBase::PeriodicHealthCheckContext ctx{CancellationToken::uncancelable(), + _executor}; + return ctx; } template <typename Duration> - void advanceClockSourcesTime(Duration d) { + void advanceTime(Duration d) { clockSource().advance(d); static_cast<ClockSourceMock*>(_svcCtx->getPreciseClockSource())->advance(d); tickSource().advance(d); @@ -231,7 +285,6 @@ public: private: ServiceContext::UniqueServiceContext _svcCtx; - executor::NetworkInterfaceMock* _net; std::shared_ptr<executor::ThreadPoolTaskExecutor> _executor; }; diff --git a/src/mongo/db/process_health/health_observer_base.cpp b/src/mongo/db/process_health/health_observer_base.cpp index 04f38f51daa..dbe49da4ed4 100644 --- a/src/mongo/db/process_health/health_observer_base.cpp +++ b/src/mongo/db/process_health/health_observer_base.cpp @@ -42,7 +42,6 @@ HealthObserverBase::HealthObserverBase(ServiceContext* svcCtx) : _svcCtx(svcCtx) void HealthObserverBase::periodicCheck(FaultFacetsContainerFactory& factory, std::shared_ptr<executor::TaskExecutor> taskExecutor, CancellationToken token) { - // If we have reached here, the intensity of this health observer must not be off { auto lk = stdx::lock_guard(_mutex); @@ -52,6 +51,10 @@ void HealthObserverBase::periodicCheck(FaultFacetsContainerFactory& factory, const auto now = _svcCtx->getPreciseClockSource()->now(); if (now - _lastTimeTheCheckWasRun < minimalCheckInterval()) { + LOGV2_DEBUG(6136802, + 3, + "Safety interval prevented new health check", + "observerType"_attr = getType()); return; } _lastTimeTheCheckWasRun = now; diff --git a/src/mongo/db/process_health/health_observer_base.h b/src/mongo/db/process_health/health_observer_base.h index 070e145d38a..99425808fab 100644 --- a/src/mongo/db/process_health/health_observer_base.h +++ b/src/mongo/db/process_health/health_observer_base.h @@ -72,12 +72,13 @@ public: HealthObserverLivenessStats getStats() const override; -protected: + // Common params for every health check. struct PeriodicHealthCheckContext { CancellationToken cancellationToken; std::shared_ptr<executor::TaskExecutor> taskExecutor; }; +protected: /** * The main method every health observer should implement for a particular * health check it does. diff --git a/src/mongo/db/process_health/health_observer_test.cpp b/src/mongo/db/process_health/health_observer_test.cpp index f8ea21439f4..ea133e04e3b 100644 --- a/src/mongo/db/process_health/health_observer_test.cpp +++ b/src/mongo/db/process_health/health_observer_test.cpp @@ -181,8 +181,7 @@ TEST_F(FaultManagerTest, DoesNotRestartCheckBeforeIntervalExpired) { ASSERT_TRUE(!currentFault); advanceTime(Milliseconds(100)); - manager().healthCheckTest(); - waitForFaultBeingCreated(); + assertSoonWithHealthCheck([this]() { return hasFault(); }); currentFault = manager().currentFault(); ASSERT_TRUE(currentFault); // The fault was created. resetManager(); // Before atomic fields above go out of scope. @@ -201,7 +200,7 @@ TEST_F(FaultManagerTest, InitialHealthCheckDoesNotBlockIfTransitionToOkSucceeds) RAIIServerParameterControllerForTest _controller{"featureFlagHealthMonitoring", true}; registerMockHealthObserver(FaultFacetType::kMock1, [] { return 0.0; }); - manager().startPeriodicHealthChecks(); + manager().healthCheckTest(); auto currentFault = manager().currentFault(); ASSERT_TRUE(!currentFault); // Is not created. @@ -254,7 +253,7 @@ TEST_F(FaultManagerTest, ProgressMonitorCheck) { manager().progressMonitorCheckTest(crashCb); // The progress check passed because the simulated time did not advance. ASSERT_FALSE(crashTriggered); - advanceClockSourcesTime(manager().getConfig().getPeriodicLivenessDeadline() + Seconds(1)); + advanceTime(manager().getConfig().getPeriodicLivenessDeadline() + Seconds(1)); manager().progressMonitorCheckTest(crashCb); // The progress check simulated a crash. ASSERT_TRUE(crashTriggered); @@ -263,10 +262,13 @@ TEST_F(FaultManagerTest, ProgressMonitorCheck) { } TEST_F(FaultManagerTest, TransitionsToActiveFaultAfterTimeout) { + auto config = test::getConfigWithDisabledPeriodicChecks(); + config->setActiveFaultDurationForTests(Milliseconds(10)); + resetManager(std::move(config)); registerMockHealthObserver(FaultFacetType::kMock1, [] { return 1.1; }); waitForTransitionIntoState(FaultState::kTransientFault); ASSERT_TRUE(manager().getFaultState() == FaultState::kTransientFault); - advanceTime(manager().getConfig().getActiveFaultDuration() + Milliseconds(1)); + advanceTime(Milliseconds(10)); waitForTransitionIntoState(FaultState::kActiveFault); } |