summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorAndrew Shuvalov <andrew.shuvalov@mongodb.com>2021-11-10 22:44:09 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-11-10 23:18:15 +0000
commit7673b195c85582a7a8295f508ae61cc50bb9c628 (patch)
treecb6c647ea3e382c9955381a4ddcf9115c70015f9 /src/mongo
parent195bfdd18bdfba1b98beb28628cef19d48d335de (diff)
downloadmongo-7673b195c85582a7a8295f508ae61cc50bb9c628.tar.gz
SERVER-61368 SERVER-61315 Ldap health check executor should support aborted tasks; test refactorings
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/process_health/fault_manager.cpp7
-rw-r--r--src/mongo/db/process_health/fault_manager_config.h24
-rw-r--r--src/mongo/db/process_health/fault_manager_test_suite.h111
-rw-r--r--src/mongo/db/process_health/health_observer_base.cpp5
-rw-r--r--src/mongo/db/process_health/health_observer_base.h3
-rw-r--r--src/mongo/db/process_health/health_observer_test.cpp12
6 files changed, 123 insertions, 39 deletions
diff --git a/src/mongo/db/process_health/fault_manager.cpp b/src/mongo/db/process_health/fault_manager.cpp
index 86d7b4406bc..2c4494b97db 100644
--- a/src/mongo/db/process_health/fault_manager.cpp
+++ b/src/mongo/db/process_health/fault_manager.cpp
@@ -175,6 +175,7 @@ FaultManager::~FaultManager() {
if (!_initialHealthCheckCompletedPromise.getFuture().isReady()) {
_initialHealthCheckCompletedPromise.emplaceValue();
}
+ LOGV2_DEBUG(6136801, 1, "Done shutting down periodic health checks");
}
void FaultManager::startPeriodicHealthChecks() {
@@ -222,7 +223,11 @@ void FaultManager::healthCheck() {
// One time init.
_firstTimeInitIfNeeded();
- ON_BLOCK_EXIT([this] { schedulePeriodicHealthCheckThread(); });
+ ON_BLOCK_EXIT([this] {
+ if (!_config->periodicChecksDisabledForTests()) {
+ schedulePeriodicHealthCheckThread();
+ }
+ });
std::vector<HealthObserver*> observers = FaultManager::getHealthObservers();
diff --git a/src/mongo/db/process_health/fault_manager_config.h b/src/mongo/db/process_health/fault_manager_config.h
index 2cf26e2f7cf..251f7c1cf7f 100644
--- a/src/mongo/db/process_health/fault_manager_config.h
+++ b/src/mongo/db/process_health/fault_manager_config.h
@@ -68,6 +68,8 @@ enum class FaultFacetType { kMock1 = 0, kMock2, kLdap, kDns };
class FaultManagerConfig {
public:
+ static auto inline constexpr kPeriodicHealthCheckInterval{Milliseconds(50)};
+
HealthObserverIntensityEnum getHealthObserverIntensity(FaultFacetType type) {
auto intensities = getHealthObserverIntensities();
switch (type) {
@@ -90,11 +92,11 @@ public:
}
Milliseconds getActiveFaultDuration() const {
- return kActiveFaultDuration;
+ return _activeFaultDuration;
}
Milliseconds getPeriodicHealthCheckInterval() const {
- return Milliseconds(50);
+ return kPeriodicHealthCheckInterval;
}
Milliseconds getPeriodicLivenessCheckInterval() const {
@@ -105,6 +107,21 @@ public:
return Seconds(300);
}
+ /** @returns true if the periodic checks are disabled for testing purposes. This is
+ * always false in production.
+ */
+ bool periodicChecksDisabledForTests() const {
+ return _periodicChecksDisabledForTests;
+ }
+
+ void disablePeriodicChecksForTests() {
+ _periodicChecksDisabledForTests = true;
+ }
+
+ void setActiveFaultDurationForTests(Milliseconds duration) {
+ _activeFaultDuration = duration;
+ }
+
protected:
// If the server persists in TransientFault for more than this duration
// it will move to the ActiveFault state and terminate.
@@ -115,6 +132,9 @@ private:
return ServerParameterSet::getGlobal()->get<HealthMonitoringIntensitiesServerParameter>(
"healthMonitoring");
}
+
+ bool _periodicChecksDisabledForTests = false;
+ Milliseconds _activeFaultDuration = kActiveFaultDuration;
};
} // namespace process_health
diff --git a/src/mongo/db/process_health/fault_manager_test_suite.h b/src/mongo/db/process_health/fault_manager_test_suite.h
index 5f7a59ca538..d45bc97bd1d 100644
--- a/src/mongo/db/process_health/fault_manager_test_suite.h
+++ b/src/mongo/db/process_health/fault_manager_test_suite.h
@@ -34,9 +34,11 @@
#include "mongo/db/process_health/health_observer_mock.h"
#include "mongo/db/process_health/health_observer_registration.h"
+#include "mongo/executor/network_interface_factory.h"
#include "mongo/executor/thread_pool_task_executor_test_fixture.h"
#include "mongo/idl/server_parameter_test_util.h"
#include "mongo/unittest/unittest.h"
+#include "mongo/util/concurrency/thread_pool.h"
#include "mongo/util/tick_source_mock.h"
namespace mongo {
@@ -48,6 +50,12 @@ namespace process_health {
namespace test {
+static inline std::unique_ptr<FaultManagerConfig> getConfigWithDisabledPeriodicChecks() {
+ auto config = std::make_unique<FaultManagerConfig>();
+ config->disablePeriodicChecksForTests();
+ return config;
+}
+
/**
* Test wrapper class for FaultManager that has access to protected methods
* for testing.
@@ -55,14 +63,22 @@ namespace test {
class FaultManagerTestImpl : public FaultManager {
public:
FaultManagerTestImpl(ServiceContext* svcCtx,
- std::shared_ptr<executor::TaskExecutor> taskExecutor)
- : FaultManager(
- svcCtx, taskExecutor, std::make_unique<FaultManagerConfig>(), [](std::string cause) {
- // In tests, do not crash.
- LOGV2(5936606,
- "Fault manager progress monitor triggered the termination",
- "cause"_attr = cause);
- }) {}
+ std::shared_ptr<executor::TaskExecutor> taskExecutor,
+ std::unique_ptr<FaultManagerConfig> config)
+ : FaultManager(svcCtx,
+ taskExecutor,
+ [&config]() -> std::unique_ptr<FaultManagerConfig> {
+ if (config)
+ return std::move(config);
+ else
+ return getConfigWithDisabledPeriodicChecks();
+ }(),
+ [](std::string cause) {
+ // In tests, do not crash.
+ LOGV2(5936606,
+ "Fault manager progress monitor triggered the termination",
+ "cause"_attr = cause);
+ }) {}
void transitionStateTest(FaultState newState) {
transitionToState(newState);
@@ -112,13 +128,28 @@ public:
RAIIServerParameterControllerForTest _controller{"featureFlagHealthMonitoring", true};
HealthObserverRegistration::resetObserverFactoriesForTest();
- _svcCtx = ServiceContext::make();
- _svcCtx->setFastClockSource(std::make_unique<ClockSourceMock>());
- _svcCtx->setPreciseClockSource(std::make_unique<ClockSourceMock>());
- _svcCtx->setTickSource(std::make_unique<TickSourceMock<Milliseconds>>());
-
+ createServiceContextIfNeeded();
+ bumpUpLogging();
resetManager();
- _executor->startup();
+ }
+
+ void createServiceContextIfNeeded() {
+ if (!_svcCtx) {
+ // Reset only once because the Ldap connection reaper is running asynchronously
+ // and is using the simulated clock, which should not go out of scope.
+ _svcCtx = ServiceContext::make();
+ _svcCtx->setFastClockSource(std::make_unique<ClockSourceMock>());
+ _svcCtx->setPreciseClockSource(std::make_unique<ClockSourceMock>());
+ _svcCtx->setTickSource(std::make_unique<TickSourceMock<Milliseconds>>());
+ advanceTime(Seconds(100));
+ }
+ }
+
+ void bumpUpLogging() {
+ logv2::LogManager::global().getGlobalSettings().setMinimumLoggedSeverity(
+ mongo::logv2::LogComponent::kProcessHealth, logv2::LogSeverity::Debug(3));
+ logv2::LogManager::global().getGlobalSettings().setMinimumLoggedSeverity(
+ mongo::logv2::LogComponent::kAccessControl, logv2::LogSeverity::Debug(3));
}
void tearDown() override {
@@ -127,15 +158,22 @@ public:
resetManager();
}
- void resetManager() {
- // Construct task executor
- auto network = std::make_unique<executor::NetworkInterfaceMock>();
- _net = network.get();
- _executor = makeSharedThreadPoolTestExecutor(std::move(network));
+ void constructTaskExecutor() {
+ auto network = std::shared_ptr<executor::NetworkInterface>(
+ executor::makeNetworkInterface("FaultManagerTest").release());
+ ThreadPool::Options options;
+ auto pool = std::make_unique<ThreadPool>(options);
- invariant(_svcCtx->getFastClockSource());
- FaultManager::set(_svcCtx.get(),
- std::make_unique<FaultManagerTestImpl>(_svcCtx.get(), _executor));
+ _executor =
+ std::make_unique<executor::ThreadPoolTaskExecutor>(std::move(pool), std::move(network));
+ _executor->startup();
+ }
+
+ void resetManager(std::unique_ptr<FaultManagerConfig> config = nullptr) {
+ constructTaskExecutor();
+ FaultManager::set(
+ _svcCtx.get(),
+ std::make_unique<FaultManagerTestImpl>(_svcCtx.get(), _executor, std::move(config)));
}
void registerMockHealthObserver(FaultFacetType mockType,
@@ -146,6 +184,12 @@ public:
});
}
+ template <typename Observer>
+ void registerHealthObserver() {
+ HealthObserverRegistration::registerObserverFactory(
+ [](ServiceContext* svcCtx) { return std::make_unique<Observer>(svcCtx); });
+ }
+
FaultManagerTestImpl& manager() {
return *static_cast<FaultManagerTestImpl*>(FaultManager::get(_svcCtx.get()));
}
@@ -162,15 +206,25 @@ public:
return *static_cast<TickSourceMock<Milliseconds>*>(_svcCtx->getTickSource());
}
- template <typename Duration>
- void advanceTime(Duration d) {
- executor::NetworkInterfaceMock::InNetworkGuard guard(_net);
- _net->advanceTime(_net->now() + d);
- advanceClockSourcesTime(d);
+ template <typename Observer>
+ Observer& observer(FaultFacetType type) {
+ std::vector<HealthObserver*> observers = manager().getHealthObserversTest();
+ ASSERT_TRUE(!observers.empty());
+ auto it = std::find_if(observers.begin(), observers.end(), [type](const HealthObserver* o) {
+ return o->getType() == type;
+ });
+ ASSERT_TRUE(it != observers.end());
+ return *static_cast<Observer*>(*it);
+ }
+
+ HealthObserverBase::PeriodicHealthCheckContext checkContext() {
+ HealthObserverBase::PeriodicHealthCheckContext ctx{CancellationToken::uncancelable(),
+ _executor};
+ return ctx;
}
template <typename Duration>
- void advanceClockSourcesTime(Duration d) {
+ void advanceTime(Duration d) {
clockSource().advance(d);
static_cast<ClockSourceMock*>(_svcCtx->getPreciseClockSource())->advance(d);
tickSource().advance(d);
@@ -231,7 +285,6 @@ public:
private:
ServiceContext::UniqueServiceContext _svcCtx;
- executor::NetworkInterfaceMock* _net;
std::shared_ptr<executor::ThreadPoolTaskExecutor> _executor;
};
diff --git a/src/mongo/db/process_health/health_observer_base.cpp b/src/mongo/db/process_health/health_observer_base.cpp
index 04f38f51daa..dbe49da4ed4 100644
--- a/src/mongo/db/process_health/health_observer_base.cpp
+++ b/src/mongo/db/process_health/health_observer_base.cpp
@@ -42,7 +42,6 @@ HealthObserverBase::HealthObserverBase(ServiceContext* svcCtx) : _svcCtx(svcCtx)
void HealthObserverBase::periodicCheck(FaultFacetsContainerFactory& factory,
std::shared_ptr<executor::TaskExecutor> taskExecutor,
CancellationToken token) {
-
// If we have reached here, the intensity of this health observer must not be off
{
auto lk = stdx::lock_guard(_mutex);
@@ -52,6 +51,10 @@ void HealthObserverBase::periodicCheck(FaultFacetsContainerFactory& factory,
const auto now = _svcCtx->getPreciseClockSource()->now();
if (now - _lastTimeTheCheckWasRun < minimalCheckInterval()) {
+ LOGV2_DEBUG(6136802,
+ 3,
+ "Safety interval prevented new health check",
+ "observerType"_attr = getType());
return;
}
_lastTimeTheCheckWasRun = now;
diff --git a/src/mongo/db/process_health/health_observer_base.h b/src/mongo/db/process_health/health_observer_base.h
index 070e145d38a..99425808fab 100644
--- a/src/mongo/db/process_health/health_observer_base.h
+++ b/src/mongo/db/process_health/health_observer_base.h
@@ -72,12 +72,13 @@ public:
HealthObserverLivenessStats getStats() const override;
-protected:
+ // Common params for every health check.
struct PeriodicHealthCheckContext {
CancellationToken cancellationToken;
std::shared_ptr<executor::TaskExecutor> taskExecutor;
};
+protected:
/**
* The main method every health observer should implement for a particular
* health check it does.
diff --git a/src/mongo/db/process_health/health_observer_test.cpp b/src/mongo/db/process_health/health_observer_test.cpp
index f8ea21439f4..ea133e04e3b 100644
--- a/src/mongo/db/process_health/health_observer_test.cpp
+++ b/src/mongo/db/process_health/health_observer_test.cpp
@@ -181,8 +181,7 @@ TEST_F(FaultManagerTest, DoesNotRestartCheckBeforeIntervalExpired) {
ASSERT_TRUE(!currentFault);
advanceTime(Milliseconds(100));
- manager().healthCheckTest();
- waitForFaultBeingCreated();
+ assertSoonWithHealthCheck([this]() { return hasFault(); });
currentFault = manager().currentFault();
ASSERT_TRUE(currentFault); // The fault was created.
resetManager(); // Before atomic fields above go out of scope.
@@ -201,7 +200,7 @@ TEST_F(FaultManagerTest, InitialHealthCheckDoesNotBlockIfTransitionToOkSucceeds)
RAIIServerParameterControllerForTest _controller{"featureFlagHealthMonitoring", true};
registerMockHealthObserver(FaultFacetType::kMock1, [] { return 0.0; });
- manager().startPeriodicHealthChecks();
+ manager().healthCheckTest();
auto currentFault = manager().currentFault();
ASSERT_TRUE(!currentFault); // Is not created.
@@ -254,7 +253,7 @@ TEST_F(FaultManagerTest, ProgressMonitorCheck) {
manager().progressMonitorCheckTest(crashCb);
// The progress check passed because the simulated time did not advance.
ASSERT_FALSE(crashTriggered);
- advanceClockSourcesTime(manager().getConfig().getPeriodicLivenessDeadline() + Seconds(1));
+ advanceTime(manager().getConfig().getPeriodicLivenessDeadline() + Seconds(1));
manager().progressMonitorCheckTest(crashCb);
// The progress check simulated a crash.
ASSERT_TRUE(crashTriggered);
@@ -263,10 +262,13 @@ TEST_F(FaultManagerTest, ProgressMonitorCheck) {
}
TEST_F(FaultManagerTest, TransitionsToActiveFaultAfterTimeout) {
+ auto config = test::getConfigWithDisabledPeriodicChecks();
+ config->setActiveFaultDurationForTests(Milliseconds(10));
+ resetManager(std::move(config));
registerMockHealthObserver(FaultFacetType::kMock1, [] { return 1.1; });
waitForTransitionIntoState(FaultState::kTransientFault);
ASSERT_TRUE(manager().getFaultState() == FaultState::kTransientFault);
- advanceTime(manager().getConfig().getActiveFaultDuration() + Milliseconds(1));
+ advanceTime(Milliseconds(10));
waitForTransitionIntoState(FaultState::kActiveFault);
}