diff options
author | Andrew Witten <andrew.witten@mongodb.com> | 2022-06-14 20:38:42 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-08-24 16:33:14 +0000 |
commit | edce02c5c24d122bf0688dcbae3d5ff699bd21bd (patch) | |
tree | 09fc58d943a49b0e45743c6224532f9b377338b6 | |
parent | edd77b9a408d5286e849b2dd171e12e48c119fb3 (diff) | |
download | mongo-edce02c5c24d122bf0688dcbae3d5ff699bd21bd.tar.gz |
SERVER-67280 wrap periodicCheckImpl with try catch
(cherry picked from commit 3e5d353f846896ea81fca853bc4a03b94fe91df5)
9 files changed, 115 insertions, 34 deletions
diff --git a/src/mongo/db/process_health/config_server_health_observer.cpp b/src/mongo/db/process_health/config_server_health_observer.cpp index 37f1a8644d4..e981e9a268b 100644 --- a/src/mongo/db/process_health/config_server_health_observer.cpp +++ b/src/mongo/db/process_health/config_server_health_observer.cpp @@ -85,7 +85,7 @@ public: * previous one is filled, thus synchronization can be relaxed. */ Future<HealthCheckStatus> periodicCheckImpl( - PeriodicHealthCheckContext&& periodicCheckContext) noexcept override; + PeriodicHealthCheckContext&& periodicCheckContext) override; private: // Collects the results of one check. @@ -144,7 +144,7 @@ ConfigServerHealthObserver::ConfigServerHealthObserver(ServiceContext* svcCtx) : HealthObserverBase(svcCtx) {} Future<HealthCheckStatus> ConfigServerHealthObserver::periodicCheckImpl( - PeriodicHealthCheckContext&& periodicCheckContext) noexcept { + PeriodicHealthCheckContext&& periodicCheckContext) { // The chain is not capturing 'this' for the case the network call outlives the observer. return _checkImpl(std::move(periodicCheckContext)) .then([type = getType()](CheckResult result) mutable -> Future<HealthCheckStatus> { diff --git a/src/mongo/db/process_health/dns_health_observer.cpp b/src/mongo/db/process_health/dns_health_observer.cpp index ef414513aff..390a7aed738 100644 --- a/src/mongo/db/process_health/dns_health_observer.cpp +++ b/src/mongo/db/process_health/dns_health_observer.cpp @@ -45,9 +45,10 @@ namespace process_health { MONGO_FAIL_POINT_DEFINE(dnsHealthObserverFp); Future<HealthCheckStatus> DnsHealthObserver::periodicCheckImpl( - PeriodicHealthCheckContext&& periodicCheckContext) noexcept { + PeriodicHealthCheckContext&& periodicCheckContext) { LOGV2_DEBUG(5938401, 2, "DNS health observer executing"); + auto makeFailedHealthCheckFuture = [this](const Status& status) { return Future<HealthCheckStatus>::makeReady( makeSimpleFailedStatus(Severity::kFailure, {status})); @@ -99,13 +100,17 @@ Future<HealthCheckStatus> DnsHealthObserver::periodicCheckImpl( auto status = periodicCheckContext.taskExecutor->scheduleWork( [this, servers, promise = std::move(completionPf.promise)]( const executor::TaskExecutor::CallbackArgs& cbArgs) mutable { - auto statusWith = - getHostFQDNs(servers.front().host(), HostnameCanonicalizationMode::kForward); - if (statusWith.isOK() && !statusWith.getValue().empty()) { - promise.emplaceValue(makeHealthyStatus()); - } else { - promise.emplaceValue( - makeSimpleFailedStatus(Severity::kFailure, {statusWith.getStatus()})); + try { + auto statusWith = + getHostFQDNs(servers.front().host(), HostnameCanonicalizationMode::kForward); + if (statusWith.isOK() && !statusWith.getValue().empty()) { + promise.emplaceValue(makeHealthyStatus()); + } else { + promise.emplaceValue( + makeSimpleFailedStatus(Severity::kFailure, {statusWith.getStatus()})); + } + } catch (const DBException& e) { + promise.emplaceValue(makeSimpleFailedStatus(Severity::kFailure, {e.toStatus()})); } }); diff --git a/src/mongo/db/process_health/dns_health_observer.h b/src/mongo/db/process_health/dns_health_observer.h index 2640c9024f7..11f54ad01bd 100644 --- a/src/mongo/db/process_health/dns_health_observer.h +++ b/src/mongo/db/process_health/dns_health_observer.h @@ -56,7 +56,7 @@ protected: } Future<HealthCheckStatus> periodicCheckImpl( - PeriodicHealthCheckContext&& periodicCheckContext) noexcept override; + PeriodicHealthCheckContext&& periodicCheckContext) override; private: mutable PseudoRandom _random; diff --git a/src/mongo/db/process_health/health_observer_base.cpp b/src/mongo/db/process_health/health_observer_base.cpp index fe3509835df..beb1a1fba83 100644 --- a/src/mongo/db/process_health/health_observer_base.cpp +++ b/src/mongo/db/process_health/health_observer_base.cpp @@ -54,25 +54,38 @@ SharedSemiFuture<HealthCheckStatus> HealthObserverBase::periodicCheck( _currentlyRunningHealthCheck = true; } + Future<HealthCheckStatus> healthCheckResult; + + try { + healthCheckResult = periodicCheckImpl({token, taskExecutor}); + } catch (const DBException& e) { + LOGV2_DEBUG(6728001, + 2, + "Health observer failed due to an exception", + "observerType"_attr = getType(), + "errorCode"_attr = e.code(), + "reason"_attr = e.reason()); + + healthCheckResult = makeSimpleFailedStatus(Severity::kFailure, {e.toStatus()}); + } + _deadlineFuture = DeadlineFuture<HealthCheckStatus>::create( taskExecutor, - periodicCheckImpl({token, taskExecutor}) - .onCompletion([this](StatusWith<HealthCheckStatus> status) { - const auto now = _svcCtx->getPreciseClockSource()->now(); - - auto lk = stdx::lock_guard(_mutex); - ++_completedChecksCount; - invariant(_currentlyRunningHealthCheck); - _currentlyRunningHealthCheck = false; - _lastTimeCheckCompleted = now; - - if (!status.isOK() || - !HealthCheckStatus::isResolved(status.getValue().getSeverity())) { - ++_completedChecksWithFaultCount; - } - - return status; - }), + std::move(healthCheckResult).onCompletion([this](StatusWith<HealthCheckStatus> status) { + const auto now = _svcCtx->getPreciseClockSource()->now(); + + auto lk = stdx::lock_guard(_mutex); + ++_completedChecksCount; + invariant(_currentlyRunningHealthCheck); + _currentlyRunningHealthCheck = false; + _lastTimeCheckCompleted = now; + + if (!status.isOK() || !HealthCheckStatus::isResolved(status.getValue().getSeverity())) { + ++_completedChecksWithFaultCount; + } + + return status; + }), getObserverTimeout()); return _deadlineFuture->get(); diff --git a/src/mongo/db/process_health/health_observer_base.h b/src/mongo/db/process_health/health_observer_base.h index 18f24eb8540..ef7900f640f 100644 --- a/src/mongo/db/process_health/health_observer_base.h +++ b/src/mongo/db/process_health/health_observer_base.h @@ -91,7 +91,7 @@ protected: * @return The result of a complete health check */ virtual Future<HealthCheckStatus> periodicCheckImpl( - PeriodicHealthCheckContext&& periodicCheckContext) noexcept = 0; + PeriodicHealthCheckContext&& periodicCheckContext) = 0; HealthObserverLivenessStats getStatsLocked(WithLock) const; diff --git a/src/mongo/db/process_health/health_observer_mock.h b/src/mongo/db/process_health/health_observer_mock.h index 80051dc5eac..996d046cad9 100644 --- a/src/mongo/db/process_health/health_observer_mock.h +++ b/src/mongo/db/process_health/health_observer_mock.h @@ -37,8 +37,11 @@ namespace mongo { namespace process_health { /** - * Mocked health observer is using a test callback to fetch the next - * fault severity value every time the periodic check is invoked. + * Mocked health observer has two modes of operation (depending on constructor called): + * 1. Passing a callback that runs on an executor and returns a severity + * 2. Passing an implementation of periodicCheckImpl + * + * See unit test HealthCheckThrowingExceptionMakesFailedStatus for an example of the second mode. */ class HealthObserverMock : public HealthObserverBase { public: @@ -51,6 +54,16 @@ public: _getSeverityCallback(getSeverityCallback), _observerTimeout(observerTimeout) {} + HealthObserverMock( + FaultFacetType mockType, + ServiceContext* svcCtx, + std::function<Future<HealthCheckStatus>(PeriodicHealthCheckContext&&)> periodicCheckImpl, + Milliseconds observerTimeout) + : HealthObserverBase(svcCtx), + _mockType(mockType), + _periodicCheckImpl(periodicCheckImpl), + _observerTimeout(observerTimeout) {} + virtual ~HealthObserverMock() = default; bool isConfigured() const override { @@ -67,7 +80,11 @@ protected: } Future<HealthCheckStatus> periodicCheckImpl( - PeriodicHealthCheckContext&& periodicCheckContext) noexcept override { + PeriodicHealthCheckContext&& periodicCheckContext) override { + + if (_periodicCheckImpl.has_value()) { + return (*_periodicCheckImpl)(std::move(periodicCheckContext)); + } auto completionPf = makePromiseFuture<HealthCheckStatus>(); @@ -96,6 +113,8 @@ protected: private: const FaultFacetType _mockType; std::function<Severity()> _getSeverityCallback; + boost::optional<std::function<Future<HealthCheckStatus>(PeriodicHealthCheckContext&&)>> + _periodicCheckImpl; const Milliseconds _observerTimeout; }; diff --git a/src/mongo/db/process_health/health_observer_test.cpp b/src/mongo/db/process_health/health_observer_test.cpp index f10d2683967..a2187f83e52 100644 --- a/src/mongo/db/process_health/health_observer_test.cpp +++ b/src/mongo/db/process_health/health_observer_test.cpp @@ -42,6 +42,7 @@ namespace process_health { // Using the common fault manager test suite. using test::FaultManagerTest; +using PeriodicHealthCheckContext = HealthObserverBase::PeriodicHealthCheckContext; namespace { // Tests that the mock observer is registered properly. @@ -250,6 +251,49 @@ TEST_F(FaultManagerTest, SchedulingDuplicateHealthChecksRejected) { LOGV2(6418205, "Total completed checks count", "count"_attr = totalCompletedCount); } +TEST_F(FaultManagerTest, HealthCheckThrowingExceptionMakesFailedStatus) { + resetManager(std::make_unique<FaultManagerConfig>()); + + FaultFacetType facetType = FaultFacetType::kMock1; + AtomicWord<bool> shouldThrow{false}; + + std::string logMsg = "Failed due to exception"; + + auto periodicCheckImpl = + [facetType, &shouldThrow, logMsg]( + PeriodicHealthCheckContext&& periodicHealthCheckCtx) -> Future<HealthCheckStatus> { + if (shouldThrow.load()) { + uasserted(ErrorCodes::InternalError, logMsg); + } + auto completionPf = makePromiseFuture<HealthCheckStatus>(); + completionPf.promise.emplaceValue(HealthCheckStatus(facetType, Severity::kOk, "success")); + return std::move(completionPf.future); + }; + + HealthObserverRegistration::registerObserverFactory( + [facetType, periodicCheckImpl](ServiceContext* svcCtx) { + return std::make_unique<HealthObserverMock>( + facetType, svcCtx, periodicCheckImpl, Milliseconds(Seconds(30))); + }); + + assertSoon([this] { return (manager().getFaultState() == FaultState::kStartupCheck); }); + + auto initialHealthCheckFuture = manager().startPeriodicHealthChecks(); + assertSoon([this] { return (manager().getFaultState() == FaultState::kOk); }); + + auto observer = manager().getHealthObserversTest().front(); + ASSERT_EQ(observer->getStats().completedChecksWithFaultCount, 0); + + shouldThrow.store(true); + assertSoon([this] { return (manager().getFaultState() == FaultState::kTransientFault); }); + + ASSERT_EQ(manager().currentFault()->toBSON()["facets"]["mock1"]["description"].String(), + "InternalError: Failed due to exception "); + + ASSERT_GTE(observer->getStats().completedChecksWithFaultCount, 1); + resetManager(); +} + } // namespace } // namespace process_health } // namespace mongo diff --git a/src/mongo/db/process_health/test_health_observer.cpp b/src/mongo/db/process_health/test_health_observer.cpp index ece51a11165..ec440754cbf 100644 --- a/src/mongo/db/process_health/test_health_observer.cpp +++ b/src/mongo/db/process_health/test_health_observer.cpp @@ -41,7 +41,7 @@ MONGO_FAIL_POINT_DEFINE(testHealthObserver); MONGO_FAIL_POINT_DEFINE(badConfigTestHealthObserver); MONGO_FAIL_POINT_DEFINE(statusFailureTestHealthObserver); Future<HealthCheckStatus> TestHealthObserver::periodicCheckImpl( - PeriodicHealthCheckContext&& periodicCheckContext) noexcept { + PeriodicHealthCheckContext&& periodicCheckContext) { LOGV2_DEBUG(5936801, 2, "Test health observer executing"); hangTestHealthObserver.pauseWhileSet(); diff --git a/src/mongo/db/process_health/test_health_observer.h b/src/mongo/db/process_health/test_health_observer.h index 428d57f8e9d..0c23df7fb42 100644 --- a/src/mongo/db/process_health/test_health_observer.h +++ b/src/mongo/db/process_health/test_health_observer.h @@ -50,7 +50,7 @@ protected: } Future<HealthCheckStatus> periodicCheckImpl( - PeriodicHealthCheckContext&& periodicCheckContext) noexcept override; + PeriodicHealthCheckContext&& periodicCheckContext) override; bool isConfigured() const override; }; |