From 3e5d353f846896ea81fca853bc4a03b94fe91df5 Mon Sep 17 00:00:00 2001 From: Andrew Witten Date: Tue, 14 Jun 2022 20:38:42 +0000 Subject: SERVER-67280 wrap periodicCheckImpl with try catch --- .../config_server_health_observer.cpp | 4 +- .../db/process_health/dns_health_observer.cpp | 21 ++++++---- src/mongo/db/process_health/dns_health_observer.h | 2 +- .../db/process_health/health_observer_base.cpp | 47 ++++++++++++++-------- src/mongo/db/process_health/health_observer_base.h | 2 +- src/mongo/db/process_health/health_observer_mock.h | 25 ++++++++++-- .../db/process_health/health_observer_test.cpp | 44 ++++++++++++++++++++ .../db/process_health/test_health_observer.cpp | 2 +- src/mongo/db/process_health/test_health_observer.h | 2 +- 9 files changed, 115 insertions(+), 34 deletions(-) diff --git a/src/mongo/db/process_health/config_server_health_observer.cpp b/src/mongo/db/process_health/config_server_health_observer.cpp index 5e8a9ada390..bf011d28472 100644 --- a/src/mongo/db/process_health/config_server_health_observer.cpp +++ b/src/mongo/db/process_health/config_server_health_observer.cpp @@ -87,7 +87,7 @@ public: * previous one is filled, thus synchronization can be relaxed. */ Future periodicCheckImpl( - PeriodicHealthCheckContext&& periodicCheckContext) noexcept override; + PeriodicHealthCheckContext&& periodicCheckContext) override; private: // Collects the results of one check. @@ -146,7 +146,7 @@ ConfigServerHealthObserver::ConfigServerHealthObserver(ServiceContext* svcCtx) : HealthObserverBase(svcCtx) {} Future ConfigServerHealthObserver::periodicCheckImpl( - PeriodicHealthCheckContext&& periodicCheckContext) noexcept { + PeriodicHealthCheckContext&& periodicCheckContext) { // The chain is not capturing 'this' for the case the network call outlives the observer. return _checkImpl(std::move(periodicCheckContext)) .then([type = getType()](CheckResult result) mutable -> Future { diff --git a/src/mongo/db/process_health/dns_health_observer.cpp b/src/mongo/db/process_health/dns_health_observer.cpp index 6f41e5e2785..ff6611d10ac 100644 --- a/src/mongo/db/process_health/dns_health_observer.cpp +++ b/src/mongo/db/process_health/dns_health_observer.cpp @@ -47,9 +47,10 @@ namespace process_health { MONGO_FAIL_POINT_DEFINE(dnsHealthObserverFp); Future DnsHealthObserver::periodicCheckImpl( - PeriodicHealthCheckContext&& periodicCheckContext) noexcept { + PeriodicHealthCheckContext&& periodicCheckContext) { LOGV2_DEBUG(5938401, 2, "DNS health observer executing"); + auto makeFailedHealthCheckFuture = [this](const Status& status) { return Future::makeReady( makeSimpleFailedStatus(Severity::kFailure, {status})); @@ -101,13 +102,17 @@ Future DnsHealthObserver::periodicCheckImpl( auto status = periodicCheckContext.taskExecutor->scheduleWork( [this, servers, promise = std::move(completionPf.promise)]( const executor::TaskExecutor::CallbackArgs& cbArgs) mutable { - auto statusWith = - getHostFQDNs(servers.front().host(), HostnameCanonicalizationMode::kForward); - if (statusWith.isOK() && !statusWith.getValue().empty()) { - promise.emplaceValue(makeHealthyStatus()); - } else { - promise.emplaceValue( - makeSimpleFailedStatus(Severity::kFailure, {statusWith.getStatus()})); + try { + auto statusWith = + getHostFQDNs(servers.front().host(), HostnameCanonicalizationMode::kForward); + if (statusWith.isOK() && !statusWith.getValue().empty()) { + promise.emplaceValue(makeHealthyStatus()); + } else { + promise.emplaceValue( + makeSimpleFailedStatus(Severity::kFailure, {statusWith.getStatus()})); + } + } catch (const DBException& e) { + promise.emplaceValue(makeSimpleFailedStatus(Severity::kFailure, {e.toStatus()})); } }); diff --git a/src/mongo/db/process_health/dns_health_observer.h b/src/mongo/db/process_health/dns_health_observer.h index 2640c9024f7..11f54ad01bd 100644 --- a/src/mongo/db/process_health/dns_health_observer.h +++ b/src/mongo/db/process_health/dns_health_observer.h @@ -56,7 +56,7 @@ protected: } Future periodicCheckImpl( - PeriodicHealthCheckContext&& periodicCheckContext) noexcept override; + PeriodicHealthCheckContext&& periodicCheckContext) override; private: mutable PseudoRandom _random; diff --git a/src/mongo/db/process_health/health_observer_base.cpp b/src/mongo/db/process_health/health_observer_base.cpp index 243a9cf7937..01d90bf3db6 100644 --- a/src/mongo/db/process_health/health_observer_base.cpp +++ b/src/mongo/db/process_health/health_observer_base.cpp @@ -56,25 +56,38 @@ SharedSemiFuture HealthObserverBase::periodicCheck( _currentlyRunningHealthCheck = true; } + Future healthCheckResult; + + try { + healthCheckResult = periodicCheckImpl({token, taskExecutor}); + } catch (const DBException& e) { + LOGV2_DEBUG(6728001, + 2, + "Health observer failed due to an exception", + "observerType"_attr = getType(), + "errorCode"_attr = e.code(), + "reason"_attr = e.reason()); + + healthCheckResult = makeSimpleFailedStatus(Severity::kFailure, {e.toStatus()}); + } + _deadlineFuture = DeadlineFuture::create( taskExecutor, - periodicCheckImpl({token, taskExecutor}) - .onCompletion([this](StatusWith status) { - const auto now = _svcCtx->getPreciseClockSource()->now(); - - auto lk = stdx::lock_guard(_mutex); - ++_completedChecksCount; - invariant(_currentlyRunningHealthCheck); - _currentlyRunningHealthCheck = false; - _lastTimeCheckCompleted = now; - - if (!status.isOK() || - !HealthCheckStatus::isResolved(status.getValue().getSeverity())) { - ++_completedChecksWithFaultCount; - } - - return status; - }), + std::move(healthCheckResult).onCompletion([this](StatusWith status) { + const auto now = _svcCtx->getPreciseClockSource()->now(); + + auto lk = stdx::lock_guard(_mutex); + ++_completedChecksCount; + invariant(_currentlyRunningHealthCheck); + _currentlyRunningHealthCheck = false; + _lastTimeCheckCompleted = now; + + if (!status.isOK() || !HealthCheckStatus::isResolved(status.getValue().getSeverity())) { + ++_completedChecksWithFaultCount; + } + + return status; + }), getObserverTimeout()); return _deadlineFuture->get(); diff --git a/src/mongo/db/process_health/health_observer_base.h b/src/mongo/db/process_health/health_observer_base.h index 18f24eb8540..ef7900f640f 100644 --- a/src/mongo/db/process_health/health_observer_base.h +++ b/src/mongo/db/process_health/health_observer_base.h @@ -91,7 +91,7 @@ protected: * @return The result of a complete health check */ virtual Future periodicCheckImpl( - PeriodicHealthCheckContext&& periodicCheckContext) noexcept = 0; + PeriodicHealthCheckContext&& periodicCheckContext) = 0; HealthObserverLivenessStats getStatsLocked(WithLock) const; diff --git a/src/mongo/db/process_health/health_observer_mock.h b/src/mongo/db/process_health/health_observer_mock.h index e7a500bdf8c..b44fd35a368 100644 --- a/src/mongo/db/process_health/health_observer_mock.h +++ b/src/mongo/db/process_health/health_observer_mock.h @@ -40,8 +40,11 @@ namespace mongo { namespace process_health { /** - * Mocked health observer is using a test callback to fetch the next - * fault severity value every time the periodic check is invoked. + * Mocked health observer has two modes of operation (depending on constructor called): + * 1. Passing a callback that runs on an executor and returns a severity + * 2. Passing an implementation of periodicCheckImpl + * + * See unit test HealthCheckThrowingExceptionMakesFailedStatus for an example of the second mode. */ class HealthObserverMock : public HealthObserverBase { public: @@ -54,6 +57,16 @@ public: _getSeverityCallback(getSeverityCallback), _observerTimeout(observerTimeout) {} + HealthObserverMock( + FaultFacetType mockType, + ServiceContext* svcCtx, + std::function(PeriodicHealthCheckContext&&)> periodicCheckImpl, + Milliseconds observerTimeout) + : HealthObserverBase(svcCtx), + _mockType(mockType), + _periodicCheckImpl(periodicCheckImpl), + _observerTimeout(observerTimeout) {} + virtual ~HealthObserverMock() = default; bool isConfigured() const override { @@ -70,7 +83,11 @@ protected: } Future periodicCheckImpl( - PeriodicHealthCheckContext&& periodicCheckContext) noexcept override { + PeriodicHealthCheckContext&& periodicCheckContext) override { + + if (_periodicCheckImpl.has_value()) { + return (*_periodicCheckImpl)(std::move(periodicCheckContext)); + } auto completionPf = makePromiseFuture(); @@ -99,6 +116,8 @@ protected: private: const FaultFacetType _mockType; std::function _getSeverityCallback; + boost::optional(PeriodicHealthCheckContext&&)>> + _periodicCheckImpl; const Milliseconds _observerTimeout; }; diff --git a/src/mongo/db/process_health/health_observer_test.cpp b/src/mongo/db/process_health/health_observer_test.cpp index 66f5c8a6e99..cd79db11ebc 100644 --- a/src/mongo/db/process_health/health_observer_test.cpp +++ b/src/mongo/db/process_health/health_observer_test.cpp @@ -46,6 +46,7 @@ namespace process_health { // Using the common fault manager test suite. using test::FaultManagerTest; +using PeriodicHealthCheckContext = HealthObserverBase::PeriodicHealthCheckContext; namespace { // Tests that the mock observer is registered properly. @@ -254,6 +255,49 @@ TEST_F(FaultManagerTest, SchedulingDuplicateHealthChecksRejected) { LOGV2(6418205, "Total completed checks count", "count"_attr = totalCompletedCount); } +TEST_F(FaultManagerTest, HealthCheckThrowingExceptionMakesFailedStatus) { + resetManager(std::make_unique()); + + FaultFacetType facetType = FaultFacetType::kMock1; + AtomicWord shouldThrow{false}; + + std::string logMsg = "Failed due to exception"; + + auto periodicCheckImpl = + [facetType, &shouldThrow, logMsg]( + PeriodicHealthCheckContext&& periodicHealthCheckCtx) -> Future { + if (shouldThrow.load()) { + uasserted(ErrorCodes::InternalError, logMsg); + } + auto completionPf = makePromiseFuture(); + completionPf.promise.emplaceValue(HealthCheckStatus(facetType, Severity::kOk, "success")); + return std::move(completionPf.future); + }; + + HealthObserverRegistration::registerObserverFactory( + [facetType, periodicCheckImpl](ServiceContext* svcCtx) { + return std::make_unique( + facetType, svcCtx, periodicCheckImpl, Milliseconds(Seconds(30))); + }); + + assertSoon([this] { return (manager().getFaultState() == FaultState::kStartupCheck); }); + + auto initialHealthCheckFuture = manager().startPeriodicHealthChecks(); + assertSoon([this] { return (manager().getFaultState() == FaultState::kOk); }); + + auto observer = manager().getHealthObserversTest().front(); + ASSERT_EQ(observer->getStats().completedChecksWithFaultCount, 0); + + shouldThrow.store(true); + assertSoon([this] { return (manager().getFaultState() == FaultState::kTransientFault); }); + + ASSERT_EQ(manager().currentFault()->toBSON()["facets"]["mock1"]["description"].String(), + "InternalError: Failed due to exception "); + + ASSERT_GTE(observer->getStats().completedChecksWithFaultCount, 1); + resetManager(); +} + } // namespace } // namespace process_health } // namespace mongo diff --git a/src/mongo/db/process_health/test_health_observer.cpp b/src/mongo/db/process_health/test_health_observer.cpp index 70572c48851..01224117baa 100644 --- a/src/mongo/db/process_health/test_health_observer.cpp +++ b/src/mongo/db/process_health/test_health_observer.cpp @@ -43,7 +43,7 @@ MONGO_FAIL_POINT_DEFINE(testHealthObserver); MONGO_FAIL_POINT_DEFINE(badConfigTestHealthObserver); MONGO_FAIL_POINT_DEFINE(statusFailureTestHealthObserver); Future TestHealthObserver::periodicCheckImpl( - PeriodicHealthCheckContext&& periodicCheckContext) noexcept { + PeriodicHealthCheckContext&& periodicCheckContext) { LOGV2_DEBUG(5936801, 2, "Test health observer executing"); hangTestHealthObserver.pauseWhileSet(); diff --git a/src/mongo/db/process_health/test_health_observer.h b/src/mongo/db/process_health/test_health_observer.h index 428d57f8e9d..0c23df7fb42 100644 --- a/src/mongo/db/process_health/test_health_observer.h +++ b/src/mongo/db/process_health/test_health_observer.h @@ -50,7 +50,7 @@ protected: } Future periodicCheckImpl( - PeriodicHealthCheckContext&& periodicCheckContext) noexcept override; + PeriodicHealthCheckContext&& periodicCheckContext) override; bool isConfigured() const override; }; -- cgit v1.2.1