From 5a243e404806f10a5e55db5013d42b4321992c09 Mon Sep 17 00:00:00 2001 From: Davis Haupt Date: Tue, 4 Jan 2022 18:25:37 +0000 Subject: SERVER-60846 replace double severity with enum type --- .../health_monitor/server_status_health.js | 3 +- src/mongo/db/process_health/fault.cpp | 16 ----- src/mongo/db/process_health/fault.h | 11 ---- src/mongo/db/process_health/fault_facet_impl.h | 2 +- src/mongo/db/process_health/fault_facet_mock.h | 4 +- src/mongo/db/process_health/fault_facet_test.cpp | 15 ++--- src/mongo/db/process_health/fault_manager.cpp | 3 +- .../db/process_health/fault_manager_test_suite.h | 4 +- .../db/process_health/fault_state_machine_test.cpp | 68 ++++++++++++---------- src/mongo/db/process_health/fault_test.cpp | 19 +++--- src/mongo/db/process_health/health_check_status.h | 37 +++++------- .../db/process_health/health_observer_base.cpp | 6 +- src/mongo/db/process_health/health_observer_base.h | 3 +- src/mongo/db/process_health/health_observer_mock.h | 4 +- .../db/process_health/health_observer_test.cpp | 16 ++--- .../db/process_health/test_health_observer.cpp | 3 +- 16 files changed, 95 insertions(+), 119 deletions(-) diff --git a/jstests/sharding/health_monitor/server_status_health.js b/jstests/sharding/health_monitor/server_status_health.js index 77c1c68485d..169c7a1865b 100644 --- a/jstests/sharding/health_monitor/server_status_health.js +++ b/jstests/sharding/health_monitor/server_status_health.js @@ -57,14 +57,13 @@ assert(result.enteredStateAtTime); assert(result.faultInformation); const faultInformation = result.faultInformation; -assert.eq(faultInformation.severity, 1); +// TODO: SERVER-60973 check fault severity assert(faultInformation.duration); assert(faultInformation.facets); assert.eq(faultInformation.numFacets, 1); assert(faultInformation.facets.kTestObserver); const kTestObserverFacet = faultInformation.facets.kTestObserver; -assert.eq(kTestObserverFacet.severity, faultInformation.severity); assert.eq(kTestObserverFacet.duration, faultInformation.duration); assert(kTestObserverFacet.description.includes("InternalError: test msg")); diff --git a/src/mongo/db/process_health/fault.cpp b/src/mongo/db/process_health/fault.cpp index 6b47d92e525..a98b73bd551 100644 --- a/src/mongo/db/process_health/fault.cpp +++ b/src/mongo/db/process_health/fault.cpp @@ -41,21 +41,6 @@ UUID Fault::getId() const { return _id; } -double Fault::getSeverity() const { - auto facets = getFacets(); - - // Simple algo to compute aggregate severity: take the max from all facets. - double severity = 0; - for (auto& facet : facets) { - invariant(facet); - HealthCheckStatus status = facet->getStatus(); - if (status.getSeverity() > severity) { - severity = status.getSeverity(); - } - } - return severity; -} - Milliseconds Fault::getDuration() const { return Milliseconds(_clockSource->now() - _startTime); } @@ -115,7 +100,6 @@ void Fault::garbageCollectResolvedFacets() { void Fault::appendDescription(BSONObjBuilder* builder) const { builder->append("id", getId().toBSON()); - builder->append("severity", getSeverity()); builder->append("duration", getDuration().toBSON()); BSONObjBuilder facetsBuilder; for (auto& facet : _facets) { diff --git a/src/mongo/db/process_health/fault.h b/src/mongo/db/process_health/fault.h index a3fc1dbf577..dbc9a0317b3 100644 --- a/src/mongo/db/process_health/fault.h +++ b/src/mongo/db/process_health/fault.h @@ -54,17 +54,6 @@ public: UUID getId() const; - /** - * The fault severity value is an aggregate severity calculated - * from all facets currently owned by this instance. - * - * @return Current fault severity. The expected values: - * 0: Ok - * (0, 1.0): Transient fault condition - * [1.0, Inf): Active fault condition - */ - double getSeverity() const; - /** * @return The lifetime of this fault from the moment it was created. * Invariant: getDuration() >= getActiveFaultDuration() diff --git a/src/mongo/db/process_health/fault_facet_impl.h b/src/mongo/db/process_health/fault_facet_impl.h index aa48b2343f5..e956a573ce2 100644 --- a/src/mongo/db/process_health/fault_facet_impl.h +++ b/src/mongo/db/process_health/fault_facet_impl.h @@ -64,7 +64,7 @@ private: mutable Mutex _mutex = MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(1), "FaultFacetImpl::_mutex"); - double _severity = 0; + Severity _severity = Severity::kOk; std::string _description; }; diff --git a/src/mongo/db/process_health/fault_facet_mock.h b/src/mongo/db/process_health/fault_facet_mock.h index 29a3c3a4184..b1f1589d320 100644 --- a/src/mongo/db/process_health/fault_facet_mock.h +++ b/src/mongo/db/process_health/fault_facet_mock.h @@ -45,7 +45,7 @@ namespace process_health { class FaultFacetMock : public FaultFacet { public: // Testing callback to fill up mocked values. - using MockCallback = std::function; + using MockCallback = std::function; FaultFacetMock(FaultFacetType mockType, ClockSource* clockSource, MockCallback callback) : _mockType(mockType), _clockSource(clockSource), _callback(callback) { @@ -59,7 +59,7 @@ public: } HealthCheckStatus getStatus() const override { - const double severity = _callback(); + const Severity severity = _callback(); auto healthCheckStatus = HealthCheckStatus(_mockType, severity, "Mock facet"); LOGV2(5956702, "Mock fault facet status", "status"_attr = healthCheckStatus); diff --git a/src/mongo/db/process_health/fault_facet_test.cpp b/src/mongo/db/process_health/fault_facet_test.cpp index 537c7db37e7..33ad76a5226 100644 --- a/src/mongo/db/process_health/fault_facet_test.cpp +++ b/src/mongo/db/process_health/fault_facet_test.cpp @@ -57,16 +57,17 @@ private: }; TEST_F(FaultFacetTestWithMock, FacetWithFailure) { - startMock([] { return 0.5; }); - auto status = getStatus(); - ASSERT_APPROX_EQUAL(0.5, status.getSeverity(), 0.001); + startMock([] { return Severity::kFailure; }); + ASSERT_EQUALS(Severity::kFailure, getStatus().getSeverity()); } // Using the FaultFacetImpl. class FaultFacetImplTest : public unittest::Test { public: - static inline const auto kNoFailures = HealthCheckStatus(FaultFacetType::kMock1, 0.0, "test"); - static inline const auto kFailure = HealthCheckStatus(FaultFacetType::kMock1, 0.5, "test"); + static inline const auto kNoFailures = + HealthCheckStatus(FaultFacetType::kMock1, Severity::kOk, "test"); + static inline const auto kFailure = + HealthCheckStatus(FaultFacetType::kMock1, Severity::kFailure, "test"); void setUp() { _svcCtx = ServiceContext::make(); @@ -103,7 +104,7 @@ private: TEST_F(FaultFacetImplTest, Simple) { createWithStatus(kFailure); const auto status = getStatus(); - ASSERT_GT(status.getSeverity(), 0); + ASSERT_GT(status.getSeverity(), Severity::kOk); ASSERT_EQ(status.getType(), FaultFacetType::kMock1); ASSERT_EQ(status.getShortDescription(), "test"_sd); } @@ -111,7 +112,7 @@ TEST_F(FaultFacetImplTest, Simple) { TEST_F(FaultFacetImplTest, Update) { createWithStatus(kFailure); update(kNoFailures); - ASSERT_EQ(getStatus().getSeverity(), 0); + ASSERT_EQ(getStatus().getSeverity(), Severity::kOk); } } // namespace diff --git a/src/mongo/db/process_health/fault_manager.cpp b/src/mongo/db/process_health/fault_manager.cpp index d0b776ead4c..1cc855e9b65 100644 --- a/src/mongo/db/process_health/fault_manager.cpp +++ b/src/mongo/db/process_health/fault_manager.cpp @@ -552,7 +552,8 @@ void FaultManager::healthCheck(HealthObserver* observer, CancellationToken token }; auto acceptNotOKStatus = [this, observer](Status s) { - auto healthCheckStatus = HealthCheckStatus(observer->getType(), 1.0, s.reason()); + auto healthCheckStatus = + HealthCheckStatus(observer->getType(), Severity::kFailure, s.reason()); LOGV2_ERROR( 6007901, "Unexpected failure during health check", "status"_attr = healthCheckStatus); accept(healthCheckStatus); diff --git a/src/mongo/db/process_health/fault_manager_test_suite.h b/src/mongo/db/process_health/fault_manager_test_suite.h index fb9ce56f2c4..62332ffb235 100644 --- a/src/mongo/db/process_health/fault_manager_test_suite.h +++ b/src/mongo/db/process_health/fault_manager_test_suite.h @@ -176,7 +176,7 @@ public: } void registerMockHealthObserver(FaultFacetType mockType, - std::function getSeverityCallback, + std::function getSeverityCallback, Milliseconds timeout) { HealthObserverRegistration::registerObserverFactory( [mockType, getSeverityCallback, timeout](ServiceContext* svcCtx) { @@ -186,7 +186,7 @@ public: } void registerMockHealthObserver(FaultFacetType mockType, - std::function getSeverityCallback) { + std::function getSeverityCallback) { registerMockHealthObserver(mockType, getSeverityCallback, Milliseconds(Seconds(30))); } diff --git a/src/mongo/db/process_health/fault_state_machine_test.cpp b/src/mongo/db/process_health/fault_state_machine_test.cpp index 6e845f3fe3d..7dfa76ab1b0 100644 --- a/src/mongo/db/process_health/fault_state_machine_test.cpp +++ b/src/mongo/db/process_health/fault_state_machine_test.cpp @@ -46,8 +46,8 @@ namespace { TEST_F(FaultManagerTest, TransitionsFromStartupCheckToOkWhenAllObserversAreSuccessful) { RAIIServerParameterControllerForTest _controller{"featureFlagHealthMonitoring", true}; - registerMockHealthObserver(FaultFacetType::kMock1, [] { return 0; }); - registerMockHealthObserver(FaultFacetType::kMock2, [] { return 0; }); + registerMockHealthObserver(FaultFacetType::kMock1, [] { return Severity::kOk; }); + registerMockHealthObserver(FaultFacetType::kMock2, [] { return Severity::kOk; }); auto initialHealthCheckFuture = manager().startPeriodicHealthChecks(); std::vector faultFacetTypes{FaultFacetType::kMock1, FaultFacetType::kMock2}; @@ -71,12 +71,12 @@ TEST_F(FaultManagerTest, TransitionsFromStartupCheckToOkWhenAllObserversAreSucce TEST_F(FaultManagerTest, TransitionsFromStartupCheckToOkAfterFailureThenSuccess) { RAIIServerParameterControllerForTest _controller{"featureFlagHealthMonitoring", true}; const auto faultFacetType = FaultFacetType::kMock1; - registerMockHealthObserver(faultFacetType, [] { return 0; }); + registerMockHealthObserver(faultFacetType, [] { return Severity::kOk; }); auto initialHealthCheckFuture = manager().startPeriodicHealthChecks(); ASSERT(manager().getFaultState() == FaultState::kStartupCheck); - manager().acceptTest(HealthCheckStatus(faultFacetType, 1.0, "error")); + manager().acceptTest(HealthCheckStatus(faultFacetType, Severity::kFailure, "error")); advanceTime(Milliseconds(100)); ASSERT(manager().getFaultState() == FaultState::kStartupCheck); ASSERT(hasFault()); @@ -92,7 +92,7 @@ TEST_F(FaultManagerTest, TransitionsFromOkToTransientFaultAfterSuccessThenFailur RAIIServerParameterControllerForTest _controller{"featureFlagHealthMonitoring", true}; const auto faultFacetType = FaultFacetType::kMock1; - registerMockHealthObserver(faultFacetType, [] { return 0; }); + registerMockHealthObserver(faultFacetType, [] { return Severity::kOk; }); auto initialHealthCheckFuture = manager().startPeriodicHealthChecks(); @@ -104,7 +104,7 @@ TEST_F(FaultManagerTest, TransitionsFromOkToTransientFaultAfterSuccessThenFailur assertSoon([this]() { return manager().getFaultState() == FaultState::kOk; }); ASSERT(initialHealthCheckFuture.isReady()); - manager().acceptTest(HealthCheckStatus(faultFacetType, 1.0, "error")); + manager().acceptTest(HealthCheckStatus(faultFacetType, Severity::kFailure, "error")); advanceTime(Milliseconds(100)); assertSoon([this]() { return hasFault() && manager().getFaultState() == FaultState::kTransientFault; @@ -114,7 +114,7 @@ TEST_F(FaultManagerTest, TransitionsFromOkToTransientFaultAfterSuccessThenFailur TEST_F(FaultManagerTest, StaysInOkOnSuccess) { RAIIServerParameterControllerForTest _controller{"featureFlagHealthMonitoring", true}; const auto faultFacetType = FaultFacetType::kMock1; - registerMockHealthObserver(faultFacetType, [] { return 0; }); + registerMockHealthObserver(faultFacetType, [] { return Severity::kOk; }); auto initialHealthCheckFuture = manager().startPeriodicHealthChecks(); @@ -136,7 +136,7 @@ TEST_F(FaultManagerTest, StaysInOkOnSuccess) { TEST_F(FaultManagerTest, StaysInTransientFault) { RAIIServerParameterControllerForTest _controller{"featureFlagHealthMonitoring", true}; const auto faultFacetType = FaultFacetType::kMock1; - registerMockHealthObserver(faultFacetType, [] { return 0; }); + registerMockHealthObserver(faultFacetType, [] { return Severity::kOk; }); auto initialHealthCheckFuture = manager().startPeriodicHealthChecks(); @@ -148,13 +148,13 @@ TEST_F(FaultManagerTest, StaysInTransientFault) { assertSoon([this]() { return manager().getFaultState() == FaultState::kOk; }); ASSERT(initialHealthCheckFuture.isReady()); - manager().acceptTest(HealthCheckStatus(faultFacetType, 1.0, "error")); + manager().acceptTest(HealthCheckStatus(faultFacetType, Severity::kFailure, "error")); advanceTime(Milliseconds(100)); assertSoon([this]() { return hasFault() && manager().getFaultState() == FaultState::kTransientFault; }); - manager().acceptTest(HealthCheckStatus(faultFacetType, 1.0, "error")); + manager().acceptTest(HealthCheckStatus(faultFacetType, Severity::kFailure, "error")); advanceTime(Milliseconds(100)); assertSoon([this]() { return hasFault() && manager().getFaultState() == FaultState::kTransientFault; @@ -164,7 +164,7 @@ TEST_F(FaultManagerTest, StaysInTransientFault) { TEST_F(FaultManagerTest, TransitionsFromTransientFaultToOkOnFailureThenSuccess) { RAIIServerParameterControllerForTest _controller{"featureFlagHealthMonitoring", true}; const auto faultFacetType = FaultFacetType::kMock1; - registerMockHealthObserver(faultFacetType, [] { return 0; }); + registerMockHealthObserver(faultFacetType, [] { return Severity::kOk; }); auto initialHealthCheckFuture = manager().startPeriodicHealthChecks(); @@ -176,7 +176,7 @@ TEST_F(FaultManagerTest, TransitionsFromTransientFaultToOkOnFailureThenSuccess) assertSoon([this]() { return manager().getFaultState() == FaultState::kOk; }); ASSERT(initialHealthCheckFuture.isReady()); - manager().acceptTest(HealthCheckStatus(faultFacetType, 1.0, "error")); + manager().acceptTest(HealthCheckStatus(faultFacetType, Severity::kFailure, "error")); advanceTime(Milliseconds(100)); assertSoon([this]() { return hasFault() && manager().getFaultState() == FaultState::kTransientFault; @@ -191,16 +191,19 @@ TEST_F(FaultManagerTest, TransitionsFromTransientFaultToOkOnFailureThenSuccess) TEST_F(FaultManagerTest, OneFacetIsResolved) { RAIIServerParameterControllerForTest _controller{"featureFlagHealthMonitoring", true}; - registerMockHealthObserver(FaultFacetType::kMock1, [] { return 1.1; }); - registerMockHealthObserver(FaultFacetType::kMock2, [] { return 1.1; }); + registerMockHealthObserver(FaultFacetType::kMock1, [] { return Severity::kFailure; }); + registerMockHealthObserver(FaultFacetType::kMock2, [] { return Severity::kFailure; }); auto initialHealthCheckFuture = manager().startPeriodicHealthChecks(); ASSERT(manager().getFaultState() == FaultState::kStartupCheck); - manager().acceptTest(HealthCheckStatus(FaultFacetType::kMock1, 1.1, "failing health check 1")); - manager().acceptTest(HealthCheckStatus(FaultFacetType::kMock2, 1.1, "failing health check 2")); + manager().acceptTest( + HealthCheckStatus(FaultFacetType::kMock1, Severity::kFailure, "failing health check 1")); + manager().acceptTest( + HealthCheckStatus(FaultFacetType::kMock2, Severity::kFailure, "failing health check 2")); assertSoon([this] { return manager().getOrCreateFaultTest()->getFacets().size() == 2; }); + manager().acceptTest(HealthCheckStatus(FaultFacetType::kMock1)); assertSoon([this] { return manager().getOrCreateFaultTest()->getFacets().front()->getType() == @@ -213,12 +216,12 @@ DEATH_TEST_F(FaultManagerTest, TransitionsToActiveFaultAfterTimeoutFromTransient RAIIServerParameterControllerForTest _controller{"featureFlagHealthMonitoring", true}; auto faultFacetType = FaultFacetType::kMock1; - registerMockHealthObserver(faultFacetType, [] { return 1.1; }); + registerMockHealthObserver(faultFacetType, [] { return Severity::kFailure; }); auto initialHealthCheckFuture = manager().startPeriodicHealthChecks(); manager().acceptTest(HealthCheckStatus(faultFacetType)); ASSERT(manager().getFaultState() == FaultState::kOk); - manager().acceptTest(HealthCheckStatus(faultFacetType, 1.0, "error")); + manager().acceptTest(HealthCheckStatus(faultFacetType, Severity::kFailure, "error")); ASSERT(manager().getFaultState() == FaultState::kTransientFault); advanceTime(Seconds(kActiveFaultDurationSecs)); @@ -233,12 +236,12 @@ TEST_F(FaultManagerTest, config->setIntensityForType(faultFacetType, HealthObserverIntensityEnum::kNonCritical); resetManager(std::move(config)); - registerMockHealthObserver(faultFacetType, [] { return 1.1; }); + registerMockHealthObserver(faultFacetType, [] { return Severity::kFailure; }); auto initialHealthCheckFuture = manager().startPeriodicHealthChecks(); manager().acceptTest(HealthCheckStatus(faultFacetType)); ASSERT(manager().getFaultState() == FaultState::kOk); - manager().acceptTest(HealthCheckStatus(faultFacetType, 1.0, "error")); + manager().acceptTest(HealthCheckStatus(faultFacetType, Severity::kFailure, "error")); ASSERT(manager().getFaultState() == FaultState::kTransientFault); advanceTime(Seconds(kActiveFaultDurationSecs)); @@ -251,9 +254,9 @@ DEATH_TEST_F(FaultManagerTest, TransitionsToActiveFaultAfterTimeoutFromStartupCh RAIIServerParameterControllerForTest _controller{"featureFlagHealthMonitoring", true}; auto faultFacetType = FaultFacetType::kMock1; - registerMockHealthObserver(faultFacetType, [] { return 1.1; }); + registerMockHealthObserver(faultFacetType, [] { return Severity::kFailure; }); auto initialHealthCheckFuture = manager().startPeriodicHealthChecks(); - manager().acceptTest(HealthCheckStatus(faultFacetType, 1.0, "error")); + manager().acceptTest(HealthCheckStatus(faultFacetType, Severity::kFailure, "error")); ASSERT(manager().getFaultState() == FaultState::kStartupCheck); advanceTime(Seconds(kActiveFaultDurationSecs)); @@ -268,9 +271,9 @@ TEST_F(FaultManagerTest, config->setIntensityForType(faultFacetType, HealthObserverIntensityEnum::kNonCritical); resetManager(std::move(config)); - registerMockHealthObserver(faultFacetType, [] { return 1.1; }); + registerMockHealthObserver(faultFacetType, [] { return Severity::kFailure; }); auto initialHealthCheckFuture = manager().startPeriodicHealthChecks(); - manager().acceptTest(HealthCheckStatus(faultFacetType, 1.0, "error")); + manager().acceptTest(HealthCheckStatus(faultFacetType, Severity::kFailure, "error")); ASSERT(manager().getFaultState() == FaultState::kStartupCheck); advanceTime(Seconds(kActiveFaultDurationSecs) * 10); @@ -283,12 +286,12 @@ TEST_F(FaultManagerTest, DoesNotTransitionToActiveFaultIfResolved) { RAIIServerParameterControllerForTest _controller{"featureFlagHealthMonitoring", true}; auto faultFacetType = FaultFacetType::kMock1; - registerMockHealthObserver(faultFacetType, [] { return 1.1; }); + registerMockHealthObserver(faultFacetType, [] { return Severity::kFailure; }); auto initialHealthCheckFuture = manager().startPeriodicHealthChecks(); manager().acceptTest(HealthCheckStatus(faultFacetType)); ASSERT(manager().getFaultState() == FaultState::kOk); - manager().acceptTest(HealthCheckStatus(faultFacetType, 1.0, "error")); + manager().acceptTest(HealthCheckStatus(faultFacetType, Severity::kFailure, "error")); ASSERT(manager().getFaultState() == FaultState::kTransientFault); advanceTime(Seconds(kActiveFaultDurationSecs / 2)); @@ -306,13 +309,16 @@ TEST_F(FaultManagerTest, HealthCheckWithOffFacetCreatesNoFault) { config->setIntensityForType(faultFacetType, HealthObserverIntensityEnum::kOff); resetManager(std::move(config)); - registerMockHealthObserver(faultFacetType, [] { return 1.0; }); + registerMockHealthObserver(faultFacetType, [] { return Severity::kFailure; }); // kSystem is enabled. - registerMockHealthObserver(FaultFacetType::kSystem, [] { return 0; }); + registerMockHealthObserver(FaultFacetType::kSystem, [] { return Severity::kOk; }); ASSERT(manager().getFaultState() == FaultState::kStartupCheck); auto initialHealthCheckFuture = manager().startPeriodicHealthChecks(); + ASSERT(manager().getFaultState() == FaultState::kStartupCheck); + manager().acceptTest(HealthCheckStatus(faultFacetType, Severity::kFailure, "error")); + advanceTime(Milliseconds(100)); assertSoon([this]() { return manager().getFaultState() == FaultState::kOk; }); ASSERT(initialHealthCheckFuture.isReady()); } @@ -324,7 +330,7 @@ TEST_F(FaultManagerTest, AllOffFacetsSkipStartupCheck) { config->setIntensityForType(faultFacetType, HealthObserverIntensityEnum::kOff); resetManager(std::move(config)); - registerMockHealthObserver(faultFacetType, [] { return 0; }); + registerMockHealthObserver(faultFacetType, [] { return Severity::kOk; }); auto initialHealthCheckFuture = manager().startPeriodicHealthChecks(); @@ -339,7 +345,7 @@ TEST_F(FaultManagerTest, HealthCheckWithOffFacetCreatesNoFaultInOk) { auto configPtr = config.get(); resetManager(std::move(config)); - registerMockHealthObserver(faultFacetType, [] { return 0; }); + registerMockHealthObserver(faultFacetType, [] { return Severity::kOk; }); auto initialHealthCheckFuture = manager().startPeriodicHealthChecks(); @@ -352,7 +358,7 @@ TEST_F(FaultManagerTest, HealthCheckWithOffFacetCreatesNoFaultInOk) { ASSERT(initialHealthCheckFuture.isReady()); configPtr->setIntensityForType(faultFacetType, HealthObserverIntensityEnum::kOff); - manager().acceptTest(HealthCheckStatus(faultFacetType, 1.0, "error")); + manager().acceptTest(HealthCheckStatus(faultFacetType, Severity::kFailure, "error")); ASSERT(manager().getFaultState() == FaultState::kOk); } diff --git a/src/mongo/db/process_health/fault_test.cpp b/src/mongo/db/process_health/fault_test.cpp index 9f8d1fc6d70..c894ec6af64 100644 --- a/src/mongo/db/process_health/fault_test.cpp +++ b/src/mongo/db/process_health/fault_test.cpp @@ -68,13 +68,12 @@ TEST_F(FaultTest, TimeSourceWorks) { } TEST_F(FaultTest, SeverityLevelHelpersWork) { - FaultFacetMock resolvedFacet(FaultFacetType::kMock1, &clockSource(), [] { return 0; }); + FaultFacetMock resolvedFacet( + FaultFacetType::kMock1, &clockSource(), [] { return Severity::kOk; }); ASSERT_TRUE(HealthCheckStatus::isResolved(resolvedFacet.getStatus().getSeverity())); - FaultFacetMock transientFacet(FaultFacetType::kMock1, &clockSource(), [] { return 0.99; }); - ASSERT_TRUE(HealthCheckStatus::isTransientFault(transientFacet.getStatus().getSeverity())); - - FaultFacetMock faultyFacet(FaultFacetType::kMock1, &clockSource(), [] { return 1.0; }); + FaultFacetMock faultyFacet( + FaultFacetType::kMock1, &clockSource(), [] { return Severity::kFailure; }); ASSERT_TRUE(HealthCheckStatus::isActiveFault(faultyFacet.getStatus().getSeverity())); } @@ -82,8 +81,8 @@ TEST_F(FaultTest, FindFacetByType) { ASSERT_EQ(0, fault().getFacets().size()); ASSERT_FALSE(fault().getFaultFacet(FaultFacetType::kMock1)); - FaultFacetPtr newFacet = - std::make_shared(FaultFacetType::kMock1, &clockSource(), [] { return 0; }); + FaultFacetPtr newFacet = std::make_shared( + FaultFacetType::kMock1, &clockSource(), [] { return Severity::kOk; }); fault().upsertFacet(newFacet); auto facet = fault().getFaultFacet(FaultFacetType::kMock1); ASSERT_TRUE(facet); @@ -92,11 +91,11 @@ TEST_F(FaultTest, FindFacetByType) { } TEST_F(FaultTest, CanCreateAndGarbageCollectFacets) { - AtomicDouble severity{0.1}; + auto severity = synchronized_value(Severity::kFailure); ASSERT_EQ(0, fault().getFacets().size()); FaultFacetPtr newFacet = std::make_shared( - FaultFacetType::kMock1, &clockSource(), [&severity] { return severity.load(); }); + FaultFacetType::kMock1, &clockSource(), [&severity] { return *severity; }); fault().upsertFacet(newFacet); // New facet was added successfully. ASSERT_EQ(1, fault().getFacets().size()); @@ -105,7 +104,7 @@ TEST_F(FaultTest, CanCreateAndGarbageCollectFacets) { fault().garbageCollectResolvedFacets(); ASSERT_EQ(1, fault().getFacets().size()); - severity.store(0); + *severity = Severity::kOk; fault().garbageCollectResolvedFacets(); ASSERT_EQ(0, fault().getFacets().size()); } diff --git a/src/mongo/db/process_health/health_check_status.h b/src/mongo/db/process_health/health_check_status.h index 5e5e26ff97d..8b69c08c947 100644 --- a/src/mongo/db/process_health/health_check_status.h +++ b/src/mongo/db/process_health/health_check_status.h @@ -38,30 +38,30 @@ namespace mongo { namespace process_health { +enum class Severity { kOk, kFailure }; +static const StringData SeverityStrings[] = {"kOk", "kFailure"}; +inline StringBuilder& operator<<(StringBuilder& s, const Severity& sev) { + return s << SeverityStrings[static_cast(sev)]; +} + +inline std::ostream& operator<<(std::ostream& s, const Severity& sev) { + return s << SeverityStrings[static_cast(sev)]; +} /** * Immutable class representing current status of an ongoing fault tracked by facet. */ class HealthCheckStatus { public: - static constexpr double kResolvedSeverity = 0; - // The range for active fault is inclusive: [ 1, Inf ). - static constexpr double kActiveFaultSeverity = 1.0; - // We chose to subtract a small 'epsilon' value from 1.0 to - // avoid rounding problems and be sure that severity of 1.0 is guaranteed to be an active fault. - static constexpr double kActiveFaultSeverityEpsilon = 0.000001; - - using Severity = double; - HealthCheckStatus(FaultFacetType type, Severity severity, StringData description) : _type(type), _severity(severity), _description(description) {} // Constructs a resolved status (no fault detected). explicit HealthCheckStatus(FaultFacetType type) - : _type(type), _severity(0), _description("resolved"_sd) {} + : _type(type), _severity(Severity::kOk), _description("resolved"_sd) {} explicit HealthCheckStatus(HealthObserverTypeEnum type) - : _type(toFaultFacetType(type)), _severity(0), _description("resolved"_sd) {} + : _type(toFaultFacetType(type)), _severity(Severity::kOk), _description("resolved"_sd) {} HealthCheckStatus(const HealthCheckStatus&) = default; HealthCheckStatus& operator=(const HealthCheckStatus&) = default; @@ -96,17 +96,12 @@ public: // Helpers for severity levels. - static bool isResolved(double severity) { - return severity <= kResolvedSeverity; - } - - static bool isTransientFault(double severity) { - return severity > kResolvedSeverity && severity < kActiveFaultSeverity; + static bool isResolved(Severity severity) { + return severity == Severity::kOk; } - static bool isActiveFault(double severity) { - // Range is inclusive. - return severity >= 1.0; + static bool isActiveFault(Severity severity) { + return severity == Severity::kFailure; } bool isActiveFault() const { @@ -119,7 +114,7 @@ private: FaultFacetType _type; - double _severity; + Severity _severity; std::string _description; }; diff --git a/src/mongo/db/process_health/health_observer_base.cpp b/src/mongo/db/process_health/health_observer_base.cpp index 331fae535c4..a253dfa4561 100644 --- a/src/mongo/db/process_health/health_observer_base.cpp +++ b/src/mongo/db/process_health/health_observer_base.cpp @@ -86,11 +86,11 @@ HealthCheckStatus HealthObserverBase::makeHealthyStatus() const { return HealthCheckStatus(getType()); } -HealthCheckStatus HealthObserverBase::makeSimpleFailedStatus(double severity, +HealthCheckStatus HealthObserverBase::makeSimpleFailedStatus(Severity severity, std::vector&& failures) const { - if (severity <= 0) { + if (severity == Severity::kOk) { LOGV2_WARNING(6007903, - "Creating faulty health check status requires positive severity", + "Creating faulty health check status requires non-ok severity", "observerType"_attr = getType()); } StringBuilder sb; diff --git a/src/mongo/db/process_health/health_observer_base.h b/src/mongo/db/process_health/health_observer_base.h index 2ed68374ac6..3985efce0ff 100644 --- a/src/mongo/db/process_health/health_observer_base.h +++ b/src/mongo/db/process_health/health_observer_base.h @@ -65,7 +65,8 @@ public: std::shared_ptr taskExecutor, CancellationToken token) override; HealthCheckStatus makeHealthyStatus() const; - HealthCheckStatus makeSimpleFailedStatus(double severity, std::vector&& failures) const; + HealthCheckStatus makeSimpleFailedStatus(Severity severity, + std::vector&& failures) const; HealthObserverLivenessStats getStats() const override; Milliseconds healthCheckJitter() const override; diff --git a/src/mongo/db/process_health/health_observer_mock.h b/src/mongo/db/process_health/health_observer_mock.h index 1929d0fde79..5ab2e65d7f1 100644 --- a/src/mongo/db/process_health/health_observer_mock.h +++ b/src/mongo/db/process_health/health_observer_mock.h @@ -44,7 +44,7 @@ class HealthObserverMock : public HealthObserverBase { public: HealthObserverMock(FaultFacetType mockType, ServiceContext* svcCtx, - std::function getSeverityCallback, + std::function getSeverityCallback, Milliseconds observerTimeout) : HealthObserverBase(svcCtx), _mockType(mockType), @@ -91,7 +91,7 @@ protected: private: const FaultFacetType _mockType; - std::function _getSeverityCallback; + std::function _getSeverityCallback; const Milliseconds _observerTimeout; }; diff --git a/src/mongo/db/process_health/health_observer_test.cpp b/src/mongo/db/process_health/health_observer_test.cpp index 8e0808d4b57..6937f5a70c1 100644 --- a/src/mongo/db/process_health/health_observer_test.cpp +++ b/src/mongo/db/process_health/health_observer_test.cpp @@ -49,7 +49,7 @@ namespace { // are not linked with this test, otherwise the count of observers returned // by the instantiate method below will be greater than expected. TEST_F(FaultManagerTest, Registration) { - registerMockHealthObserver(FaultFacetType::kMock1, [] { return 0; }); + registerMockHealthObserver(FaultFacetType::kMock1, [] { return Severity::kOk; }); auto allObservers = HealthObserverRegistration::instantiateAllObservers(svcCtx()); ASSERT_EQ(1, allObservers.size()); ASSERT_EQ(FaultFacetType::kMock1, allObservers[0]->getType()); @@ -59,7 +59,7 @@ TEST_F(FaultManagerTest, InitialHealthCheckDoesNotRunIfFeatureFlagNotEnabled) { resetManager(); RAIIServerParameterControllerForTest _controller{"featureFlagHealthMonitoring", false}; - registerMockHealthObserver(FaultFacetType::kMock1, [] { return 0.0; }); + registerMockHealthObserver(FaultFacetType::kMock1, [] { return Severity::kOk; }); static_cast(manager().schedulePeriodicHealthCheckThreadTest()); auto currentFault = manager().currentFault(); @@ -70,7 +70,7 @@ TEST_F(FaultManagerTest, InitialHealthCheckDoesNotRunIfFeatureFlagNotEnabled) { TEST_F(FaultManagerTest, Stats) { RAIIServerParameterControllerForTest _controller{"featureFlagHealthMonitoring", true}; auto faultFacetType = FaultFacetType::kMock1; - registerMockHealthObserver(faultFacetType, [] { return 0.1; }); + registerMockHealthObserver(faultFacetType, [] { return Severity::kFailure; }); auto initialHealthCheckFuture = manager().startPeriodicHealthChecks(); auto observer = manager().getHealthObserversTest()[0]; @@ -110,7 +110,7 @@ TEST_F(FaultManagerTest, ProgressMonitorCheck) { while (shouldBlock.load()) { sleepFor(Milliseconds(1)); } - return 0.1; + return Severity::kFailure; }); // Health check should get stuck here. @@ -144,7 +144,7 @@ TEST_F(FaultManagerTest, HealthCheckRunsPeriodically) { << "interval" << 1)))}; RAIIServerParameterControllerForTest _controller{"featureFlagHealthMonitoring", true}; auto faultFacetType = FaultFacetType::kMock1; - int severity = 0; + auto severity = Severity::kOk; registerMockHealthObserver(faultFacetType, [&severity] { return severity; }); assertSoon([this] { return (manager().getFaultState() == FaultState::kStartupCheck); }); @@ -152,7 +152,7 @@ TEST_F(FaultManagerTest, HealthCheckRunsPeriodically) { auto initialHealthCheckFuture = manager().startPeriodicHealthChecks(); assertSoon([this] { return (manager().getFaultState() == FaultState::kOk); }); - severity = 1; + severity = Severity::kFailure; assertSoon([this] { return (manager().getFaultState() == FaultState::kTransientFault); }); resetManager(); // Before fields above go out of scope. } @@ -164,7 +164,7 @@ TEST_F(FaultManagerTest, PeriodicHealthCheckOnErrorMakesBadHealthStatus) { registerMockHealthObserver(faultFacetType, [] { uassert(ErrorCodes::InternalError, "test exception", false); - return 0.1; + return Severity::kFailure; }); ASSERT_TRUE(manager().getFaultState() == FaultState::kStartupCheck); @@ -192,7 +192,7 @@ TEST_F(FaultManagerTest, while (shouldBlock.load()) { sleepFor(Milliseconds(1)); } - return 0.0; + return Severity::kOk; }, Milliseconds(100)); diff --git a/src/mongo/db/process_health/test_health_observer.cpp b/src/mongo/db/process_health/test_health_observer.cpp index ae5747895ea..6ab40e5e799 100644 --- a/src/mongo/db/process_health/test_health_observer.cpp +++ b/src/mongo/db/process_health/test_health_observer.cpp @@ -50,7 +50,8 @@ Future TestHealthObserver::periodicCheckImpl( auto code = data["code"].checkAndGetStringData(); auto msg = data["msg"].checkAndGetStringData(); result = Future::makeReady(makeSimpleFailedStatus( - 1.0, {Status(ErrorCodes::fromString(code.toString()), msg.toString())})); + Severity::kFailure, + {Status(ErrorCodes::fromString(code.toString()), msg.toString())})); }, [&](const BSONObj& data) { return !data.isEmpty(); }); -- cgit v1.2.1