diff options
author | LaMont Nelson <lamont.nelson@mongodb.com> | 2022-02-07 17:00:28 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-02-07 21:05:54 +0000 |
commit | c7fa719098c99bd0b94c478e5c74842ba11e92b5 (patch) | |
tree | 9d3e44329e7ebc6ffae4e581fcefcf5d66ad2ace /src/mongo/db | |
parent | 522da292b8e89d6288916f744f318cb1f32ac8a8 (diff) | |
download | mongo-c7fa719098c99bd0b94c478e5c74842ba11e92b5.tar.gz |
SERVER-63319: Refactor FaultManager Stats test to allow health check thread to run in background
Diffstat (limited to 'src/mongo/db')
-rw-r--r-- | src/mongo/db/process_health/health_observer.h | 10 | ||||
-rw-r--r-- | src/mongo/db/process_health/health_observer_test.cpp | 58 |
2 files changed, 44 insertions, 24 deletions
diff --git a/src/mongo/db/process_health/health_observer.h b/src/mongo/db/process_health/health_observer.h index a5a48306929..f85ff1c91f1 100644 --- a/src/mongo/db/process_health/health_observer.h +++ b/src/mongo/db/process_health/health_observer.h @@ -52,6 +52,16 @@ struct HealthObserverLivenessStats { // Incremented when check completed with fault. // This doesn't take into account critical vs non-critical. int completedChecksWithFaultCount = 0; + + BSONObj toBSON() const { + BSONObjBuilder builder; + builder.append("currentlyRunningHealthCheck", currentlyRunningHealthCheck); + builder.append("lastTimeCheckStarted", lastTimeCheckStarted); + builder.append("lastTimeCheckCompleted", lastTimeCheckCompleted); + builder.append("completedChecksCount", completedChecksCount); + builder.append("completedChecksWithFaultCount", completedChecksWithFaultCount); + return builder.obj(); + } }; /** diff --git a/src/mongo/db/process_health/health_observer_test.cpp b/src/mongo/db/process_health/health_observer_test.cpp index 5725fb5b8c0..3cfe1dbe608 100644 --- a/src/mongo/db/process_health/health_observer_test.cpp +++ b/src/mongo/db/process_health/health_observer_test.cpp @@ -69,39 +69,49 @@ TEST_F(FaultManagerTest, InitialHealthCheckDoesNotRunIfFeatureFlagNotEnabled) { TEST_F(FaultManagerTest, Stats) { RAIIServerParameterControllerForTest _controller{"featureFlagHealthMonitoring", true}; + resetManager(std::make_unique<FaultManagerConfig>()); auto faultFacetType = FaultFacetType::kMock1; - registerMockHealthObserver(faultFacetType, [] { return Severity::kFailure; }); + AtomicWord<Severity> mockResult(Severity::kFailure); + registerMockHealthObserver(faultFacetType, [&mockResult] { return mockResult.load(); }); auto initialHealthCheckFuture = manager().startPeriodicHealthChecks(); auto observer = manager().getHealthObserversTest()[0]; - manager().healthCheckTest(observer, CancellationToken::uncancelable()); - assertSoon([this] { return static_cast<bool>(manager().currentFault()); }); - assertSoon([&observer] { return !observer->getStats().currentlyRunningHealthCheck; }); + // Initial checks should fail; There must have been at least 1 to generate the fault. + assertSoon([this, &observer] { + return hasFault() && !observer->getStats().currentlyRunningHealthCheck; + }); + + // Make sure we are still in startup check state. + waitForTransitionIntoState(FaultState::kStartupCheck); - auto stats = observer->getStats(); + auto initialStats = observer->getStats(); + LOGV2_DEBUG(6331901, 0, "stats after detecting fault", "stats"_attr = initialStats); ASSERT_TRUE(manager().getConfig().isHealthObserverEnabled(observer->getType())); - ASSERT_EQ(stats.lastTimeCheckStarted, clockSource().now()); - ASSERT_EQ(stats.lastTimeCheckCompleted, stats.lastTimeCheckStarted); - ASSERT_GTE(stats.completedChecksCount, 1); - ASSERT_GTE(stats.completedChecksWithFaultCount, 1); + ASSERT_EQ(initialStats.lastTimeCheckStarted, clockSource().now()); + ASSERT_EQ(initialStats.lastTimeCheckCompleted, initialStats.lastTimeCheckStarted); + ASSERT_GTE(initialStats.completedChecksCount, 1); + ASSERT_GTE(initialStats.completedChecksWithFaultCount, 1); // To complete initial health check. - manager().acceptTest(HealthCheckStatus(faultFacetType)); - - advanceTime(Milliseconds(200)); - auto prevStats = stats; - do { - manager().healthCheckTest(observer, CancellationToken::uncancelable()); - sleepmillis(1); - observer = manager().getHealthObserversTest()[0]; - stats = observer->getStats(); - } while (stats.completedChecksCount <= prevStats.completedChecksCount); - - ASSERT_GT(stats.lastTimeCheckStarted, prevStats.lastTimeCheckStarted); - ASSERT_GT(stats.lastTimeCheckCompleted, prevStats.lastTimeCheckCompleted); - ASSERT_GTE(stats.completedChecksCount, 2); - ASSERT_GTE(stats.completedChecksWithFaultCount, 2); + mockResult.store(Severity::kOk); + + waitForTransitionIntoState(FaultState::kOk); + auto okStats = observer->getStats(); + LOGV2_DEBUG(6331902, 0, "stats after ok state", "stats"_attr = okStats); + advanceTime(Milliseconds(100)); + + assertSoon([observer, okStats]() { + auto stats = observer->getStats(); + return stats.completedChecksCount > okStats.completedChecksCount; + }); + + auto finalStats = observer->getStats(); + LOGV2_DEBUG(6331903, 0, "stats after final state", "stats"_attr = finalStats); + ASSERT_GT(finalStats.lastTimeCheckStarted, okStats.lastTimeCheckStarted); + ASSERT_GT(finalStats.lastTimeCheckCompleted, okStats.lastTimeCheckCompleted); + ASSERT_GTE(finalStats.completedChecksCount, okStats.completedChecksCount); + ASSERT_GTE(finalStats.completedChecksWithFaultCount, okStats.completedChecksWithFaultCount); } TEST_F(FaultManagerTest, ProgressMonitorCheck) { |