diff options
author | Davis Haupt <davis.haupt@mongodb.com> | 2021-12-15 21:26:52 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-12-28 23:18:25 +0000 |
commit | c53285719d4b039f0e973cb120c2e90874c303b8 (patch) | |
tree | 07f2b412edfaf068211e51307843e30818abe33b | |
parent | 86de78310d82cd0bafc6dbd5fcdbacfed6223c8d (diff) | |
download | mongo-c53285719d4b039f0e973cb120c2e90874c303b8.tar.gz |
SERVER-61220 Integration test for progress monitor
-rw-r--r-- | jstests/sharding/health_monitor/progress_monitor.js | 50 | ||||
-rw-r--r-- | src/mongo/db/process_health/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault_manager_config.h | 2 | ||||
-rw-r--r-- | src/mongo/db/process_health/test_health_observer.cpp | 9 |
4 files changed, 59 insertions, 3 deletions
diff --git a/jstests/sharding/health_monitor/progress_monitor.js b/jstests/sharding/health_monitor/progress_monitor.js new file mode 100644 index 00000000000..fc243da4401 --- /dev/null +++ b/jstests/sharding/health_monitor/progress_monitor.js @@ -0,0 +1,50 @@ +const PROGRESS_TIMEOUT_SECONDS = 5; +const CHECK_PING_SECONDS = 1; +(function() { +'use strict'; + +const params = { + setParameter: { + healthMonitoringIntensities: tojson({test: "non-critical", ldap: "off", dns: "off"}), + healthMonitoringIntervals: tojson({test: 500}), + progressMonitor: tojson({deadline: PROGRESS_TIMEOUT_SECONDS}), + featureFlagHealthMonitoring: true + } +}; +let st = new ShardingTest({ + mongos: [params, params], + shards: 1, +}); +// After cluster startup, make sure both mongos's are available. +assert.commandWorked(st.s0.adminCommand({"ping": 1})); +assert.commandWorked(st.s1.adminCommand({"ping": 1})); + +// Set the failpoint on one of the mongos's to pause its healthchecks. +assert.commandWorked( + st.s1.adminCommand({"configureFailPoint": 'hangTestHealthObserver', "mode": "alwaysOn"})); +sleep(CHECK_PING_SECONDS * 1000); +// Make sure the failpoint on its own doesn't bring down the server. +assert.commandWorked(st.s1.adminCommand({"ping": 1})); +// Wait for the progress monitor timeout to elapse. +sleep(PROGRESS_TIMEOUT_SECONDS * 1000); + +assert.soon(() => { + try { + assert.commandWorked(st.s0.adminCommand({"ping": 1})); // Ensure s0 is unaffected. + st.s1.adminCommand( + {"ping": 1}); // This should throw an error because s1 is no longer reachable. + assert(false, "ping command to s1 should fail."); + } catch (e) { + // This might seem brittle to rely on the string message for the error, but the same check + // appears in the implementation for runCommand(). + if (e.message.indexOf("network error") >= 0) { + return true; + } else { + throw (e); + } + } +}, "Pinging faulty mongos should fail with network error.", PROGRESS_TIMEOUT_SECONDS * 1000); +// Don't validate exit codes, since a mongos will exit on its own with a non-zero exit code. + +st.stop({skipValidatingExitCode: true}); +})(); diff --git a/src/mongo/db/process_health/SConscript b/src/mongo/db/process_health/SConscript index ce02fc06253..a18e492245a 100644 --- a/src/mongo/db/process_health/SConscript +++ b/src/mongo/db/process_health/SConscript @@ -21,6 +21,7 @@ env.Library( ], LIBDEPS=[ '$BUILD_DIR/mongo/base', + '$BUILD_DIR/mongo/db/commands/test_commands_enabled', '$BUILD_DIR/mongo/db/service_context', '$BUILD_DIR/mongo/executor/network_interface', '$BUILD_DIR/mongo/executor/network_interface_factory', diff --git a/src/mongo/db/process_health/fault_manager_config.h b/src/mongo/db/process_health/fault_manager_config.h index 1ba938a3d84..18b15a78589 100644 --- a/src/mongo/db/process_health/fault_manager_config.h +++ b/src/mongo/db/process_health/fault_manager_config.h @@ -109,7 +109,7 @@ public: } Seconds getPeriodicLivenessDeadline() const { - return Seconds(_getLivenessConfig()->_data->getInterval()); + return Seconds(_getLivenessConfig()->_data->getDeadline()); } /** @returns true if the periodic checks are disabled for testing purposes. This is diff --git a/src/mongo/db/process_health/test_health_observer.cpp b/src/mongo/db/process_health/test_health_observer.cpp index 2251f9590d0..0c69c43d42c 100644 --- a/src/mongo/db/process_health/test_health_observer.cpp +++ b/src/mongo/db/process_health/test_health_observer.cpp @@ -29,6 +29,7 @@ #include "mongo/base/init.h" #include "mongo/db/process_health/test_health_observer.h" +#include "mongo/db/commands/test_commands_enabled.h" #include "mongo/db/process_health/health_observer_registration.h" #include "mongo/util/fail_point.h" @@ -56,8 +57,12 @@ Future<HealthCheckStatus> TestHealthObserver::periodicCheckImpl( namespace { MONGO_INITIALIZER(TestHealthObserver)(InitializerContext*) { - HealthObserverRegistration::registerObserverFactory( - [](ServiceContext* svcCtx) { return std::make_unique<TestHealthObserver>(svcCtx); }); + // Failpoints can only be set when test commands are enabled, and so the test health observer + // is only useful in that case. + if (getTestCommandsEnabled()) { + HealthObserverRegistration::registerObserverFactory( + [](ServiceContext* svcCtx) { return std::make_unique<TestHealthObserver>(svcCtx); }); + } return Status::OK(); } } // namespace |