summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavis Haupt <davis.haupt@mongodb.com>2021-12-15 21:26:52 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-12-28 23:18:25 +0000
commitc53285719d4b039f0e973cb120c2e90874c303b8 (patch)
tree07f2b412edfaf068211e51307843e30818abe33b
parent86de78310d82cd0bafc6dbd5fcdbacfed6223c8d (diff)
downloadmongo-c53285719d4b039f0e973cb120c2e90874c303b8.tar.gz
SERVER-61220 Integration test for progress monitor
-rw-r--r--jstests/sharding/health_monitor/progress_monitor.js50
-rw-r--r--src/mongo/db/process_health/SConscript1
-rw-r--r--src/mongo/db/process_health/fault_manager_config.h2
-rw-r--r--src/mongo/db/process_health/test_health_observer.cpp9
4 files changed, 59 insertions, 3 deletions
diff --git a/jstests/sharding/health_monitor/progress_monitor.js b/jstests/sharding/health_monitor/progress_monitor.js
new file mode 100644
index 00000000000..fc243da4401
--- /dev/null
+++ b/jstests/sharding/health_monitor/progress_monitor.js
@@ -0,0 +1,50 @@
+const PROGRESS_TIMEOUT_SECONDS = 5;
+const CHECK_PING_SECONDS = 1;
+(function() {
+'use strict';
+
+const params = {
+ setParameter: {
+ healthMonitoringIntensities: tojson({test: "non-critical", ldap: "off", dns: "off"}),
+ healthMonitoringIntervals: tojson({test: 500}),
+ progressMonitor: tojson({deadline: PROGRESS_TIMEOUT_SECONDS}),
+ featureFlagHealthMonitoring: true
+ }
+};
+let st = new ShardingTest({
+ mongos: [params, params],
+ shards: 1,
+});
+// After cluster startup, make sure both mongos's are available.
+assert.commandWorked(st.s0.adminCommand({"ping": 1}));
+assert.commandWorked(st.s1.adminCommand({"ping": 1}));
+
+// Set the failpoint on one of the mongos's to pause its healthchecks.
+assert.commandWorked(
+ st.s1.adminCommand({"configureFailPoint": 'hangTestHealthObserver', "mode": "alwaysOn"}));
+sleep(CHECK_PING_SECONDS * 1000);
+// Make sure the failpoint on its own doesn't bring down the server.
+assert.commandWorked(st.s1.adminCommand({"ping": 1}));
+// Wait for the progress monitor timeout to elapse.
+sleep(PROGRESS_TIMEOUT_SECONDS * 1000);
+
+assert.soon(() => {
+ try {
+ assert.commandWorked(st.s0.adminCommand({"ping": 1})); // Ensure s0 is unaffected.
+ st.s1.adminCommand(
+ {"ping": 1}); // This should throw an error because s1 is no longer reachable.
+ assert(false, "ping command to s1 should fail.");
+ } catch (e) {
+ // This might seem brittle to rely on the string message for the error, but the same check
+ // appears in the implementation for runCommand().
+ if (e.message.indexOf("network error") >= 0) {
+ return true;
+ } else {
+ throw (e);
+ }
+ }
+}, "Pinging faulty mongos should fail with network error.", PROGRESS_TIMEOUT_SECONDS * 1000);
+// Don't validate exit codes, since a mongos will exit on its own with a non-zero exit code.
+
+st.stop({skipValidatingExitCode: true});
+})();
diff --git a/src/mongo/db/process_health/SConscript b/src/mongo/db/process_health/SConscript
index ce02fc06253..a18e492245a 100644
--- a/src/mongo/db/process_health/SConscript
+++ b/src/mongo/db/process_health/SConscript
@@ -21,6 +21,7 @@ env.Library(
],
LIBDEPS=[
'$BUILD_DIR/mongo/base',
+ '$BUILD_DIR/mongo/db/commands/test_commands_enabled',
'$BUILD_DIR/mongo/db/service_context',
'$BUILD_DIR/mongo/executor/network_interface',
'$BUILD_DIR/mongo/executor/network_interface_factory',
diff --git a/src/mongo/db/process_health/fault_manager_config.h b/src/mongo/db/process_health/fault_manager_config.h
index 1ba938a3d84..18b15a78589 100644
--- a/src/mongo/db/process_health/fault_manager_config.h
+++ b/src/mongo/db/process_health/fault_manager_config.h
@@ -109,7 +109,7 @@ public:
}
Seconds getPeriodicLivenessDeadline() const {
- return Seconds(_getLivenessConfig()->_data->getInterval());
+ return Seconds(_getLivenessConfig()->_data->getDeadline());
}
/** @returns true if the periodic checks are disabled for testing purposes. This is
diff --git a/src/mongo/db/process_health/test_health_observer.cpp b/src/mongo/db/process_health/test_health_observer.cpp
index 2251f9590d0..0c69c43d42c 100644
--- a/src/mongo/db/process_health/test_health_observer.cpp
+++ b/src/mongo/db/process_health/test_health_observer.cpp
@@ -29,6 +29,7 @@
#include "mongo/base/init.h"
#include "mongo/db/process_health/test_health_observer.h"
+#include "mongo/db/commands/test_commands_enabled.h"
#include "mongo/db/process_health/health_observer_registration.h"
#include "mongo/util/fail_point.h"
@@ -56,8 +57,12 @@ Future<HealthCheckStatus> TestHealthObserver::periodicCheckImpl(
namespace {
MONGO_INITIALIZER(TestHealthObserver)(InitializerContext*) {
- HealthObserverRegistration::registerObserverFactory(
- [](ServiceContext* svcCtx) { return std::make_unique<TestHealthObserver>(svcCtx); });
+ // Failpoints can only be set when test commands are enabled, and so the test health observer
+ // is only useful in that case.
+ if (getTestCommandsEnabled()) {
+ HealthObserverRegistration::registerObserverFactory(
+ [](ServiceContext* svcCtx) { return std::make_unique<TestHealthObserver>(svcCtx); });
+ }
return Status::OK();
}
} // namespace