diff options
-rw-r--r-- | jstests/sharding/health_monitor/config_fault.js | 46 | ||||
-rw-r--r-- | jstests/sharding/health_monitor/parameters.js | 17 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault_manager.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/process_health/health_observer.h | 7 | ||||
-rw-r--r-- | src/mongo/db/process_health/health_observer_mock.h | 4 | ||||
-rw-r--r-- | src/mongo/db/process_health/test_health_observer.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/process_health/test_health_observer.h | 2 |
7 files changed, 86 insertions, 8 deletions
diff --git a/jstests/sharding/health_monitor/config_fault.js b/jstests/sharding/health_monitor/config_fault.js new file mode 100644 index 00000000000..219e7183126 --- /dev/null +++ b/jstests/sharding/health_monitor/config_fault.js @@ -0,0 +1,46 @@ +/* + * @tags: [requires_fcv_53] + */ + +(function() { +'use strict'; +const params = { + healthMonitoringIntensities: tojson({ + values: [ + {type: "test", intensity: "critical"}, + ] + }), +}; +const setFailPoint = { + 'failpoint.badConfigTestHealthObserver': "{'mode':'alwaysOn'}" +}; + +let st = new ShardingTest({ + mongos: [ + {setParameter: params}, + {setParameter: Object.assign({}, params, setFailPoint), waitForConnect: false} + ] +}); +assert.commandWorked(st.s0.adminCommand({"ping": 1})); // Ensure s0 is unaffected. +try { + conn = new Mongo("127.0.0.1:" + st.s1.port); + assert(false); +} catch (e) { + assert( + tojson(e).indexOf("network error") >= 0 || tojson(e).indexOf("couldn't connect to server"), + "connection should fail with network error: " + tojson(e)); +} +st.stop({skipValidatingExitCode: true}); + +// Verify that configuration doesn't matter when a health observer is off. +const offParams = { + healthMonitoringIntensities: tojson({ + values: [ + {type: "test", intensity: "off"}, + ] + }) +}; +st = new ShardingTest({mongos: [{setParameter: Object.assign({}, offParams, setFailPoint)}]}); +assert.commandWorked(st.s0.adminCommand({"ping": 1})); +st.stop(); +})(); diff --git a/jstests/sharding/health_monitor/parameters.js b/jstests/sharding/health_monitor/parameters.js index b73eb8f5446..624a6eafdd2 100644 --- a/jstests/sharding/health_monitor/parameters.js +++ b/jstests/sharding/health_monitor/parameters.js @@ -15,8 +15,8 @@ var st = new ShardingTest({ healthMonitoringIntensities: tojson({ values: [ {type: "dns", intensity: "off"}, - {type: "ldap", intensity: "critical"}, - {type: "test", intensity: "off"} + {type: "ldap", intensity: "off"}, + {type: "test", intensity: "critical"} ] }), } @@ -50,11 +50,12 @@ let getIntensity = (result, typeOfObserver) => { }; assert.eq(getIntensity(result, "dns"), "off"); -assert.eq(getIntensity(result, "ldap"), "critical"); +assert.eq(getIntensity(result, "ldap"), "off"); +assert.eq(getIntensity(result, "test"), "critical"); assert.commandWorked(st.s0.adminCommand({ "setParameter": 1, - healthMonitoringIntensities: {values: [{type: "dns", intensity: "critical"}]} + healthMonitoringIntensities: {values: [{type: "test", intensity: "non-critical"}]} })); assert.commandFailed(st.s0.adminCommand({ "setParameter": 1, @@ -65,16 +66,16 @@ assert.commandFailed(st.s0.adminCommand({ healthMonitoringIntensities: {values: [{type: "invalid", intensity: "off"}]} })); -// Tests that ldap param is unchanged after dns was changed. +// Tests that test param is unchanged after dns was changed. result = assert.commandWorked(st.s0.adminCommand({"getParameter": 1, healthMonitoringIntensities: 1})); -assert.eq(getIntensity(result, "dns"), "critical"); -assert.eq(getIntensity(result, "ldap"), "critical"); +assert.eq(getIntensity(result, "dns"), "off"); +assert.eq(getIntensity(result, "test"), "non-critical"); assert.commandWorked(st.s0.adminCommand({ "setParameter": 1, healthMonitoringIntensities: - {values: [{type: "dns", intensity: 'non-critical'}, {type: "ldap", intensity: 'off'}]} + {values: [{type: "dns", intensity: 'non-critical'}, {type: "test", intensity: 'off'}]} })); result = assert.commandWorked(st.s0.adminCommand({"getParameter": 1, healthMonitoringIntensities: 1})); diff --git a/src/mongo/db/process_health/fault_manager.cpp b/src/mongo/db/process_health/fault_manager.cpp index 1e57fadd1ce..830e7e2834c 100644 --- a/src/mongo/db/process_health/fault_manager.cpp +++ b/src/mongo/db/process_health/fault_manager.cpp @@ -349,6 +349,7 @@ boost::optional<FaultState> FaultManager::handleTransientFault(const OptionalMes } boost::optional<FaultState> FaultManager::handleActiveFault(const OptionalMessageType& message) { + invariant(_fault); LOGV2_FATAL(5936509, "Halting Process due to ongoing fault", "fault"_attr = *_fault); return boost::none; } @@ -418,6 +419,15 @@ void FaultManager::schedulePeriodicHealthCheckThread() { listOfActiveObservers << observer->getType() << " "; auto token = _managerShuttingDownCancellationSource.token(); + if (!observer->isConfigured()) { + // Transition to an active fault if a health observer is not configured properly. + updateWithCheckStatus(HealthCheckStatus( + observer->getType(), + Severity::kFailure, + "Health observer failed to start because it was not configured properly."_sd)); + setState(FaultState::kActiveFault, HealthCheckStatus(observer->getType())); + return; + } healthCheck(observer, token); } LOGV2(5936804, "Health observers started", "detail"_attr = listOfActiveObservers); diff --git a/src/mongo/db/process_health/health_observer.h b/src/mongo/db/process_health/health_observer.h index cdfeeb81f08..a5a48306929 100644 --- a/src/mongo/db/process_health/health_observer.h +++ b/src/mongo/db/process_health/health_observer.h @@ -89,6 +89,13 @@ public: * Timeout value enforced on an individual health check. */ virtual Milliseconds getObserverTimeout() const = 0; + + /** + * Returns false if the health observer is missing some configuration it needs for its health + * checks. In the case of faulty configuration, make sure to log any helpful messages within + * this method. + */ + virtual bool isConfigured() const = 0; }; } // namespace process_health diff --git a/src/mongo/db/process_health/health_observer_mock.h b/src/mongo/db/process_health/health_observer_mock.h index 5ab2e65d7f1..644cfa3c247 100644 --- a/src/mongo/db/process_health/health_observer_mock.h +++ b/src/mongo/db/process_health/health_observer_mock.h @@ -53,6 +53,10 @@ public: virtual ~HealthObserverMock() = default; + bool isConfigured() const override { + return true; + } + protected: FaultFacetType getType() const override { return _mockType; diff --git a/src/mongo/db/process_health/test_health_observer.cpp b/src/mongo/db/process_health/test_health_observer.cpp index 6ab40e5e799..41b2087372c 100644 --- a/src/mongo/db/process_health/test_health_observer.cpp +++ b/src/mongo/db/process_health/test_health_observer.cpp @@ -38,6 +38,7 @@ namespace mongo { namespace process_health { MONGO_FAIL_POINT_DEFINE(hangTestHealthObserver); MONGO_FAIL_POINT_DEFINE(testHealthObserver); +MONGO_FAIL_POINT_DEFINE(badConfigTestHealthObserver); Future<HealthCheckStatus> TestHealthObserver::periodicCheckImpl( PeriodicHealthCheckContext&& periodicCheckContext) { LOGV2_DEBUG(5936801, 2, "Test health observer executing"); @@ -59,6 +60,13 @@ Future<HealthCheckStatus> TestHealthObserver::periodicCheckImpl( return result; } +bool TestHealthObserver::isConfigured() const { + if (badConfigTestHealthObserver.shouldFail()) { + return false; + } + return true; +} + namespace { MONGO_INITIALIZER(TestHealthObserver)(InitializerContext*) { // Failpoints can only be set when test commands are enabled, and so the test health observer diff --git a/src/mongo/db/process_health/test_health_observer.h b/src/mongo/db/process_health/test_health_observer.h index 85de8a65998..f325ac71534 100644 --- a/src/mongo/db/process_health/test_health_observer.h +++ b/src/mongo/db/process_health/test_health_observer.h @@ -51,6 +51,8 @@ protected: Future<HealthCheckStatus> periodicCheckImpl( PeriodicHealthCheckContext&& periodicCheckContext) override; + + bool isConfigured() const override; }; } // namespace process_health } // namespace mongo |