From 7dee5b3d7b240e1161860436a5eeb688ee8a1e6f Mon Sep 17 00:00:00 2001 From: Andrew Shuvalov Date: Wed, 22 Dec 2021 15:58:47 +0000 Subject: SERVER-62174 Refactored health check intervals (cherry picked from commit f0e9cd72975ceb2862de10a1cad5bfb436fc2d67) --- jstests/sharding/health_monitor/parameters.js | 63 +++++++++++++++++----- .../sharding/health_monitor/progress_monitor.js | 2 +- .../db/process_health/fault_manager_config.cpp | 19 +++++++ src/mongo/db/process_health/fault_manager_config.h | 34 ++++++++++-- .../health_monitoring_server_parameters.cpp | 38 +++++++------ .../health_monitoring_server_parameters.idl | 31 +++++------ .../db/process_health/health_observer_test.cpp | 14 +++++ 7 files changed, 153 insertions(+), 48 deletions(-) diff --git a/jstests/sharding/health_monitor/parameters.js b/jstests/sharding/health_monitor/parameters.js index 44af7744ce5..ee2c8c5d519 100644 --- a/jstests/sharding/health_monitor/parameters.js +++ b/jstests/sharding/health_monitor/parameters.js @@ -35,7 +35,8 @@ var st = new ShardingTest({ ] }), progressMonitor: tojson({interval: CUSTOM_INTERVAL, deadline: CUSTOM_DEADLINE}), - healthMonitoringIntervals: tojson({test: CUSTOM_INTERVAL}) + healthMonitoringIntervals: + tojson({values: [{type: "test", interval: CUSTOM_INTERVAL}]}) } } ], @@ -44,17 +45,21 @@ var st = new ShardingTest({ // Intensity parameter let result = st.s0.adminCommand({"getParameter": 1, "healthMonitoringIntensities": 1}); -let getIntensity = (param_value, type) => { +let getIntensity = (result, typeOfObserver) => { let intensities = result.healthMonitoringIntensities.values; - for (var i = 0; i < intensities.length; i++) { - if (intensities[i].type === type) - return intensities[i].intensity; + let foundPair = intensities.find(({type}) => type === typeOfObserver); + if (foundPair) { + return foundPair.intensity; } }; assert.eq(getIntensity(result, "dns"), "off"); assert.eq(getIntensity(result, "ldap"), "critical"); +assert.commandWorked(st.s0.adminCommand({ + "setParameter": 1, + healthMonitoringIntensities: {values: [{type: "dns", intensity: "critical"}]} +})); assert.commandFailed(st.s0.adminCommand({ "setParameter": 1, healthMonitoringIntensities: {values: [{type: "dns", intensity: "INVALID"}]} @@ -64,7 +69,12 @@ assert.commandFailed(st.s0.adminCommand({ healthMonitoringIntensities: {values: [{type: "invalid", intensity: "off"}]} })); -jsTestLog('Test setting 2 intensities'); +// Tests that ldap param is unchanged after dns was changed. +result = + assert.commandWorked(st.s0.adminCommand({"getParameter": 1, healthMonitoringIntensities: 1})); +assert.eq(getIntensity(result, "dns"), "critical"); +assert.eq(getIntensity(result, "ldap"), "critical"); + assert.commandWorked(st.s0.adminCommand({ "setParameter": 1, healthMonitoringIntensities: @@ -77,21 +87,48 @@ assert.eq(getIntensity(result, "dns"), "non-critical"); assert.eq(getIntensity(result, "ldap"), "off"); // Interval parameter +let getInterval = (commandResult, typeOfObserver) => { + let allValues = commandResult.healthMonitoringIntervals.values; + let foundPair = allValues.find(({type}) => type === typeOfObserver); + if (foundPair) { + return foundPair.interval; + } +}; + result = st.s1.adminCommand({"getParameter": 1, "healthMonitoringIntervals": 1}); -assert.eq(result.healthMonitoringIntervals.test, CUSTOM_INTERVAL); +assert.eq(getInterval(result, "test"), CUSTOM_INTERVAL); + +assert.commandWorked(st.s1.adminCommand({ + "setParameter": 1, + healthMonitoringIntervals: {values: [{type: "dns", interval: NumberInt(100)}]} +})); +assert.commandFailed(st.s1.adminCommand({ + "setParameter": 1, + healthMonitoringIntervals: {values: [{type: "dns", interval: NumberInt(0)}]} +})); +assert.commandFailed(st.s1.adminCommand({ + "setParameter": 1, + healthMonitoringIntervals: {values: [{type: "invalid", interval: NumberInt(100)}]} +})); -assert.commandFailed(st.s1.adminCommand({"setParameter": 1, healthMonitoringIntervals: {dns: 0}})); -assert.commandFailed( - st.s1.adminCommand({"setParameter": 1, healthMonitoringIntervals: {invalid: 1000}})); +// Tests that test param is unchanged, dns is set to 100. +result = + assert.commandWorked(st.s1.adminCommand({"getParameter": 1, healthMonitoringIntervals: 1})); +assert.eq(getInterval(result, "test"), CUSTOM_INTERVAL); +assert.eq(getInterval(result, "dns"), 100); assert.commandWorked(st.s1.adminCommand({ "setParameter": 1, - healthMonitoringIntervals: {dns: NumberInt(2000), ldap: NumberInt(600000)} + healthMonitoringIntervals: { + values: + [{type: "dns", interval: NumberInt(2000)}, {type: "ldap", interval: NumberInt(600000)}] + } })); + result = assert.commandWorked(st.s1.adminCommand({"getParameter": 1, healthMonitoringIntervals: 1})); -assert.eq(result.healthMonitoringIntervals.dns, 2000); -assert.eq(result.healthMonitoringIntervals.ldap, 600000); +assert.eq(getInterval(result, "dns"), 2000); +assert.eq(getInterval(result, "ldap"), 600000); // Check that custom liveness values were set properly. result = st.s1.adminCommand({"getParameter": 1, "progressMonitor": 1}); diff --git a/jstests/sharding/health_monitor/progress_monitor.js b/jstests/sharding/health_monitor/progress_monitor.js index 687cf3729d5..4ef70928685 100644 --- a/jstests/sharding/health_monitor/progress_monitor.js +++ b/jstests/sharding/health_monitor/progress_monitor.js @@ -15,7 +15,7 @@ const params = { {type: "dns", intensity: "off"} ] }), - healthMonitoringIntervals: tojson({test: 500}), + healthMonitoringIntervals: tojson({values: [{type: "test", interval: NumberInt(500)}]}), progressMonitor: tojson({interval: PROGRESS_TIMEOUT_SECONDS, deadline: PROGRESS_TIMEOUT_SECONDS}), featureFlagHealthMonitoring: true diff --git a/src/mongo/db/process_health/fault_manager_config.cpp b/src/mongo/db/process_health/fault_manager_config.cpp index c4a7442c67c..a071c063eea 100644 --- a/src/mongo/db/process_health/fault_manager_config.cpp +++ b/src/mongo/db/process_health/fault_manager_config.cpp @@ -36,6 +36,25 @@ namespace mongo { namespace process_health { +namespace { +constexpr auto inline kDefaultObserverInterval = Milliseconds{10000}; +constexpr auto inline kDefaultLdapObserverInterval = Milliseconds{30000}; +constexpr auto inline kDefaultTestObserverInterval = Milliseconds{1000}; +} // namespace + +Milliseconds FaultManagerConfig::_getDefaultObserverInterval(FaultFacetType type) { + switch (type) { + case FaultFacetType::kLdap: + return kDefaultLdapObserverInterval; + case FaultFacetType::kMock1: + case FaultFacetType::kMock2: + case FaultFacetType::kTestObserver: + return kDefaultTestObserverInterval; + default: + return kDefaultObserverInterval; + } +} + StringBuilder& operator<<(StringBuilder& s, const FaultState& state) { switch (state) { case FaultState::kOk: diff --git a/src/mongo/db/process_health/fault_manager_config.h b/src/mongo/db/process_health/fault_manager_config.h index cb92353798f..f6d47bdbf24 100644 --- a/src/mongo/db/process_health/fault_manager_config.h +++ b/src/mongo/db/process_health/fault_manager_config.h @@ -55,11 +55,9 @@ enum class FaultState { kActiveFault }; - StringBuilder& operator<<(StringBuilder& s, const FaultState& state); std::ostream& operator<<(std::ostream& os, const FaultState& state); - /** * Types of health observers available. */ @@ -90,6 +88,20 @@ public: /* Maximum possible jitter added to the time between health checks */ static auto inline constexpr kPeriodicHealthCheckMaxJitter{Milliseconds{100}}; + static constexpr auto toObserverType = + [](FaultFacetType type) -> boost::optional { + switch (type) { + case FaultFacetType::kLdap: + return HealthObserverTypeEnum::kLdap; + case FaultFacetType::kDns: + return HealthObserverTypeEnum::kDns; + case FaultFacetType::kTestObserver: + return HealthObserverTypeEnum::kTest; + default: + return boost::none; + } + }; + HealthObserverIntensityEnum getHealthObserverIntensity(FaultFacetType type) const { auto intensities = _getHealthObserverIntensities(); @@ -147,7 +159,21 @@ public: Milliseconds getPeriodicHealthCheckInterval(FaultFacetType type) const { auto intervals = _getHealthObserverIntervals(); - return Milliseconds(_getPropertyByType(type, &intervals->_data, 1000)); + // TODO(SERVER-62125): replace with unified type from IDL. + const auto convertedType = toObserverType(type); + if (convertedType) { + const auto values = intervals->_data->getValues(); + if (values) { + const auto intervalIt = + std::find_if(values->begin(), values->end(), [&](const auto& v) { + return v.getType() == *convertedType; + }); + if (intervalIt != values->end()) { + return Milliseconds(intervalIt->getInterval()); + } + } + } + return _getDefaultObserverInterval(type); } Milliseconds getPeriodicLivenessCheckInterval() const { @@ -185,6 +211,8 @@ private: "progressMonitor"); } + static Milliseconds _getDefaultObserverInterval(FaultFacetType type); + template R _getPropertyByType(FaultFacetType type, synchronized_value* data, R defaultValue) const { // TODO: update this function with additional fault facets when they are added diff --git a/src/mongo/db/process_health/health_monitoring_server_parameters.cpp b/src/mongo/db/process_health/health_monitoring_server_parameters.cpp index 8258d2da0e9..9007e57b464 100644 --- a/src/mongo/db/process_health/health_monitoring_server_parameters.cpp +++ b/src/mongo/db/process_health/health_monitoring_server_parameters.cpp @@ -37,16 +37,16 @@ namespace mongo { namespace { -// Replaces values in oldIntensities with values in newIntensities while preserving all values in -// oldIntensities not in newIntensities. -HealthObserverIntensities mergeIntensities(const HealthObserverIntensities& oldIntensities, - const HealthObserverIntensities& newIntensities) { +// Replaces values in oldIntensities/Intervals with values in newIntensities/Intervals while +// preserving all values present in old- that are not present in new-. +template +ConfigValues mergeConfigValues(const ConfigValues& oldValues, const ConfigValues& newValues) { using namespace std; - HealthObserverIntensities result = oldIntensities; + ConfigValues result = oldValues; auto optionalOldValues = result.getValues(); - auto optionalNewValues = newIntensities.getValues(); + auto optionalNewValues = newValues.getValues(); if (!optionalNewValues) { - return oldIntensities; + return oldValues; } if (!optionalOldValues) { result.setValues(*optionalNewValues); @@ -55,7 +55,7 @@ HealthObserverIntensities mergeIntensities(const HealthObserverIntensities& oldI for (const auto& setting : *optionalNewValues) { auto it = find_if(begin(*optionalOldValues), end(*optionalOldValues), - [&setting](const HealthObserverIntensitySetting& destSetting) { + [&setting](const auto& destSetting) { return (destSetting.getType() == setting.getType()) ? true : false; }); if (it != optionalOldValues->end()) { @@ -70,20 +70,20 @@ HealthObserverIntensities mergeIntensities(const HealthObserverIntensities& oldI } // namespace Status HealthMonitoringIntensitiesServerParameter::setFromString(const std::string& value) { - auto oldValue = **_data; + const auto oldValue = **_data; auto newValue = HealthObserverIntensities::parse( IDLParserErrorContext("health monitoring intensities"), fromjson(value)); - newValue = mergeIntensities(oldValue, newValue); + newValue = mergeConfigValues(oldValue, newValue); process_health::FaultManager::healthMonitoringIntensitiesUpdated(oldValue, newValue); **_data = newValue; return Status::OK(); } Status HealthMonitoringIntensitiesServerParameter::set(const BSONElement& newValueElement) { - auto oldValue = **_data; + const auto oldValue = **_data; auto newValue = HealthObserverIntensities::parse( IDLParserErrorContext("health monitoring intensities"), newValueElement.Obj()); - newValue = mergeIntensities(oldValue, newValue); + newValue = mergeConfigValues(oldValue, newValue); process_health::FaultManager::healthMonitoringIntensitiesUpdated(oldValue, newValue); **_data = newValue; return Status::OK(); @@ -118,14 +118,20 @@ void HealthMonitoringProgressMonitorServerParameter::append(OperationContext*, } Status PeriodicHealthCheckIntervalsServerParameter::setFromString(const std::string& value) { - *_data = HealthObserverIntervals::parse(IDLParserErrorContext("health monitoring liveness"), - fromjson(value)); + const auto oldValue = **_data; + auto newValue = HealthObserverIntervals::parse( + IDLParserErrorContext("health monitoring liveness"), fromjson(value)); + newValue = mergeConfigValues(oldValue, newValue); + **_data = newValue; return Status::OK(); } Status PeriodicHealthCheckIntervalsServerParameter::set(const BSONElement& newValueElement) { - *_data = HealthObserverIntervals::parse(IDLParserErrorContext("health monitoring liveness"), - newValueElement.Obj()); + const auto oldValue = **_data; + auto newValue = HealthObserverIntervals::parse( + IDLParserErrorContext("health monitoring liveness"), newValueElement.Obj()); + newValue = mergeConfigValues(oldValue, newValue); + **_data = newValue; return Status::OK(); } diff --git a/src/mongo/db/process_health/health_monitoring_server_parameters.idl b/src/mongo/db/process_health/health_monitoring_server_parameters.idl index c6b44e56701..33120033e35 100644 --- a/src/mongo/db/process_health/health_monitoring_server_parameters.idl +++ b/src/mongo/db/process_health/health_monitoring_server_parameters.idl @@ -72,25 +72,26 @@ structs: type: array optional: true - HealthObserverIntervals: - description: "A struct representing the interval in milliseconds for each health observer." + HealthObserverIntervalSetting: + description: "One health observer check interval setting, in milliseconds" strict: true fields: - dns: - description: "DNS health check interval." - type: int - default: 1000 - validator: { gt: 0 } - ldap: - description: "LDAP health check interval." - type: int - default: 10000 - validator: { gt: 0 } - test: - description: "Test health observer health check interval." + type: + type: HealthObserverType + optional: false + interval: type: int - default: 10 + optional: false validator: { gt: 0 } + + HealthObserverIntervals: + description: "A struct representing the interval in milliseconds for each health observer." + strict: true + fields: + values: + description: "Array of health observer intervals settings" + type: array + optional: true HealthObserverProgressMonitorConfig: description: "A struct representing configuration for health observer liveness checks." diff --git a/src/mongo/db/process_health/health_observer_test.cpp b/src/mongo/db/process_health/health_observer_test.cpp index afe75804e38..93ad0e90eb7 100644 --- a/src/mongo/db/process_health/health_observer_test.cpp +++ b/src/mongo/db/process_health/health_observer_test.cpp @@ -139,6 +139,13 @@ TEST_F(FaultManagerTest, ProgressMonitorCheck) { TEST_F(FaultManagerTest, HealthCheckRunsPeriodically) { resetManager(std::make_unique()); feature_flags::gFeatureFlagHealthMonitoring = true; + ASSERT_OK(ServerParameterSet::getGlobal() + ->getMap() + .find("healthMonitoringIntervals") + ->second->setFromString(BSON("values" << BSON_ARRAY(BSON("type" + << "test" + << "interval" << 1))) + .toString())); auto faultFacetType = FaultFacetType::kMock1; int severity = 0; registerMockHealthObserver(faultFacetType, [&severity] { return severity; }); @@ -177,6 +184,13 @@ TEST_F(FaultManagerTest, feature_flags::gFeatureFlagHealthMonitoring = true; const auto saveActiveFaultDuration = gActiveFaultDurationSecs.load(); gActiveFaultDurationSecs.store(5); + ASSERT_OK(ServerParameterSet::getGlobal() + ->getMap() + .find("healthMonitoringIntervals") + ->second->setFromString(BSON("values" << BSON_ARRAY(BSON("type" + << "test" + << "interval" << 1))) + .toString())); AtomicWord shouldBlock{true}; registerMockHealthObserver(FaultFacetType::kMock1, -- cgit v1.2.1