summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Shuvalov <andrew.shuvalov@mongodb.com>2021-12-22 15:58:47 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-12-22 16:25:27 +0000
commitf0e9cd72975ceb2862de10a1cad5bfb436fc2d67 (patch)
tree6a471e79e48433c1cd0a87307c74b6160e70e465
parent1bb26e23c16f3f43ef477fc91cacabdf962d16f0 (diff)
downloadmongo-f0e9cd72975ceb2862de10a1cad5bfb436fc2d67.tar.gz
SERVER-62174 Refactored health check intervals
-rw-r--r--jstests/sharding/health_monitor/parameters.js63
-rw-r--r--jstests/sharding/health_monitor/progress_monitor.js2
-rw-r--r--src/mongo/db/process_health/fault_manager_config.cpp19
-rw-r--r--src/mongo/db/process_health/fault_manager_config.h34
-rw-r--r--src/mongo/db/process_health/health_monitoring_server_parameters.cpp38
-rw-r--r--src/mongo/db/process_health/health_monitoring_server_parameters.idl31
-rw-r--r--src/mongo/db/process_health/health_observer_test.cpp10
7 files changed, 149 insertions, 48 deletions
diff --git a/jstests/sharding/health_monitor/parameters.js b/jstests/sharding/health_monitor/parameters.js
index 763d0ecf85e..a375ee89b7e 100644
--- a/jstests/sharding/health_monitor/parameters.js
+++ b/jstests/sharding/health_monitor/parameters.js
@@ -31,7 +31,8 @@ var st = new ShardingTest({
]
}),
progressMonitor: tojson({interval: CUSTOM_INTERVAL, deadline: CUSTOM_DEADLINE}),
- healthMonitoringIntervals: tojson({test: CUSTOM_INTERVAL})
+ healthMonitoringIntervals:
+ tojson({values: [{type: "test", interval: CUSTOM_INTERVAL}]})
}
}
],
@@ -40,17 +41,21 @@ var st = new ShardingTest({
// Intensity parameter
let result = st.s0.adminCommand({"getParameter": 1, "healthMonitoringIntensities": 1});
-let getIntensity = (param_value, type) => {
+let getIntensity = (result, typeOfObserver) => {
let intensities = result.healthMonitoringIntensities.values;
- for (var i = 0; i < intensities.length; i++) {
- if (intensities[i].type === type)
- return intensities[i].intensity;
+ let foundPair = intensities.find(({type}) => type === typeOfObserver);
+ if (foundPair) {
+ return foundPair.intensity;
}
};
assert.eq(getIntensity(result, "dns"), "off");
assert.eq(getIntensity(result, "ldap"), "critical");
+assert.commandWorked(st.s0.adminCommand({
+ "setParameter": 1,
+ healthMonitoringIntensities: {values: [{type: "dns", intensity: "critical"}]}
+}));
assert.commandFailed(st.s0.adminCommand({
"setParameter": 1,
healthMonitoringIntensities: {values: [{type: "dns", intensity: "INVALID"}]}
@@ -60,7 +65,12 @@ assert.commandFailed(st.s0.adminCommand({
healthMonitoringIntensities: {values: [{type: "invalid", intensity: "off"}]}
}));
-jsTestLog('Test setting 2 intensities');
+// Tests that ldap param is unchanged after dns was changed.
+result =
+ assert.commandWorked(st.s0.adminCommand({"getParameter": 1, healthMonitoringIntensities: 1}));
+assert.eq(getIntensity(result, "dns"), "critical");
+assert.eq(getIntensity(result, "ldap"), "critical");
+
assert.commandWorked(st.s0.adminCommand({
"setParameter": 1,
healthMonitoringIntensities:
@@ -73,21 +83,48 @@ assert.eq(getIntensity(result, "dns"), "non-critical");
assert.eq(getIntensity(result, "ldap"), "off");
// Interval parameter
+let getInterval = (commandResult, typeOfObserver) => {
+ let allValues = commandResult.healthMonitoringIntervals.values;
+ let foundPair = allValues.find(({type}) => type === typeOfObserver);
+ if (foundPair) {
+ return foundPair.interval;
+ }
+};
+
result = st.s1.adminCommand({"getParameter": 1, "healthMonitoringIntervals": 1});
-assert.eq(result.healthMonitoringIntervals.test, CUSTOM_INTERVAL);
+assert.eq(getInterval(result, "test"), CUSTOM_INTERVAL);
+
+assert.commandWorked(st.s1.adminCommand({
+ "setParameter": 1,
+ healthMonitoringIntervals: {values: [{type: "dns", interval: NumberInt(100)}]}
+}));
+assert.commandFailed(st.s1.adminCommand({
+ "setParameter": 1,
+ healthMonitoringIntervals: {values: [{type: "dns", interval: NumberInt(0)}]}
+}));
+assert.commandFailed(st.s1.adminCommand({
+ "setParameter": 1,
+ healthMonitoringIntervals: {values: [{type: "invalid", interval: NumberInt(100)}]}
+}));
-assert.commandFailed(st.s1.adminCommand({"setParameter": 1, healthMonitoringIntervals: {dns: 0}}));
-assert.commandFailed(
- st.s1.adminCommand({"setParameter": 1, healthMonitoringIntervals: {invalid: 1000}}));
+// Tests that test param is unchanged, dns is set to 100.
+result =
+ assert.commandWorked(st.s1.adminCommand({"getParameter": 1, healthMonitoringIntervals: 1}));
+assert.eq(getInterval(result, "test"), CUSTOM_INTERVAL);
+assert.eq(getInterval(result, "dns"), 100);
assert.commandWorked(st.s1.adminCommand({
"setParameter": 1,
- healthMonitoringIntervals: {dns: NumberInt(2000), ldap: NumberInt(600000)}
+ healthMonitoringIntervals: {
+ values:
+ [{type: "dns", interval: NumberInt(2000)}, {type: "ldap", interval: NumberInt(600000)}]
+ }
}));
+
result =
assert.commandWorked(st.s1.adminCommand({"getParameter": 1, healthMonitoringIntervals: 1}));
-assert.eq(result.healthMonitoringIntervals.dns, 2000);
-assert.eq(result.healthMonitoringIntervals.ldap, 600000);
+assert.eq(getInterval(result, "dns"), 2000);
+assert.eq(getInterval(result, "ldap"), 600000);
// Check that custom liveness values were set properly.
result = st.s1.adminCommand({"getParameter": 1, "progressMonitor": 1});
diff --git a/jstests/sharding/health_monitor/progress_monitor.js b/jstests/sharding/health_monitor/progress_monitor.js
index 687cf3729d5..4ef70928685 100644
--- a/jstests/sharding/health_monitor/progress_monitor.js
+++ b/jstests/sharding/health_monitor/progress_monitor.js
@@ -15,7 +15,7 @@ const params = {
{type: "dns", intensity: "off"}
]
}),
- healthMonitoringIntervals: tojson({test: 500}),
+ healthMonitoringIntervals: tojson({values: [{type: "test", interval: NumberInt(500)}]}),
progressMonitor:
tojson({interval: PROGRESS_TIMEOUT_SECONDS, deadline: PROGRESS_TIMEOUT_SECONDS}),
featureFlagHealthMonitoring: true
diff --git a/src/mongo/db/process_health/fault_manager_config.cpp b/src/mongo/db/process_health/fault_manager_config.cpp
index c4a7442c67c..a071c063eea 100644
--- a/src/mongo/db/process_health/fault_manager_config.cpp
+++ b/src/mongo/db/process_health/fault_manager_config.cpp
@@ -36,6 +36,25 @@
namespace mongo {
namespace process_health {
+namespace {
+constexpr auto inline kDefaultObserverInterval = Milliseconds{10000};
+constexpr auto inline kDefaultLdapObserverInterval = Milliseconds{30000};
+constexpr auto inline kDefaultTestObserverInterval = Milliseconds{1000};
+} // namespace
+
+Milliseconds FaultManagerConfig::_getDefaultObserverInterval(FaultFacetType type) {
+ switch (type) {
+ case FaultFacetType::kLdap:
+ return kDefaultLdapObserverInterval;
+ case FaultFacetType::kMock1:
+ case FaultFacetType::kMock2:
+ case FaultFacetType::kTestObserver:
+ return kDefaultTestObserverInterval;
+ default:
+ return kDefaultObserverInterval;
+ }
+}
+
StringBuilder& operator<<(StringBuilder& s, const FaultState& state) {
switch (state) {
case FaultState::kOk:
diff --git a/src/mongo/db/process_health/fault_manager_config.h b/src/mongo/db/process_health/fault_manager_config.h
index cb92353798f..f6d47bdbf24 100644
--- a/src/mongo/db/process_health/fault_manager_config.h
+++ b/src/mongo/db/process_health/fault_manager_config.h
@@ -55,11 +55,9 @@ enum class FaultState {
kActiveFault
};
-
StringBuilder& operator<<(StringBuilder& s, const FaultState& state);
std::ostream& operator<<(std::ostream& os, const FaultState& state);
-
/**
* Types of health observers available.
*/
@@ -90,6 +88,20 @@ public:
/* Maximum possible jitter added to the time between health checks */
static auto inline constexpr kPeriodicHealthCheckMaxJitter{Milliseconds{100}};
+ static constexpr auto toObserverType =
+ [](FaultFacetType type) -> boost::optional<HealthObserverTypeEnum> {
+ switch (type) {
+ case FaultFacetType::kLdap:
+ return HealthObserverTypeEnum::kLdap;
+ case FaultFacetType::kDns:
+ return HealthObserverTypeEnum::kDns;
+ case FaultFacetType::kTestObserver:
+ return HealthObserverTypeEnum::kTest;
+ default:
+ return boost::none;
+ }
+ };
+
HealthObserverIntensityEnum getHealthObserverIntensity(FaultFacetType type) const {
auto intensities = _getHealthObserverIntensities();
@@ -147,7 +159,21 @@ public:
Milliseconds getPeriodicHealthCheckInterval(FaultFacetType type) const {
auto intervals = _getHealthObserverIntervals();
- return Milliseconds(_getPropertyByType(type, &intervals->_data, 1000));
+ // TODO(SERVER-62125): replace with unified type from IDL.
+ const auto convertedType = toObserverType(type);
+ if (convertedType) {
+ const auto values = intervals->_data->getValues();
+ if (values) {
+ const auto intervalIt =
+ std::find_if(values->begin(), values->end(), [&](const auto& v) {
+ return v.getType() == *convertedType;
+ });
+ if (intervalIt != values->end()) {
+ return Milliseconds(intervalIt->getInterval());
+ }
+ }
+ }
+ return _getDefaultObserverInterval(type);
}
Milliseconds getPeriodicLivenessCheckInterval() const {
@@ -185,6 +211,8 @@ private:
"progressMonitor");
}
+ static Milliseconds _getDefaultObserverInterval(FaultFacetType type);
+
template <typename T, typename R>
R _getPropertyByType(FaultFacetType type, synchronized_value<T>* data, R defaultValue) const {
// TODO: update this function with additional fault facets when they are added
diff --git a/src/mongo/db/process_health/health_monitoring_server_parameters.cpp b/src/mongo/db/process_health/health_monitoring_server_parameters.cpp
index 8258d2da0e9..9007e57b464 100644
--- a/src/mongo/db/process_health/health_monitoring_server_parameters.cpp
+++ b/src/mongo/db/process_health/health_monitoring_server_parameters.cpp
@@ -37,16 +37,16 @@
namespace mongo {
namespace {
-// Replaces values in oldIntensities with values in newIntensities while preserving all values in
-// oldIntensities not in newIntensities.
-HealthObserverIntensities mergeIntensities(const HealthObserverIntensities& oldIntensities,
- const HealthObserverIntensities& newIntensities) {
+// Replaces values in oldIntensities/Intervals with values in newIntensities/Intervals while
+// preserving all values present in old- that are not present in new-.
+template <typename ConfigValues>
+ConfigValues mergeConfigValues(const ConfigValues& oldValues, const ConfigValues& newValues) {
using namespace std;
- HealthObserverIntensities result = oldIntensities;
+ ConfigValues result = oldValues;
auto optionalOldValues = result.getValues();
- auto optionalNewValues = newIntensities.getValues();
+ auto optionalNewValues = newValues.getValues();
if (!optionalNewValues) {
- return oldIntensities;
+ return oldValues;
}
if (!optionalOldValues) {
result.setValues(*optionalNewValues);
@@ -55,7 +55,7 @@ HealthObserverIntensities mergeIntensities(const HealthObserverIntensities& oldI
for (const auto& setting : *optionalNewValues) {
auto it = find_if(begin(*optionalOldValues),
end(*optionalOldValues),
- [&setting](const HealthObserverIntensitySetting& destSetting) {
+ [&setting](const auto& destSetting) {
return (destSetting.getType() == setting.getType()) ? true : false;
});
if (it != optionalOldValues->end()) {
@@ -70,20 +70,20 @@ HealthObserverIntensities mergeIntensities(const HealthObserverIntensities& oldI
} // namespace
Status HealthMonitoringIntensitiesServerParameter::setFromString(const std::string& value) {
- auto oldValue = **_data;
+ const auto oldValue = **_data;
auto newValue = HealthObserverIntensities::parse(
IDLParserErrorContext("health monitoring intensities"), fromjson(value));
- newValue = mergeIntensities(oldValue, newValue);
+ newValue = mergeConfigValues(oldValue, newValue);
process_health::FaultManager::healthMonitoringIntensitiesUpdated(oldValue, newValue);
**_data = newValue;
return Status::OK();
}
Status HealthMonitoringIntensitiesServerParameter::set(const BSONElement& newValueElement) {
- auto oldValue = **_data;
+ const auto oldValue = **_data;
auto newValue = HealthObserverIntensities::parse(
IDLParserErrorContext("health monitoring intensities"), newValueElement.Obj());
- newValue = mergeIntensities(oldValue, newValue);
+ newValue = mergeConfigValues(oldValue, newValue);
process_health::FaultManager::healthMonitoringIntensitiesUpdated(oldValue, newValue);
**_data = newValue;
return Status::OK();
@@ -118,14 +118,20 @@ void HealthMonitoringProgressMonitorServerParameter::append(OperationContext*,
}
Status PeriodicHealthCheckIntervalsServerParameter::setFromString(const std::string& value) {
- *_data = HealthObserverIntervals::parse(IDLParserErrorContext("health monitoring liveness"),
- fromjson(value));
+ const auto oldValue = **_data;
+ auto newValue = HealthObserverIntervals::parse(
+ IDLParserErrorContext("health monitoring liveness"), fromjson(value));
+ newValue = mergeConfigValues(oldValue, newValue);
+ **_data = newValue;
return Status::OK();
}
Status PeriodicHealthCheckIntervalsServerParameter::set(const BSONElement& newValueElement) {
- *_data = HealthObserverIntervals::parse(IDLParserErrorContext("health monitoring liveness"),
- newValueElement.Obj());
+ const auto oldValue = **_data;
+ auto newValue = HealthObserverIntervals::parse(
+ IDLParserErrorContext("health monitoring liveness"), newValueElement.Obj());
+ newValue = mergeConfigValues(oldValue, newValue);
+ **_data = newValue;
return Status::OK();
}
diff --git a/src/mongo/db/process_health/health_monitoring_server_parameters.idl b/src/mongo/db/process_health/health_monitoring_server_parameters.idl
index c6b44e56701..33120033e35 100644
--- a/src/mongo/db/process_health/health_monitoring_server_parameters.idl
+++ b/src/mongo/db/process_health/health_monitoring_server_parameters.idl
@@ -72,25 +72,26 @@ structs:
type: array<HealthObserverIntensitySetting>
optional: true
- HealthObserverIntervals:
- description: "A struct representing the interval in milliseconds for each health observer."
+ HealthObserverIntervalSetting:
+ description: "One health observer check interval setting, in milliseconds"
strict: true
fields:
- dns:
- description: "DNS health check interval."
- type: int
- default: 1000
- validator: { gt: 0 }
- ldap:
- description: "LDAP health check interval."
- type: int
- default: 10000
- validator: { gt: 0 }
- test:
- description: "Test health observer health check interval."
+ type:
+ type: HealthObserverType
+ optional: false
+ interval:
type: int
- default: 10
+ optional: false
validator: { gt: 0 }
+
+ HealthObserverIntervals:
+ description: "A struct representing the interval in milliseconds for each health observer."
+ strict: true
+ fields:
+ values:
+ description: "Array of health observer intervals settings"
+ type: array<HealthObserverIntervalSetting>
+ optional: true
HealthObserverProgressMonitorConfig:
description: "A struct representing configuration for health observer liveness checks."
diff --git a/src/mongo/db/process_health/health_observer_test.cpp b/src/mongo/db/process_health/health_observer_test.cpp
index b8dce4fb768..8e0808d4b57 100644
--- a/src/mongo/db/process_health/health_observer_test.cpp
+++ b/src/mongo/db/process_health/health_observer_test.cpp
@@ -137,6 +137,11 @@ TEST_F(FaultManagerTest, ProgressMonitorCheck) {
TEST_F(FaultManagerTest, HealthCheckRunsPeriodically) {
resetManager(std::make_unique<FaultManagerConfig>());
+ RAIIServerParameterControllerForTest _intervalController{
+ "healthMonitoringIntervals",
+ BSON("values" << BSON_ARRAY(BSON("type"
+ << "test"
+ << "interval" << 1)))};
RAIIServerParameterControllerForTest _controller{"featureFlagHealthMonitoring", true};
auto faultFacetType = FaultFacetType::kMock1;
int severity = 0;
@@ -173,6 +178,11 @@ TEST_F(FaultManagerTest, PeriodicHealthCheckOnErrorMakesBadHealthStatus) {
TEST_F(FaultManagerTest,
DeadlineFutureCausesTransientFaultWhenObserverBlocksAndGetsResolvedWhenObserverUnblocked) {
resetManager(std::make_unique<FaultManagerConfig>());
+ RAIIServerParameterControllerForTest _intervalController{
+ "healthMonitoringIntervals",
+ BSON("values" << BSON_ARRAY(BSON("type"
+ << "test"
+ << "interval" << 1)))};
RAIIServerParameterControllerForTest _flagController{"featureFlagHealthMonitoring", true};
RAIIServerParameterControllerForTest _serverParamController{"activeFaultDurationSecs", 5};