summaryrefslogtreecommitdiff
path: root/src/mongo/db/process_health
diff options
context:
space:
mode:
authorAndrew Shuvalov <andrew.shuvalov@mongodb.com>2021-12-17 20:42:57 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-12-17 21:25:46 +0000
commit39aa4089e095c2b12375108bbfb428b5fa38696c (patch)
treee4078f1cb98ca4d1d7c2e5825ad95f242e57535c /src/mongo/db/process_health
parent6e8beaab454ba83cf6123625de45bc0b22fb1079 (diff)
downloadmongo-39aa4089e095c2b12375108bbfb428b5fa38696c.tar.gz
SERVER-59368 runtime change of intensities values
Diffstat (limited to 'src/mongo/db/process_health')
-rw-r--r--src/mongo/db/process_health/fault_manager.cpp149
-rw-r--r--src/mongo/db/process_health/fault_manager.h13
-rw-r--r--src/mongo/db/process_health/fault_manager_config.cpp14
-rw-r--r--src/mongo/db/process_health/fault_manager_config.h67
-rw-r--r--src/mongo/db/process_health/fault_manager_test.cpp4
-rw-r--r--src/mongo/db/process_health/fault_manager_test_suite.h4
-rw-r--r--src/mongo/db/process_health/health_check_status.h4
-rw-r--r--src/mongo/db/process_health/health_monitoring_server_parameters.cpp47
-rw-r--r--src/mongo/db/process_health/health_monitoring_server_parameters.idl38
-rw-r--r--src/mongo/db/process_health/test_health_observer.cpp6
10 files changed, 283 insertions, 63 deletions
diff --git a/src/mongo/db/process_health/fault_manager.cpp b/src/mongo/db/process_health/fault_manager.cpp
index 6de76b304ad..4091546a8b9 100644
--- a/src/mongo/db/process_health/fault_manager.cpp
+++ b/src/mongo/db/process_health/fault_manager.cpp
@@ -88,6 +88,73 @@ void FaultManager::set(ServiceContext* svcCtx, std::unique_ptr<FaultManager> new
faultManager = std::move(newFaultManager);
}
+
+bool FaultManager::isInitialized() {
+ stdx::lock_guard lock(_stateMutex);
+ return _initialized;
+}
+
+
+// Start health checks if observer turned on via setParamater. Cleanup if the observer is turned
+// off.
+void FaultManager::healthMonitoringIntensitiesUpdated(HealthObserverIntensities oldValue,
+ HealthObserverIntensities newValue) {
+ if (!hasGlobalServiceContext())
+ return;
+
+ auto manager = FaultManager::get(getGlobalServiceContext());
+ if (manager && manager->isInitialized()) {
+ auto cancellationToken = manager->_managerShuttingDownCancellationSource.token();
+ auto findByType =
+ [](const auto& values,
+ HealthObserverTypeEnum type) -> boost::optional<HealthObserverIntensitySetting> {
+ if (!values) {
+ return boost::none;
+ }
+ auto it = std::find_if(values->begin(),
+ values->end(),
+ [type](const HealthObserverIntensitySetting& setting) {
+ return setting.getType() == type;
+ });
+ if (it != values->end()) {
+ return *it;
+ }
+ return boost::none;
+ };
+
+ auto optionalNewValues = newValue.getValues();
+ if (!optionalNewValues) {
+ return; // Nothing was updated.
+ }
+ for (auto& setting : *optionalNewValues) {
+ auto oldSetting = findByType(oldValue.getValues(), setting.getType());
+ if (!oldSetting) {
+ continue;
+ }
+ if (cancellationToken.isCanceled()) {
+ break;
+ }
+ auto oldIntensity = oldSetting->getIntensity();
+ auto newIntensity = setting.getIntensity();
+ if (oldIntensity != newIntensity) {
+ if (oldIntensity == HealthObserverIntensityEnum::kOff) {
+ // off -> {critical, non-critical}
+ if (auto* observer =
+ manager->getHealthObserver(toFaultFacetType(setting.getType()));
+ observer != nullptr) {
+ manager->healthCheck(observer, cancellationToken);
+ }
+ } else if (newIntensity == HealthObserverIntensityEnum::kOff) {
+ // {critical, non-critical} -> off
+ // Resolve any faults for this observer with a synthetic health check result.
+ auto successfulHealthCheckResult = HealthCheckStatus(setting.getType());
+ manager->accept(successfulHealthCheckResult);
+ }
+ }
+ }
+ }
+}
+
FaultManager::TransientFaultDeadline::TransientFaultDeadline(
FaultManager* faultManager,
std::shared_ptr<executor::TaskExecutor> executor,
@@ -190,13 +257,15 @@ boost::optional<FaultState> FaultManager::handleStartupCheck(const OptionalMessa
});
- auto lk = stdx::lock_guard(_stateMutex);
- logMessageReceived(state(), status);
+ {
+ auto lk = stdx::lock_guard(_stateMutex);
+ logMessageReceived(state(), status);
- if (status.isActiveFault()) {
- _healthyObservations.erase(status.getType());
- } else {
- _healthyObservations.insert(status.getType());
+ if (status.isActiveFault()) {
+ _healthyObservations.erase(status.getType());
+ } else {
+ _healthyObservations.insert(status.getType());
+ }
}
updateWithCheckStatus(HealthCheckStatus(status));
@@ -210,8 +279,6 @@ boost::optional<FaultState> FaultManager::handleStartupCheck(const OptionalMessa
FaultState::kStartupCheck, FaultState::kStartupCheck, boost::none);
}
- // If the whole fault becomes resolved, garbage collect it
- // with proper locking.
std::shared_ptr<FaultInternal> faultToDelete;
{
auto lk = stdx::lock_guard(_mutex);
@@ -220,6 +287,7 @@ boost::optional<FaultState> FaultManager::handleStartupCheck(const OptionalMessa
}
}
+ auto lk = stdx::lock_guard(_stateMutex);
if (activeObserversTypes == _healthyObservations) {
return FaultState::kOk;
}
@@ -230,8 +298,10 @@ boost::optional<FaultState> FaultManager::handleOk(const OptionalMessageType& me
invariant(message);
HealthCheckStatus status = message.get();
- auto lk = stdx::lock_guard(_stateMutex);
- logMessageReceived(state(), status);
+ {
+ auto lk = stdx::lock_guard(_stateMutex);
+ logMessageReceived(state(), status);
+ }
if (_config->getHealthObserverIntensity(status.getType()) ==
HealthObserverIntensityEnum::kOff) {
@@ -253,8 +323,11 @@ boost::optional<FaultState> FaultManager::handleTransientFault(const OptionalMes
}
HealthCheckStatus status = message.get();
- auto lk = stdx::lock_guard(_stateMutex);
- logMessageReceived(state(), status);
+
+ {
+ auto lk = stdx::lock_guard(_stateMutex);
+ logMessageReceived(state(), status);
+ }
updateWithCheckStatus(HealthCheckStatus(status));
@@ -265,6 +338,7 @@ boost::optional<FaultState> FaultManager::handleTransientFault(const OptionalMes
// If the whole fault becomes resolved, garbage collect it
// with proper locking.
+ auto lk = stdx::lock_guard(_mutex);
if (_fault && _fault->getFacets().empty()) {
_fault.reset();
return FaultState::kOk;
@@ -325,21 +399,25 @@ void FaultManager::schedulePeriodicHealthCheckThread() {
return;
}
- if (getActiveHealthObservers().size() == 0) {
- LOGV2_DEBUG(5936511, 2, "No active health observers are configured.");
+ auto observers = getActiveHealthObservers();
+ if (observers.size() == 0) {
+ LOGV2(5936511, "No active health observers are configured.");
setState(FaultState::kOk, HealthCheckStatus(FaultFacetType::kSystem));
+ return;
}
- auto observers = getHealthObservers();
+ str::stream listOfActiveObservers;
for (auto observer : observers) {
- LOGV2_DEBUG(
- 59365, 1, "starting health observer", "observerType"_attr = observer->getType());
+ LOGV2_DEBUG(5936501,
+ 1,
+ "starting health observer",
+ "observerType"_attr = str::stream() << observer->getType());
+ listOfActiveObservers << observer->getType() << " ";
- // TODO (SERVER-59368): The system should properly handle a health checker being turned
- // on/off
auto token = _managerShuttingDownCancellationSource.token();
healthCheck(observer, token);
}
+ LOGV2(5936804, "Health observers started", "detail"_attr = listOfActiveObservers);
}
FaultManager::~FaultManager() {
@@ -454,8 +532,9 @@ void FaultManager::healthCheck(HealthObserver* observer, CancellationToken token
}
uassert(5936101,
- fmt::format("Failed to initialize periodic health check work. Reason: {}",
- periodicThreadCbHandleStatus.getStatus().codeString()),
+ str::stream() << "Failed to schedule periodic health check for "
+ << observer->getType() << ": "
+ << periodicThreadCbHandleStatus.getStatus().codeString(),
periodicThreadCbHandleStatus.isOK());
}
@@ -480,14 +559,12 @@ void FaultManager::healthCheck(HealthObserver* observer, CancellationToken token
// If health observer is disabled, then do nothing and schedule another run (health observer may
// become enabled).
- // TODO (SERVER-59368): The system should properly handle a health checker being turned on/off
if (!_config->isHealthObserverEnabled(observer->getType())) {
schedulerCb();
return;
}
- // Run asynchronous health check. When complete, check for state transition (and perform if
- // necessary). Then schedule the next run.
+ // Run asynchronous health check. Send output to the state machine. Schedule next run.
auto healthCheckFuture = observer->periodicCheck(*this, _taskExecutor, token)
.thenRunOn(_taskExecutor)
.onCompletion([this, acceptNotOKStatus, schedulerCb](
@@ -566,13 +643,15 @@ void FaultManager::_init() {
_progressMonitor = std::make_unique<ProgressMonitor>(this, _svcCtx, _crashCb);
auto lk2 = stdx::lock_guard(_stateMutex);
- LOGV2(5956701,
- "Instantiated health observers, periodic health checking starts",
- "managerState"_attr = state(),
- "observersCount"_attr = _observers.size());
+ _initialized = true;
+ LOGV2_DEBUG(5956701,
+ 1,
+ "Instantiated health observers",
+ "managerState"_attr = str::stream() << state(),
+ "observersCount"_attr = _observers.size());
}
-std::vector<HealthObserver*> FaultManager::getHealthObservers() {
+std::vector<HealthObserver*> FaultManager::getHealthObservers() const {
std::vector<HealthObserver*> result;
stdx::lock_guard<Latch> lk(_mutex);
result.reserve(_observers.size());
@@ -583,7 +662,7 @@ std::vector<HealthObserver*> FaultManager::getHealthObservers() {
return result;
}
-std::vector<HealthObserver*> FaultManager::getActiveHealthObservers() {
+std::vector<HealthObserver*> FaultManager::getActiveHealthObservers() const {
auto allObservers = getHealthObservers();
std::vector<HealthObserver*> result;
result.reserve(allObservers.size());
@@ -596,6 +675,16 @@ std::vector<HealthObserver*> FaultManager::getActiveHealthObservers() {
return result;
}
+HealthObserver* FaultManager::getHealthObserver(FaultFacetType type) const {
+ stdx::lock_guard<Latch> lk(_mutex);
+ auto observerIt = std::find_if(
+ _observers.begin(), _observers.end(), [type](auto& o) { return o->getType() == type; });
+ if (observerIt != _observers.end()) {
+ return (*observerIt).get();
+ }
+ return nullptr;
+}
+
void FaultManager::progressMonitorCheckForTests(std::function<void(std::string cause)> crashCb) {
_progressMonitor->progressMonitorCheck(crashCb);
}
diff --git a/src/mongo/db/process_health/fault_manager.h b/src/mongo/db/process_health/fault_manager.h
index 597cfb54112..c627c18ee1d 100644
--- a/src/mongo/db/process_health/fault_manager.h
+++ b/src/mongo/db/process_health/fault_manager.h
@@ -93,12 +93,19 @@ public:
// specific flags.
SharedSemiFuture<void> startPeriodicHealthChecks();
+ bool isInitialized();
+
+
static FaultManager* get(ServiceContext* svcCtx);
// Replace the FaultManager for the 'svcCtx'. This functionality
// is exposed for testing and initial bootstrap.
static void set(ServiceContext* svcCtx, std::unique_ptr<FaultManager> newFaultManager);
+ // Signals that the intensity for a health observer has been updated.
+ static void healthMonitoringIntensitiesUpdated(HealthObserverIntensities oldValue,
+ HealthObserverIntensities newValue);
+
// Returns the current fault state for the server.
FaultState getFaultState() const;
@@ -107,7 +114,7 @@ public:
// All observers remain valid for the manager lifetime, thus returning
// just pointers is safe, as long as they are used while manager exists.
- std::vector<HealthObserver*> getHealthObservers();
+ std::vector<HealthObserver*> getHealthObservers() const;
// Gets the aggregate configuration for all process health environment.
FaultManagerConfig getConfig() const;
@@ -117,7 +124,8 @@ public:
protected:
// Returns all health observers not configured as Off
- std::vector<HealthObserver*> getActiveHealthObservers();
+ std::vector<HealthObserver*> getActiveHealthObservers() const;
+ HealthObserver* getHealthObserver(FaultFacetType type) const;
// Runs a particular health observer. Then attempts to transition states. Then schedules next
// run.
@@ -164,6 +172,7 @@ private:
mutable Mutex _stateMutex =
MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(0), "FaultManager::_stateMutex");
+ bool _initialized = false;
Date_t _lastTransitionTime;
// Responsible for transitioning the state of FaultManager to ActiveFault after a
diff --git a/src/mongo/db/process_health/fault_manager_config.cpp b/src/mongo/db/process_health/fault_manager_config.cpp
index 0243cee192a..c4a7442c67c 100644
--- a/src/mongo/db/process_health/fault_manager_config.cpp
+++ b/src/mongo/db/process_health/fault_manager_config.cpp
@@ -57,5 +57,19 @@ std::ostream& operator<<(std::ostream& os, const FaultState& state) {
return os << sb.stringData();
}
+// TODO(SERVER-62125): remove this conversion and use idl type everywhere
+FaultFacetType toFaultFacetType(HealthObserverTypeEnum type) {
+ switch (type) {
+ case HealthObserverTypeEnum::kLdap:
+ return FaultFacetType::kLdap;
+ case HealthObserverTypeEnum::kDns:
+ return FaultFacetType::kDns;
+ case HealthObserverTypeEnum::kTest:
+ return FaultFacetType::kTestObserver;
+ default:
+ MONGO_UNREACHABLE;
+ }
+}
+
} // namespace process_health
} // namespace mongo
diff --git a/src/mongo/db/process_health/fault_manager_config.h b/src/mongo/db/process_health/fault_manager_config.h
index d1d9007f29a..db218853da0 100644
--- a/src/mongo/db/process_health/fault_manager_config.h
+++ b/src/mongo/db/process_health/fault_manager_config.h
@@ -67,6 +67,9 @@ enum class FaultFacetType { kSystem, kMock1, kMock2, kTestObserver, kLdap, kDns
static const StringData FaultFacetTypeStrings[] = {
"kSystem", "kMock1", "kMock2", "kTestObserver", "kLdap", "kDns"};
+FaultFacetType toFaultFacetType(HealthObserverTypeEnum type);
+
+
static const StringData FaultFacetType_serializer(const FaultFacetType value) {
return FaultFacetTypeStrings[static_cast<int>(value)];
}
@@ -89,11 +92,43 @@ public:
HealthObserverIntensityEnum getHealthObserverIntensity(FaultFacetType type) {
auto intensities = _getHealthObserverIntensities();
- if (type == FaultFacetType::kMock1 && _facetToIntensityMapForTest.contains(type)) {
- return _facetToIntensityMapForTest.at(type);
- }
- return _getPropertyByType(
- type, &intensities->_data, HealthObserverIntensityEnum::kCritical);
+
+ auto toObserverType = [](FaultFacetType type) -> boost::optional<HealthObserverTypeEnum> {
+ switch (type) {
+ case FaultFacetType::kLdap:
+ return HealthObserverTypeEnum::kLdap;
+ case FaultFacetType::kDns:
+ return HealthObserverTypeEnum::kDns;
+ case FaultFacetType::kTestObserver:
+ return HealthObserverTypeEnum::kTest;
+ default:
+ return boost::none;
+ }
+ };
+
+ auto getIntensity = [this, intensities, &toObserverType](FaultFacetType type) {
+ auto observerType = toObserverType(type);
+ if (observerType) {
+ auto x = intensities->_data->getValues();
+ if (x) {
+ for (auto setting : *x) {
+ if (setting.getType() == observerType) {
+ return setting.getIntensity();
+ }
+ }
+ }
+ return HealthObserverIntensityEnum::kOff;
+ } else {
+ // TODO SERVER-61944: this is for kMock1 & kMock2. Remove this branch once mock
+ // types are deleted.
+ if (_facetToIntensityMapForTest.contains(type)) {
+ return _facetToIntensityMapForTest.at(type);
+ }
+ return HealthObserverIntensityEnum::kCritical;
+ }
+ };
+
+ return getIntensity(type);
}
bool isHealthObserverEnabled(FaultFacetType type) {
@@ -152,23 +187,31 @@ private:
template <typename T, typename R>
R _getPropertyByType(FaultFacetType type, synchronized_value<T>* data, R defaultValue) const {
+ // TODO: update this function with additional fault facets when they are added
+ boost::optional<R> result;
switch (type) {
case FaultFacetType::kLdap:
- return (*data)->getLdap();
+ result = (*data)->getLdap();
+ break;
case FaultFacetType::kDns:
- return (*data)->getDns();
+ result = (*data)->getDns();
+ break;
case FaultFacetType::kTestObserver:
- return (*data)->getTest();
+ result = (*data)->getTest();
+ break;
case FaultFacetType::kSystem:
- return defaultValue;
+ result = defaultValue;
+ break;
case FaultFacetType::kMock1:
- return defaultValue;
+ result = defaultValue;
+ break;
case FaultFacetType::kMock2:
- return defaultValue;
- // TODO: update this function with additional fault facets when they are added
+ result = defaultValue;
+ break;
default:
MONGO_UNREACHABLE;
}
+ return *result;
}
bool _periodicChecksDisabledForTests = false;
diff --git a/src/mongo/db/process_health/fault_manager_test.cpp b/src/mongo/db/process_health/fault_manager_test.cpp
index d216206c505..e3466699906 100644
--- a/src/mongo/db/process_health/fault_manager_test.cpp
+++ b/src/mongo/db/process_health/fault_manager_test.cpp
@@ -50,9 +50,9 @@ TEST(FaultManagerTest, Registration) {
TEST_F(FaultManagerTest, GetHealthObserverIntensity) {
auto config = manager().getConfig();
ASSERT(config.getHealthObserverIntensity(FaultFacetType::kLdap) ==
- HealthObserverIntensityEnum::kNonCritical);
+ HealthObserverIntensityEnum::kOff);
ASSERT(config.getHealthObserverIntensity(FaultFacetType::kDns) ==
- HealthObserverIntensityEnum::kNonCritical);
+ HealthObserverIntensityEnum::kOff);
}
} // namespace
diff --git a/src/mongo/db/process_health/fault_manager_test_suite.h b/src/mongo/db/process_health/fault_manager_test_suite.h
index fc567c5b0fb..59d937846c7 100644
--- a/src/mongo/db/process_health/fault_manager_test_suite.h
+++ b/src/mongo/db/process_health/fault_manager_test_suite.h
@@ -236,7 +236,7 @@ public:
tickSource().advance(d);
}
- static inline const Seconds kWaitTimeout{30};
+ static inline const Seconds kWaitTimeout{10};
static inline const Milliseconds kSleepTime{1};
static inline const int kActiveFaultDurationSecs = 1;
@@ -251,7 +251,7 @@ public:
return;
sleepFor(kSleepTime);
}
- invariant(false);
+ ASSERT(false);
}
static inline const Milliseconds kCheckTimeIncrement{100};
diff --git a/src/mongo/db/process_health/health_check_status.h b/src/mongo/db/process_health/health_check_status.h
index 7d20016331d..5e5e26ff97d 100644
--- a/src/mongo/db/process_health/health_check_status.h
+++ b/src/mongo/db/process_health/health_check_status.h
@@ -60,6 +60,9 @@ public:
explicit HealthCheckStatus(FaultFacetType type)
: _type(type), _severity(0), _description("resolved"_sd) {}
+ explicit HealthCheckStatus(HealthObserverTypeEnum type)
+ : _type(toFaultFacetType(type)), _severity(0), _description("resolved"_sd) {}
+
HealthCheckStatus(const HealthCheckStatus&) = default;
HealthCheckStatus& operator=(const HealthCheckStatus&) = default;
HealthCheckStatus(HealthCheckStatus&&) = default;
@@ -114,6 +117,7 @@ private:
friend std::ostream& operator<<(std::ostream&, const HealthCheckStatus&);
friend StringBuilder& operator<<(StringBuilder& s, const HealthCheckStatus& hcs);
+
FaultFacetType _type;
double _severity;
std::string _description;
diff --git a/src/mongo/db/process_health/health_monitoring_server_parameters.cpp b/src/mongo/db/process_health/health_monitoring_server_parameters.cpp
index 36d445cc386..8258d2da0e9 100644
--- a/src/mongo/db/process_health/health_monitoring_server_parameters.cpp
+++ b/src/mongo/db/process_health/health_monitoring_server_parameters.cpp
@@ -26,23 +26,66 @@
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
+#include <algorithm>
#include "mongo/bson/json.h"
+#include "mongo/db/process_health/fault_manager.h"
#include "mongo/db/process_health/health_monitoring_server_parameters_gen.h"
#include "mongo/db/process_health/health_observer.h"
namespace mongo {
+namespace {
+// Replaces values in oldIntensities with values in newIntensities while preserving all values in
+// oldIntensities not in newIntensities.
+HealthObserverIntensities mergeIntensities(const HealthObserverIntensities& oldIntensities,
+ const HealthObserverIntensities& newIntensities) {
+ using namespace std;
+ HealthObserverIntensities result = oldIntensities;
+ auto optionalOldValues = result.getValues();
+ auto optionalNewValues = newIntensities.getValues();
+ if (!optionalNewValues) {
+ return oldIntensities;
+ }
+ if (!optionalOldValues) {
+ result.setValues(*optionalNewValues);
+ return result;
+ }
+ for (const auto& setting : *optionalNewValues) {
+ auto it = find_if(begin(*optionalOldValues),
+ end(*optionalOldValues),
+ [&setting](const HealthObserverIntensitySetting& destSetting) {
+ return (destSetting.getType() == setting.getType()) ? true : false;
+ });
+ if (it != optionalOldValues->end()) {
+ *it = setting;
+ } else {
+ optionalOldValues->emplace_back(setting);
+ }
+ }
+ result.setValues(*optionalOldValues);
+ return result;
+}
+} // namespace
+
Status HealthMonitoringIntensitiesServerParameter::setFromString(const std::string& value) {
- *_data = HealthObserverIntensities::parse(
+ auto oldValue = **_data;
+ auto newValue = HealthObserverIntensities::parse(
IDLParserErrorContext("health monitoring intensities"), fromjson(value));
+ newValue = mergeIntensities(oldValue, newValue);
+ process_health::FaultManager::healthMonitoringIntensitiesUpdated(oldValue, newValue);
+ **_data = newValue;
return Status::OK();
}
Status HealthMonitoringIntensitiesServerParameter::set(const BSONElement& newValueElement) {
- *_data = HealthObserverIntensities::parse(
+ auto oldValue = **_data;
+ auto newValue = HealthObserverIntensities::parse(
IDLParserErrorContext("health monitoring intensities"), newValueElement.Obj());
+ newValue = mergeIntensities(oldValue, newValue);
+ process_health::FaultManager::healthMonitoringIntensitiesUpdated(oldValue, newValue);
+ **_data = newValue;
return Status::OK();
}
diff --git a/src/mongo/db/process_health/health_monitoring_server_parameters.idl b/src/mongo/db/process_health/health_monitoring_server_parameters.idl
index 66c17fd15df..c6b44e56701 100644
--- a/src/mongo/db/process_health/health_monitoring_server_parameters.idl
+++ b/src/mongo/db/process_health/health_monitoring_server_parameters.idl
@@ -42,24 +42,36 @@ enums:
kCritical: "critical"
kNonCritical: "non-critical"
+ HealthObserverType:
+ description: "Enum representing available health observer types"
+ type: string
+ values:
+ kLdap: "ldap"
+ kDns: "dns"
+ kTest: "test"
+
structs:
- HealthObserverIntensities:
- description: "A struct representing the health observer intensities."
+ HealthObserverIntensitySetting:
+ description: "One health observer intensity setting"
strict: true
fields:
- dns:
- description: "Intensity of DNS fault facet"
- type: HealthObserverIntensity
- default: kNonCritical
- ldap:
- description: "Intensity of LDAP fault facet"
- type: HealthObserverIntensity
- default: kNonCritical
- test:
- description: "Intensity of test fault facet"
+ type:
+ type: HealthObserverType
+ optional: false
+ intensity:
type: HealthObserverIntensity
+ optional: false
default: kOff
+ HealthObserverIntensities:
+ description: "A struct representing the health observer intensities."
+ strict: false
+ fields:
+ values:
+ description: "Array of health observer intensity settings"
+ type: array<HealthObserverIntensitySetting>
+ optional: true
+
HealthObserverIntervals:
description: "A struct representing the interval in milliseconds for each health observer."
strict: true
@@ -77,7 +89,7 @@ structs:
test:
description: "Test health observer health check interval."
type: int
- default: 1
+ default: 10
validator: { gt: 0 }
HealthObserverProgressMonitorConfig:
diff --git a/src/mongo/db/process_health/test_health_observer.cpp b/src/mongo/db/process_health/test_health_observer.cpp
index 254a70f217b..ae5747895ea 100644
--- a/src/mongo/db/process_health/test_health_observer.cpp
+++ b/src/mongo/db/process_health/test_health_observer.cpp
@@ -26,10 +26,13 @@
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kProcessHealth
#include "mongo/db/process_health/test_health_observer.h"
+
#include "mongo/db/commands/test_commands_enabled.h"
#include "mongo/db/process_health/health_observer_registration.h"
+#include "mongo/logv2/log.h"
namespace mongo {
namespace process_health {
@@ -37,6 +40,7 @@ MONGO_FAIL_POINT_DEFINE(hangTestHealthObserver);
MONGO_FAIL_POINT_DEFINE(testHealthObserver);
Future<HealthCheckStatus> TestHealthObserver::periodicCheckImpl(
PeriodicHealthCheckContext&& periodicCheckContext) {
+ LOGV2_DEBUG(5936801, 2, "Test health observer executing");
hangTestHealthObserver.pauseWhileSet();
auto result = Future<HealthCheckStatus>::makeReady(makeHealthyStatus());
@@ -50,6 +54,7 @@ Future<HealthCheckStatus> TestHealthObserver::periodicCheckImpl(
},
[&](const BSONObj& data) { return !data.isEmpty(); });
+ LOGV2_DEBUG(5936802, 2, "Test health observer returns", "result"_attr = result.get());
return result;
}
@@ -58,6 +63,7 @@ MONGO_INITIALIZER(TestHealthObserver)(InitializerContext*) {
// Failpoints can only be set when test commands are enabled, and so the test health observer
// is only useful in that case.
if (getTestCommandsEnabled()) {
+ LOGV2(5936803, "Test health observer instantiated");
HealthObserverRegistration::registerObserverFactory(
[](ServiceContext* svcCtx) { return std::make_unique<TestHealthObserver>(svcCtx); });
}