summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndrew Shuvalov <andrew.shuvalov@mongodb.com>2022-01-10 21:41:36 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-02-15 17:02:29 +0000
commit4b4be42169630f44a9849ce14ed1769ef37fd31b (patch)
tree15efe892634e6257e47e42b589def585264ab372 /src
parent083698d4b61d49b1c0de9157856832381fbbb95c (diff)
downloadmongo-4b4be42169630f44a9849ce14ed1769ef37fd31b.tar.gz
SERVER-59375 SERVER-62373 additional serverStatus sections for health checks
(cherry picked from commit 386986a651b852c3c98b426ea60a023d99e4a5a4)
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/process_health/fault_manager.cpp47
-rw-r--r--src/mongo/db/process_health/fault_manager.h8
-rw-r--r--src/mongo/db/process_health/fault_manager_config.h2
-rw-r--r--src/mongo/db/process_health/health_monitoring_server_status_section.cpp19
4 files changed, 65 insertions, 11 deletions
diff --git a/src/mongo/db/process_health/fault_manager.cpp b/src/mongo/db/process_health/fault_manager.cpp
index 818eee320f2..1e57fadd1ce 100644
--- a/src/mongo/db/process_health/fault_manager.cpp
+++ b/src/mongo/db/process_health/fault_manager.cpp
@@ -678,6 +678,53 @@ HealthObserver* FaultManager::getHealthObserver(FaultFacetType type) const {
return nullptr;
}
+void FaultManager::appendDescription(BSONObjBuilder* result, bool appendDetails) const {
+ static constexpr auto kDurationThreshold = Hours{24};
+ const auto now = _svcCtx->getFastClockSource()->now();
+ StringBuilder faultStateStr;
+ faultStateStr << getFaultState();
+
+ result->append("state", faultStateStr.str());
+ result->appendDate("enteredStateAtTime", getLastTransitionTime());
+
+ auto fault = currentFault();
+ if (fault) {
+ BSONObjBuilder sub_result;
+ fault->appendDescription(&sub_result);
+ result->append("faultInformation", sub_result.obj());
+ }
+
+ auto allObservers = getHealthObservers();
+ for (auto observer : allObservers) {
+ if (!appendDetails && !_config->isHealthObserverEnabled(observer->getType())) {
+ continue;
+ }
+ BSONObjBuilder sub_result;
+ sub_result.append("intensity",
+ HealthObserverIntensity_serializer(
+ _config->getHealthObserverIntensity(observer->getType())));
+
+ HealthObserverLivenessStats stats = observer->getStats();
+ sub_result.append("totalChecks", stats.completedChecksCount);
+ if (appendDetails) {
+ sub_result.append("totalChecksWithFailure", stats.completedChecksWithFaultCount);
+ if (now - stats.lastTimeCheckStarted < kDurationThreshold) {
+ sub_result.append("timeSinceLastCheckStartedMs",
+ durationCount<Milliseconds>(now - stats.lastTimeCheckStarted));
+ sub_result.append("timeSinceLastCheckCompletedMs",
+ durationCount<Milliseconds>(now - stats.lastTimeCheckCompleted));
+ }
+ }
+ // Report how long the current check is running, if it's longer than 10% of deadline.
+ if (stats.currentlyRunningHealthCheck &&
+ now - stats.lastTimeCheckStarted > getConfig().getPeriodicLivenessDeadline() / 10) {
+ sub_result.append("runningCheckForMs",
+ durationCount<Milliseconds>(now - stats.lastTimeCheckStarted));
+ }
+ result->append(FaultFacetType_serializer(observer->getType()), sub_result.obj());
+ }
+}
+
void FaultManager::progressMonitorCheckForTests(std::function<void(std::string cause)> crashCb) {
_progressMonitor->progressMonitorCheck(crashCb);
}
diff --git a/src/mongo/db/process_health/fault_manager.h b/src/mongo/db/process_health/fault_manager.h
index 4ef3a6c36e2..14234fc75d4 100644
--- a/src/mongo/db/process_health/fault_manager.h
+++ b/src/mongo/db/process_health/fault_manager.h
@@ -120,6 +120,14 @@ public:
// Gets the timestamp of the last transition
Date_t getLastTransitionTime() const;
+ /**
+ * Generate the `serverStatus` section for the fault manager.
+ * @param appendDetails is true when the section is generated with:
+ * health: {details: true}
+ * thus it is ok to add any verbose information here.
+ */
+ void appendDescription(BSONObjBuilder* builder, bool appendDetails) const;
+
protected:
// Returns all health observers not configured as Off
std::vector<HealthObserver*> getActiveHealthObservers() const;
diff --git a/src/mongo/db/process_health/fault_manager_config.h b/src/mongo/db/process_health/fault_manager_config.h
index 072c304376c..2ee6addfed9 100644
--- a/src/mongo/db/process_health/fault_manager_config.h
+++ b/src/mongo/db/process_health/fault_manager_config.h
@@ -63,7 +63,7 @@ std::ostream& operator<<(std::ostream& os, const FaultState& state);
*/
enum class FaultFacetType { kSystem, kMock1, kMock2, kTestObserver, kLdap, kDns };
static const StringData FaultFacetTypeStrings[] = {
- "kSystem", "kMock1", "kMock2", "kTestObserver", "kLdap", "kDns"};
+ "systemObserver", "mock1", "mock2", "testObserver", "LDAP", "DNS"};
FaultFacetType toFaultFacetType(HealthObserverTypeEnum type);
diff --git a/src/mongo/db/process_health/health_monitoring_server_status_section.cpp b/src/mongo/db/process_health/health_monitoring_server_status_section.cpp
index 1dae93a752c..3a7713af285 100644
--- a/src/mongo/db/process_health/health_monitoring_server_status_section.cpp
+++ b/src/mongo/db/process_health/health_monitoring_server_status_section.cpp
@@ -47,20 +47,19 @@ public:
BSONObj generateSection(OperationContext* opCtx,
const BSONElement& configElement) const override {
auto* fault_manager = process_health::FaultManager::get(getGlobalServiceContext());
- BSONObjBuilder result;
- StringBuilder os;
- os << fault_manager->getFaultState();
+ if (!fault_manager) {
+ return BSONObj();
+ }
- result.append("state", os.str());
- result.appendDate("enteredStateAtTime", fault_manager->getLastTransitionTime());
+ BSONObjBuilder result;
- auto fault = fault_manager->currentFault();
- if (fault) {
- BSONObjBuilder sub_result;
- fault->appendDescription(&sub_result);
- result.append("faultInformation", sub_result.obj());
+ bool appendDetails = false;
+ if (configElement.type() == BSONType::Object && configElement.Obj().hasElement("details")) {
+ appendDetails = configElement.Obj()["details"].trueValue();
}
+ fault_manager->appendDescription(&result, appendDetails);
+
return result.obj();
}