diff options
author | Andrew Shuvalov <andrew.shuvalov@mongodb.com> | 2022-01-10 21:41:36 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-02-15 17:02:29 +0000 |
commit | 4b4be42169630f44a9849ce14ed1769ef37fd31b (patch) | |
tree | 15efe892634e6257e47e42b589def585264ab372 /src | |
parent | 083698d4b61d49b1c0de9157856832381fbbb95c (diff) | |
download | mongo-4b4be42169630f44a9849ce14ed1769ef37fd31b.tar.gz |
SERVER-59375 SERVER-62373 additional serverStatus sections for health checks
(cherry picked from commit 386986a651b852c3c98b426ea60a023d99e4a5a4)
Diffstat (limited to 'src')
4 files changed, 65 insertions, 11 deletions
diff --git a/src/mongo/db/process_health/fault_manager.cpp b/src/mongo/db/process_health/fault_manager.cpp index 818eee320f2..1e57fadd1ce 100644 --- a/src/mongo/db/process_health/fault_manager.cpp +++ b/src/mongo/db/process_health/fault_manager.cpp @@ -678,6 +678,53 @@ HealthObserver* FaultManager::getHealthObserver(FaultFacetType type) const { return nullptr; } +void FaultManager::appendDescription(BSONObjBuilder* result, bool appendDetails) const { + static constexpr auto kDurationThreshold = Hours{24}; + const auto now = _svcCtx->getFastClockSource()->now(); + StringBuilder faultStateStr; + faultStateStr << getFaultState(); + + result->append("state", faultStateStr.str()); + result->appendDate("enteredStateAtTime", getLastTransitionTime()); + + auto fault = currentFault(); + if (fault) { + BSONObjBuilder sub_result; + fault->appendDescription(&sub_result); + result->append("faultInformation", sub_result.obj()); + } + + auto allObservers = getHealthObservers(); + for (auto observer : allObservers) { + if (!appendDetails && !_config->isHealthObserverEnabled(observer->getType())) { + continue; + } + BSONObjBuilder sub_result; + sub_result.append("intensity", + HealthObserverIntensity_serializer( + _config->getHealthObserverIntensity(observer->getType()))); + + HealthObserverLivenessStats stats = observer->getStats(); + sub_result.append("totalChecks", stats.completedChecksCount); + if (appendDetails) { + sub_result.append("totalChecksWithFailure", stats.completedChecksWithFaultCount); + if (now - stats.lastTimeCheckStarted < kDurationThreshold) { + sub_result.append("timeSinceLastCheckStartedMs", + durationCount<Milliseconds>(now - stats.lastTimeCheckStarted)); + sub_result.append("timeSinceLastCheckCompletedMs", + durationCount<Milliseconds>(now - stats.lastTimeCheckCompleted)); + } + } + // Report how long the current check is running, if it's longer than 10% of deadline. + if (stats.currentlyRunningHealthCheck && + now - stats.lastTimeCheckStarted > getConfig().getPeriodicLivenessDeadline() / 10) { + sub_result.append("runningCheckForMs", + durationCount<Milliseconds>(now - stats.lastTimeCheckStarted)); + } + result->append(FaultFacetType_serializer(observer->getType()), sub_result.obj()); + } +} + void FaultManager::progressMonitorCheckForTests(std::function<void(std::string cause)> crashCb) { _progressMonitor->progressMonitorCheck(crashCb); } diff --git a/src/mongo/db/process_health/fault_manager.h b/src/mongo/db/process_health/fault_manager.h index 4ef3a6c36e2..14234fc75d4 100644 --- a/src/mongo/db/process_health/fault_manager.h +++ b/src/mongo/db/process_health/fault_manager.h @@ -120,6 +120,14 @@ public: // Gets the timestamp of the last transition Date_t getLastTransitionTime() const; + /** + * Generate the `serverStatus` section for the fault manager. + * @param appendDetails is true when the section is generated with: + * health: {details: true} + * thus it is ok to add any verbose information here. + */ + void appendDescription(BSONObjBuilder* builder, bool appendDetails) const; + protected: // Returns all health observers not configured as Off std::vector<HealthObserver*> getActiveHealthObservers() const; diff --git a/src/mongo/db/process_health/fault_manager_config.h b/src/mongo/db/process_health/fault_manager_config.h index 072c304376c..2ee6addfed9 100644 --- a/src/mongo/db/process_health/fault_manager_config.h +++ b/src/mongo/db/process_health/fault_manager_config.h @@ -63,7 +63,7 @@ std::ostream& operator<<(std::ostream& os, const FaultState& state); */ enum class FaultFacetType { kSystem, kMock1, kMock2, kTestObserver, kLdap, kDns }; static const StringData FaultFacetTypeStrings[] = { - "kSystem", "kMock1", "kMock2", "kTestObserver", "kLdap", "kDns"}; + "systemObserver", "mock1", "mock2", "testObserver", "LDAP", "DNS"}; FaultFacetType toFaultFacetType(HealthObserverTypeEnum type); diff --git a/src/mongo/db/process_health/health_monitoring_server_status_section.cpp b/src/mongo/db/process_health/health_monitoring_server_status_section.cpp index 1dae93a752c..3a7713af285 100644 --- a/src/mongo/db/process_health/health_monitoring_server_status_section.cpp +++ b/src/mongo/db/process_health/health_monitoring_server_status_section.cpp @@ -47,20 +47,19 @@ public: BSONObj generateSection(OperationContext* opCtx, const BSONElement& configElement) const override { auto* fault_manager = process_health::FaultManager::get(getGlobalServiceContext()); - BSONObjBuilder result; - StringBuilder os; - os << fault_manager->getFaultState(); + if (!fault_manager) { + return BSONObj(); + } - result.append("state", os.str()); - result.appendDate("enteredStateAtTime", fault_manager->getLastTransitionTime()); + BSONObjBuilder result; - auto fault = fault_manager->currentFault(); - if (fault) { - BSONObjBuilder sub_result; - fault->appendDescription(&sub_result); - result.append("faultInformation", sub_result.obj()); + bool appendDetails = false; + if (configElement.type() == BSONType::Object && configElement.Obj().hasElement("details")) { + appendDetails = configElement.Obj()["details"].trueValue(); } + fault_manager->appendDescription(&result, appendDetails); + return result.obj(); } |