diff options
author | Kshitij Gupta <kshitij.gupta@mongodb.com> | 2021-12-09 14:29:39 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-02-15 17:02:28 +0000 |
commit | 000a9d5d06df3cf8a9135d88fb0301db1c083799 (patch) | |
tree | 4f9df4b2e2762ee589d217aa8d5423f7250e7be6 | |
parent | dcef9a469854574e7361f894c059b3d3e4b73884 (diff) | |
download | mongo-000a9d5d06df3cf8a9135d88fb0301db1c083799.tar.gz |
SERVER-61914: add fault facet details to FaultImpl::toBSON
SERVER-61958: Make a mocked health observer class that takes in failpoint data
(cherry picked from commit 15c71223b071133cd18a69a9bc4b47f65dcbca17)
-rw-r--r-- | jstests/sharding/health_monitor/server_status_health.js | 71 | ||||
-rw-r--r-- | src/mongo/db/process_health/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault_facet.h | 10 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault_facet_impl.cpp | 11 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault_facet_impl.h | 4 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault_facet_mock.h | 9 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault_impl.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault_manager.cpp | 20 | ||||
-rw-r--r-- | src/mongo/db/process_health/fault_manager_config.h | 9 | ||||
-rw-r--r-- | src/mongo/db/process_health/health_check_status.h | 2 | ||||
-rw-r--r-- | src/mongo/db/process_health/health_monitoring_server_parameters.idl | 4 | ||||
-rw-r--r-- | src/mongo/db/process_health/test_health_observer.cpp | 62 | ||||
-rw-r--r-- | src/mongo/db/process_health/test_health_observer.h | 52 |
13 files changed, 243 insertions, 21 deletions
diff --git a/jstests/sharding/health_monitor/server_status_health.js b/jstests/sharding/health_monitor/server_status_health.js index 5b973ebda82..b224420b2f5 100644 --- a/jstests/sharding/health_monitor/server_status_health.js +++ b/jstests/sharding/health_monitor/server_status_health.js @@ -1,15 +1,74 @@ - - +/** + * Tests server status has correct fault/facet information. + */ (function() { 'use strict'; -var st = new ShardingTest({ - mongos: 1, +const params = { + setParameter: { + healthMonitoring: tojson({test: "off", ldap: "off", dns: "off"}), + featureFlagHealthMonitoring: true + } +}; + +let st = new ShardingTest({ + mongos: [params], shards: 1, }); -const result = assert.commandWorked(st.s0.adminCommand({serverStatus: 1})).health; +// Check server status after initial health check is complete. +let result = assert.commandWorked(st.s0.adminCommand({serverStatus: 1})).health; +print("---RESULT 1---"); +print(tojson(result)); +assert.eq(result.state, "Ok"); +assert(result.enteredStateAtTime); + +assert.commandWorked(st.s0.adminCommand( + {"setParameter": 1, healthMonitoring: {test: "critical", dns: 'off', ldap: 'off'}})); + +// Check server status after test health observer enabled and failpoint returns fault. +assert.commandWorked(st.s0.adminCommand({ + "configureFailPoint": 'testHealthObserver', + "data": {"code": "InternalError", "msg": "test msg"}, + "mode": "alwaysOn" +})); + +assert.soon(() => { + result = assert.commandWorked(st.s0.adminCommand({serverStatus: 1})).health; + return result.state == "TransientFault"; +}); + +print("---RESULT 2---"); +print(tojson(result)); +assert(result.enteredStateAtTime); +assert(result.faultInformation); + +const faultInformation = result.faultInformation; +assert.eq(faultInformation.severity, 1); +assert(faultInformation.duration); +assert(faultInformation.facets); +assert.eq(faultInformation.numFacets, 1); +assert(faultInformation.facets.kTestObserver); + +const kTestObserverFacet = faultInformation.facets.kTestObserver; +assert.eq(kTestObserverFacet.severity, faultInformation.severity); +assert.eq(kTestObserverFacet.duration, faultInformation.duration); +assert(kTestObserverFacet.description.includes("InternalError: test msg")); + +// Check server status after test health observer enabled and failpoint returns success. +assert.commandWorked( + st.s0.adminCommand({"configureFailPoint": 'testHealthObserver', "mode": "alwaysOn"})); + +assert.soon(() => { + result = assert.commandWorked(st.s0.adminCommand({serverStatus: 1})).health; + return result.state == "Ok"; +}); + +result = assert.commandWorked(st.s0.adminCommand({serverStatus: 1})).health; +print("---RESULT 3---"); print(tojson(result)); +assert.eq(result.state, "Ok"); +assert(result.enteredStateAtTime); -assert(result.state == "StartupCheck" || result.state == "Ok"); +st.stop(); })(); diff --git a/src/mongo/db/process_health/SConscript b/src/mongo/db/process_health/SConscript index 6e3942a2068..c2f708db212 100644 --- a/src/mongo/db/process_health/SConscript +++ b/src/mongo/db/process_health/SConscript @@ -17,6 +17,7 @@ env.Library( 'health_observer_base.cpp', 'health_observer_registration.cpp', 'progress_monitor.cpp', + 'test_health_observer.cpp', ], LIBDEPS=[ '$BUILD_DIR/mongo/base', diff --git a/src/mongo/db/process_health/fault_facet.h b/src/mongo/db/process_health/fault_facet.h index bab42ef4d5e..1927921b254 100644 --- a/src/mongo/db/process_health/fault_facet.h +++ b/src/mongo/db/process_health/fault_facet.h @@ -53,10 +53,20 @@ public: */ virtual HealthCheckStatus getStatus() const = 0; + virtual Milliseconds getDuration() const = 0; + /** * Change the state of this Facet with health check result. */ virtual void update(HealthCheckStatus status) = 0; + + virtual void appendDescription(BSONObjBuilder* builder) const = 0; + + BSONObj toBSON() const { + BSONObjBuilder builder; + appendDescription(&builder); + return builder.obj(); + } }; using FaultFacetPtr = std::shared_ptr<FaultFacet>; diff --git a/src/mongo/db/process_health/fault_facet_impl.cpp b/src/mongo/db/process_health/fault_facet_impl.cpp index f2975b6b559..62d6c4585ba 100644 --- a/src/mongo/db/process_health/fault_facet_impl.cpp +++ b/src/mongo/db/process_health/fault_facet_impl.cpp @@ -48,11 +48,22 @@ HealthCheckStatus FaultFacetImpl::getStatus() const { return HealthCheckStatus(getType(), _severity, _description); } +Milliseconds FaultFacetImpl::getDuration() const { + return std::max(Milliseconds(0), Milliseconds(_clockSource->now() - _startTime)); +} + void FaultFacetImpl::update(HealthCheckStatus status) { auto lk = stdx::lock_guard(_mutex); _severity = status.getSeverity(); _description = status.getShortDescription().toString(); } +void FaultFacetImpl::appendDescription(BSONObjBuilder* builder) const { + builder->append("type", FaultFacetType_serializer(getType())); + builder->append("severity", _severity); + builder->append("duration", getDuration().toBSON()); + builder->append("description", _description); +}; + } // namespace process_health } // namespace mongo diff --git a/src/mongo/db/process_health/fault_facet_impl.h b/src/mongo/db/process_health/fault_facet_impl.h index 76279052470..aa48b2343f5 100644 --- a/src/mongo/db/process_health/fault_facet_impl.h +++ b/src/mongo/db/process_health/fault_facet_impl.h @@ -50,8 +50,12 @@ public: HealthCheckStatus getStatus() const override; + Milliseconds getDuration() const override; + void update(HealthCheckStatus status) override; + void appendDescription(BSONObjBuilder* builder) const override; + private: const FaultFacetType _type; ClockSource* const _clockSource; diff --git a/src/mongo/db/process_health/fault_facet_mock.h b/src/mongo/db/process_health/fault_facet_mock.h index 6035a5c89df..29a3c3a4184 100644 --- a/src/mongo/db/process_health/fault_facet_mock.h +++ b/src/mongo/db/process_health/fault_facet_mock.h @@ -67,6 +67,15 @@ public: return healthCheckStatus; } + Milliseconds getDuration() const override { + return std::max(Milliseconds(0), Milliseconds(_clockSource->now() - _startTime)); + } + + void appendDescription(BSONObjBuilder* builder) const override { + builder->append("type", FaultFacetType_serializer(getType())); + builder->append("duration", getDuration().toBSON()); + }; + void update(HealthCheckStatus status) override { MONGO_UNREACHABLE; // Don't use this in mock. } diff --git a/src/mongo/db/process_health/fault_impl.cpp b/src/mongo/db/process_health/fault_impl.cpp index 77ca9a82cc5..0fe95df330b 100644 --- a/src/mongo/db/process_health/fault_impl.cpp +++ b/src/mongo/db/process_health/fault_impl.cpp @@ -120,8 +120,13 @@ void FaultImpl::appendDescription(BSONObjBuilder* builder) const { builder->append("id", getId().toBSON()); builder->append("severity", getSeverity()); builder->append("duration", getDuration().toBSON()); - // TODO (SERVER-61914): Add fault facet details - builder->append("facets", static_cast<int>(_facets.size())); + BSONObjBuilder facetsBuilder; + for (auto& facet : _facets) { + facetsBuilder.append(FaultFacetType_serializer(facet->getType()), facet->toBSON()); + } + + builder->append("facets", facetsBuilder.obj()); + builder->append("numFacets", static_cast<int>(_facets.size())); } } // namespace process_health diff --git a/src/mongo/db/process_health/fault_manager.cpp b/src/mongo/db/process_health/fault_manager.cpp index 52a83911f5c..927acc06fe9 100644 --- a/src/mongo/db/process_health/fault_manager.cpp +++ b/src/mongo/db/process_health/fault_manager.cpp @@ -322,6 +322,11 @@ void FaultManager::schedulePeriodicHealthCheckThread() { return; } + if (getActiveHealthObservers().size() == 0) { + LOGV2_DEBUG(5936511, 2, "No active health observers are configured."); + setState(FaultState::kOk, HealthCheckStatus(FaultFacetType::kSystem)); + } + auto observers = getHealthObservers(); for (auto observer : observers) { LOGV2_DEBUG( @@ -330,9 +335,7 @@ void FaultManager::schedulePeriodicHealthCheckThread() { // TODO (SERVER-59368): The system should properly handle a health checker being turned // on/off auto token = _managerShuttingDownCancellationSource.token(); - if (_config->isHealthObserverEnabled(observer->getType())) { - healthCheck(observer, token); - } + healthCheck(observer, token); } } @@ -376,13 +379,7 @@ SharedSemiFuture<void> FaultManager::startPeriodicHealthChecks() { invariant(state() == FaultState::kStartupCheck); _init(); - - if (getActiveHealthObservers().size() == 0) { - LOGV2_DEBUG(5936511, 2, "No active health observers are configured."); - setState(FaultState::kOk, HealthCheckStatus(FaultFacetType::kSystem)); - } else { - schedulePeriodicHealthCheckThread(); - } + schedulePeriodicHealthCheckThread(); return _initialHealthCheckCompletedPromise.getFuture(); } @@ -466,6 +463,8 @@ void FaultManager::healthCheck(HealthObserver* observer, CancellationToken token return healthCheckStatus; }; + _healthCheckContexts.insert({observer->getType(), HealthCheckContext(nullptr, boost::none)}); + // If health observer is disabled, then do nothing and schedule another run (health observer may // become enabled). // TODO (SERVER-59368): The system should properly handle a health checker being turned on/off @@ -474,7 +473,6 @@ void FaultManager::healthCheck(HealthObserver* observer, CancellationToken token return; } - _healthCheckContexts.insert({observer->getType(), HealthCheckContext(nullptr, boost::none)}); // Run asynchronous health check. When complete, check for state transition (and perform if // necessary). Then schedule the next run. auto healthCheckFuture = observer->periodicCheck(*this, _taskExecutor, token) diff --git a/src/mongo/db/process_health/fault_manager_config.h b/src/mongo/db/process_health/fault_manager_config.h index ce17c74b7a8..92834acafad 100644 --- a/src/mongo/db/process_health/fault_manager_config.h +++ b/src/mongo/db/process_health/fault_manager_config.h @@ -63,8 +63,13 @@ std::ostream& operator<<(std::ostream& os, const FaultState& state); /** * Types of health observers available. */ -enum class FaultFacetType { kSystem, kMock1, kMock2, kLdap, kDns }; +enum class FaultFacetType { kSystem, kMock1, kMock2, kTestObserver, kLdap, kDns }; +static const StringData FaultFacetTypeStrings[] = { + "kSystem", "kMock1", "kMock2", "kTestObserver", "kLdap", "kDns"}; +static const StringData FaultFacetType_serializer(const FaultFacetType value) { + return FaultFacetTypeStrings[static_cast<int>(value)]; +} class FaultManagerConfig { public: @@ -84,6 +89,8 @@ public: case FaultFacetType::kDns: return intensities->_data->getDns(); // TODO: update this function with additional fault facets when they are added + case FaultFacetType::kTestObserver: + return intensities->_data->getTest(); case FaultFacetType::kSystem: return HealthObserverIntensityEnum::kCritical; case FaultFacetType::kMock1: diff --git a/src/mongo/db/process_health/health_check_status.h b/src/mongo/db/process_health/health_check_status.h index da5284db13c..957432284eb 100644 --- a/src/mongo/db/process_health/health_check_status.h +++ b/src/mongo/db/process_health/health_check_status.h @@ -126,7 +126,7 @@ inline StringBuilder& operator<<(StringBuilder& s, const FaultFacetType& type) { case FaultFacetType::kMock2: return s << "kMock2"_sd; default: - return s << "Uknown"_sd; + return s << "Unknown"_sd; } } diff --git a/src/mongo/db/process_health/health_monitoring_server_parameters.idl b/src/mongo/db/process_health/health_monitoring_server_parameters.idl index 1222242a812..42adb48c2d3 100644 --- a/src/mongo/db/process_health/health_monitoring_server_parameters.idl +++ b/src/mongo/db/process_health/health_monitoring_server_parameters.idl @@ -55,6 +55,10 @@ structs: description: "Intensity of LDAP fault facet" type: HealthObserverIntensity default: kNonCritical + test: + description: "Intensity of test fault facet" + type: HealthObserverIntensity + default: kOff server_parameters: healthMonitoring: diff --git a/src/mongo/db/process_health/test_health_observer.cpp b/src/mongo/db/process_health/test_health_observer.cpp new file mode 100644 index 00000000000..eeafc987012 --- /dev/null +++ b/src/mongo/db/process_health/test_health_observer.cpp @@ -0,0 +1,62 @@ +/** + * Copyright (C) 2021-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/process_health/test_health_observer.h" +#include "mongo/db/process_health/health_observer_registration.h" + +namespace mongo { +namespace process_health { +MONGO_FAIL_POINT_DEFINE(hangTestHealthObserver); +MONGO_FAIL_POINT_DEFINE(testHealthObserver); +Future<HealthCheckStatus> TestHealthObserver::periodicCheckImpl( + PeriodicHealthCheckContext&& periodicCheckContext) { + hangTestHealthObserver.pauseWhileSet(); + + auto result = Future<HealthCheckStatus>::makeReady(makeHealthyStatus()); + + testHealthObserver.executeIf( + [this, &result](const BSONObj& data) { + auto code = data["code"].checkAndGetStringData(); + auto msg = data["msg"].checkAndGetStringData(); + result = Future<HealthCheckStatus>::makeReady(makeSimpleFailedStatus( + 1.0, {Status(ErrorCodes::fromString(code.toString()), msg.toString())})); + }, + [&](const BSONObj& data) { return !data.isEmpty(); }); + + return result; +} + +namespace { +MONGO_INITIALIZER(TestHealthObserver)(InitializerContext*) { + HealthObserverRegistration::registerObserverFactory( + [](ServiceContext* svcCtx) { return std::make_unique<TestHealthObserver>(svcCtx); }); +} +} // namespace +} // namespace process_health +} // namespace mongo diff --git a/src/mongo/db/process_health/test_health_observer.h b/src/mongo/db/process_health/test_health_observer.h new file mode 100644 index 00000000000..42670ee79a9 --- /dev/null +++ b/src/mongo/db/process_health/test_health_observer.h @@ -0,0 +1,52 @@ +/** + * Copyright (C) 2021-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ +#pragma once + +#include "mongo/db/process_health/health_observer_base.h" + +namespace mongo { +namespace process_health { +class TestHealthObserver : public HealthObserverBase { +public: + FaultFacetType getType() const override { + return FaultFacetType::kTestObserver; + } + + Milliseconds healthCheckJitter() const override { + return Milliseconds(0); + } + + TestHealthObserver(ServiceContext* svcCtx) : HealthObserverBase(svcCtx){}; + +protected: + Future<HealthCheckStatus> periodicCheckImpl( + PeriodicHealthCheckContext&& periodicCheckContext) override; +}; +} // namespace process_health +} // namespace mongo |