summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKshitij Gupta <kshitij.gupta@mongodb.com>2021-12-09 14:29:39 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-12-28 23:18:25 +0000
commita00bb2e633ccf18be6e74c4345b6c7ad6f838c3d (patch)
tree8e4a2ed7c1bc5c95ace7811b07d8dfeb95481e50
parent7f2a575a1a1fb90197dac5c125bfc03348355b7f (diff)
downloadmongo-a00bb2e633ccf18be6e74c4345b6c7ad6f838c3d.tar.gz
SERVER-61914: add fault facet details to FaultImpl::toBSON
SERVER-61958: Make a mocked health observer class that takes in failpoint data
-rw-r--r--jstests/sharding/health_monitor/server_status_health.js69
-rw-r--r--src/mongo/db/process_health/SConscript1
-rw-r--r--src/mongo/db/process_health/fault_facet.h10
-rw-r--r--src/mongo/db/process_health/fault_facet_impl.cpp11
-rw-r--r--src/mongo/db/process_health/fault_facet_impl.h4
-rw-r--r--src/mongo/db/process_health/fault_facet_mock.h9
-rw-r--r--src/mongo/db/process_health/fault_impl.cpp9
-rw-r--r--src/mongo/db/process_health/fault_manager.cpp20
-rw-r--r--src/mongo/db/process_health/fault_manager_config.h9
-rw-r--r--src/mongo/db/process_health/health_check_status.h2
-rw-r--r--src/mongo/db/process_health/health_monitoring_server_parameters.idl4
-rw-r--r--src/mongo/db/process_health/test_health_observer.cpp65
-rw-r--r--src/mongo/db/process_health/test_health_observer.h52
13 files changed, 244 insertions, 21 deletions
diff --git a/jstests/sharding/health_monitor/server_status_health.js b/jstests/sharding/health_monitor/server_status_health.js
index a0d24c704fd..76244deddb7 100644
--- a/jstests/sharding/health_monitor/server_status_health.js
+++ b/jstests/sharding/health_monitor/server_status_health.js
@@ -1,18 +1,75 @@
-
-
+/**
+ * Tests server status has correct fault/facet information.
+ */
(function() {
'use strict';
-var st = new ShardingTest({
- mongos: 1,
+const params = {
+ setParameter: {
+ healthMonitoring: tojson({test: "off", ldap: "off", dns: "off"}),
+ featureFlagHealthMonitoring: true
+ }
+};
+
+let st = new ShardingTest({
+ mongos: [params],
shards: 1,
other: {mongosOptions: {setParameter: {featureFlagHealthMonitoring: true}}}
});
-const result = assert.commandWorked(st.s0.adminCommand({serverStatus: 1})).health;
+// Check server status after initial health check is complete.
+let result = assert.commandWorked(st.s0.adminCommand({serverStatus: 1})).health;
+print("---RESULT 1---");
print(tojson(result));
+assert.eq(result.state, "Ok");
+assert(result.enteredStateAtTime);
+
+assert.commandWorked(st.s0.adminCommand(
+ {"setParameter": 1, healthMonitoring: {test: "critical", dns: 'off', ldap: 'off'}}));
-assert(result.state == "StartupCheck" || result.state == "Ok");
+// Check server status after test health observer enabled and failpoint returns fault.
+assert.commandWorked(st.s0.adminCommand({
+ "configureFailPoint": 'testHealthObserver',
+ "data": {"code": "InternalError", "msg": "test msg"},
+ "mode": "alwaysOn"
+}));
+
+assert.soon(() => {
+ result = assert.commandWorked(st.s0.adminCommand({serverStatus: 1})).health;
+ return result.state == "TransientFault";
+});
+
+print("---RESULT 2---");
+print(tojson(result));
+assert(result.enteredStateAtTime);
+assert(result.faultInformation);
+
+const faultInformation = result.faultInformation;
+assert.eq(faultInformation.severity, 1);
+assert(faultInformation.duration);
+assert(faultInformation.facets);
+assert.eq(faultInformation.numFacets, 1);
+assert(faultInformation.facets.kTestObserver);
+
+const kTestObserverFacet = faultInformation.facets.kTestObserver;
+assert.eq(kTestObserverFacet.severity, faultInformation.severity);
+assert.eq(kTestObserverFacet.duration, faultInformation.duration);
+assert(kTestObserverFacet.description.includes("InternalError: test msg"));
+
+// Check server status after test health observer enabled and failpoint returns success.
+assert.commandWorked(
+ st.s0.adminCommand({"configureFailPoint": 'testHealthObserver', "mode": "alwaysOn"}));
+
+assert.soon(() => {
+ result = assert.commandWorked(st.s0.adminCommand({serverStatus: 1})).health;
+ return result.state == "Ok";
+});
+
+result = assert.commandWorked(st.s0.adminCommand({serverStatus: 1})).health;
+print("---RESULT 3---");
+print(tojson(result));
+assert.eq(result.state, "Ok");
+assert(result.enteredStateAtTime);
st.stop();
})();
diff --git a/src/mongo/db/process_health/SConscript b/src/mongo/db/process_health/SConscript
index 82bdded2855..ce02fc06253 100644
--- a/src/mongo/db/process_health/SConscript
+++ b/src/mongo/db/process_health/SConscript
@@ -17,6 +17,7 @@ env.Library(
'health_observer_base.cpp',
'health_observer_registration.cpp',
'progress_monitor.cpp',
+ 'test_health_observer.cpp',
],
LIBDEPS=[
'$BUILD_DIR/mongo/base',
diff --git a/src/mongo/db/process_health/fault_facet.h b/src/mongo/db/process_health/fault_facet.h
index bab42ef4d5e..1927921b254 100644
--- a/src/mongo/db/process_health/fault_facet.h
+++ b/src/mongo/db/process_health/fault_facet.h
@@ -53,10 +53,20 @@ public:
*/
virtual HealthCheckStatus getStatus() const = 0;
+ virtual Milliseconds getDuration() const = 0;
+
/**
* Change the state of this Facet with health check result.
*/
virtual void update(HealthCheckStatus status) = 0;
+
+ virtual void appendDescription(BSONObjBuilder* builder) const = 0;
+
+ BSONObj toBSON() const {
+ BSONObjBuilder builder;
+ appendDescription(&builder);
+ return builder.obj();
+ }
};
using FaultFacetPtr = std::shared_ptr<FaultFacet>;
diff --git a/src/mongo/db/process_health/fault_facet_impl.cpp b/src/mongo/db/process_health/fault_facet_impl.cpp
index f2975b6b559..62d6c4585ba 100644
--- a/src/mongo/db/process_health/fault_facet_impl.cpp
+++ b/src/mongo/db/process_health/fault_facet_impl.cpp
@@ -48,11 +48,22 @@ HealthCheckStatus FaultFacetImpl::getStatus() const {
return HealthCheckStatus(getType(), _severity, _description);
}
+Milliseconds FaultFacetImpl::getDuration() const {
+ return std::max(Milliseconds(0), Milliseconds(_clockSource->now() - _startTime));
+}
+
void FaultFacetImpl::update(HealthCheckStatus status) {
auto lk = stdx::lock_guard(_mutex);
_severity = status.getSeverity();
_description = status.getShortDescription().toString();
}
+void FaultFacetImpl::appendDescription(BSONObjBuilder* builder) const {
+ builder->append("type", FaultFacetType_serializer(getType()));
+ builder->append("severity", _severity);
+ builder->append("duration", getDuration().toBSON());
+ builder->append("description", _description);
+};
+
} // namespace process_health
} // namespace mongo
diff --git a/src/mongo/db/process_health/fault_facet_impl.h b/src/mongo/db/process_health/fault_facet_impl.h
index 76279052470..aa48b2343f5 100644
--- a/src/mongo/db/process_health/fault_facet_impl.h
+++ b/src/mongo/db/process_health/fault_facet_impl.h
@@ -50,8 +50,12 @@ public:
HealthCheckStatus getStatus() const override;
+ Milliseconds getDuration() const override;
+
void update(HealthCheckStatus status) override;
+ void appendDescription(BSONObjBuilder* builder) const override;
+
private:
const FaultFacetType _type;
ClockSource* const _clockSource;
diff --git a/src/mongo/db/process_health/fault_facet_mock.h b/src/mongo/db/process_health/fault_facet_mock.h
index 6035a5c89df..29a3c3a4184 100644
--- a/src/mongo/db/process_health/fault_facet_mock.h
+++ b/src/mongo/db/process_health/fault_facet_mock.h
@@ -67,6 +67,15 @@ public:
return healthCheckStatus;
}
+ Milliseconds getDuration() const override {
+ return std::max(Milliseconds(0), Milliseconds(_clockSource->now() - _startTime));
+ }
+
+ void appendDescription(BSONObjBuilder* builder) const override {
+ builder->append("type", FaultFacetType_serializer(getType()));
+ builder->append("duration", getDuration().toBSON());
+ };
+
void update(HealthCheckStatus status) override {
MONGO_UNREACHABLE; // Don't use this in mock.
}
diff --git a/src/mongo/db/process_health/fault_impl.cpp b/src/mongo/db/process_health/fault_impl.cpp
index 77ca9a82cc5..0fe95df330b 100644
--- a/src/mongo/db/process_health/fault_impl.cpp
+++ b/src/mongo/db/process_health/fault_impl.cpp
@@ -120,8 +120,13 @@ void FaultImpl::appendDescription(BSONObjBuilder* builder) const {
builder->append("id", getId().toBSON());
builder->append("severity", getSeverity());
builder->append("duration", getDuration().toBSON());
- // TODO (SERVER-61914): Add fault facet details
- builder->append("facets", static_cast<int>(_facets.size()));
+ BSONObjBuilder facetsBuilder;
+ for (auto& facet : _facets) {
+ facetsBuilder.append(FaultFacetType_serializer(facet->getType()), facet->toBSON());
+ }
+
+ builder->append("facets", facetsBuilder.obj());
+ builder->append("numFacets", static_cast<int>(_facets.size()));
}
} // namespace process_health
diff --git a/src/mongo/db/process_health/fault_manager.cpp b/src/mongo/db/process_health/fault_manager.cpp
index 2dadbfdd2b1..10c43033689 100644
--- a/src/mongo/db/process_health/fault_manager.cpp
+++ b/src/mongo/db/process_health/fault_manager.cpp
@@ -327,6 +327,11 @@ void FaultManager::schedulePeriodicHealthCheckThread() {
return;
}
+ if (getActiveHealthObservers().size() == 0) {
+ LOGV2_DEBUG(5936511, 2, "No active health observers are configured.");
+ setState(FaultState::kOk, HealthCheckStatus(FaultFacetType::kSystem));
+ }
+
auto observers = getHealthObservers();
for (auto observer : observers) {
LOGV2_DEBUG(
@@ -334,9 +339,7 @@ void FaultManager::schedulePeriodicHealthCheckThread() {
// TODO (SERVER-59368): The system should properly handle a health checker being turned
// on/off
- if (_config->isHealthObserverEnabled(observer->getType())) {
- healthCheck(observer, _managerShuttingDownCancellation);
- }
+ healthCheck(observer, _managerShuttingDownCancellation);
}
}
@@ -379,13 +382,7 @@ SharedSemiFuture<void> FaultManager::startPeriodicHealthChecks() {
invariant(state() == FaultState::kStartupCheck);
_init();
-
- if (getActiveHealthObservers().size() == 0) {
- LOGV2_DEBUG(5936511, 2, "No active health observers are configured.");
- setState(FaultState::kOk, HealthCheckStatus(FaultFacetType::kSystem));
- } else {
- schedulePeriodicHealthCheckThread();
- }
+ schedulePeriodicHealthCheckThread();
return _initialHealthCheckCompletedPromise.getFuture();
}
@@ -472,6 +469,8 @@ void FaultManager::healthCheck(HealthObserver* observer, std::shared_ptr<AtomicW
return healthCheckStatus;
};
+ _healthCheckContexts.insert({observer->getType(), HealthCheckContext(nullptr, boost::none)});
+
// If health observer is disabled, then do nothing and schedule another run (health observer may
// become enabled).
// TODO (SERVER-59368): The system should properly handle a health checker being turned on/off
@@ -480,7 +479,6 @@ void FaultManager::healthCheck(HealthObserver* observer, std::shared_ptr<AtomicW
return;
}
- _healthCheckContexts.insert({observer->getType(), HealthCheckContext(nullptr, boost::none)});
// Run asynchronous health check. When complete, check for state transition (and perform if
// necessary). Then schedule the next run.
auto healthCheckFuture = observer->periodicCheck(*this, _taskExecutor, token)
diff --git a/src/mongo/db/process_health/fault_manager_config.h b/src/mongo/db/process_health/fault_manager_config.h
index ce17c74b7a8..92834acafad 100644
--- a/src/mongo/db/process_health/fault_manager_config.h
+++ b/src/mongo/db/process_health/fault_manager_config.h
@@ -63,8 +63,13 @@ std::ostream& operator<<(std::ostream& os, const FaultState& state);
/**
* Types of health observers available.
*/
-enum class FaultFacetType { kSystem, kMock1, kMock2, kLdap, kDns };
+enum class FaultFacetType { kSystem, kMock1, kMock2, kTestObserver, kLdap, kDns };
+static const StringData FaultFacetTypeStrings[] = {
+ "kSystem", "kMock1", "kMock2", "kTestObserver", "kLdap", "kDns"};
+static const StringData FaultFacetType_serializer(const FaultFacetType value) {
+ return FaultFacetTypeStrings[static_cast<int>(value)];
+}
class FaultManagerConfig {
public:
@@ -84,6 +89,8 @@ public:
case FaultFacetType::kDns:
return intensities->_data->getDns();
// TODO: update this function with additional fault facets when they are added
+ case FaultFacetType::kTestObserver:
+ return intensities->_data->getTest();
case FaultFacetType::kSystem:
return HealthObserverIntensityEnum::kCritical;
case FaultFacetType::kMock1:
diff --git a/src/mongo/db/process_health/health_check_status.h b/src/mongo/db/process_health/health_check_status.h
index da5284db13c..957432284eb 100644
--- a/src/mongo/db/process_health/health_check_status.h
+++ b/src/mongo/db/process_health/health_check_status.h
@@ -126,7 +126,7 @@ inline StringBuilder& operator<<(StringBuilder& s, const FaultFacetType& type) {
case FaultFacetType::kMock2:
return s << "kMock2"_sd;
default:
- return s << "Uknown"_sd;
+ return s << "Unknown"_sd;
}
}
diff --git a/src/mongo/db/process_health/health_monitoring_server_parameters.idl b/src/mongo/db/process_health/health_monitoring_server_parameters.idl
index 1222242a812..42adb48c2d3 100644
--- a/src/mongo/db/process_health/health_monitoring_server_parameters.idl
+++ b/src/mongo/db/process_health/health_monitoring_server_parameters.idl
@@ -55,6 +55,10 @@ structs:
description: "Intensity of LDAP fault facet"
type: HealthObserverIntensity
default: kNonCritical
+ test:
+ description: "Intensity of test fault facet"
+ type: HealthObserverIntensity
+ default: kOff
server_parameters:
healthMonitoring:
diff --git a/src/mongo/db/process_health/test_health_observer.cpp b/src/mongo/db/process_health/test_health_observer.cpp
new file mode 100644
index 00000000000..2251f9590d0
--- /dev/null
+++ b/src/mongo/db/process_health/test_health_observer.cpp
@@ -0,0 +1,65 @@
+/**
+ * Copyright (C) 2021-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/base/init.h"
+#include "mongo/db/process_health/test_health_observer.h"
+#include "mongo/db/process_health/health_observer_registration.h"
+#include "mongo/util/fail_point.h"
+
+namespace mongo {
+namespace process_health {
+MONGO_FAIL_POINT_DEFINE(hangTestHealthObserver);
+MONGO_FAIL_POINT_DEFINE(testHealthObserver);
+Future<HealthCheckStatus> TestHealthObserver::periodicCheckImpl(
+ PeriodicHealthCheckContext&& periodicCheckContext) {
+ hangTestHealthObserver.pauseWhileSet();
+
+ auto result = Future<HealthCheckStatus>::makeReady(makeHealthyStatus());
+
+ testHealthObserver.executeIf(
+ [this, &result](const BSONObj& data) {
+ auto code = data["code"].checkAndGetStringData();
+ auto msg = data["msg"].checkAndGetStringData();
+ result = Future<HealthCheckStatus>::makeReady(makeSimpleFailedStatus(
+ 1.0, {Status(ErrorCodes::fromString(code.toString()), msg.toString())}));
+ },
+ [&](const BSONObj& data) { return !data.isEmpty(); });
+
+ return result;
+}
+
+namespace {
+MONGO_INITIALIZER(TestHealthObserver)(InitializerContext*) {
+ HealthObserverRegistration::registerObserverFactory(
+ [](ServiceContext* svcCtx) { return std::make_unique<TestHealthObserver>(svcCtx); });
+ return Status::OK();
+}
+} // namespace
+} // namespace process_health
+} // namespace mongo
diff --git a/src/mongo/db/process_health/test_health_observer.h b/src/mongo/db/process_health/test_health_observer.h
new file mode 100644
index 00000000000..42670ee79a9
--- /dev/null
+++ b/src/mongo/db/process_health/test_health_observer.h
@@ -0,0 +1,52 @@
+/**
+ * Copyright (C) 2021-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+#pragma once
+
+#include "mongo/db/process_health/health_observer_base.h"
+
+namespace mongo {
+namespace process_health {
+class TestHealthObserver : public HealthObserverBase {
+public:
+ FaultFacetType getType() const override {
+ return FaultFacetType::kTestObserver;
+ }
+
+ Milliseconds healthCheckJitter() const override {
+ return Milliseconds(0);
+ }
+
+ TestHealthObserver(ServiceContext* svcCtx) : HealthObserverBase(svcCtx){};
+
+protected:
+ Future<HealthCheckStatus> periodicCheckImpl(
+ PeriodicHealthCheckContext&& periodicCheckContext) override;
+};
+} // namespace process_health
+} // namespace mongo