summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Shuvalov <andrew.shuvalov@mongodb.com>2021-10-21 01:50:32 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-12-28 21:25:36 +0000
commit3029334d8ae2db30136bc933ba9bc6630ad7524c (patch)
tree0b1f67f0e17ec85e79df40d33db0b7a96a341968
parentc421bb34fdbd725dcea3eabc7cfd1693d72feeed (diff)
downloadmongo-3029334d8ae2db30136bc933ba9bc6630ad7524c.tar.gz
SERVER-60587 Implement FaultFacet and make necessary changes in HealthCheckStatus
-rw-r--r--src/mongo/db/process_health/SConscript1
-rw-r--r--src/mongo/db/process_health/fault_facet.h8
-rw-r--r--src/mongo/db/process_health/fault_facet_impl.cpp58
-rw-r--r--src/mongo/db/process_health/fault_facet_impl.h68
-rw-r--r--src/mongo/db/process_health/fault_facet_mock.h23
-rw-r--r--src/mongo/db/process_health/fault_facet_test.cpp58
-rw-r--r--src/mongo/db/process_health/fault_manager.cpp27
-rw-r--r--src/mongo/db/process_health/health_check_status.h55
-rw-r--r--src/mongo/db/process_health/health_observer_base.cpp22
-rw-r--r--src/mongo/db/process_health/health_observer_base.h6
-rw-r--r--src/mongo/db/process_health/health_observer_mock.h3
-rw-r--r--src/mongo/db/process_health/health_observer_test.cpp7
12 files changed, 247 insertions, 89 deletions
diff --git a/src/mongo/db/process_health/SConscript b/src/mongo/db/process_health/SConscript
index be2c784e769..600308e4aa8 100644
--- a/src/mongo/db/process_health/SConscript
+++ b/src/mongo/db/process_health/SConscript
@@ -7,6 +7,7 @@ env = env.Clone()
env.Library(
target='fault_manager',
source=[
+ 'fault_facet_impl.cpp',
'fault_impl.cpp',
'fault_manager.cpp',
'health_observer_base.cpp',
diff --git a/src/mongo/db/process_health/fault_facet.h b/src/mongo/db/process_health/fault_facet.h
index 1a46f2686bf..ad7fb6673d7 100644
--- a/src/mongo/db/process_health/fault_facet.h
+++ b/src/mongo/db/process_health/fault_facet.h
@@ -52,10 +52,10 @@ public:
*/
virtual HealthCheckStatus getStatus() const = 0;
- // This interface contains no methods to change the state of the Facet
- // because this update is handled by the particular implementation of
- // the HealthObserver interface, updating its matching Facet.
- // Please do not add any non-const methods here.
+ /**
+ * Change the state of this Facet with health check result.
+ */
+ virtual void update(HealthCheckStatus status) = 0;
};
using FaultFacetPtr = std::shared_ptr<FaultFacet>;
diff --git a/src/mongo/db/process_health/fault_facet_impl.cpp b/src/mongo/db/process_health/fault_facet_impl.cpp
new file mode 100644
index 00000000000..f2975b6b559
--- /dev/null
+++ b/src/mongo/db/process_health/fault_facet_impl.cpp
@@ -0,0 +1,58 @@
+/**
+ * Copyright (C) 2021-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/process_health/fault_facet_impl.h"
+
+namespace mongo {
+namespace process_health {
+
+FaultFacetImpl::FaultFacetImpl(FaultFacetType type,
+ ClockSource* clockSource,
+ HealthCheckStatus status)
+ : _type(type), _clockSource(clockSource) {
+ update(status);
+}
+
+FaultFacetType FaultFacetImpl::getType() const {
+ return _type;
+}
+
+HealthCheckStatus FaultFacetImpl::getStatus() const {
+ auto lk = stdx::lock_guard(_mutex);
+ return HealthCheckStatus(getType(), _severity, _description);
+}
+
+void FaultFacetImpl::update(HealthCheckStatus status) {
+ auto lk = stdx::lock_guard(_mutex);
+ _severity = status.getSeverity();
+ _description = status.getShortDescription().toString();
+}
+
+} // namespace process_health
+} // namespace mongo
diff --git a/src/mongo/db/process_health/fault_facet_impl.h b/src/mongo/db/process_health/fault_facet_impl.h
new file mode 100644
index 00000000000..76279052470
--- /dev/null
+++ b/src/mongo/db/process_health/fault_facet_impl.h
@@ -0,0 +1,68 @@
+/**
+ * Copyright (C) 2021-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+#pragma once
+
+#include "mongo/db/process_health/fault_facet.h"
+
+#include "mongo/db/process_health/health_check_status.h"
+#include "mongo/db/process_health/health_observer.h"
+#include "mongo/util/clock_source.h"
+
+namespace mongo {
+namespace process_health {
+
+// Internal representation of the Facet.
+class FaultFacetImpl : public FaultFacet {
+public:
+ FaultFacetImpl(FaultFacetType type, ClockSource* clockSource, HealthCheckStatus status);
+
+ ~FaultFacetImpl() override = default;
+
+ // Public interface methods.
+
+ FaultFacetType getType() const override;
+
+ HealthCheckStatus getStatus() const override;
+
+ void update(HealthCheckStatus status) override;
+
+private:
+ const FaultFacetType _type;
+ ClockSource* const _clockSource;
+
+ const Date_t _startTime = _clockSource->now();
+
+ mutable Mutex _mutex =
+ MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(1), "FaultFacetImpl::_mutex");
+ double _severity = 0;
+ std::string _description;
+};
+
+} // namespace process_health
+} // namespace mongo
diff --git a/src/mongo/db/process_health/fault_facet_mock.h b/src/mongo/db/process_health/fault_facet_mock.h
index c607f50939c..6035a5c89df 100644
--- a/src/mongo/db/process_health/fault_facet_mock.h
+++ b/src/mongo/db/process_health/fault_facet_mock.h
@@ -59,34 +59,23 @@ public:
}
HealthCheckStatus getStatus() const override {
- double severity = _callback();
+ const double severity = _callback();
- auto lk = stdx::lock_guard(_mutex);
- if (HealthCheckStatus::isActiveFault(severity)) {
- if (_activeFaultTime == Date_t::max()) {
- _activeFaultTime = _clockSource->now();
- }
- } else {
- _activeFaultTime = Date_t::max();
- }
-
- auto now = _clockSource->now();
- HealthCheckStatus healthCheckStatus(
- _mockType, severity, "Mock facet", now - _activeFaultTime, now - _startTime);
+ auto healthCheckStatus = HealthCheckStatus(_mockType, severity, "Mock facet");
LOGV2(5956702, "Mock fault facet status", "status"_attr = healthCheckStatus);
return healthCheckStatus;
}
+ void update(HealthCheckStatus status) override {
+ MONGO_UNREACHABLE; // Don't use this in mock.
+ }
+
private:
const FaultFacetType _mockType;
ClockSource* const _clockSource;
const Date_t _startTime = _clockSource->now();
const MockCallback _callback;
-
- mutable Mutex _mutex;
- // We also update the _activeFaultTime inside the const getStatus().
- mutable Date_t _activeFaultTime = Date_t::max();
};
} // namespace process_health
diff --git a/src/mongo/db/process_health/fault_facet_test.cpp b/src/mongo/db/process_health/fault_facet_test.cpp
index 8251848e2e6..537c7db37e7 100644
--- a/src/mongo/db/process_health/fault_facet_test.cpp
+++ b/src/mongo/db/process_health/fault_facet_test.cpp
@@ -28,6 +28,7 @@
*/
#include "mongo/db/process_health/fault_facet.h"
+#include "mongo/db/process_health/fault_facet_impl.h"
#include "mongo/db/process_health/fault_facet_mock.h"
#include "mongo/unittest/unittest.h"
@@ -36,7 +37,8 @@ namespace process_health {
namespace {
-class FaultFacetTest : public unittest::Test {
+// Using the Mock facet.
+class FaultFacetTestWithMock : public unittest::Test {
public:
void startMock(FaultFacetMock::MockCallback callback) {
_svcCtx = ServiceContext::make();
@@ -54,12 +56,64 @@ private:
std::unique_ptr<FaultFacetMock> _facetMock;
};
-TEST_F(FaultFacetTest, FacetWithFailure) {
+TEST_F(FaultFacetTestWithMock, FacetWithFailure) {
startMock([] { return 0.5; });
auto status = getStatus();
ASSERT_APPROX_EQUAL(0.5, status.getSeverity(), 0.001);
}
+// Using the FaultFacetImpl.
+class FaultFacetImplTest : public unittest::Test {
+public:
+ static inline const auto kNoFailures = HealthCheckStatus(FaultFacetType::kMock1, 0.0, "test");
+ static inline const auto kFailure = HealthCheckStatus(FaultFacetType::kMock1, 0.5, "test");
+
+ void setUp() {
+ _svcCtx = ServiceContext::make();
+ _svcCtx->setFastClockSource(std::make_unique<ClockSourceMock>());
+ }
+
+ void createWithStatus(HealthCheckStatus status) {
+ _facet = std::make_unique<FaultFacetImpl>(
+ FaultFacetType::kMock1, _svcCtx->getFastClockSource(), status);
+ }
+
+ void update(HealthCheckStatus status) {
+ _facet->update(status);
+ }
+
+ HealthCheckStatus getStatus() const {
+ return _facet->getStatus();
+ }
+
+ ClockSourceMock& clockSource() {
+ return *static_cast<ClockSourceMock*>(_svcCtx->getFastClockSource());
+ }
+
+ template <typename Duration>
+ void advanceTime(Duration d) {
+ clockSource().advance(d);
+ }
+
+private:
+ ServiceContext::UniqueServiceContext _svcCtx;
+ std::unique_ptr<FaultFacetImpl> _facet;
+};
+
+TEST_F(FaultFacetImplTest, Simple) {
+ createWithStatus(kFailure);
+ const auto status = getStatus();
+ ASSERT_GT(status.getSeverity(), 0);
+ ASSERT_EQ(status.getType(), FaultFacetType::kMock1);
+ ASSERT_EQ(status.getShortDescription(), "test"_sd);
+}
+
+TEST_F(FaultFacetImplTest, Update) {
+ createWithStatus(kFailure);
+ update(kNoFailures);
+ ASSERT_EQ(getStatus().getSeverity(), 0);
+}
+
} // namespace
} // namespace process_health
} // namespace mongo
diff --git a/src/mongo/db/process_health/fault_manager.cpp b/src/mongo/db/process_health/fault_manager.cpp
index c308f4f2f04..b1f7ca8edc8 100644
--- a/src/mongo/db/process_health/fault_manager.cpp
+++ b/src/mongo/db/process_health/fault_manager.cpp
@@ -33,6 +33,7 @@
#include "mongo/db/process_health/fault_manager.h"
+#include "mongo/db/process_health/fault_facet_impl.h"
#include "mongo/db/process_health/fault_impl.h"
#include "mongo/db/process_health/health_monitoring_feature_flag.h"
#include "mongo/db/process_health/health_monitoring_gen.h"
@@ -206,23 +207,6 @@ void FaultManager::updateWithCheckStatus(HealthCheckStatus&& checkStatus) {
return;
}
- // TODO(SERVER-60587): implement Facet properly.
- class Impl : public FaultFacet {
- public:
- Impl(HealthCheckStatus status) : _status(status) {}
-
- FaultFacetType getType() const override {
- return _status.getType();
- }
-
- HealthCheckStatus getStatus() const override {
- return _status;
- }
-
- private:
- HealthCheckStatus _status;
- };
-
if (!container) {
// Need to create container first.
container = getOrCreateFaultFacetsContainer();
@@ -230,10 +214,13 @@ void FaultManager::updateWithCheckStatus(HealthCheckStatus&& checkStatus) {
auto facet = container->getFaultFacet(checkStatus.getType());
if (!facet) {
- auto newFacet = FaultFacetPtr(new Impl(checkStatus));
- container->updateWithSuppliedFacet(checkStatus.getType(), newFacet);
+ const auto type = checkStatus.getType();
+ auto newFacet =
+ new FaultFacetImpl(type, _svcCtx->getFastClockSource(), std::move(checkStatus));
+ container->updateWithSuppliedFacet(type, FaultFacetPtr(newFacet));
+ } else {
+ facet->update(std::move(checkStatus));
}
- // TODO(SERVER-60587): update facet with new check status.
}
void FaultManager::checkForStateTransition() {
diff --git a/src/mongo/db/process_health/health_check_status.h b/src/mongo/db/process_health/health_check_status.h
index 9dffed1f10e..c05ae48d8bb 100644
--- a/src/mongo/db/process_health/health_check_status.h
+++ b/src/mongo/db/process_health/health_check_status.h
@@ -43,7 +43,7 @@ namespace process_health {
enum class FaultFacetType { kMock1 = 0, kMock2, kLdap };
/**
- * The class representing current status of an ongoing fault tracked by facet.
+ * Immutable class representing current status of an ongoing fault tracked by facet.
*/
class HealthCheckStatus {
public:
@@ -54,27 +54,17 @@ public:
// avoid rounding problems and be sure that severity of 1.0 is guaranteed to be an active fault.
static constexpr double kActiveFaultSeverityEpsilon = 0.000001;
- HealthCheckStatus(FaultFacetType type,
- double severity,
- StringData description,
- Milliseconds activeFaultDuration,
- Milliseconds duration)
- : _type(type),
- _severity(severity),
- _description(description),
- _activeFaultDuration(activeFaultDuration),
- _duration(duration) {
- uassert(5949601,
- str::stream() << "Active fault duration " << _activeFaultDuration
- << " cannot be longer than duration " << _duration,
- _duration >= _activeFaultDuration);
- }
+ using Severity = double;
+
+ HealthCheckStatus(FaultFacetType type, Severity severity, StringData description)
+ : _type(type), _severity(severity), _description(description) {}
// Constructs a resolved status (no fault detected).
- HealthCheckStatus(FaultFacetType type) : _type(type), _severity(0), _description("resolved") {}
+ explicit HealthCheckStatus(FaultFacetType type)
+ : _type(type), _severity(0), _description("resolved"_sd) {}
HealthCheckStatus(const HealthCheckStatus&) = default;
- HealthCheckStatus& operator=(const HealthCheckStatus&) = default;
+ HealthCheckStatus& operator=(const HealthCheckStatus&) = delete;
/**
* @return FaultFacetType of this status.
@@ -86,31 +76,14 @@ public:
/**
* The fault severity value if any.
*
- * @return Current fault severity. The expected values:
- * 0: Ok
- * (0, 1.0): Transient fault condition
- * [1.0, Inf): Active fault condition
+ * @return Current fault severity
*/
- double getSeverity() const {
+ Severity getSeverity() const {
return _severity;
}
- /**
- * Gets the duration of an active fault, if any.
- * This is the time from the moment the severity reached the 1.0 value
- * and stayed on or above 1.0.
- *
- * Note: each time the severity drops below 1.0 the duration is reset.
- */
- Milliseconds getActiveFaultDuration() const {
- return _activeFaultDuration;
- }
-
- /**
- * @return duration of the fault facet or fault from the moment it was created.
- */
- Milliseconds getDuration() const {
- return _duration;
+ StringData getShortDescription() const {
+ return _description;
}
void appendDescription(BSONObjBuilder* builder) const;
@@ -141,8 +114,6 @@ private:
const FaultFacetType _type;
const double _severity;
const std::string _description;
- const Milliseconds _activeFaultDuration;
- const Milliseconds _duration;
};
inline StringBuilder& operator<<(StringBuilder& s, const FaultFacetType& type) {
@@ -167,8 +138,6 @@ inline void HealthCheckStatus::appendDescription(BSONObjBuilder* builder) const
builder->append("type", _type);
builder->append("description", _description);
builder->append("severity", _severity);
- builder->append("activeFaultDuration", _activeFaultDuration.toString());
- builder->append("duration", _duration.toString());
}
inline StringBuilder& operator<<(StringBuilder& s, const HealthCheckStatus& hcs) {
diff --git a/src/mongo/db/process_health/health_observer_base.cpp b/src/mongo/db/process_health/health_observer_base.cpp
index 097f5dd5aab..918093b34b4 100644
--- a/src/mongo/db/process_health/health_observer_base.cpp
+++ b/src/mongo/db/process_health/health_observer_base.cpp
@@ -66,7 +66,7 @@ void HealthObserverBase::periodicCheck(FaultFacetsContainerFactory& factory,
// Do the health check.
taskExecutor->schedule([this, &factory](Status status) {
periodicCheckImpl({})
- .then([this, &factory](HealthCheckStatus checkStatus) {
+ .then([this, &factory](HealthCheckStatus&& checkStatus) mutable {
factory.updateWithCheckStatus(std::move(checkStatus));
})
.onCompletion([this](Status status) {
@@ -87,5 +87,25 @@ HealthObserverIntensity HealthObserverBase::getIntensity() {
return _intensity;
}
+HealthCheckStatus HealthObserverBase::makeHealthyStatus() const {
+ return HealthCheckStatus(getType());
+}
+
+HealthCheckStatus HealthObserverBase::makeSimpleFailedStatus(double severity,
+ std::vector<Status>&& failures) const {
+ if (severity <= 0) {
+ LOGV2_WARNING(6007903,
+ "Creating faulty health check status requires positive severity",
+ "observerType"_attr = getType());
+ }
+ StringBuilder sb;
+ for (const auto& s : failures) {
+ sb.append(s.toString());
+ sb.append(" ");
+ }
+
+ return HealthCheckStatus(getType(), severity, sb.stringData());
+}
+
} // namespace process_health
} // namespace mongo
diff --git a/src/mongo/db/process_health/health_observer_base.h b/src/mongo/db/process_health/health_observer_base.h
index d8f13737bc7..5c227e6355c 100644
--- a/src/mongo/db/process_health/health_observer_base.h
+++ b/src/mongo/db/process_health/health_observer_base.h
@@ -79,6 +79,12 @@ protected:
HealthObserverIntensity getIntensity() override;
+ // Helper method to create a status without errors.
+ HealthCheckStatus makeHealthyStatus() const;
+
+ // Make a generic error status.
+ HealthCheckStatus makeSimpleFailedStatus(double severity, std::vector<Status>&& failures) const;
+
ClockSource* const _clockSource;
TickSource* const _tickSource;
diff --git a/src/mongo/db/process_health/health_observer_mock.h b/src/mongo/db/process_health/health_observer_mock.h
index dafbbc3608d..96c984960ae 100644
--- a/src/mongo/db/process_health/health_observer_mock.h
+++ b/src/mongo/db/process_health/health_observer_mock.h
@@ -66,8 +66,7 @@ protected:
if (HealthCheckStatus::isResolved(severity)) {
completionPf.promise.emplaceValue(HealthCheckStatus(getType()));
} else {
- completionPf.promise.emplaceValue(
- HealthCheckStatus(getType(), severity, "failed", Milliseconds(0), Milliseconds(0)));
+ completionPf.promise.emplaceValue(HealthCheckStatus(getType(), severity, "failed"));
}
return std::move(completionPf.future);
}
diff --git a/src/mongo/db/process_health/health_observer_test.cpp b/src/mongo/db/process_health/health_observer_test.cpp
index 2084cb138cf..01683bb055f 100644
--- a/src/mongo/db/process_health/health_observer_test.cpp
+++ b/src/mongo/db/process_health/health_observer_test.cpp
@@ -100,6 +100,8 @@ TEST_F(FaultManagerTest, SeverityIsMaxFromAllFacetsSeverity) {
manager().healthCheckTest();
do {
waitForFaultBeingCreated();
+ advanceTime(Milliseconds(100));
+ manager().healthCheckTest();
} while (manager().getFault().getFacets().size() != 2); // Race between two facets.
auto currentFault = manager().currentFault();
@@ -121,6 +123,7 @@ TEST_F(FaultManagerTest, HealthCheckCreatesFacetThenIsGarbageCollectedAndStateTr
ASSERT_FALSE(manager().currentFault());
// State is transitioned.
ASSERT_EQ(FaultState::kOk, manager().getFaultState());
+ resetManager(); // Before atomic fields above go out of scope.
}
TEST_F(FaultManagerTest, HealthCheckCreates2FacetsThenIsGarbageCollected) {
@@ -133,6 +136,8 @@ TEST_F(FaultManagerTest, HealthCheckCreates2FacetsThenIsGarbageCollected) {
while (manager().getFault().getFacets().size() != 2) {
sleepFor(Milliseconds(1));
+ advanceTime(Milliseconds(100));
+ manager().healthCheckTest();
}
// Resolve one facet and it should be garbage collected.
@@ -149,6 +154,7 @@ TEST_F(FaultManagerTest, HealthCheckCreates2FacetsThenIsGarbageCollected) {
}
ASSERT_FALSE(internalFault.getFaultFacet(FaultFacetType::kMock1));
ASSERT_TRUE(internalFault.getFaultFacet(FaultFacetType::kMock2));
+ resetManager(); // Before atomic fields above go out of scope.
}
TEST_F(FaultManagerTest, HealthCheckWithOffFacetCreatesNoFault) {
@@ -178,6 +184,7 @@ TEST_F(FaultManagerTest, DoesNotRestartCheckBeforeIntervalExpired) {
waitForFaultBeingCreated();
currentFault = manager().currentFault();
ASSERT_TRUE(currentFault); // The fault was created.
+ resetManager(); // Before atomic fields above go out of scope.
}
TEST_F(FaultManagerTest, HealthCheckWithCriticalFacetCreatesFault) {