summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorSara Golemon <sara.golemon@mongodb.com>2018-04-26 14:57:17 -0400
committerSara Golemon <sara.golemon@mongodb.com>2018-05-01 12:53:05 -0400
commit1c5629c05fa9b08752688c021f7a6beef3a20dca (patch)
treedbaacdd92639b5be5b7a6f6884e1496c7af0b803 /src/mongo
parente5f4331b710b0c933699e26e8c65f702231038cc (diff)
downloadmongo-1c5629c05fa9b08752688c021f7a6beef3a20dca.tar.gz
SERVER-34229 Add free monitoring to serverStatus
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/free_mon/SConscript1
-rw-r--r--src/mongo/db/free_mon/free_mon_controller.cpp7
-rw-r--r--src/mongo/db/free_mon/free_mon_controller.h8
-rw-r--r--src/mongo/db/free_mon/free_mon_processor.cpp165
-rw-r--r--src/mongo/db/free_mon/free_mon_processor.h37
-rw-r--r--src/mongo/db/free_mon/free_mon_status.cpp64
6 files changed, 212 insertions, 70 deletions
diff --git a/src/mongo/db/free_mon/SConscript b/src/mongo/db/free_mon/SConscript
index 3bfa66d6f76..8b665d00027 100644
--- a/src/mongo/db/free_mon/SConscript
+++ b/src/mongo/db/free_mon/SConscript
@@ -35,6 +35,7 @@ if free_monitoring == "on":
'free_mon_commands.cpp',
'free_mon_mongod.cpp',
'free_mon_options.cpp',
+ 'free_mon_status.cpp',
'http_client_curl.cpp' if not env.TargetOSIs('windows') else 'http_client_winhttp.cpp',
],
LIBDEPS=[
diff --git a/src/mongo/db/free_mon/free_mon_controller.cpp b/src/mongo/db/free_mon/free_mon_controller.cpp
index 3c50d193d07..f5e35217166 100644
--- a/src/mongo/db/free_mon/free_mon_controller.cpp
+++ b/src/mongo/db/free_mon/free_mon_controller.cpp
@@ -162,4 +162,11 @@ void FreeMonController::turnCrankForTest(size_t countMessagesToIgnore) {
_processor->turnCrankForTest(countMessagesToIgnore);
}
+void FreeMonController::getServerStatus(OperationContext* opCtx, BSONObjBuilder* status) {
+ if (!_processor) {
+ status->append("state", "disabled");
+ }
+ _processor->getServerStatus(opCtx, status);
+}
+
} // namespace mongo
diff --git a/src/mongo/db/free_mon/free_mon_controller.h b/src/mongo/db/free_mon/free_mon_controller.h
index 0e3aece0694..edb9bec3657 100644
--- a/src/mongo/db/free_mon/free_mon_controller.h
+++ b/src/mongo/db/free_mon/free_mon_controller.h
@@ -35,6 +35,7 @@
#include <vector>
#include "mongo/base/status.h"
+#include "mongo/db/client.h"
#include "mongo/db/free_mon/free_mon_message.h"
#include "mongo/db/free_mon/free_mon_network.h"
#include "mongo/db/free_mon/free_mon_processor.h"
@@ -111,9 +112,12 @@ public:
*/
boost::optional<Status> unregisterServerCommand(Milliseconds timeout);
- // TODO - add these methods
- // void getServerStatus(BSONObjBuilder* builder);
+ /**
+ * Populates an info blob for use by {serverStatus: 1}
+ */
+ void getServerStatus(OperationContext* opCtx, BSONObjBuilder* status);
+ // TODO
// void notifyObserver(const BSONObj& doc);
private:
void _enqueue(std::shared_ptr<FreeMonMessage> msg);
diff --git a/src/mongo/db/free_mon/free_mon_processor.cpp b/src/mongo/db/free_mon/free_mon_processor.cpp
index 53e60ed316d..e17e506d673 100644
--- a/src/mongo/db/free_mon/free_mon_processor.cpp
+++ b/src/mongo/db/free_mon/free_mon_processor.cpp
@@ -125,12 +125,12 @@ FreeMonProcessor::FreeMonProcessor(FreeMonCollectorCollection& registration,
_metrics(metrics),
_network(network),
_random(Date_t::now().asInt64()),
- _registrationRetry(_random),
- _metricsRetry(_random),
+ _registrationRetry(RegistrationRetryCounter(_random)),
+ _metricsRetry(MetricsRetryCounter(_random)),
_metricsGatherInterval(kDefaultMetricsGatherInterval),
_queue(useCrankForTest) {
- _registrationRetry.reset();
- _metricsRetry.reset();
+ _registrationRetry->reset();
+ _metricsRetry->reset();
}
void FreeMonProcessor::enqueue(std::shared_ptr<FreeMonMessage> msg) {
@@ -241,11 +241,8 @@ void FreeMonProcessor::run() {
}
}
-void FreeMonProcessor::readState(Client* client) {
-
- auto optCtx = client->makeOperationContext();
-
- auto state = FreeMonStorage::read(optCtx.get());
+void FreeMonProcessor::readState(OperationContext* opCtx) {
+ auto state = FreeMonStorage::read(opCtx);
_lastReadState = state;
@@ -255,15 +252,21 @@ void FreeMonProcessor::readState(Client* client) {
_state = state.get();
} else if (!state.is_initialized()) {
// Default the state
- _state.setVersion(kProtocolVersion);
- _state.setState(StorageStateEnum::enabled);
- _state.setRegistrationId("");
- _state.setInformationalURL("");
- _state.setMessage("");
- _state.setUserReminder("");
+ auto state = _state.synchronize();
+ state->setVersion(kProtocolVersion);
+ state->setState(StorageStateEnum::enabled);
+ state->setRegistrationId("");
+ state->setInformationalURL("");
+ state->setMessage("");
+ state->setUserReminder("");
}
}
+void FreeMonProcessor::readState(Client* client) {
+ auto opCtx = client->makeOperationContext();
+ readState(opCtx.get());
+}
+
void FreeMonProcessor::writeState(Client* client) {
// Do a compare and swap
@@ -271,7 +274,7 @@ void FreeMonProcessor::writeState(Client* client) {
// If the local document is different, then oh-well we do nothing, and wait until the next round
// Has our in-memory state changed, if so consider writing
- if (_lastReadState != _state) {
+ if (_lastReadState != _state.get()) {
// The read and write are bound the same operation context
{
@@ -281,9 +284,9 @@ void FreeMonProcessor::writeState(Client* client) {
// If our in-memory copy matches the last read, then write it to disk
if (state == _lastReadState) {
- FreeMonStorage::replace(optCtx.get(), _state);
+ FreeMonStorage::replace(optCtx.get(), _state.get());
- _lastReadState = _state;
+ _lastReadState = boost::make_optional(_state.get());
}
}
}
@@ -380,8 +383,9 @@ void FreeMonProcessor::doCommandRegister(Client* client,
FreeMonRegistrationRequest req;
- if (!_state.getRegistrationId().empty()) {
- req.setId(_state.getRegistrationId());
+ auto regid = _state->getRegistrationId();
+ if (!regid.empty()) {
+ req.setId(regid);
}
req.setVersion(kProtocolVersion);
@@ -401,7 +405,7 @@ void FreeMonProcessor::doCommandRegister(Client* client,
req.setPayload(std::get<0>(collect));
// Record that the registration is pending
- _state.setState(StorageStateEnum::pending);
+ _state->setState(StorageStateEnum::pending);
writeState(client);
@@ -569,7 +573,7 @@ void FreeMonProcessor::doAsyncRegisterComplete(
// Our request is no longer in-progress so delete it
_futureRegistrationResponse.reset();
- if (_state.getState() != StorageStateEnum::pending) {
+ if (_state->getState() != StorageStateEnum::pending) {
notifyPendingRegisters(Status(ErrorCodes::BadValue, "Registration was canceled"));
return;
@@ -582,7 +586,7 @@ void FreeMonProcessor::doAsyncRegisterComplete(
warning() << "Free Monitoring registration halted due to " << s;
// Disable on any error
- _state.setState(StorageStateEnum::disabled);
+ _state->setState(StorageStateEnum::disabled);
// Persist state
writeState(client);
@@ -594,26 +598,29 @@ void FreeMonProcessor::doAsyncRegisterComplete(
}
// Update in-memory state
- _registrationRetry.setMin(Seconds(resp.getReportingInterval()));
+ _registrationRetry->setMin(Seconds(resp.getReportingInterval()));
- _state.setRegistrationId(resp.getId());
+ {
+ auto state = _state.synchronize();
+ state->setRegistrationId(resp.getId());
- if (resp.getUserReminder().is_initialized()) {
- _state.setUserReminder(resp.getUserReminder().get());
- } else {
- _state.setUserReminder("");
- }
+ if (resp.getUserReminder().is_initialized()) {
+ state->setUserReminder(resp.getUserReminder().get());
+ } else {
+ state->setUserReminder("");
+ }
- _state.setMessage(resp.getMessage());
- _state.setInformationalURL(resp.getInformationalURL());
+ state->setMessage(resp.getMessage());
+ state->setInformationalURL(resp.getInformationalURL());
- _state.setState(StorageStateEnum::enabled);
+ state->setState(StorageStateEnum::enabled);
+ }
// Persist state
writeState(client);
// Reset retry counter
- _registrationRetry.reset();
+ _registrationRetry->reset();
// Notify waiters
notifyPendingRegisters(Status::OK());
@@ -622,7 +629,7 @@ void FreeMonProcessor::doAsyncRegisterComplete(
// Enqueue next metrics upload
enqueue(FreeMonMessage::createWithDeadline(FreeMonMessageType::MetricsSend,
- _registrationRetry.getNextDeadline(client)));
+ _registrationRetry->getNextDeadline(client)));
}
void FreeMonProcessor::doAsyncRegisterFail(
@@ -631,32 +638,32 @@ void FreeMonProcessor::doAsyncRegisterFail(
// Our request is no longer in-progress so delete it
_futureRegistrationResponse.reset();
- if (_state.getState() != StorageStateEnum::pending) {
+ if (_state->getState() != StorageStateEnum::pending) {
notifyPendingRegisters(Status(ErrorCodes::BadValue, "Registration was canceled"));
return;
}
- if (!_registrationRetry.incrementError()) {
+ if (!_registrationRetry->incrementError()) {
// We have exceeded our retry
warning() << "Free Monitoring is abandoning registration after excess retries";
return;
}
LOG(1) << "Free Monitoring Registration Failed with status '" << msg->getPayload()
- << "', retrying in " << _registrationRetry.getNextDuration();
+ << "', retrying in " << _registrationRetry->getNextDuration();
// Enqueue a register retry
enqueue(FreeMonRegisterCommandMessage::createWithDeadline(
- _tags, _registrationRetry.getNextDeadline(client)));
+ _tags, _registrationRetry->getNextDeadline(client)));
}
void FreeMonProcessor::doCommandUnregister(
Client* client, FreeMonWaitableMessageWithPayload<FreeMonMessageType::UnregisterCommand>* msg) {
// Treat this request as idempotent
- if (_state.getState() != StorageStateEnum::disabled) {
+ if (_state->getState() != StorageStateEnum::disabled) {
- _state.setState(StorageStateEnum::disabled);
+ _state->setState(StorageStateEnum::disabled);
writeState(client);
@@ -705,24 +712,26 @@ std::string compressMetrics(MetricsBuffer& buffer) {
void FreeMonProcessor::doMetricsSend(Client* client) {
readState(client);
- if (_state.getState() != StorageStateEnum::enabled) {
+ if (_state->getState() != StorageStateEnum::enabled) {
// If we are recently disabled, then stop sending metrics
return;
}
// Build outbound request
FreeMonMetricsRequest req;
- invariant(!_state.getRegistrationId().empty());
+ invariant(!_state->getRegistrationId().empty());
req.setVersion(kProtocolVersion);
req.setEncoding(MetricsEncodingEnum::snappy);
- req.setId(_state.getRegistrationId());
+ req.setId(_state->getRegistrationId());
// Get the buffered metrics
auto metrics = compressMetrics(_metricsBuffer);
req.setMetrics(ConstDataRange(metrics.data(), metrics.size()));
+ _lastMetricsSend = Date_t::now();
+
// Send the async request
doAsyncCallback<FreeMonMetricsResponse>(
this,
@@ -749,7 +758,7 @@ void FreeMonProcessor::doAsyncMetricsComplete(
warning() << "Free Monitoring metrics uploading halted due to " << s;
// Disable free monitoring on validation errors
- _state.setState(StorageStateEnum::disabled);
+ _state->setState(StorageStateEnum::disabled);
writeState(client);
// If validation fails, we do not retry
@@ -768,50 +777,82 @@ void FreeMonProcessor::doAsyncMetricsComplete(
// TODO: do we reset only the metrics we send or all pending on success?
_metricsBuffer.reset();
- _metricsRetry.setMin(Seconds(resp.getReportingInterval()));
+ _metricsRetry->setMin(Seconds(resp.getReportingInterval()));
- if (resp.getId().is_initialized()) {
- _state.setRegistrationId(resp.getId().get());
- }
+ {
+ auto state = _state.synchronize();
- if (resp.getUserReminder().is_initialized()) {
- _state.setUserReminder(resp.getUserReminder().get());
- }
+ if (resp.getId().is_initialized()) {
+ state->setRegistrationId(resp.getId().get());
+ }
- if (resp.getInformationalURL().is_initialized()) {
- _state.setInformationalURL(resp.getInformationalURL().get());
- }
+ if (resp.getUserReminder().is_initialized()) {
+ state->setUserReminder(resp.getUserReminder().get());
+ }
+
+ if (resp.getInformationalURL().is_initialized()) {
+ state->setInformationalURL(resp.getInformationalURL().get());
+ }
- if (resp.getMessage().is_initialized()) {
- _state.setMessage(resp.getMessage().get());
+ if (resp.getMessage().is_initialized()) {
+ state->setMessage(resp.getMessage().get());
+ }
}
// Persist state
writeState(client);
// Reset retry counter
- _metricsRetry.reset();
+ _metricsRetry->reset();
// Enqueue next metrics upload
enqueue(FreeMonMessage::createWithDeadline(FreeMonMessageType::MetricsSend,
- _registrationRetry.getNextDeadline(client)));
+ _registrationRetry->getNextDeadline(client)));
}
void FreeMonProcessor::doAsyncMetricsFail(
Client* client, const FreeMonMessageWithPayload<FreeMonMessageType::AsyncMetricsFail>* msg) {
- if (!_metricsRetry.incrementError()) {
+ if (!_metricsRetry->incrementError()) {
// We have exceeded our retry
warning() << "Free Monitoring is abandoning metrics upload after excess retries";
return;
}
LOG(1) << "Free Monitoring Metrics upload failed with status " << msg->getPayload()
- << ", retrying in " << _metricsRetry.getNextDuration();
+ << ", retrying in " << _metricsRetry->getNextDuration();
// Enqueue next metrics upload
enqueue(FreeMonMessage::createWithDeadline(FreeMonMessageType::MetricsSend,
- _metricsRetry.getNextDeadline(client)));
+ _metricsRetry->getNextDeadline(client)));
+}
+
+void FreeMonProcessor::getServerStatus(OperationContext* opCtx, BSONObjBuilder* status) {
+ try {
+ readState(opCtx);
+ } catch (const DBException&) {
+ // readState() may throw if invoked during shutdown (as in ReplSetTest cleanup).
+ // If we have a lastReadState, go ahead and use that, otherwise just give up already.
+ if (!_lastReadState.get()) {
+ return;
+ }
+ }
+
+ if (!_lastReadState.get()) {
+ // _state gets initialized by readState() regardless,
+ // use _lastReadState to differential "undecided" from default.
+ status->append("state", "undecided");
+ return;
+ }
+
+ status->append("state", StorageState_serializer(_state->getState()));
+ status->append("retryIntervalSecs", durationCount<Seconds>(_metricsRetry->getNextDuration()));
+ auto lastMetricsSend = _lastMetricsSend.get();
+ if (lastMetricsSend) {
+ status->append("lastRunTime", lastMetricsSend->toString());
+ }
+ status->append("registerErrors", static_cast<long long>(_registrationRetry->getCount()));
+ status->append("metricsErrors", static_cast<long long>(_metricsRetry->getCount()));
}
} // namespace mongo
diff --git a/src/mongo/db/free_mon/free_mon_processor.h b/src/mongo/db/free_mon/free_mon_processor.h
index ade74fc09e6..43028553144 100644
--- a/src/mongo/db/free_mon/free_mon_processor.h
+++ b/src/mongo/db/free_mon/free_mon_processor.h
@@ -28,6 +28,7 @@
#pragma once
#include <boost/optional.hpp>
+#include <boost/thread/synchronized_value.hpp>
#include <cstdint>
#include <deque>
#include <memory>
@@ -84,7 +85,7 @@ public:
/**
* Get the next retry duration.
*/
- Seconds getNextDuration() {
+ Seconds getNextDuration() const {
dassert(_current != Seconds(0));
return _current;
}
@@ -92,7 +93,7 @@ public:
/**
* Get the next retry deadline
*/
- Date_t getNextDeadline(Client* client) {
+ Date_t getNextDeadline(Client* client) const {
return client->getServiceContext()->getPreciseClockSource()->now() + _current;
}
@@ -118,6 +119,10 @@ public:
bool incrementError() final;
+ size_t getCount() const {
+ return _retryCount;
+ }
+
private:
// Random number generator for jitter
PseudoRandom& _random;
@@ -154,6 +159,10 @@ public:
bool incrementError() final;
+ size_t getCount() const {
+ return _retryCount;
+ }
+
private:
// Random number generator for jitter
PseudoRandom& _random;
@@ -308,6 +317,11 @@ private:
/**
* Read the state from the database.
*/
+ void readState(OperationContext* opCtx);
+
+ /**
+ * Create a short-lived opCtx and read the state from the database.
+ */
void readState(Client* client);
/**
@@ -375,6 +389,14 @@ private:
void doAsyncMetricsFail(
Client* client, const FreeMonMessageWithPayload<FreeMonMessageType::AsyncMetricsFail>* msg);
+protected:
+ friend class FreeMonController;
+
+ /**
+ * Server status section with state for active processor.
+ */
+ void getServerStatus(OperationContext* opCtx, BSONObjBuilder* status);
+
private:
// Collection of collectors to send on registration
FreeMonCollectorCollection& _registration;
@@ -389,10 +411,10 @@ private:
PseudoRandom _random;
// Registration Retry logic
- RegistrationRetryCounter _registrationRetry;
+ boost::synchronized_value<RegistrationRetryCounter> _registrationRetry;
// Metrics Retry logic
- MetricsRetryCounter _metricsRetry;
+ boost::synchronized_value<MetricsRetryCounter> _metricsRetry;
// Interval for gathering metrics
Seconds _metricsGatherInterval;
@@ -400,6 +422,9 @@ private:
// Buffer of metrics to upload
MetricsBuffer _metricsBuffer;
+ // When did we last send a metrics batch?
+ boost::synchronized_value<boost::optional<Date_t>> _lastMetricsSend;
+
// List of tags from server configuration registration
std::vector<std::string> _tags;
@@ -410,10 +435,10 @@ private:
std::vector<std::shared_ptr<FreeMonMessage>> _pendingRegisters;
// Last read storage state
- boost::optional<FreeMonStorageState> _lastReadState;
+ boost::synchronized_value<boost::optional<FreeMonStorageState>> _lastReadState;
// Pending update to disk
- FreeMonStorageState _state;
+ boost::synchronized_value<FreeMonStorageState> _state;
// Countdown launch to support manual cranking
FreeMonCountdownLatch _countdown;
diff --git a/src/mongo/db/free_mon/free_mon_status.cpp b/src/mongo/db/free_mon/free_mon_status.cpp
new file mode 100644
index 00000000000..73ff7165052
--- /dev/null
+++ b/src/mongo/db/free_mon/free_mon_status.cpp
@@ -0,0 +1,64 @@
+/**
+ * Copyright (C) 2018 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/commands/server_status.h"
+#include "mongo/db/free_mon/free_mon_controller.h"
+
+namespace mongo {
+namespace {
+
+class FreeMonServerStatus : public ServerStatusSection {
+public:
+ FreeMonServerStatus() : ServerStatusSection("freeMonitoring") {}
+
+ bool includeByDefault() const final {
+ return true;
+ }
+
+ void addRequiredPrivileges(std::vector<Privilege>* out) final {
+ out->push_back(Privilege(ResourcePattern::forClusterResource(),
+ ActionType::checkFreeMonitoringStatus));
+ }
+
+ BSONObj generateSection(OperationContext* opCtx, const BSONElement& configElement) const final {
+ auto* controller = FreeMonController::get(opCtx->getServiceContext());
+ if (!controller) {
+ return BSON("state"
+ << "disabled");
+ }
+
+ BSONObjBuilder builder;
+ controller->getServerStatus(opCtx, &builder);
+ return builder.obj();
+ }
+} freeMonServerStatus;
+
+} // namespace
+} // namespace mongo