summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorScott Hernandez <scotthernandez@gmail.com>2015-11-24 11:14:57 -0500
committerScott Hernandez <scotthernandez@gmail.com>2015-11-24 13:54:54 -0500
commiteb23ba1f57b17cfbe44896a735ab74a0bdee2f55 (patch)
tree4e949e981799547d94fef1668f569d1d2ee4b90c
parentcc8f75d1cfc9b2b4b0c0f8a213c960e928174ce7 (diff)
downloadmongo-eb23ba1f57b17cfbe44896a735ab74a0bdee2f55.tar.gz
SERVER-21643: Add server metrics for the Replication Executor
-rw-r--r--src/mongo/db/repl/replication_coordinator.h8
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h4
-rw-r--r--src/mongo/db/repl/replication_coordinator_mock.h4
-rw-r--r--src/mongo/db/repl/replication_executor.cpp50
-rw-r--r--src/mongo/db/repl/replication_executor.h31
-rw-r--r--src/mongo/db/repl/replication_executor_test.cpp8
-rw-r--r--src/mongo/db/repl/replset_commands.cpp18
7 files changed, 96 insertions, 27 deletions
diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h
index 34abd56ab5a..05c5d3c212e 100644
--- a/src/mongo/db/repl/replication_coordinator.h
+++ b/src/mongo/db/repl/replication_coordinator.h
@@ -67,6 +67,8 @@ class OplogReader;
class OpTime;
class ReadConcernArgs;
class ReadConcernResponse;
+class ReplicaSetConfig;
+class ReplicationExecutor;
class ReplSetDeclareElectionWinnerArgs;
class ReplSetDeclareElectionWinnerResponse;
class ReplSetHeartbeatArgs;
@@ -75,7 +77,6 @@ class ReplSetHeartbeatResponse;
class ReplSetHtmlSummary;
class ReplSetRequestVotesArgs;
class ReplSetRequestVotesResponse;
-class ReplicaSetConfig;
class UpdatePositionArgs;
/**
@@ -130,6 +131,11 @@ public:
virtual void shutdown() = 0;
/**
+ * Returns a pointer to the ReplicationExecutor.
+ */
+ virtual ReplicationExecutor* getExecutor() = 0;
+
+ /**
* Returns a reference to the parsed command line arguments that are related to replication.
*/
virtual const ReplSettings& getSettings() const = 0;
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index 1b16cddfacc..d1cca5c96e3 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -109,6 +109,10 @@ public:
virtual void shutdown() override;
+ virtual ReplicationExecutor* getExecutor() override {
+ return &_replExecutor;
+ }
+
virtual const ReplSettings& getSettings() const override;
virtual Mode getReplicationMode() const override;
diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h
index 142e4289620..6fb18b19007 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_mock.h
@@ -51,6 +51,10 @@ public:
virtual void shutdown();
+ virtual ReplicationExecutor* getExecutor() override {
+ return nullptr;
+ };
+
virtual const ReplSettings& getSettings() const;
virtual bool isReplEnabled() const;
diff --git a/src/mongo/db/repl/replication_executor.cpp b/src/mongo/db/repl/replication_executor.cpp
index 6dc2067f9ea..c72f3c16ac7 100644
--- a/src/mongo/db/repl/replication_executor.cpp
+++ b/src/mongo/db/repl/replication_executor.cpp
@@ -58,14 +58,13 @@ ReplicationExecutor::ReplicationExecutor(NetworkInterface* netInterface,
: _random(prngSeed),
_networkInterface(netInterface),
_storageInterface(storageInterface),
- _totalEventWaiters(0),
_inShutdown(false),
_dblockWorkers(OldThreadPool::DoNotStartThreadsTag(), 3, "replExecDBWorker-"),
_dblockTaskRunner(&_dblockWorkers,
stdx::bind(&StorageInterface::createOperationContext, storageInterface)),
_dblockExclusiveLockTaskRunner(
- &_dblockWorkers, stdx::bind(&StorageInterface::createOperationContext, storageInterface)),
- _nextId(0) {}
+ &_dblockWorkers,
+ stdx::bind(&StorageInterface::createOperationContext, storageInterface)) {}
ReplicationExecutor::~ReplicationExecutor() {
// join must have been called
@@ -75,12 +74,31 @@ ReplicationExecutor::~ReplicationExecutor() {
BSONObj ReplicationExecutor::getDiagnosticBSON() {
stdx::lock_guard<stdx::mutex> lk(_mutex);
BSONObjBuilder builder;
- builder.appendIntOrLL("networkInProgress", _networkInProgressQueue.size());
- builder.appendIntOrLL("dbWorkInProgress", _dbWorkInProgressQueue.size());
- builder.appendIntOrLL("exclusiveInProgress", _exclusiveLockInProgressQueue.size());
- builder.appendIntOrLL("sleeperQueue", _sleepersQueue.size());
- builder.appendIntOrLL("ready", _readyQueue.size());
- builder.appendIntOrLL("free", _freeQueue.size());
+
+ // Counters
+ BSONObjBuilder counters(builder.subobjStart("counters"));
+ counters.appendIntOrLL("eventCreated", _counterCreatedEvents);
+ counters.appendIntOrLL("eventWait", _counterCreatedEvents);
+ counters.appendIntOrLL("cancels", _counterCancels);
+ counters.appendIntOrLL("waits", _counterWaits);
+ counters.appendIntOrLL("scheduledNetCmd", _counterScheduledCommands);
+ counters.appendIntOrLL("scheduledDBWork", _counterScheduledDBWorks);
+ counters.appendIntOrLL("scheduledXclWork", _counterScheduledExclusiveWorks);
+ counters.appendIntOrLL("scheduledWorkAt", _counterScheduledWorkAts);
+ counters.appendIntOrLL("scheduledWork", _counterScheduledWorks);
+ counters.appendIntOrLL("schedulingFailures", _counterSchedulingFailures);
+ counters.done();
+
+ // Queues
+ BSONObjBuilder queues(builder.subobjStart("queues"));
+ queues.appendIntOrLL("networkInProgress", _networkInProgressQueue.size());
+ queues.appendIntOrLL("dbWorkInProgress", _dbWorkInProgressQueue.size());
+ queues.appendIntOrLL("exclusiveInProgress", _exclusiveLockInProgressQueue.size());
+ queues.appendIntOrLL("sleepers", _sleepersQueue.size());
+ queues.appendIntOrLL("ready", _readyQueue.size());
+ queues.appendIntOrLL("free", _freeQueue.size());
+ queues.done();
+
builder.appendIntOrLL("unsignaledEvents", _unsignaledEvents.size());
builder.appendIntOrLL("eventWaiters", _totalEventWaiters);
builder.append("shuttingDown", _inShutdown);
@@ -207,6 +225,7 @@ void ReplicationExecutor::maybeNotifyShutdownComplete_inlock() {
StatusWith<ReplicationExecutor::EventHandle> ReplicationExecutor::makeEvent() {
stdx::lock_guard<stdx::mutex> lk(_mutex);
+ ++_counterCreatedEvents;
return makeEvent_inlock();
}
@@ -232,14 +251,17 @@ void ReplicationExecutor::signalEvent_inlock(const EventHandle& eventHandle) {
}
void ReplicationExecutor::waitForEvent(const EventHandle& event) {
+ ++_counterWaitEvents;
_getEventFromHandle(event)->waitUntilSignaled();
}
void ReplicationExecutor::cancel(const CallbackHandle& cbHandle) {
+ ++_counterCancels;
_getCallbackFromHandle(cbHandle)->cancel();
};
void ReplicationExecutor::wait(const CallbackHandle& cbHandle) {
+ ++_counterWaits;
_getCallbackFromHandle(cbHandle)->waitForCompletion();
};
@@ -330,6 +352,7 @@ StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleRem
_getCallbackFromHandle(handle.getValue())->_iter->generation,
cb));
}
+ ++_counterScheduledCommands;
return handle;
}
@@ -337,7 +360,11 @@ StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleWor
const CallbackFn& work) {
stdx::lock_guard<stdx::mutex> lk(_mutex);
_networkInterface->signalWorkAvailable();
- return enqueueWork_inlock(&_readyQueue, work);
+ const auto status = enqueueWork_inlock(&_readyQueue, work);
+ if (status.isOK()) {
+ ++_counterScheduledWorks;
+ }
+ return status;
}
StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleWorkAt(
@@ -354,6 +381,7 @@ StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleWor
while (insertBefore != _sleepersQueue.end() && insertBefore->readyDate <= when)
++insertBefore;
_sleepersQueue.splice(insertBefore, temp, temp.begin());
+ ++_counterScheduledWorkAts;
return cbHandle;
}
@@ -384,6 +412,7 @@ StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleDBW
_dblockTaskRunner.schedule(DatabaseTask::makeCollectionLockTask(task, nss, mode));
}
}
+ ++_counterScheduledDBWorks;
return handle;
}
@@ -435,6 +464,7 @@ ReplicationExecutor::scheduleWorkWithGlobalExclusiveLock(const CallbackFn& work)
return TaskRunner::NextAction::kDisposeOperationContext;
}));
}
+ ++_counterScheduledExclusiveWorks;
return handle;
}
diff --git a/src/mongo/db/repl/replication_executor.h b/src/mongo/db/repl/replication_executor.h
index 4f220b06d56..5b130f87de4 100644
--- a/src/mongo/db/repl/replication_executor.h
+++ b/src/mongo/db/repl/replication_executor.h
@@ -313,19 +313,32 @@ private:
stdx::mutex _mutex;
stdx::mutex _terribleExLockSyncMutex;
stdx::condition_variable _noMoreWaitingThreads;
- WorkQueue _freeQueue;
- WorkQueue _readyQueue;
- WorkQueue _dbWorkInProgressQueue;
- WorkQueue _exclusiveLockInProgressQueue;
- WorkQueue _networkInProgressQueue;
- WorkQueue _sleepersQueue;
- EventList _unsignaledEvents;
- int64_t _totalEventWaiters;
+ WorkQueue _freeQueue{};
+ WorkQueue _readyQueue{};
+ WorkQueue _dbWorkInProgressQueue{};
+ WorkQueue _exclusiveLockInProgressQueue{};
+ WorkQueue _networkInProgressQueue{};
+ WorkQueue _sleepersQueue{};
+ EventList _unsignaledEvents{};
+ int64_t _totalEventWaiters = 0;
+
+ // Counters for metrics, for the whole life of this instance, protected by _mutex.
+ int64_t _counterWaitEvents = 0;
+ int64_t _counterCreatedEvents = 0;
+ int64_t _counterScheduledCommands = 0;
+ int64_t _counterScheduledExclusiveWorks = 0;
+ int64_t _counterScheduledDBWorks = 0;
+ int64_t _counterScheduledWorks = 0;
+ int64_t _counterScheduledWorkAts = 0;
+ int64_t _counterSchedulingFailures = 0;
+ int64_t _counterCancels = 0;
+ int64_t _counterWaits = 0;
+
bool _inShutdown;
OldThreadPool _dblockWorkers;
TaskRunner _dblockTaskRunner;
TaskRunner _dblockExclusiveLockTaskRunner;
- uint64_t _nextId;
+ uint64_t _nextId = 0;
};
class ReplicationExecutor::Callback : public executor::TaskExecutor::CallbackState {
diff --git a/src/mongo/db/repl/replication_executor_test.cpp b/src/mongo/db/repl/replication_executor_test.cpp
index cd8dab36018..998a80a549e 100644
--- a/src/mongo/db/repl/replication_executor_test.cpp
+++ b/src/mongo/db/repl/replication_executor_test.cpp
@@ -163,12 +163,12 @@ TEST_F(ReplicationExecutorTest, CancelBeforeRunningFutureWork) {
});
ASSERT_OK(cbhWithStatus.getStatus());
- ASSERT_EQUALS(1, executor.getDiagnosticBSON()["sleeperQueue"].Int());
- ASSERT_EQUALS(0, executor.getDiagnosticBSON()["ready"].Int());
+ ASSERT_EQUALS(1, executor.getDiagnosticBSON().getFieldDotted("queues.sleepers").Int());
+ ASSERT_EQUALS(0, executor.getDiagnosticBSON().getFieldDotted("queues.ready").Int());
executor.cancel(cbhWithStatus.getValue());
- ASSERT_EQUALS(0, executor.getDiagnosticBSON()["sleeperQueue"].Int());
- ASSERT_EQUALS(1, executor.getDiagnosticBSON()["ready"].Int());
+ ASSERT_EQUALS(0, executor.getDiagnosticBSON().getFieldDotted("queues.sleepers").Int());
+ ASSERT_EQUALS(1, executor.getDiagnosticBSON().getFieldDotted("queues.ready").Int());
}
} // namespace
diff --git a/src/mongo/db/repl/replset_commands.cpp b/src/mongo/db/repl/replset_commands.cpp
index 002353c690d..ea9fba690ad 100644
--- a/src/mongo/db/repl/replset_commands.cpp
+++ b/src/mongo/db/repl/replset_commands.cpp
@@ -40,19 +40,21 @@
#include "mongo/db/auth/authorization_manager.h"
#include "mongo/db/auth/authorization_session.h"
#include "mongo/db/commands.h"
+#include "mongo/db/commands/server_status_metric.h"
#include "mongo/db/dbhelpers.h"
#include "mongo/db/lasterror.h"
-#include "mongo/db/service_context.h"
#include "mongo/db/op_observer.h"
#include "mongo/db/repl/initial_sync.h"
#include "mongo/db/repl/oplog.h"
-#include "mongo/db/repl/repl_set_heartbeat_args.h"
#include "mongo/db/repl/repl_set_heartbeat_args_v1.h"
+#include "mongo/db/repl/repl_set_heartbeat_args.h"
#include "mongo/db/repl/repl_set_heartbeat_response.h"
-#include "mongo/db/repl/replication_coordinator_global.h"
#include "mongo/db/repl/replication_coordinator_external_state_impl.h"
+#include "mongo/db/repl/replication_coordinator_global.h"
+#include "mongo/db/repl/replication_coordinator_global.h"
#include "mongo/db/repl/replication_executor.h"
#include "mongo/db/repl/update_position_args.h"
+#include "mongo/db/service_context.h"
#include "mongo/db/storage/storage_engine.h"
#include "mongo/executor/network_interface.h"
#include "mongo/util/fail_point_service.h"
@@ -65,6 +67,16 @@ namespace repl {
using std::string;
using std::stringstream;
+
+class ReplExecutorSSM : public ServerStatusMetric {
+public:
+ ReplExecutorSSM() : ServerStatusMetric("repl.executor") {}
+ virtual void appendAtLeaf(BSONObjBuilder& b) const {
+ ReplicationExecutor* exec = getGlobalReplicationCoordinator()->getExecutor();
+ b.append("executor", exec->getDiagnosticBSON());
+ }
+} replExecutorSSM;
+
// Testing only, enabled via command-line.
class CmdReplSetTest : public ReplSetCommand {
public: