diff options
author | Scott Hernandez <scotthernandez@gmail.com> | 2015-11-24 11:14:57 -0500 |
---|---|---|
committer | Scott Hernandez <scotthernandez@gmail.com> | 2015-11-24 13:54:54 -0500 |
commit | eb23ba1f57b17cfbe44896a735ab74a0bdee2f55 (patch) | |
tree | 4e949e981799547d94fef1668f569d1d2ee4b90c | |
parent | cc8f75d1cfc9b2b4b0c0f8a213c960e928174ce7 (diff) | |
download | mongo-eb23ba1f57b17cfbe44896a735ab74a0bdee2f55.tar.gz |
SERVER-21643: Add server metrics for the Replication Executor
-rw-r--r-- | src/mongo/db/repl/replication_coordinator.h | 8 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.h | 4 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_mock.h | 4 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_executor.cpp | 50 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_executor.h | 31 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_executor_test.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/repl/replset_commands.cpp | 18 |
7 files changed, 96 insertions, 27 deletions
diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h index 34abd56ab5a..05c5d3c212e 100644 --- a/src/mongo/db/repl/replication_coordinator.h +++ b/src/mongo/db/repl/replication_coordinator.h @@ -67,6 +67,8 @@ class OplogReader; class OpTime; class ReadConcernArgs; class ReadConcernResponse; +class ReplicaSetConfig; +class ReplicationExecutor; class ReplSetDeclareElectionWinnerArgs; class ReplSetDeclareElectionWinnerResponse; class ReplSetHeartbeatArgs; @@ -75,7 +77,6 @@ class ReplSetHeartbeatResponse; class ReplSetHtmlSummary; class ReplSetRequestVotesArgs; class ReplSetRequestVotesResponse; -class ReplicaSetConfig; class UpdatePositionArgs; /** @@ -130,6 +131,11 @@ public: virtual void shutdown() = 0; /** + * Returns a pointer to the ReplicationExecutor. + */ + virtual ReplicationExecutor* getExecutor() = 0; + + /** * Returns a reference to the parsed command line arguments that are related to replication. */ virtual const ReplSettings& getSettings() const = 0; diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h index 1b16cddfacc..d1cca5c96e3 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.h +++ b/src/mongo/db/repl/replication_coordinator_impl.h @@ -109,6 +109,10 @@ public: virtual void shutdown() override; + virtual ReplicationExecutor* getExecutor() override { + return &_replExecutor; + } + virtual const ReplSettings& getSettings() const override; virtual Mode getReplicationMode() const override; diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h index 142e4289620..6fb18b19007 100644 --- a/src/mongo/db/repl/replication_coordinator_mock.h +++ b/src/mongo/db/repl/replication_coordinator_mock.h @@ -51,6 +51,10 @@ public: virtual void shutdown(); + virtual ReplicationExecutor* getExecutor() override { + return nullptr; + }; + virtual const ReplSettings& getSettings() const; virtual bool isReplEnabled() const; diff --git a/src/mongo/db/repl/replication_executor.cpp b/src/mongo/db/repl/replication_executor.cpp index 6dc2067f9ea..c72f3c16ac7 100644 --- a/src/mongo/db/repl/replication_executor.cpp +++ b/src/mongo/db/repl/replication_executor.cpp @@ -58,14 +58,13 @@ ReplicationExecutor::ReplicationExecutor(NetworkInterface* netInterface, : _random(prngSeed), _networkInterface(netInterface), _storageInterface(storageInterface), - _totalEventWaiters(0), _inShutdown(false), _dblockWorkers(OldThreadPool::DoNotStartThreadsTag(), 3, "replExecDBWorker-"), _dblockTaskRunner(&_dblockWorkers, stdx::bind(&StorageInterface::createOperationContext, storageInterface)), _dblockExclusiveLockTaskRunner( - &_dblockWorkers, stdx::bind(&StorageInterface::createOperationContext, storageInterface)), - _nextId(0) {} + &_dblockWorkers, + stdx::bind(&StorageInterface::createOperationContext, storageInterface)) {} ReplicationExecutor::~ReplicationExecutor() { // join must have been called @@ -75,12 +74,31 @@ ReplicationExecutor::~ReplicationExecutor() { BSONObj ReplicationExecutor::getDiagnosticBSON() { stdx::lock_guard<stdx::mutex> lk(_mutex); BSONObjBuilder builder; - builder.appendIntOrLL("networkInProgress", _networkInProgressQueue.size()); - builder.appendIntOrLL("dbWorkInProgress", _dbWorkInProgressQueue.size()); - builder.appendIntOrLL("exclusiveInProgress", _exclusiveLockInProgressQueue.size()); - builder.appendIntOrLL("sleeperQueue", _sleepersQueue.size()); - builder.appendIntOrLL("ready", _readyQueue.size()); - builder.appendIntOrLL("free", _freeQueue.size()); + + // Counters + BSONObjBuilder counters(builder.subobjStart("counters")); + counters.appendIntOrLL("eventCreated", _counterCreatedEvents); + counters.appendIntOrLL("eventWait", _counterCreatedEvents); + counters.appendIntOrLL("cancels", _counterCancels); + counters.appendIntOrLL("waits", _counterWaits); + counters.appendIntOrLL("scheduledNetCmd", _counterScheduledCommands); + counters.appendIntOrLL("scheduledDBWork", _counterScheduledDBWorks); + counters.appendIntOrLL("scheduledXclWork", _counterScheduledExclusiveWorks); + counters.appendIntOrLL("scheduledWorkAt", _counterScheduledWorkAts); + counters.appendIntOrLL("scheduledWork", _counterScheduledWorks); + counters.appendIntOrLL("schedulingFailures", _counterSchedulingFailures); + counters.done(); + + // Queues + BSONObjBuilder queues(builder.subobjStart("queues")); + queues.appendIntOrLL("networkInProgress", _networkInProgressQueue.size()); + queues.appendIntOrLL("dbWorkInProgress", _dbWorkInProgressQueue.size()); + queues.appendIntOrLL("exclusiveInProgress", _exclusiveLockInProgressQueue.size()); + queues.appendIntOrLL("sleepers", _sleepersQueue.size()); + queues.appendIntOrLL("ready", _readyQueue.size()); + queues.appendIntOrLL("free", _freeQueue.size()); + queues.done(); + builder.appendIntOrLL("unsignaledEvents", _unsignaledEvents.size()); builder.appendIntOrLL("eventWaiters", _totalEventWaiters); builder.append("shuttingDown", _inShutdown); @@ -207,6 +225,7 @@ void ReplicationExecutor::maybeNotifyShutdownComplete_inlock() { StatusWith<ReplicationExecutor::EventHandle> ReplicationExecutor::makeEvent() { stdx::lock_guard<stdx::mutex> lk(_mutex); + ++_counterCreatedEvents; return makeEvent_inlock(); } @@ -232,14 +251,17 @@ void ReplicationExecutor::signalEvent_inlock(const EventHandle& eventHandle) { } void ReplicationExecutor::waitForEvent(const EventHandle& event) { + ++_counterWaitEvents; _getEventFromHandle(event)->waitUntilSignaled(); } void ReplicationExecutor::cancel(const CallbackHandle& cbHandle) { + ++_counterCancels; _getCallbackFromHandle(cbHandle)->cancel(); }; void ReplicationExecutor::wait(const CallbackHandle& cbHandle) { + ++_counterWaits; _getCallbackFromHandle(cbHandle)->waitForCompletion(); }; @@ -330,6 +352,7 @@ StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleRem _getCallbackFromHandle(handle.getValue())->_iter->generation, cb)); } + ++_counterScheduledCommands; return handle; } @@ -337,7 +360,11 @@ StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleWor const CallbackFn& work) { stdx::lock_guard<stdx::mutex> lk(_mutex); _networkInterface->signalWorkAvailable(); - return enqueueWork_inlock(&_readyQueue, work); + const auto status = enqueueWork_inlock(&_readyQueue, work); + if (status.isOK()) { + ++_counterScheduledWorks; + } + return status; } StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleWorkAt( @@ -354,6 +381,7 @@ StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleWor while (insertBefore != _sleepersQueue.end() && insertBefore->readyDate <= when) ++insertBefore; _sleepersQueue.splice(insertBefore, temp, temp.begin()); + ++_counterScheduledWorkAts; return cbHandle; } @@ -384,6 +412,7 @@ StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleDBW _dblockTaskRunner.schedule(DatabaseTask::makeCollectionLockTask(task, nss, mode)); } } + ++_counterScheduledDBWorks; return handle; } @@ -435,6 +464,7 @@ ReplicationExecutor::scheduleWorkWithGlobalExclusiveLock(const CallbackFn& work) return TaskRunner::NextAction::kDisposeOperationContext; })); } + ++_counterScheduledExclusiveWorks; return handle; } diff --git a/src/mongo/db/repl/replication_executor.h b/src/mongo/db/repl/replication_executor.h index 4f220b06d56..5b130f87de4 100644 --- a/src/mongo/db/repl/replication_executor.h +++ b/src/mongo/db/repl/replication_executor.h @@ -313,19 +313,32 @@ private: stdx::mutex _mutex; stdx::mutex _terribleExLockSyncMutex; stdx::condition_variable _noMoreWaitingThreads; - WorkQueue _freeQueue; - WorkQueue _readyQueue; - WorkQueue _dbWorkInProgressQueue; - WorkQueue _exclusiveLockInProgressQueue; - WorkQueue _networkInProgressQueue; - WorkQueue _sleepersQueue; - EventList _unsignaledEvents; - int64_t _totalEventWaiters; + WorkQueue _freeQueue{}; + WorkQueue _readyQueue{}; + WorkQueue _dbWorkInProgressQueue{}; + WorkQueue _exclusiveLockInProgressQueue{}; + WorkQueue _networkInProgressQueue{}; + WorkQueue _sleepersQueue{}; + EventList _unsignaledEvents{}; + int64_t _totalEventWaiters = 0; + + // Counters for metrics, for the whole life of this instance, protected by _mutex. + int64_t _counterWaitEvents = 0; + int64_t _counterCreatedEvents = 0; + int64_t _counterScheduledCommands = 0; + int64_t _counterScheduledExclusiveWorks = 0; + int64_t _counterScheduledDBWorks = 0; + int64_t _counterScheduledWorks = 0; + int64_t _counterScheduledWorkAts = 0; + int64_t _counterSchedulingFailures = 0; + int64_t _counterCancels = 0; + int64_t _counterWaits = 0; + bool _inShutdown; OldThreadPool _dblockWorkers; TaskRunner _dblockTaskRunner; TaskRunner _dblockExclusiveLockTaskRunner; - uint64_t _nextId; + uint64_t _nextId = 0; }; class ReplicationExecutor::Callback : public executor::TaskExecutor::CallbackState { diff --git a/src/mongo/db/repl/replication_executor_test.cpp b/src/mongo/db/repl/replication_executor_test.cpp index cd8dab36018..998a80a549e 100644 --- a/src/mongo/db/repl/replication_executor_test.cpp +++ b/src/mongo/db/repl/replication_executor_test.cpp @@ -163,12 +163,12 @@ TEST_F(ReplicationExecutorTest, CancelBeforeRunningFutureWork) { }); ASSERT_OK(cbhWithStatus.getStatus()); - ASSERT_EQUALS(1, executor.getDiagnosticBSON()["sleeperQueue"].Int()); - ASSERT_EQUALS(0, executor.getDiagnosticBSON()["ready"].Int()); + ASSERT_EQUALS(1, executor.getDiagnosticBSON().getFieldDotted("queues.sleepers").Int()); + ASSERT_EQUALS(0, executor.getDiagnosticBSON().getFieldDotted("queues.ready").Int()); executor.cancel(cbhWithStatus.getValue()); - ASSERT_EQUALS(0, executor.getDiagnosticBSON()["sleeperQueue"].Int()); - ASSERT_EQUALS(1, executor.getDiagnosticBSON()["ready"].Int()); + ASSERT_EQUALS(0, executor.getDiagnosticBSON().getFieldDotted("queues.sleepers").Int()); + ASSERT_EQUALS(1, executor.getDiagnosticBSON().getFieldDotted("queues.ready").Int()); } } // namespace diff --git a/src/mongo/db/repl/replset_commands.cpp b/src/mongo/db/repl/replset_commands.cpp index 002353c690d..ea9fba690ad 100644 --- a/src/mongo/db/repl/replset_commands.cpp +++ b/src/mongo/db/repl/replset_commands.cpp @@ -40,19 +40,21 @@ #include "mongo/db/auth/authorization_manager.h" #include "mongo/db/auth/authorization_session.h" #include "mongo/db/commands.h" +#include "mongo/db/commands/server_status_metric.h" #include "mongo/db/dbhelpers.h" #include "mongo/db/lasterror.h" -#include "mongo/db/service_context.h" #include "mongo/db/op_observer.h" #include "mongo/db/repl/initial_sync.h" #include "mongo/db/repl/oplog.h" -#include "mongo/db/repl/repl_set_heartbeat_args.h" #include "mongo/db/repl/repl_set_heartbeat_args_v1.h" +#include "mongo/db/repl/repl_set_heartbeat_args.h" #include "mongo/db/repl/repl_set_heartbeat_response.h" -#include "mongo/db/repl/replication_coordinator_global.h" #include "mongo/db/repl/replication_coordinator_external_state_impl.h" +#include "mongo/db/repl/replication_coordinator_global.h" +#include "mongo/db/repl/replication_coordinator_global.h" #include "mongo/db/repl/replication_executor.h" #include "mongo/db/repl/update_position_args.h" +#include "mongo/db/service_context.h" #include "mongo/db/storage/storage_engine.h" #include "mongo/executor/network_interface.h" #include "mongo/util/fail_point_service.h" @@ -65,6 +67,16 @@ namespace repl { using std::string; using std::stringstream; + +class ReplExecutorSSM : public ServerStatusMetric { +public: + ReplExecutorSSM() : ServerStatusMetric("repl.executor") {} + virtual void appendAtLeaf(BSONObjBuilder& b) const { + ReplicationExecutor* exec = getGlobalReplicationCoordinator()->getExecutor(); + b.append("executor", exec->getDiagnosticBSON()); + } +} replExecutorSSM; + // Testing only, enabled via command-line. class CmdReplSetTest : public ReplSetCommand { public: |