summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Shuvalov <andrew.shuvalov@mongodb.com>2021-05-28 23:20:40 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-05-28 23:41:03 +0000
commite4faa4d08720df067a62d8e48f66944a2f2b76cc (patch)
tree7743242ca6a058ebbe7c0c98a87abef0973669ee
parent8ddcaf878b5600fdac322929cd337c6f4563bf19 (diff)
downloadmongo-e4faa4d08720df067a62d8e48f66944a2f2b76cc.tar.gz
SERVER-56361: Better FTDC diagnostics of RSM
-rw-r--r--jstests/noPassthrough/ftdc_connection_pool.js6
-rw-r--r--src/mongo/client/SConscript3
-rw-r--r--src/mongo/client/replica_set_monitor.cpp48
-rw-r--r--src/mongo/client/replica_set_monitor.h19
-rw-r--r--src/mongo/client/replica_set_monitor_manager.cpp25
-rw-r--r--src/mongo/client/replica_set_monitor_manager.h4
-rw-r--r--src/mongo/client/replica_set_monitor_stats.cpp146
-rw-r--r--src/mongo/client/replica_set_monitor_stats.h127
-rw-r--r--src/mongo/client/replica_set_monitor_test.cpp86
-rw-r--r--src/mongo/client/replica_set_monitor_transport.cpp24
-rw-r--r--src/mongo/client/replica_set_monitor_transport.h10
11 files changed, 429 insertions, 69 deletions
diff --git a/jstests/noPassthrough/ftdc_connection_pool.js b/jstests/noPassthrough/ftdc_connection_pool.js
index f1fb7336aa0..351713c5a8c 100644
--- a/jstests/noPassthrough/ftdc_connection_pool.js
+++ b/jstests/noPassthrough/ftdc_connection_pool.js
@@ -24,6 +24,12 @@ load('jstests/libs/ftdc.js');
assert(stats.hasOwnProperty('totalAvailable'));
assert(stats.hasOwnProperty('totalCreated'));
assert(stats.hasOwnProperty('totalRefreshing'));
+ assert("getHostAndRefresh" in stats["replicaSetMonitor"]);
+ const getHostStats = stats["replicaSetMonitor"]["getHostAndRefresh"];
+ assert(getHostStats.hasOwnProperty('currentlyActive'));
+ assert("hello" in stats["replicaSetMonitor"]);
+ const helloStats = stats["replicaSetMonitor"]["hello"];
+ assert(helloStats.hasOwnProperty('currentlyActive'));
// The connPoolStats command reply has "hosts", but FTDC's stats do not.
assert(!stats.hasOwnProperty('hosts'));
diff --git a/src/mongo/client/SConscript b/src/mongo/client/SConscript
index 0f8b225c8ab..5a930faa1f5 100644
--- a/src/mongo/client/SConscript
+++ b/src/mongo/client/SConscript
@@ -186,7 +186,8 @@ clientDriverEnv.Library(
'global_conn_pool.cpp',
'replica_set_monitor.cpp',
'replica_set_monitor_manager.cpp',
- 'replica_set_monitor_transport.cpp',
+ 'replica_set_monitor_stats.cpp',
+ 'replica_set_monitor_transport.cpp',
],
LIBDEPS=[
'$BUILD_DIR/mongo/db/write_concern_options',
diff --git a/src/mongo/client/replica_set_monitor.cpp b/src/mongo/client/replica_set_monitor.cpp
index 7d01c32b2c5..a274aaa3e4f 100644
--- a/src/mongo/client/replica_set_monitor.cpp
+++ b/src/mongo/client/replica_set_monitor.cpp
@@ -99,7 +99,7 @@ const int64_t unknownLatency = numeric_limits<int64_t>::max();
// After this period of time, RSM logging becomes more verbose.
static constexpr int kRsmVerbosityThresholdTimeoutSec = 20;
// When 'is master' reply latency is over 2 sec, it will be logged.
-static constexpr int kSlowIsMasterThresholdMicros = 2L * 1000 * 1000;
+static constexpr Milliseconds kSlowIsMasterThreshold{500};
static Seconds kHelloTimeout = Seconds{10};
@@ -132,6 +132,11 @@ bool hostsEqual(const Node& lhs, const HostAndPort& rhs) {
return lhs.host == rhs;
}
+ReplicaSetMonitorManagerStats* testOnlyManagerStats() {
+ static ReplicaSetMonitorManagerStats* stats = new ReplicaSetMonitorManagerStats();
+ return stats;
+}
+
// Allows comparing two Nodes, or a HostAndPort and a Node.
// NOTE: the two HostAndPort overload is only needed to support extra checks in some STL
// implementations. For simplicity, no comparator should be used with collections of just
@@ -225,21 +230,30 @@ Milliseconds ReplicaSetMonitor::getHelloTimeout() {
ReplicaSetMonitor::ReplicaSetMonitor(StringData name,
const std::set<HostAndPort>& seeds,
- ReplicaSetMonitorTransportPtr transport)
+ ReplicaSetMonitorTransportPtr transport,
+ ReplicaSetMonitorManagerStats* managerStats)
: _state(std::make_shared<SetState>(name, seeds)),
_executor(globalRSMonitorManager.getExecutor()),
- _rsmTransport(std::move(transport)) {}
+ _rsmTransport(std::move(transport)),
+ _stats(managerStats) {}
ReplicaSetMonitor::ReplicaSetMonitor(const MongoURI& uri,
- std::unique_ptr<ReplicaSetMonitorTransport> transport)
+ std::unique_ptr<ReplicaSetMonitorTransport> transport,
+ ReplicaSetMonitorManagerStats* managerStats)
: _state(std::make_shared<SetState>(uri)),
_executor(globalRSMonitorManager.getExecutor()),
- _rsmTransport(std::move(transport)) {}
+ _rsmTransport(std::move(transport)),
+ _stats(managerStats) {}
void ReplicaSetMonitor::init() {
_scheduleRefresh(_executor->now());
}
+// Test only constructor.
+ReplicaSetMonitor::ReplicaSetMonitor(const SetStatePtr& initialState,
+ ReplicaSetMonitorManagerStats* managerStats)
+ : _state(initialState), _stats(managerStats ? managerStats : testOnlyManagerStats()) {}
+
ReplicaSetMonitor::~ReplicaSetMonitor() {
// need this lock because otherwise can get race with _scheduleRefresh()
stdx::lock_guard<stdx::mutex> lk(_mutex);
@@ -312,7 +326,8 @@ StatusWith<HostAndPort> ReplicaSetMonitor::getHostOrRefresh(const ReadPreference
return {std::move(out)};
}
- const auto startTimeMs = Date_t::now();
+ const Timer startTimer;
+ const auto statsCollector = _stats.collectGetHostAndRefreshStats();
while (true) {
// We might not have found any matching hosts due to the scan, which just completed may have
@@ -329,7 +344,7 @@ StatusWith<HostAndPort> ReplicaSetMonitor::getHostOrRefresh(const ReadPreference
return {ErrorCodes::ShutdownInProgress, str::stream() << "Server is shutting down"};
}
- const Milliseconds remaining = maxWait - (Date_t::now() - startTimeMs);
+ const Milliseconds remaining = maxWait - Milliseconds(startTimer.millis());
if (remaining < kFindHostMaxBackOffTime || areRefreshRetriesDisabledForTest.load()) {
break;
@@ -352,7 +367,7 @@ HostAndPort ReplicaSetMonitor::getMasterOrUassert() {
Refresher ReplicaSetMonitor::startOrContinueRefresh() {
stdx::lock_guard<stdx::mutex> lk(_state->mutex);
- Refresher out(_state, _executor, _rsmTransport.get());
+ Refresher out(_state, _executor, _rsmTransport.get(), &_stats);
DEV _state->checkInvariants();
return out;
}
@@ -521,8 +536,13 @@ void ReplicaSetMonitor::markAsRemoved() {
Refresher::Refresher(const SetStatePtr& setState,
executor::TaskExecutor* executor,
- ReplicaSetMonitorTransport* transport)
- : _set(setState), _scan(setState->currentScan), _executor(executor), _rsmTransport(transport) {
+ ReplicaSetMonitorTransport* transport,
+ ReplicaSetMonitorStats* stats)
+ : _set(setState),
+ _scan(setState->currentScan),
+ _executor(executor),
+ _rsmTransport(transport),
+ _stats(stats) {
if (_scan)
return; // participate in in-progress scan
@@ -906,7 +926,7 @@ HostAndPort Refresher::_refreshUntilMatches(const ReadPreferenceSetting* criteri
{
Timer timer;
auto helloFuture = _rsmTransport->sayHello(
- ns.host, _set->name, _set->setUri, getHelloTimeout());
+ ns.host, _set->name, _set->setUri, getHelloTimeout(), _stats);
helloReplyStatus = helloFuture.getNoThrow();
pingMicros = timer.micros();
}
@@ -1065,9 +1085,9 @@ void Node::update(const IsMasterReply& reply, bool verbose) {
// update latency with smoothed moving average (1/4th the delta)
latencyMicros += (reply.latencyMicros - latencyMicros) / 4;
}
- if (reply.latencyMicros > kSlowIsMasterThresholdMicros) { // > 2 seconds.
- log() << "ReplicaSet Monitor received reply with latency above 2 seconds: "
- << reply.raw;
+ if (Milliseconds(reply.latencyMicros / 1000) > kSlowIsMasterThreshold) {
+ log() << "Slow ReplicaSet Monitor reply for " << reply.setName << " from " << host
+ << ": " << reply.raw << " (" << (reply.latencyMicros / 1000) << " ms)";
}
}
diff --git a/src/mongo/client/replica_set_monitor.h b/src/mongo/client/replica_set_monitor.h
index 115696c04ca..2ca2fd89df1 100644
--- a/src/mongo/client/replica_set_monitor.h
+++ b/src/mongo/client/replica_set_monitor.h
@@ -32,13 +32,13 @@
#include <atomic>
#include <memory>
-#include <memory>
#include <set>
#include <string>
#include "mongo/base/disallow_copying.h"
#include "mongo/base/string_data.h"
#include "mongo/client/mongo_uri.h"
+#include "mongo/client/replica_set_monitor_stats.h"
#include "mongo/client/replica_set_monitor_transport.h"
#include "mongo/executor/task_executor.h"
#include "mongo/platform/atomic_word.h"
@@ -74,9 +74,12 @@ public:
*/
ReplicaSetMonitor(StringData name,
const std::set<HostAndPort>& seeds,
- ReplicaSetMonitorTransportPtr transport);
+ ReplicaSetMonitorTransportPtr transport,
+ ReplicaSetMonitorManagerStats* managerStats);
- ReplicaSetMonitor(const MongoURI& uri, ReplicaSetMonitorTransportPtr transport);
+ ReplicaSetMonitor(const MongoURI& uri,
+ ReplicaSetMonitorTransportPtr transport,
+ ReplicaSetMonitorManagerStats* managerStats);
/**
* Schedules the initial refresh task into task executor.
@@ -275,8 +278,10 @@ public:
/**
* Allows tests to set initial conditions and introspect the current state.
+ * This constructor is used only in tests.
*/
- explicit ReplicaSetMonitor(const SetStatePtr& initialState) : _state(initialState) {}
+ explicit ReplicaSetMonitor(const SetStatePtr& initialState,
+ ReplicaSetMonitorManagerStats* managerStats = nullptr);
~ReplicaSetMonitor();
/**
@@ -313,6 +318,8 @@ private:
executor::TaskExecutor* _executor;
AtomicBool _isRemovedFromManager{false};
ReplicaSetMonitorTransportPtr _rsmTransport;
+
+ ReplicaSetMonitorStats _stats;
};
@@ -357,7 +364,8 @@ public:
*/
explicit Refresher(const SetStatePtr& setState,
executor::TaskExecutor* executor,
- ReplicaSetMonitorTransport* transport);
+ ReplicaSetMonitorTransport* transport,
+ ReplicaSetMonitorStats* stats);
struct NextStep {
enum StepKind {
@@ -436,6 +444,7 @@ private:
executor::TaskExecutor* _executor;
ReplicaSetMonitorTransport* _rsmTransport;
+ ReplicaSetMonitorStats* const _stats;
};
} // namespace mongo
diff --git a/src/mongo/client/replica_set_monitor_manager.cpp b/src/mongo/client/replica_set_monitor_manager.cpp
index 7345fd95fc8..d305f790d55 100644
--- a/src/mongo/client/replica_set_monitor_manager.cpp
+++ b/src/mongo/client/replica_set_monitor_manager.cpp
@@ -159,7 +159,8 @@ shared_ptr<ReplicaSetMonitor> ReplicaSetMonitorManager::getOrCreateMonitor(
if (!transport) {
transport = makeRsmTransport();
}
- auto newMonitor = std::make_shared<ReplicaSetMonitor>(setName, servers, std::move(transport));
+ auto newMonitor =
+ std::make_shared<ReplicaSetMonitor>(setName, servers, std::move(transport), &_stats);
_monitors[setName] = newMonitor;
newMonitor->init();
return newMonitor;
@@ -180,7 +181,7 @@ shared_ptr<ReplicaSetMonitor> ReplicaSetMonitorManager::getOrCreateMonitor(
log() << "Starting new replica set monitor for " << uri.toString();
- auto newMonitor = std::make_shared<ReplicaSetMonitor>(uri, std::move(transport));
+ auto newMonitor = std::make_shared<ReplicaSetMonitor>(uri, std::move(transport), &_stats);
_monitors[setName] = newMonitor;
newMonitor->init();
return newMonitor;
@@ -253,16 +254,20 @@ void ReplicaSetMonitorManager::report(BSONObjBuilder* builder, bool forFTDC) {
// calling ShardRegistry::updateConfigServerConnectionString.
auto setNames = getAllSetNames();
- BSONObjBuilder setStats(
- builder->subobjStart(forFTDC ? "replicaSetPingTimesMillis" : "replicaSets"));
-
- for (const auto& setName : setNames) {
- auto monitor = getMonitor(setName);
- if (!monitor) {
- continue;
+ {
+ BSONObjBuilder setStats(
+ builder->subobjStart(forFTDC ? "replicaSetPingTimesMillis" : "replicaSets"));
+
+ for (const auto& setName : setNames) {
+ auto monitor = getMonitor(setName);
+ if (!monitor) {
+ continue;
+ }
+ monitor->appendInfo(setStats, forFTDC);
}
- monitor->appendInfo(setStats, forFTDC);
}
+
+ _stats.report(builder, forFTDC);
}
TaskExecutor* ReplicaSetMonitorManager::getExecutor() {
diff --git a/src/mongo/client/replica_set_monitor_manager.h b/src/mongo/client/replica_set_monitor_manager.h
index 89bee98cfc8..181ae80481c 100644
--- a/src/mongo/client/replica_set_monitor_manager.h
+++ b/src/mongo/client/replica_set_monitor_manager.h
@@ -34,6 +34,7 @@
#include <vector>
#include "mongo/base/disallow_copying.h"
+#include "mongo/client/replica_set_monitor_stats.h"
#include "mongo/client/replica_set_monitor_transport.h"
#include "mongo/executor/task_executor.h"
#include "mongo/stdx/mutex.h"
@@ -120,6 +121,9 @@ private:
// set to true when shutdown has been called.
bool _isShutdown{false};
+
+ // Internally synchronized.
+ ReplicaSetMonitorManagerStats _stats;
};
} // namespace mongo
diff --git a/src/mongo/client/replica_set_monitor_stats.cpp b/src/mongo/client/replica_set_monitor_stats.cpp
new file mode 100644
index 00000000000..1eb1e183de5
--- /dev/null
+++ b/src/mongo/client/replica_set_monitor_stats.cpp
@@ -0,0 +1,146 @@
+/**
+ * Copyright (C) 2021-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/client/replica_set_monitor_stats.h"
+#include "mongo/bson/bsonobjbuilder.h"
+
+namespace mongo {
+
+constexpr Microseconds ReplicaSetMonitorManagerStats::kMaxLatencyDefaultResetTimeout;
+
+static constexpr char kRSMPrefix[] = "replicaSetMonitor";
+static constexpr char kGetHostPrefix[] = "getHostAndRefresh";
+static constexpr char kHelloPrefix[] = "hello";
+
+ReplicaSetMonitorManagerStats::ReplicaSetMonitorManagerStats(Microseconds resetTimeout)
+ : _resetTimeout(resetTimeout) {}
+
+void ReplicaSetMonitorManagerStats::report(BSONObjBuilder* builder, bool forFTDC) {
+ if (!forFTDC) {
+ return;
+ }
+ BSONObjBuilder rsmStats(builder->subobjStart(kRSMPrefix));
+ {
+ BSONObjBuilder getHostStats(rsmStats.subobjStart(kGetHostPrefix));
+ getHostStats.appendNumber("totalCalls", _getHostAndRefreshTotal);
+ getHostStats.appendNumber("currentlyActive", _getHostAndRefreshCurrent);
+ getHostStats.appendNumber("totalLatencyMicros", _getHostAndRefreshAggregateLatency);
+
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ getHostStats.appendNumber("maxLatencyMicros",
+ durationCount<Microseconds>(_getHostAndRefreshMaxLatency));
+ }
+ {
+ BSONObjBuilder helloStats(rsmStats.subobjStart(kHelloPrefix));
+ helloStats.appendNumber("totalCalls", _helloTotal);
+ helloStats.appendNumber("currentlyActive", _helloCurrent);
+ helloStats.appendNumber("totalLatencyMicros", _helloAggregateLatency);
+
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ helloStats.appendNumber("maxLatencyMicros", durationCount<Microseconds>(_helloMaxLatency));
+ }
+}
+
+void ReplicaSetMonitorManagerStats::enterGetHostAndRefresh() {
+ _getHostAndRefreshTotal.increment(1);
+ _getHostAndRefreshCurrent.increment(1);
+}
+
+static auto kProcessLatency = [](Microseconds& currentMaxLatency,
+ Microseconds& newLatency,
+ Timer& resetTimer,
+ Microseconds resetTimeout) {
+ bool replace = false;
+ if (newLatency > currentMaxLatency) {
+ replace = true;
+ }
+ if (Microseconds(resetTimer.micros()) > resetTimeout) {
+ replace = true;
+ resetTimer.reset();
+ }
+ if (replace) {
+ currentMaxLatency = newLatency;
+ }
+};
+
+void ReplicaSetMonitorManagerStats::leaveGetHostAndRefresh(Microseconds latency) {
+ _getHostAndRefreshCurrent.decrement(1);
+ _getHostAndRefreshAggregateLatency.increment(latency.count());
+
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ kProcessLatency(_getHostAndRefreshMaxLatency,
+ latency,
+ _lastTimeGetHostAndRefreshMaxLatencyUpdated,
+ _resetTimeout);
+}
+
+void ReplicaSetMonitorManagerStats::enterHello() {
+ _helloTotal.increment(1);
+ _helloCurrent.increment(1);
+}
+
+void ReplicaSetMonitorManagerStats::leaveHello(Microseconds latency) {
+ _helloCurrent.decrement(1);
+ _helloAggregateLatency.increment(latency.count());
+
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ kProcessLatency(_helloMaxLatency, latency, _lastTimeHelloMaxLatencyUpdated, _resetTimeout);
+}
+
+
+ReplicaSetMonitorStats::ReplicaSetMonitorStats(ReplicaSetMonitorManagerStats* managerStats)
+ : _managerStats(managerStats) {}
+
+void ReplicaSetMonitorStats::_enterGetHostAndRefresh() {
+ _getHostAndRefreshTotal.increment(1);
+ _getHostAndRefreshCurrent.increment(1);
+ _managerStats->enterGetHostAndRefresh();
+}
+
+void ReplicaSetMonitorStats::_leaveGetHostAndRefresh(const Timer& suppliedTimer) {
+ const Microseconds latency = Microseconds(suppliedTimer.micros());
+ _getHostAndRefreshCurrent.decrement(1);
+ _getHostAndRefreshAggregateLatency.increment(latency.count());
+ _managerStats->leaveGetHostAndRefresh(latency);
+}
+
+void ReplicaSetMonitorStats::_enterHello() {
+ _helloTotal.increment(1);
+ _helloCurrent.increment(1);
+ _managerStats->enterHello();
+}
+
+void ReplicaSetMonitorStats::_leaveHello(const Timer& suppliedTimer) {
+ const Microseconds latency = Microseconds(suppliedTimer.micros());
+ _helloCurrent.decrement(1);
+ _helloLatency.increment(latency.count());
+ _managerStats->leaveHello(latency);
+}
+
+} // namespace mongo
diff --git a/src/mongo/client/replica_set_monitor_stats.h b/src/mongo/client/replica_set_monitor_stats.h
new file mode 100644
index 00000000000..caaf49e6e28
--- /dev/null
+++ b/src/mongo/client/replica_set_monitor_stats.h
@@ -0,0 +1,127 @@
+/**
+ * Copyright (C) 2021-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <functional>
+#include <memory>
+
+#include "mongo/base/counter.h"
+#include "mongo/util/duration.h"
+#include "mongo/util/scopeguard.h"
+#include "mongo/util/timer.h"
+
+namespace mongo {
+
+// Common and aggregate stats for all RSMs.
+class ReplicaSetMonitorManagerStats {
+public:
+ static constexpr Microseconds kMaxLatencyDefaultResetTimeout{1 * 1000 * 1000};
+
+ explicit ReplicaSetMonitorManagerStats(
+ Microseconds resetTimeout = kMaxLatencyDefaultResetTimeout);
+
+ /**
+ * Reports information about the replica sets tracked by us, for diagnostic purposes.
+ */
+ void report(BSONObjBuilder* builder, bool forFTDC);
+
+ void enterGetHostAndRefresh();
+ void leaveGetHostAndRefresh(Microseconds latency);
+
+ void enterHello();
+ void leaveHello(Microseconds latency);
+
+private:
+ const Microseconds _resetTimeout;
+
+ mutable stdx::mutex _mutex; // NOLINT.
+
+ // Stats for the outer loop of getHostAndRefresh().
+
+ Counter64 _getHostAndRefreshTotal;
+ Counter64 _getHostAndRefreshCurrent;
+ Counter64 _getHostAndRefreshAggregateLatency;
+
+ Timer _lastTimeGetHostAndRefreshMaxLatencyUpdated;
+ // Keeps track of largest individual RSM latency registered during last time period.
+ Microseconds _getHostAndRefreshMaxLatency;
+
+ Counter64 _helloTotal;
+ Counter64 _helloCurrent;
+ Counter64 _helloAggregateLatency;
+
+ Timer _lastTimeHelloMaxLatencyUpdated;
+ Microseconds _helloMaxLatency;
+};
+
+// Stats for an RSM.
+class ReplicaSetMonitorStats {
+public:
+ explicit ReplicaSetMonitorStats(ReplicaSetMonitorManagerStats* managerStats);
+
+ /**
+ * Returns a scope guard class instance to collect statistics.
+ * Invokes 'completionCallback' on leaving the scope.
+ * Callbacks arg: calculated latency.
+ */
+ auto collectGetHostAndRefreshStats() {
+ _enterGetHostAndRefresh();
+ return MakeGuard(
+ [ this, timer = std::make_shared<Timer>() ] { _leaveGetHostAndRefresh(*timer); });
+ }
+
+ auto collectHelloStats() {
+ _enterHello();
+ return MakeGuard([ this, timer = std::make_shared<Timer>() ] { _leaveHello(*timer); });
+ }
+
+private:
+ void _enterGetHostAndRefresh();
+ void _leaveGetHostAndRefresh(const Timer& suppliedTimer);
+
+ void _enterHello();
+ void _leaveHello(const Timer& suppliedTimer);
+
+ // Not owned. Instance of ReplicaSetMonitorManagerStats is owned by
+ // global ReplicaSetMonitorManager, which is never destroyed.
+ ReplicaSetMonitorManagerStats* _managerStats;
+
+ // Stats for the outer loop of getHostAndRefresh().
+
+ Counter64 _getHostAndRefreshTotal;
+ Counter64 _getHostAndRefreshCurrent;
+ Counter64 _getHostAndRefreshAggregateLatency;
+
+ Counter64 _helloTotal;
+ Counter64 _helloCurrent;
+ Counter64 _helloLatency;
+};
+
+} // namespace mongo
diff --git a/src/mongo/client/replica_set_monitor_test.cpp b/src/mongo/client/replica_set_monitor_test.cpp
index 7540854d9ce..471d1ae27a7 100644
--- a/src/mongo/client/replica_set_monitor_test.cpp
+++ b/src/mongo/client/replica_set_monitor_test.cpp
@@ -59,6 +59,11 @@ std::vector<HostAndPort> basicSeedsBuilder() {
const std::vector<HostAndPort> basicSeeds = basicSeedsBuilder();
const std::set<HostAndPort> basicSeedsSet(basicSeeds.begin(), basicSeeds.end());
+struct StatsForTest {
+ ReplicaSetMonitorManagerStats managerStats;
+ ReplicaSetMonitorStats stats{&managerStats};
+};
+
// NOTE: Unless stated otherwise, all tests assume exclusive access to state belongs to the
// current (only) thread, so they do not lock SetState::mutex before examining state. This is
// NOT something that non-test code should do.
@@ -347,7 +352,8 @@ TEST(ReplicaSetMonitor, IsMasterSecondaryWithTags) {
TEST(ReplicaSetMonitor, CheckAllSeedsSerial) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
set<HostAndPort> seen;
@@ -395,7 +401,8 @@ TEST(ReplicaSetMonitor, CheckAllSeedsSerial) {
TEST(ReplicaSetMonitor, CheckAllSeedsParallel) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
set<HostAndPort> seen;
@@ -453,7 +460,8 @@ TEST(ReplicaSetMonitor, CheckAllSeedsParallel) {
TEST(ReplicaSetMonitor, NoMasterInitAllUp) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
set<HostAndPort> seen;
@@ -500,7 +508,8 @@ TEST(ReplicaSetMonitor, NoMasterInitAllUp) {
TEST(ReplicaSetMonitor, MasterNotInSeeds_NoPrimaryInIsMaster) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
set<HostAndPort> seen;
@@ -577,7 +586,8 @@ TEST(ReplicaSetMonitor, MasterNotInSeeds_NoPrimaryInIsMaster) {
TEST(ReplicaSetMonitor, MasterNotInSeeds_PrimaryInIsMaster) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
set<HostAndPort> seen;
@@ -643,7 +653,8 @@ TEST(ReplicaSetMonitor, SlavesUsableEvenIfNoMaster) {
std::set<HostAndPort> seeds;
seeds.insert(HostAndPort("a"));
SetStatePtr state = std::make_shared<SetState>("name", seeds);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
const ReadPreferenceSetting secondary(ReadPreference::SecondaryOnly, TagSet());
@@ -685,7 +696,8 @@ TEST(ReplicaSetMonitor, SlavesUsableEvenIfNoMaster) {
// Test multiple nodes that claim to be master (we use a last-wins policy)
TEST(ReplicaSetMonitor, MultipleMasterLastNodeWins) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
set<HostAndPort> seen;
@@ -748,7 +760,8 @@ TEST(ReplicaSetMonitor, MultipleMasterLastNodeWins) {
// Test nodes disagree about who is in the set, master is source of truth
TEST(ReplicaSetMonitor, MasterIsSourceOfTruth) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
BSONArray primaryHosts = BSON_ARRAY("a"
<< "b"
@@ -789,7 +802,8 @@ TEST(ReplicaSetMonitor, MasterIsSourceOfTruth) {
// Test multiple master nodes that disagree about set membership
TEST(ReplicaSetMonitor, MultipleMastersDisagree) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
BSONArray hostsForSeed[3];
hostsForSeed[0] = BSON_ARRAY("a"
@@ -887,7 +901,8 @@ TEST(ReplicaSetMonitor, MultipleMastersDisagree) {
// Ensure getMatchingHost returns hosts even if scan is ongoing
TEST(ReplicaSetMonitor, GetMatchingDuringScan) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
const ReadPreferenceSetting primaryOnly(ReadPreference::PrimaryOnly, TagSet());
const ReadPreferenceSetting secondaryOnly(ReadPreference::SecondaryOnly, TagSet());
@@ -990,7 +1005,8 @@ TEST(ReplicaSetMonitor, OutOfBandFailedHost) {
// Newly elected primary with electionId >= maximum electionId seen by the Refresher
TEST(ReplicaSetMonitorTests, NewPrimaryWithMaxElectionId) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
set<HostAndPort> seen;
@@ -1056,7 +1072,8 @@ TEST(ReplicaSetMonitorTests, NewPrimaryWithMaxElectionId) {
// Ignore electionId of secondaries
TEST(ReplicaSetMonitorTests, IgnoreElectionIdFromSecondaries) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
set<HostAndPort> seen;
@@ -1103,7 +1120,8 @@ TEST(ReplicaSetMonitorTests, IgnoreElectionIdFromSecondaries) {
// Stale Primary with obsolete electionId
TEST(ReplicaSetMonitorTests, StalePrimaryWithObsoleteElectionId) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
const OID firstElectionId = OID::gen();
const OID secondElectionId = OID::gen();
@@ -1233,7 +1251,8 @@ TEST(ReplicaSetMonitor, PrimaryIsUpCheck) {
*/
TEST(ReplicaSetMonitorTests, TwoPrimaries2ndHasNewerConfigVersion) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
auto ns = refresher.getNextStep();
ASSERT_EQUALS(ns.step, NextStep::CONTACT_HOST);
@@ -1295,7 +1314,8 @@ TEST(ReplicaSetMonitorTests, TwoPrimaries2ndHasNewerConfigVersion) {
*/
TEST(ReplicaSetMonitorTests, TwoPrimaries2ndHasOlderConfigVersion) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
auto ns = refresher.getNextStep();
ASSERT_EQUALS(ns.step, NextStep::CONTACT_HOST);
@@ -1355,7 +1375,8 @@ TEST(ReplicaSetMonitorTests, TwoPrimaries2ndHasOlderConfigVersion) {
*/
TEST(ReplicaSetMonitor, MaxStalenessMSMatch) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
repl::OpTime opTime{Timestamp{10, 10}, 10};
const ReadPreferenceSetting secondary(ReadPreference::SecondaryOnly, TagSet(), Seconds(100));
@@ -1408,7 +1429,8 @@ TEST(ReplicaSetMonitor, MaxStalenessMSMatch) {
*/
TEST(ReplicaSetMonitor, MaxStalenessMSNoMatch) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
repl::OpTime opTime{Timestamp{10, 10}, 10};
const ReadPreferenceSetting secondary(ReadPreference::SecondaryOnly, TagSet(), Seconds(200));
@@ -1460,7 +1482,8 @@ TEST(ReplicaSetMonitor, MaxStalenessMSNoMatch) {
*/
TEST(ReplicaSetMonitor, MaxStalenessMSNoPrimaryMatch) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
repl::OpTime opTime{Timestamp{10, 10}, 10};
const ReadPreferenceSetting secondary(ReadPreference::SecondaryOnly, TagSet(), Seconds(200));
@@ -1515,7 +1538,8 @@ TEST(ReplicaSetMonitor, MaxStalenessMSNoPrimaryMatch) {
*/
TEST(ReplicaSetMonitor, MaxStalenessMSAllFailed) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
repl::OpTime opTime{Timestamp{10, 10}, 10};
const ReadPreferenceSetting secondary(ReadPreference::SecondaryOnly, TagSet(), Seconds(200));
@@ -1569,7 +1593,8 @@ TEST(ReplicaSetMonitor, MaxStalenessMSAllFailed) {
*/
TEST(ReplicaSetMonitor, MaxStalenessMSAllButPrimaryFailed) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
repl::OpTime opTime{Timestamp{10, 10}, 10};
const ReadPreferenceSetting secondary(ReadPreference::SecondaryOnly, TagSet(), Seconds(200));
@@ -1622,7 +1647,8 @@ TEST(ReplicaSetMonitor, MaxStalenessMSAllButPrimaryFailed) {
*/
TEST(ReplicaSetMonitor, MaxStalenessMSOneSecondaryFailed) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
repl::OpTime opTime{Timestamp{10, 10}, 10};
const ReadPreferenceSetting secondary(ReadPreference::SecondaryOnly, TagSet(), Seconds(200));
@@ -1674,7 +1700,8 @@ TEST(ReplicaSetMonitor, MaxStalenessMSOneSecondaryFailed) {
*/
TEST(ReplicaSetMonitor, MaxStalenessMSNonStaleSecondaryMatched) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
repl::OpTime opTime{Timestamp{10, 10}, 10};
const ReadPreferenceSetting secondary(ReadPreference::SecondaryOnly, TagSet(), Seconds(200));
@@ -1727,7 +1754,8 @@ TEST(ReplicaSetMonitor, MaxStalenessMSNonStaleSecondaryMatched) {
*/
TEST(ReplicaSetMonitor, MaxStalenessMSNoLastWrite) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
const ReadPreferenceSetting secondary(ReadPreference::SecondaryOnly, TagSet(), Seconds(200));
BSONArray hosts = BSON_ARRAY("a"
@@ -1769,7 +1797,8 @@ TEST(ReplicaSetMonitor, MaxStalenessMSNoLastWrite) {
*/
TEST(ReplicaSetMonitor, MaxStalenessMSZeroNoLastWrite) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
const ReadPreferenceSetting secondary(ReadPreference::SecondaryOnly, TagSet(), Seconds(0));
BSONArray hosts = BSON_ARRAY("a"
@@ -1811,7 +1840,8 @@ TEST(ReplicaSetMonitor, MaxStalenessMSZeroNoLastWrite) {
*/
TEST(ReplicaSetMonitor, MinOpTimeMatched) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
repl::OpTime minOpTimeSetting{Timestamp{10, 10}, 10};
repl::OpTime opTimeNonStale{Timestamp{10, 10}, 11};
@@ -1856,7 +1886,8 @@ TEST(ReplicaSetMonitor, MinOpTimeMatched) {
*/
TEST(ReplicaSetMonitor, MinOpTimeNotMatched) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
repl::OpTime minOpTimeSetting{Timestamp{10, 10}, 10};
repl::OpTime opTimeNonStale{Timestamp{10, 10}, 11};
@@ -1901,7 +1932,8 @@ TEST(ReplicaSetMonitor, MinOpTimeNotMatched) {
*/
TEST(ReplicaSetMonitor, MinOpTimeIgnored) {
SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet);
- Refresher refresher(state, nullptr, nullptr);
+ StatsForTest stats;
+ Refresher refresher(state, nullptr, nullptr, &stats.stats);
repl::OpTime minOpTimeSetting{Timestamp{10, 10}, 10};
repl::OpTime opTimeStale{Timestamp{10, 10}, 9};
diff --git a/src/mongo/client/replica_set_monitor_transport.cpp b/src/mongo/client/replica_set_monitor_transport.cpp
index 58cc7fa5671..7b3a6e74ef1 100644
--- a/src/mongo/client/replica_set_monitor_transport.cpp
+++ b/src/mongo/client/replica_set_monitor_transport.cpp
@@ -44,10 +44,12 @@ const int kLogLevel = 2;
ReplicaSetMonitorTransport::~ReplicaSetMonitorTransport() {}
-Future<BSONObj> ReplicaSetMonitorDbClientTransport::sayHello(HostAndPort host,
- const std::string& setName,
- const MongoURI& setUri,
- Milliseconds timeout) noexcept {
+Future<BSONObj> ReplicaSetMonitorDbClientTransport::sayHello(
+ HostAndPort host,
+ const std::string& setName,
+ const MongoURI& setUri,
+ Milliseconds timeout,
+ ReplicaSetMonitorStats* stats) noexcept {
MongoURI targetURI;
const auto& hostStr = host.toString();
Timer timer;
@@ -62,6 +64,7 @@ Future<BSONObj> ReplicaSetMonitorDbClientTransport::sayHello(HostAndPort host,
LOG(kLogLevel) << "ReplicaSetMonitor " << setName << " sending hello request to "
<< hostStr;
+ const auto statsCollector = stats->collectHelloStats();
ScopedDbConnection conn(targetURI, durationCount<Seconds>(timeout));
bool ignoredOutParam = false;
BSONObj reply;
@@ -84,10 +87,12 @@ ReplicaSetMonitorExecutorTransport::ReplicaSetMonitorExecutorTransport(
executor::TaskExecutor* executor)
: _executor(executor) {}
-Future<BSONObj> ReplicaSetMonitorExecutorTransport::sayHello(HostAndPort host,
- const std::string& setName,
- const MongoURI& setUri,
- Milliseconds timeout) noexcept {
+Future<BSONObj> ReplicaSetMonitorExecutorTransport::sayHello(
+ HostAndPort host,
+ const std::string& setName,
+ const MongoURI& setUri,
+ Milliseconds timeout,
+ ReplicaSetMonitorStats* stats) noexcept {
try {
auto pf = makePromiseFuture<BSONObj>();
BSONObjBuilder bob;
@@ -105,7 +110,8 @@ Future<BSONObj> ReplicaSetMonitorExecutorTransport::sayHello(HostAndPort host,
auto swCbHandle = _executor->scheduleRemoteCommand(std::move(request), [
this,
setName,
- requestState = std::make_shared<HelloRequest>(host, std::move(pf.promise))
+ requestState = std::make_shared<HelloRequest>(host, std::move(pf.promise)),
+ statsCollector = stats->collectHelloStats()
](const executor::TaskExecutor::RemoteCommandCallbackArgs& result) mutable {
LOG(kLogLevel) << "Replica set monitor " << setName << " received reply from "
<< requestState->host.toString() << ": "
diff --git a/src/mongo/client/replica_set_monitor_transport.h b/src/mongo/client/replica_set_monitor_transport.h
index cf6d3607dc2..660d5d4041a 100644
--- a/src/mongo/client/replica_set_monitor_transport.h
+++ b/src/mongo/client/replica_set_monitor_transport.h
@@ -31,6 +31,7 @@
#include "mongo/platform/basic.h"
#include "mongo/client/mongo_uri.h"
+#include "mongo/client/replica_set_monitor_stats.h"
#include "mongo/executor/task_executor.h"
#include "mongo/s/is_mongos.h"
#include "mongo/transport/transport_layer.h"
@@ -54,7 +55,8 @@ public:
virtual Future<BSONObj> sayHello(HostAndPort host,
const std::string& setName,
const MongoURI& setUri,
- Milliseconds timeout) noexcept = 0;
+ Milliseconds timeout,
+ ReplicaSetMonitorStats* stats) noexcept = 0;
virtual ~ReplicaSetMonitorTransport();
};
using ReplicaSetMonitorTransportPtr = std::unique_ptr<ReplicaSetMonitorTransport>;
@@ -68,7 +70,8 @@ public:
Future<BSONObj> sayHello(HostAndPort host,
const std::string& setName,
const MongoURI& setUri,
- Milliseconds timeout) noexcept override;
+ Milliseconds timeout,
+ ReplicaSetMonitorStats* stats) noexcept override;
};
/**
@@ -83,7 +86,8 @@ public:
Future<BSONObj> sayHello(HostAndPort host,
const std::string& setName,
const MongoURI& setUri,
- Milliseconds timeout) noexcept override;
+ Milliseconds timeout,
+ ReplicaSetMonitorStats* stats) noexcept override;
private:
void _haltIfIncompatibleServer(Status status);