summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorShaileja Jain <shaileja.jain@gmail.com>2019-06-13 11:22:10 -0400
committerShaileja Jain <shaileja.jain@gmail.com>2019-06-26 15:52:11 -0400
commitdac50f778efc784c316b79d1d936865f8cca59fe (patch)
tree860455c08f51d8d89f3666d1e11401fdde6bb36d /src
parent7a02a2f84da35e671736427fea01a7b08a3b8f3f (diff)
downloadmongo-dac50f778efc784c316b79d1d936865f8cca59fe.tar.gz
SERVER-40490 RSM should check replicas at most twice per second
Diffstat (limited to 'src')
-rw-r--r--src/mongo/client/replica_set_monitor.cpp29
-rw-r--r--src/mongo/client/replica_set_monitor_internal.h4
-rw-r--r--src/mongo/client/replica_set_monitor_test_concurrent.cpp62
3 files changed, 92 insertions, 3 deletions
diff --git a/src/mongo/client/replica_set_monitor.cpp b/src/mongo/client/replica_set_monitor.cpp
index d134f935070..58640f16596 100644
--- a/src/mongo/client/replica_set_monitor.cpp
+++ b/src/mongo/client/replica_set_monitor.cpp
@@ -511,7 +511,25 @@ void Refresher::scheduleNetworkRequests(WithLock withLock) {
if (ns.step != Refresher::NextStep::CONTACT_HOST)
break;
- scheduleIsMaster(ns.host, withLock);
+ // cancel any scheduled isMaster calls that haven't yet been called
+ Node* node = _set->findOrCreateNode(ns.host);
+ if (node->scheduledIsMasterHandle) {
+ _set->executor->cancel(node->scheduledIsMasterHandle);
+ }
+
+ // ensure that the call to isMaster is scheduled at most every 500ms
+ if (_set->executor && (_set->executor->now() < node->nextPossibleIsMasterCall) &&
+ !_set->isMocked) {
+ // schedule a new call
+ node->scheduledIsMasterHandle = uassertStatusOK(_set->executor->scheduleWorkAt(
+ node->nextPossibleIsMasterCall,
+ [ *this, host = ns.host ](const CallbackArgs& cbArgs) mutable {
+ stdx::lock_guard lk(_set->mutex);
+ scheduleIsMaster(host, lk);
+ }));
+ } else {
+ scheduleIsMaster(ns.host, withLock);
+ }
}
DEV _set->checkInvariants();
@@ -544,7 +562,6 @@ void Refresher::scheduleIsMaster(const HostAndPort& host, WithLock withLock) {
nullptr,
Milliseconds(int64_t(socketTimeoutSecs * 1000)));
request.sslMode = _set->setUri.getSSLMode();
-
auto status =
_set->executor
->scheduleRemoteCommand(
@@ -671,6 +688,14 @@ void Refresher::receivedIsMaster(const HostAndPort& from,
return;
}
+ // ensure that isMaster calls occur at most 500ms after the previous call ended
+ if (_set->executor) {
+ Node* node = _set->findNode(from);
+ if (node) {
+ node->nextPossibleIsMasterCall = _set->executor->now() + Milliseconds(500);
+ }
+ }
+
if (reply.setName != _set->name) {
if (reply.raw["isreplicaset"].trueValue()) {
// The reply came from a node in the state referred to as RSGhost in the SDAM
diff --git a/src/mongo/client/replica_set_monitor_internal.h b/src/mongo/client/replica_set_monitor_internal.h
index e7a57cc6543..76b67e975e0 100644
--- a/src/mongo/client/replica_set_monitor_internal.h
+++ b/src/mongo/client/replica_set_monitor_internal.h
@@ -134,7 +134,9 @@ public:
Date_t lastWriteDate{}; // from isMasterReply
Date_t lastWriteDateUpdateTime{}; // set to the local system's time at the time of updating
// lastWriteDate
- repl::OpTime opTime{}; // from isMasterReply
+ Date_t nextPossibleIsMasterCall{}; // time that previous isMaster check ended
+ executor::TaskExecutor::CallbackHandle scheduledIsMasterHandle; //
+ repl::OpTime opTime{}; // from isMasterReply
};
using Nodes = std::vector<Node>;
diff --git a/src/mongo/client/replica_set_monitor_test_concurrent.cpp b/src/mongo/client/replica_set_monitor_test_concurrent.cpp
index 106dba50beb..d1506ed1bc7 100644
--- a/src/mongo/client/replica_set_monitor_test_concurrent.cpp
+++ b/src/mongo/client/replica_set_monitor_test_concurrent.cpp
@@ -298,5 +298,67 @@ TEST_F(ReplicaSetMonitorConcurrentTest, StepdownAndElection) {
advanceTime(Milliseconds(100));
}
}
+
+// Check that isMaster is being called at most every 500ms.
+//
+// 1. Create a replica set with two secondaries, Node 0 and Node 1
+// 2. Begin a ReplicaSetMonitor::getHostOrRefresh call with primaryOnly
+// 3. Node 0 responds but Node 1 does not
+// 4. After 0.5s call ReplicaSetMonitor::getHostOrRefresh again
+TEST_F(ReplicaSetMonitorConcurrentTest, IsMasterFrequency) {
+ MockReplicaSet replSet("test", 2, /* hasPrimary = */ false, /* dollarPrefixHosts = */ false);
+ const auto node0 = HostAndPort(replSet.getSecondaries()[0]);
+ const auto node1 = HostAndPort(replSet.getSecondaries()[1]);
+
+ auto state = std::make_shared<ReplicaSetMonitor::SetState>(
+ replSet.getURI(), &getNotifier(), &getExecutor());
+ auto monitor = std::make_shared<ReplicaSetMonitor>(state);
+
+ // Node 1 is unresponsive.
+ replSet.kill(replSet.getSecondaries()[1]);
+ monitor->init();
+
+ std::vector<SemiFuture<HostAndPort>> primaryFutures;
+ primaryFutures.push_back(monitor->getHostOrRefresh(primaryOnly, Seconds(4)));
+
+ // Because Node 1 is unresponsive, the monitor rechecks Node 0 every 500ms
+ // until Node 1 times out after 5 seconds.
+ auto checkUntil = [&](auto timeToStop, auto func) {
+ while (elapsed() < timeToStop) {
+ processReadyRequests(replSet);
+ func();
+ // all primaryFutures are not ready, because the monitor cannot find the primary
+ for (SemiFuture<HostAndPort>& future : primaryFutures) {
+ ASSERT(!future.isReady());
+ }
+ advanceTime(Milliseconds(100));
+ }
+ };
+
+ // Up until 500ms there is only one isMaster call.
+ // At 500ms, the monitor rechecks node 0.
+ checkUntil(Milliseconds(500), [&]() {
+ ASSERT_EQ(getNumChecks(node0), 1);
+ ASSERT_EQ(getNumChecks(node1), 1);
+ });
+
+ // Triggers isMaster calls that will be delayed until 1000ms
+ checkUntil(Milliseconds(1000), [&]() {
+ // this should schedule a new isMaster call at 1000ms and cancel the
+ // previous job scheduled for the same time
+ primaryFutures.push_back(monitor->getHostOrRefresh(primaryOnly, Seconds(4)));
+ ASSERT_EQ(getNumChecks(node0), 2);
+ ASSERT_EQ(getNumChecks(node1), 1);
+ });
+
+ // At 1000ms, only one scheduled isMaster call runs, because all others have been canceled
+ checkUntil(Milliseconds(1500), [&]() {
+ ASSERT_EQ(getNumChecks(node0), 3);
+ ASSERT_EQ(getNumChecks(node1), 1);
+ });
+
+ // TODO: advanceTime(Seconds(5)) and getNoThrow() each entry in primaryFutures once
+ // race conditions in RSM expedited scans are fixed
+}
} // namespace
} // namespace mongo