diff options
author | Haley Connelly <haley.connelly@mongodb.com> | 2020-03-05 18:33:08 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-04-06 15:55:38 +0000 |
commit | e10fd6abcb28c5b12b98ff0c19066191226fdf8f (patch) | |
tree | c7b941371db16377f31a51482902cccd40ae5393 | |
parent | b6cac93c3a10a85039f63b16f981020d555f20be (diff) | |
download | mongo-e10fd6abcb28c5b12b98ff0c19066191226fdf8f.tar.gz |
SERVER-46289 Make SingleServerPingMonitor retry after failed ping
(cherry picked from commit ba72823366d4a4ed4bea78eeeb456970285ea64b)
-rw-r--r-- | src/mongo/client/server_ping_monitor.cpp | 11 | ||||
-rw-r--r-- | src/mongo/client/server_ping_monitor_test.cpp | 113 |
2 files changed, 47 insertions, 77 deletions
diff --git a/src/mongo/client/server_ping_monitor.cpp b/src/mongo/client/server_ping_monitor.cpp index 17b33ec270c..3cb7f793bf2 100644 --- a/src/mongo/client/server_ping_monitor.cpp +++ b/src/mongo/client/server_ping_monitor.cpp @@ -131,7 +131,8 @@ void SingleServerPingMonitor::_doServerPing() { [anchor = shared_from_this(), timer = Timer()](const executor::TaskExecutor::RemoteCommandCallbackArgs& result) mutable { if (ErrorCodes::isCancelationError(result.response.status)) { - // Do no more work if we're removed or canceled + // Do no more work if the SingleServerPingMonitor is removed or the request is + // canceled. return; } { @@ -143,12 +144,10 @@ void SingleServerPingMonitor::_doServerPing() { if (!result.response.isOK()) { anchor->_rttListener->onServerPingFailedEvent(anchor->_hostAndPort, result.response.status); - // Don't schedule any more pings to the server once an error is encountered. - return; + } else { + auto rtt = sdam::IsMasterRTT(timer.micros()); + anchor->_rttListener->onServerPingSucceededEvent(rtt, anchor->_hostAndPort); } - - auto rtt = sdam::IsMasterRTT(timer.micros()); - anchor->_rttListener->onServerPingSucceededEvent(rtt, anchor->_hostAndPort); } anchor->_scheduleServerPing(); }); diff --git a/src/mongo/client/server_ping_monitor_test.cpp b/src/mongo/client/server_ping_monitor_test.cpp index f9abd8050de..05a7f0603d0 100644 --- a/src/mongo/client/server_ping_monitor_test.cpp +++ b/src/mongo/client/server_ping_monitor_test.cpp @@ -146,14 +146,15 @@ protected: } /** - * Checks that exactly one successful ping occurs at the time this method is called and no - * additional pings are made before deadline. + * Checks that exactly one successful ping occurs at the time the method is called and ensures + * no additional pings are issued for at least pingFrequency. */ - void checkSinglePing(Seconds deadline, + void checkSinglePing(Milliseconds pingFrequency, const sdam::ServerAddress& hostAndPort, MockReplicaSet* replSet) { processPingRequest(hostAndPort, replSet); + auto deadline = elapsed() + pingFrequency; while (elapsed() < deadline && !_topologyListener->hasPingResponse(hostAndPort)) { advanceTime(Milliseconds(100)); } @@ -169,7 +170,7 @@ protected: * Confirms no more ping requests are sent between elapsed() and deadline. Confirms no more ping * responses are received between elapsed() and deadline. */ - void checkNoActivityBefore(Seconds deadline, const sdam::ServerAddress& hostAndPort) { + void checkNoActivityBefore(Milliseconds deadline, const sdam::ServerAddress& hostAndPort) { while (elapsed() < deadline) { ASSERT_FALSE(hasReadyRequests()); ASSERT_FALSE(_topologyListener->hasPingResponse(hostAndPort)); @@ -224,11 +225,11 @@ protected: return ssPingMonitor; } - void checkSinglePing(Seconds deadline) { - ServerPingMonitorTestFixture::checkSinglePing(deadline, _hostAndPort, getReplSet()); + void checkSinglePing(Milliseconds pingFrequency) { + ServerPingMonitorTestFixture::checkSinglePing(pingFrequency, _hostAndPort, getReplSet()); } - void checkNoActivityBefore(Seconds deadline) { + void checkNoActivityBefore(Milliseconds deadline) { ServerPingMonitorTestFixture::checkNoActivityBefore(deadline, _hostAndPort); } @@ -245,12 +246,16 @@ TEST_F(SingleServerPingMonitorTest, pingFrequencyCheck) { auto pingFrequency = Seconds(10); auto ssPingMonitor = initSingleServerPingMonitor(pingFrequency); - checkSinglePing(pingFrequency * 1); - checkSinglePing(pingFrequency * 2); - checkSinglePing(pingFrequency * 3); - checkSinglePing(pingFrequency * 4); + checkSinglePing(pingFrequency); + checkSinglePing(pingFrequency); + checkSinglePing(pingFrequency); + checkSinglePing(pingFrequency); } +/** + * Confirms that the SingleServerPingMonitor continues to try and ping a dead server at + * pingFrequency and successfully does so once the server is restored. + */ TEST_F(SingleServerPingMonitorTest, pingDeadServer) { // Kill the server before starting up the SingleServerPingMonitor. auto hostAndPort = getHostAndPort(); @@ -262,25 +267,36 @@ TEST_F(SingleServerPingMonitorTest, pingDeadServer) { auto pingFrequency = Seconds(10); auto ssPingMonitor = initSingleServerPingMonitor(pingFrequency); auto topologyListener = getTopologyListener(); - processPingRequest(); + auto checkSinglePingDeadServer = [&]() { + Milliseconds deadline = elapsed() + pingFrequency; + processPingRequest(); - while (elapsed() < pingFrequency && !topologyListener->hasPingResponse(hostAndPort)) { - advanceTime(Milliseconds(100)); - } + while (elapsed() < deadline && !topologyListener->hasPingResponse(hostAndPort)) { + advanceTime(Milliseconds(100)); + } - ASSERT_TRUE(topologyListener->hasPingResponse(hostAndPort)); - auto pingResponse = topologyListener->getPingResponse(hostAndPort); - ASSERT_EQ(ErrorCodes::HostUnreachable, pingResponse.getStatus()); + ASSERT_TRUE(topologyListener->hasPingResponse(hostAndPort)); + auto pingResponse = topologyListener->getPingResponse(hostAndPort); + ASSERT_EQ(ErrorCodes::HostUnreachable, pingResponse.getStatus()); - // Confirm no other ping requests are sent after receiving an error. - checkNoActivityBefore(pingFrequency * 2); + checkNoActivityBefore(deadline); + }; + checkSinglePingDeadServer(); + checkSinglePingDeadServer(); + { + + NetworkInterfaceMock::InNetworkGuard ing(getNet()); + getReplSet()->restore(hostAndPort); + } + checkSinglePing(pingFrequency); + checkSinglePing(pingFrequency); } /** - * After a SingleServerPingMonitor is dropped, there should be no more events published to the - * TopologyListener and no more pings to the server. + * Checks that no more events are published to the TopologyListener and no more pings are issued to + * the server after the SingleServerPingMonitor is closed. */ -TEST_F(SingleServerPingMonitorTest, noPingAfterServerDrop) { +TEST_F(SingleServerPingMonitorTest, noPingAfterSingleServerPingMonitorClosed) { auto pingFrequency = Seconds(10); auto ssPingMonitor = initSingleServerPingMonitor(pingFrequency); @@ -316,12 +332,12 @@ TEST_F(ServerPingMonitorTest, singleNodeServerPingMonitorCycle) { // at pingFrequency. serverPingMonitor->onServerHandshakeCompleteEvent(initialRTT, hostAndPort); checkSinglePing(pingFrequency, hostAndPort, replSet.get()); - checkSinglePing(pingFrequency * 2 - Seconds(2), hostAndPort, replSet.get()); + checkSinglePing(pingFrequency - Seconds(2), hostAndPort, replSet.get()); // Close the SingleServerMonitor before the third ping and confirm ping activity to the server // is stopped. serverPingMonitor->onServerClosedEvent(hostAndPort, oid); - checkNoActivityBefore(pingFrequency * 4, hostAndPort); + checkNoActivityBefore(elapsed() + pingFrequency * 2, hostAndPort); } /** @@ -352,52 +368,7 @@ TEST_F(ServerPingMonitorTest, twoNodeServerPingMonitorOneClosed) { checkNoActivityBefore(pingFrequency + host1Delay, host0); // Confirm that host1's SingleServerPingMonitor continues ping activity. - checkSinglePing(pingFrequency * 2, host1, replSet.get()); -} - -/** - * Adds two SingleServerPingMonitors to the ServerPingMonitor. One server is dead, the other is not. - */ -TEST_F(ServerPingMonitorTest, twoNodeServerPingMonitorOneDead) { - auto pingFrequency = Seconds(10); - auto serverPingMonitor = makeServerPingMonitor(pingFrequency); - auto replSet = std::make_unique<MockReplicaSet>( - "test", 2, /* hasPrimary = */ false, /* dollarPrefixHosts = */ false); - - auto hosts = replSet->getHosts(); - auto host0 = hosts[0].toString(); - auto host1 = hosts[1].toString(); - - { - NetworkInterfaceMock::InNetworkGuard ing(getNet()); - replSet->kill(host1); - } - - auto host1Delay = Seconds(2); - serverPingMonitor->onServerHandshakeCompleteEvent(initialRTT, host0); - checkSinglePing(host1Delay, host0, replSet.get()); - - // Add host1 host1Delay after host2. - ASSERT_EQ(elapsed(), host1Delay); - serverPingMonitor->onServerHandshakeCompleteEvent(initialRTT, host1); - - // Confirm host1 reported HostUnreachable to the TopologyLisener. - processPingRequest(host1, replSet.get()); - auto topologyListener = getTopologyListener(); - while (elapsed() < pingFrequency && !topologyListener->hasPingResponse(host1)) { - advanceTime(Milliseconds(100)); - } - ASSERT_LT(elapsed(), pingFrequency); - ASSERT_TRUE(topologyListener->hasPingResponse(host1)); - auto pingResponse = topologyListener->getPingResponse(host1); - ASSERT_EQ(ErrorCodes::HostUnreachable, pingResponse.getStatus()); - - // Confirm host0 is still pinged at pingFrequency and host1 no longer has ping activity. - while (elapsed() < pingFrequency) { - advanceTime(Milliseconds(100)); - } - checkSinglePing(pingFrequency + host1Delay, host0, replSet.get()); - checkNoActivityBefore(pingFrequency * 2, host1); + checkSinglePing(pingFrequency, host1, replSet.get()); } /** |