diff options
author | Cheahuychou Mao <cheahuychou.mao@mongodb.com> | 2020-04-02 16:48:07 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-04-07 20:48:40 +0000 |
commit | e03fe6ff798f50382122edd469743677d401e001 (patch) | |
tree | eb8380e5b99982137bea56ddf0c7b4e40feed256 /src/mongo/executor | |
parent | 950522e1e4a478673efd5da46469635b9a15bb6a (diff) | |
download | mongo-e03fe6ff798f50382122edd469743677d401e001.tar.gz |
SERVER-47343 Add/update failpoints to make cancelCommand integration tests more robust
Diffstat (limited to 'src/mongo/executor')
-rw-r--r-- | src/mongo/executor/network_interface.cpp | 2 | ||||
-rw-r--r-- | src/mongo/executor/network_interface.h | 2 | ||||
-rw-r--r-- | src/mongo/executor/network_interface_integration_test.cpp | 59 | ||||
-rw-r--r-- | src/mongo/executor/network_interface_tl.cpp | 27 |
4 files changed, 65 insertions, 25 deletions
diff --git a/src/mongo/executor/network_interface.cpp b/src/mongo/executor/network_interface.cpp index 9df8b70af9f..90d2a248a0a 100644 --- a/src/mongo/executor/network_interface.cpp +++ b/src/mongo/executor/network_interface.cpp @@ -41,7 +41,7 @@ NetworkInterface::~NetworkInterface() {} MONGO_FAIL_POINT_DEFINE(networkInterfaceSendRequestsToTargetHostsInAlphabeticalOrder); MONGO_FAIL_POINT_DEFINE(networkInterfaceDiscardCommandsBeforeAcquireConn); MONGO_FAIL_POINT_DEFINE(networkInterfaceHangCommandsAfterAcquireConn); -MONGO_FAIL_POINT_DEFINE(networkInterfaceAfterAcquireConn); +MONGO_FAIL_POINT_DEFINE(networkInterfaceCommandsFailedWithErrorCode); } // namespace executor } // namespace mongo diff --git a/src/mongo/executor/network_interface.h b/src/mongo/executor/network_interface.h index 1a5c35aa594..9a1e0488dab 100644 --- a/src/mongo/executor/network_interface.h +++ b/src/mongo/executor/network_interface.h @@ -45,7 +45,7 @@ namespace executor { extern FailPoint networkInterfaceSendRequestsToTargetHostsInAlphabeticalOrder; extern FailPoint networkInterfaceDiscardCommandsBeforeAcquireConn; extern FailPoint networkInterfaceHangCommandsAfterAcquireConn; -extern FailPoint networkInterfaceAfterAcquireConn; +extern FailPoint networkInterfaceCommandsFailedWithErrorCode; /** * Interface to networking for use by TaskExecutor implementations. diff --git a/src/mongo/executor/network_interface_integration_test.cpp b/src/mongo/executor/network_interface_integration_test.cpp index 46f55d87ea8..42a81961876 100644 --- a/src/mongo/executor/network_interface_integration_test.cpp +++ b/src/mongo/executor/network_interface_integration_test.cpp @@ -216,10 +216,27 @@ public: /** * Repeatedly runs currentOp to check if the given command is running, and blocks until - * the command finishes running or the wait timeout is reached, and returns the number - * times of currentOp is run. + * the command starts running or the wait timeout is reached. Asserts that the command + * is running after the wait and returns the number times of currentOp is run. */ - uint64_t waitForCommand(const std::string command, Milliseconds timeout) { + uint64_t waitForCommandToStart(const std::string command, Milliseconds timeout) { + ClockSource::StopWatch stopwatch; + uint64_t numCurrentOpRan = 0; + do { + sleepmillis(100); + numCurrentOpRan++; + } while (!isCommandRunning(command) && stopwatch.elapsed() < timeout); + + ASSERT_TRUE(isCommandRunning(command)); + return ++numCurrentOpRan; + } + + /** + * Repeatedly runs currentOp to check if the given command is running, and blocks until + * the command finishes running or the wait timeout is reached. Asserts that the command + * is no longer running after the wait and returns the number times of currentOp is run. + */ + uint64_t waitForCommandToStop(const std::string command, Milliseconds timeout) { ClockSource::StopWatch stopwatch; uint64_t numCurrentOpRan = 0; do { @@ -227,7 +244,8 @@ public: numCurrentOpRan++; } while (isCommandRunning(command) && stopwatch.elapsed() < timeout); - return numCurrentOpRan; + ASSERT_FALSE(isCommandRunning(command)); + return ++numCurrentOpRan; } struct IsMasterData { @@ -283,7 +301,7 @@ TEST_F(NetworkInterfaceTest, CancelMissingOperation) { assertNumOps(0u, 0u, 0u, 0u); } -TEST_F(NetworkInterfaceTest, CancelOperation) { +TEST_F(NetworkInterfaceTest, CancelLocally) { auto cbh = makeCallbackHandle(); auto deferred = [&] { @@ -331,19 +349,20 @@ TEST_F(NetworkInterfaceTest, CancelRemotely) { kNoTimeout); }); + int numCurrentOpRan = 0; + auto cbh = makeCallbackHandle(); auto deferred = [&] { // Kick off an "echo" operation, which should block until cancelCommand causes // the operation to be killed. - FailPointEnableBlock fpb("networkInterfaceAfterAcquireConn"); - auto deferred = runCommand(cbh, makeTestCommand(kNoTimeout, makeEchoCmdObj(), nullptr /* opCtx */, RemoteCommandRequest::HedgeOptions())); - fpb->waitForTimesEntered(fpb.initialTimesEntered() + 1); + // Wait for the "echo" operation to start. + numCurrentOpRan += waitForCommandToStart("echo", kMaxWait); // Run cancelCommand to kill the above operation. net().cancelCommand(cbh); @@ -357,7 +376,7 @@ TEST_F(NetworkInterfaceTest, CancelRemotely) { ASSERT(result.elapsedMillis); // Wait for the operation to be killed on the remote host. - auto numCurrentOpRan = waitForCommand("echo", kMaxWait); + numCurrentOpRan += waitForCommandToStop("echo", kMaxWait); // We have one canceled operation (echo), and two other succeeded operations // on top of the currentOp operations (configureFailPoint and _killOperations). @@ -388,23 +407,32 @@ TEST_F(NetworkInterfaceTest, CancelRemotelyTimedOut) { kNoTimeout); }); + int numCurrentOpRan = 0; + auto cbh = makeCallbackHandle(); auto deferred = [&] { // Kick off a blocking "echo" operation. - FailPointEnableBlock fpb("networkInterfaceAfterAcquireConn"); - auto deferred = runCommand(cbh, makeTestCommand(kNoTimeout, makeEchoCmdObj(), nullptr /* opCtx */, RemoteCommandRequest::HedgeOptions())); - fpb->waitForTimesEntered(fpb.initialTimesEntered() + 1); + // Wait for the "echo" operation to start. + numCurrentOpRan += waitForCommandToStart("echo", kMaxWait); // Run cancelCommand to kill the above operation. _killOperations is expected to block and - // time out, and the cancel timer is expected to cancel the operations. + // time out, and to be canceled by the command timer. + FailPointEnableBlock cmdFailedFpb("networkInterfaceCommandsFailedWithErrorCode", + BSON("cmdNames" + << BSON_ARRAY("_killOperations") << "errorCode" + << ErrorCodes::NetworkInterfaceExceededTimeLimit)); + net().cancelCommand(cbh); + // Wait for _killOperations for 'echo' to time out. + cmdFailedFpb->waitForTimesEntered(cmdFailedFpb.initialTimesEntered() + 1); + return deferred; }(); @@ -413,11 +441,8 @@ TEST_F(NetworkInterfaceTest, CancelRemotelyTimedOut) { ASSERT_EQ(ErrorCodes::CallbackCanceled, result.status); ASSERT(result.elapsedMillis); - // Wait for _killOperations for 'echo' to time out. - auto numCurrentOpRan = waitForCommand("_killOperations", kMaxWait); - // We have one canceled operation (echo), one timedout operation (_killOperations), - // and one other succeeded operation on top of the currentOp operations (configureFailPoint) + // and one succeeded operation on top of the currentOp operations (configureFailPoint). assertNumOps(1u, 1u, 0u, 1u + numCurrentOpRan); } diff --git a/src/mongo/executor/network_interface_tl.cpp b/src/mongo/executor/network_interface_tl.cpp index a21a568726c..5eb469da81b 100644 --- a/src/mongo/executor/network_interface_tl.cpp +++ b/src/mongo/executor/network_interface_tl.cpp @@ -380,6 +380,22 @@ void NetworkInterfaceTL::CommandStateBase::tryFinish(Status status) noexcept { // Kill operations for requests that we didn't use to fulfill the promise. requestManager->killOperationsForPendingRequests(); } + + networkInterfaceCommandsFailedWithErrorCode.shouldFail([&](const BSONObj& data) { + const auto errorCode = data.getIntField("errorCode"); + if (errorCode != status.code()) { + return false; + } + + const std::string requestCmdName = requestOnAny.cmdObj.firstElement().fieldName(); + for (auto&& cmdName : data.getObjectField("cmdNames")) { + if (cmdName.type() == String && cmdName.valueStringData() == requestCmdName) { + return true; + } + } + + return false; + }); } void NetworkInterfaceTL::RequestState::cancel() noexcept { @@ -772,12 +788,11 @@ void NetworkInterfaceTL::RequestState::send(StatusWith<ConnectionPool::Connectio networkInterfaceHangCommandsAfterAcquireConn.pauseWhileSet(); - if (networkInterfaceAfterAcquireConn.shouldFail()) { - LOGV2(4630601, - "Request acquired a connection", - "requestId"_attr = request->id, - "target"_attr = request->target); - } + LOGV2_DEBUG(4630601, + 2, + "Request acquired a connection", + "requestId"_attr = request->id, + "target"_attr = request->target); if (auto counters = interface()->_counters) { counters->recordSent(); |