summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCheahuychou Mao <cheahuychou.mao@mongodb.com>2020-04-02 16:48:07 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-04-07 20:48:40 +0000
commite03fe6ff798f50382122edd469743677d401e001 (patch)
treeeb8380e5b99982137bea56ddf0c7b4e40feed256
parent950522e1e4a478673efd5da46469635b9a15bb6a (diff)
downloadmongo-e03fe6ff798f50382122edd469743677d401e001.tar.gz
SERVER-47343 Add/update failpoints to make cancelCommand integration tests more robust
-rw-r--r--src/mongo/executor/network_interface.cpp2
-rw-r--r--src/mongo/executor/network_interface.h2
-rw-r--r--src/mongo/executor/network_interface_integration_test.cpp59
-rw-r--r--src/mongo/executor/network_interface_tl.cpp27
-rw-r--r--src/mongo/util/fail_point.h2
5 files changed, 66 insertions, 26 deletions
diff --git a/src/mongo/executor/network_interface.cpp b/src/mongo/executor/network_interface.cpp
index 9df8b70af9f..90d2a248a0a 100644
--- a/src/mongo/executor/network_interface.cpp
+++ b/src/mongo/executor/network_interface.cpp
@@ -41,7 +41,7 @@ NetworkInterface::~NetworkInterface() {}
MONGO_FAIL_POINT_DEFINE(networkInterfaceSendRequestsToTargetHostsInAlphabeticalOrder);
MONGO_FAIL_POINT_DEFINE(networkInterfaceDiscardCommandsBeforeAcquireConn);
MONGO_FAIL_POINT_DEFINE(networkInterfaceHangCommandsAfterAcquireConn);
-MONGO_FAIL_POINT_DEFINE(networkInterfaceAfterAcquireConn);
+MONGO_FAIL_POINT_DEFINE(networkInterfaceCommandsFailedWithErrorCode);
} // namespace executor
} // namespace mongo
diff --git a/src/mongo/executor/network_interface.h b/src/mongo/executor/network_interface.h
index 1a5c35aa594..9a1e0488dab 100644
--- a/src/mongo/executor/network_interface.h
+++ b/src/mongo/executor/network_interface.h
@@ -45,7 +45,7 @@ namespace executor {
extern FailPoint networkInterfaceSendRequestsToTargetHostsInAlphabeticalOrder;
extern FailPoint networkInterfaceDiscardCommandsBeforeAcquireConn;
extern FailPoint networkInterfaceHangCommandsAfterAcquireConn;
-extern FailPoint networkInterfaceAfterAcquireConn;
+extern FailPoint networkInterfaceCommandsFailedWithErrorCode;
/**
* Interface to networking for use by TaskExecutor implementations.
diff --git a/src/mongo/executor/network_interface_integration_test.cpp b/src/mongo/executor/network_interface_integration_test.cpp
index 46f55d87ea8..42a81961876 100644
--- a/src/mongo/executor/network_interface_integration_test.cpp
+++ b/src/mongo/executor/network_interface_integration_test.cpp
@@ -216,10 +216,27 @@ public:
/**
* Repeatedly runs currentOp to check if the given command is running, and blocks until
- * the command finishes running or the wait timeout is reached, and returns the number
- * times of currentOp is run.
+ * the command starts running or the wait timeout is reached. Asserts that the command
+ * is running after the wait and returns the number times of currentOp is run.
*/
- uint64_t waitForCommand(const std::string command, Milliseconds timeout) {
+ uint64_t waitForCommandToStart(const std::string command, Milliseconds timeout) {
+ ClockSource::StopWatch stopwatch;
+ uint64_t numCurrentOpRan = 0;
+ do {
+ sleepmillis(100);
+ numCurrentOpRan++;
+ } while (!isCommandRunning(command) && stopwatch.elapsed() < timeout);
+
+ ASSERT_TRUE(isCommandRunning(command));
+ return ++numCurrentOpRan;
+ }
+
+ /**
+ * Repeatedly runs currentOp to check if the given command is running, and blocks until
+ * the command finishes running or the wait timeout is reached. Asserts that the command
+ * is no longer running after the wait and returns the number times of currentOp is run.
+ */
+ uint64_t waitForCommandToStop(const std::string command, Milliseconds timeout) {
ClockSource::StopWatch stopwatch;
uint64_t numCurrentOpRan = 0;
do {
@@ -227,7 +244,8 @@ public:
numCurrentOpRan++;
} while (isCommandRunning(command) && stopwatch.elapsed() < timeout);
- return numCurrentOpRan;
+ ASSERT_FALSE(isCommandRunning(command));
+ return ++numCurrentOpRan;
}
struct IsMasterData {
@@ -283,7 +301,7 @@ TEST_F(NetworkInterfaceTest, CancelMissingOperation) {
assertNumOps(0u, 0u, 0u, 0u);
}
-TEST_F(NetworkInterfaceTest, CancelOperation) {
+TEST_F(NetworkInterfaceTest, CancelLocally) {
auto cbh = makeCallbackHandle();
auto deferred = [&] {
@@ -331,19 +349,20 @@ TEST_F(NetworkInterfaceTest, CancelRemotely) {
kNoTimeout);
});
+ int numCurrentOpRan = 0;
+
auto cbh = makeCallbackHandle();
auto deferred = [&] {
// Kick off an "echo" operation, which should block until cancelCommand causes
// the operation to be killed.
- FailPointEnableBlock fpb("networkInterfaceAfterAcquireConn");
-
auto deferred = runCommand(cbh,
makeTestCommand(kNoTimeout,
makeEchoCmdObj(),
nullptr /* opCtx */,
RemoteCommandRequest::HedgeOptions()));
- fpb->waitForTimesEntered(fpb.initialTimesEntered() + 1);
+ // Wait for the "echo" operation to start.
+ numCurrentOpRan += waitForCommandToStart("echo", kMaxWait);
// Run cancelCommand to kill the above operation.
net().cancelCommand(cbh);
@@ -357,7 +376,7 @@ TEST_F(NetworkInterfaceTest, CancelRemotely) {
ASSERT(result.elapsedMillis);
// Wait for the operation to be killed on the remote host.
- auto numCurrentOpRan = waitForCommand("echo", kMaxWait);
+ numCurrentOpRan += waitForCommandToStop("echo", kMaxWait);
// We have one canceled operation (echo), and two other succeeded operations
// on top of the currentOp operations (configureFailPoint and _killOperations).
@@ -388,23 +407,32 @@ TEST_F(NetworkInterfaceTest, CancelRemotelyTimedOut) {
kNoTimeout);
});
+ int numCurrentOpRan = 0;
+
auto cbh = makeCallbackHandle();
auto deferred = [&] {
// Kick off a blocking "echo" operation.
- FailPointEnableBlock fpb("networkInterfaceAfterAcquireConn");
-
auto deferred = runCommand(cbh,
makeTestCommand(kNoTimeout,
makeEchoCmdObj(),
nullptr /* opCtx */,
RemoteCommandRequest::HedgeOptions()));
- fpb->waitForTimesEntered(fpb.initialTimesEntered() + 1);
+ // Wait for the "echo" operation to start.
+ numCurrentOpRan += waitForCommandToStart("echo", kMaxWait);
// Run cancelCommand to kill the above operation. _killOperations is expected to block and
- // time out, and the cancel timer is expected to cancel the operations.
+ // time out, and to be canceled by the command timer.
+ FailPointEnableBlock cmdFailedFpb("networkInterfaceCommandsFailedWithErrorCode",
+ BSON("cmdNames"
+ << BSON_ARRAY("_killOperations") << "errorCode"
+ << ErrorCodes::NetworkInterfaceExceededTimeLimit));
+
net().cancelCommand(cbh);
+ // Wait for _killOperations for 'echo' to time out.
+ cmdFailedFpb->waitForTimesEntered(cmdFailedFpb.initialTimesEntered() + 1);
+
return deferred;
}();
@@ -413,11 +441,8 @@ TEST_F(NetworkInterfaceTest, CancelRemotelyTimedOut) {
ASSERT_EQ(ErrorCodes::CallbackCanceled, result.status);
ASSERT(result.elapsedMillis);
- // Wait for _killOperations for 'echo' to time out.
- auto numCurrentOpRan = waitForCommand("_killOperations", kMaxWait);
-
// We have one canceled operation (echo), one timedout operation (_killOperations),
- // and one other succeeded operation on top of the currentOp operations (configureFailPoint)
+ // and one succeeded operation on top of the currentOp operations (configureFailPoint).
assertNumOps(1u, 1u, 0u, 1u + numCurrentOpRan);
}
diff --git a/src/mongo/executor/network_interface_tl.cpp b/src/mongo/executor/network_interface_tl.cpp
index a21a568726c..5eb469da81b 100644
--- a/src/mongo/executor/network_interface_tl.cpp
+++ b/src/mongo/executor/network_interface_tl.cpp
@@ -380,6 +380,22 @@ void NetworkInterfaceTL::CommandStateBase::tryFinish(Status status) noexcept {
// Kill operations for requests that we didn't use to fulfill the promise.
requestManager->killOperationsForPendingRequests();
}
+
+ networkInterfaceCommandsFailedWithErrorCode.shouldFail([&](const BSONObj& data) {
+ const auto errorCode = data.getIntField("errorCode");
+ if (errorCode != status.code()) {
+ return false;
+ }
+
+ const std::string requestCmdName = requestOnAny.cmdObj.firstElement().fieldName();
+ for (auto&& cmdName : data.getObjectField("cmdNames")) {
+ if (cmdName.type() == String && cmdName.valueStringData() == requestCmdName) {
+ return true;
+ }
+ }
+
+ return false;
+ });
}
void NetworkInterfaceTL::RequestState::cancel() noexcept {
@@ -772,12 +788,11 @@ void NetworkInterfaceTL::RequestState::send(StatusWith<ConnectionPool::Connectio
networkInterfaceHangCommandsAfterAcquireConn.pauseWhileSet();
- if (networkInterfaceAfterAcquireConn.shouldFail()) {
- LOGV2(4630601,
- "Request acquired a connection",
- "requestId"_attr = request->id,
- "target"_attr = request->target);
- }
+ LOGV2_DEBUG(4630601,
+ 2,
+ "Request acquired a connection",
+ "requestId"_attr = request->id,
+ "target"_attr = request->target);
if (auto counters = interface()->_counters) {
counters->recordSent();
diff --git a/src/mongo/util/fail_point.h b/src/mongo/util/fail_point.h
index 1ed365be95a..c7a5f547b35 100644
--- a/src/mongo/util/fail_point.h
+++ b/src/mongo/util/fail_point.h
@@ -473,7 +473,7 @@ private:
class FailPointEnableBlock {
public:
explicit FailPointEnableBlock(std::string failPointName);
- FailPointEnableBlock(std::string failPointName, BSONObj cmdObj);
+ FailPointEnableBlock(std::string failPointName, BSONObj data);
~FailPointEnableBlock();
// Const access to the underlying FailPoint