summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Zolnierz <nicholas.zolnierz@mongodb.com>2017-11-01 14:25:09 -0400
committerNick Zolnierz <nicholas.zolnierz@mongodb.com>2017-11-07 11:56:23 -0500
commit1d290c584c45a6065c8beeb67d18076753a2d587 (patch)
tree874e9eac691bb5905d0f92099f40a4ee7d8e2ece
parentd20d01aedf7fac426fe60ba18c1bc59ce97ca060 (diff)
downloadmongo-1d290c584c45a6065c8beeb67d18076753a2d587.tar.gz
SERVER-31787: Certain aggregation commands with merging on mongos can leak cursors on mongod
-rw-r--r--src/mongo/s/query/async_results_merger.cpp10
-rw-r--r--src/mongo/s/query/async_results_merger_test.cpp50
2 files changed, 50 insertions, 10 deletions
diff --git a/src/mongo/s/query/async_results_merger.cpp b/src/mongo/s/query/async_results_merger.cpp
index 5c52b9588c8..45f4cbf350d 100644
--- a/src/mongo/s/query/async_results_merger.cpp
+++ b/src/mongo/s/query/async_results_merger.cpp
@@ -651,18 +651,12 @@ executor::TaskExecutor::EventHandle AsyncResultsMerger::kill(OperationContext* o
_killCursorsScheduledEvent = statusWithEvent.getValue();
// If we're not waiting for responses from remotes, we can schedule killCursors commands on the
- // remotes now. Otherwise, we have to wait until all responses are back, and then we can kill
- // the remote cursors.
+ // remotes now. Otherwise, we have to wait until all responses are back because a cursor that
+ // is active (pinned) on a remote cannot be killed through killCursors.
if (!_haveOutstandingBatchRequests(lk)) {
_scheduleKillCursors(lk, opCtx);
_lifecycleState = kKillComplete;
_executor->signalEvent(_killCursorsScheduledEvent);
- } else {
- for (const auto& remote : _remotes) {
- if (remote.cbHandle.isValid()) {
- _executor->cancel(remote.cbHandle);
- }
- }
}
return _killCursorsScheduledEvent;
diff --git a/src/mongo/s/query/async_results_merger_test.cpp b/src/mongo/s/query/async_results_merger_test.cpp
index 1a9a2337095..a68c483979e 100644
--- a/src/mongo/s/query/async_results_merger_test.cpp
+++ b/src/mongo/s/query/async_results_merger_test.cpp
@@ -856,6 +856,7 @@ TEST_F(AsyncResultsMergerTest, BadResponseReceivedFromShard) {
BSONObj response3 = CursorResponse(_nss, CursorId(789), batch3)
.toBSON(CursorResponse::ResponseType::SubsequentResponse);
scheduleNetworkResponseObjs({response1, response2, response3});
+ runReadyCallbacks();
executor()->waitForEvent(readyEvent);
ASSERT_TRUE(arm->ready());
auto statusWithNext = arm->nextReady();
@@ -1021,8 +1022,17 @@ TEST_F(AsyncResultsMergerTest, KillTwoOutstandingBatches) {
// Kill event will only be signalled once the callbacks for the pending batches have run.
auto killedEvent = arm->kill(operationContext());
- // The pending requests have been canceled, so run their callbacks.
- runReadyCallbacks();
+ // Schedule the remaining batches.
+ responses.clear();
+ std::vector<BSONObj> batch2 = {fromjson("{_id: 3}"), fromjson("{_id: 4}")};
+ responses.emplace_back(_nss, CursorId(2), batch2);
+ scheduleNetworkResponses(std::move(responses),
+ CursorResponse::ResponseType::SubsequentResponse);
+ responses.clear();
+ std::vector<BSONObj> batch3 = {fromjson("{_id: 5}"), fromjson("{_id: 6}")};
+ responses.emplace_back(_nss, CursorId(3), batch3);
+ scheduleNetworkResponses(std::move(responses),
+ CursorResponse::ResponseType::SubsequentResponse);
// Ensure that we properly signal those waiting for more results to be ready.
executor()->waitForEvent(readyEvent);
@@ -1647,6 +1657,42 @@ TEST_F(AsyncResultsMergerTest, ShardCanErrorInBetweenReadyAndNextEvent) {
executor()->waitForEvent(killEvent);
}
+TEST_F(AsyncResultsMergerTest, KillShouldWaitForRemoteCommandsBeforeSchedulingKillCursors) {
+ std::vector<ClusterClientCursorParams::RemoteCursor> cursors;
+ cursors.emplace_back(kTestShardIds[0], kTestShardHosts[0], CursorResponse(_nss, 1, {}));
+ makeCursorFromExistingCursors(std::move(cursors));
+
+ // Before any requests are scheduled, ARM is not ready to return results.
+ ASSERT_FALSE(arm->ready());
+ ASSERT_FALSE(arm->remotesExhausted());
+
+ // Schedule requests.
+ auto readyEvent = unittest::assertGet(arm->nextEvent());
+
+ // Before any responses are delivered, ARM is not ready to return results.
+ ASSERT_FALSE(arm->ready());
+ ASSERT_FALSE(arm->remotesExhausted());
+
+ // Kill the ARM while a batch is still outstanding.
+ auto killEvent = arm->kill(operationContext());
+
+ // Since the cursor has not returned any results and still has a pending remote
+ // request, the ARM should not attempt to kill the cursor.
+ ASSERT_FALSE(arm->remotesExhausted());
+
+ // Schedule the batch response, this should trigger cleanup of the batch and schedule the
+ // killCursors command.
+ std::vector<CursorResponse> responses;
+ std::vector<BSONObj> batch = {fromjson("{_id: 1}")};
+ responses.emplace_back(_nss, CursorId(1), batch);
+ scheduleNetworkResponses(std::move(responses),
+ CursorResponse::ResponseType::SubsequentResponse);
+ executor()->waitForEvent(readyEvent);
+
+ // Now the kill cursors command should be scheduled.
+ executor()->waitForEvent(killEvent);
+}
+
} // namespace
} // namespace mongo