diff options
author | Nick Zolnierz <nicholas.zolnierz@mongodb.com> | 2017-11-01 14:25:09 -0400 |
---|---|---|
committer | Nick Zolnierz <nicholas.zolnierz@mongodb.com> | 2017-11-07 11:56:23 -0500 |
commit | 1d290c584c45a6065c8beeb67d18076753a2d587 (patch) | |
tree | 874e9eac691bb5905d0f92099f40a4ee7d8e2ece | |
parent | d20d01aedf7fac426fe60ba18c1bc59ce97ca060 (diff) | |
download | mongo-1d290c584c45a6065c8beeb67d18076753a2d587.tar.gz |
SERVER-31787: Certain aggregation commands with merging on mongos can leak cursors on mongod
-rw-r--r-- | src/mongo/s/query/async_results_merger.cpp | 10 | ||||
-rw-r--r-- | src/mongo/s/query/async_results_merger_test.cpp | 50 |
2 files changed, 50 insertions, 10 deletions
diff --git a/src/mongo/s/query/async_results_merger.cpp b/src/mongo/s/query/async_results_merger.cpp index 5c52b9588c8..45f4cbf350d 100644 --- a/src/mongo/s/query/async_results_merger.cpp +++ b/src/mongo/s/query/async_results_merger.cpp @@ -651,18 +651,12 @@ executor::TaskExecutor::EventHandle AsyncResultsMerger::kill(OperationContext* o _killCursorsScheduledEvent = statusWithEvent.getValue(); // If we're not waiting for responses from remotes, we can schedule killCursors commands on the - // remotes now. Otherwise, we have to wait until all responses are back, and then we can kill - // the remote cursors. + // remotes now. Otherwise, we have to wait until all responses are back because a cursor that + // is active (pinned) on a remote cannot be killed through killCursors. if (!_haveOutstandingBatchRequests(lk)) { _scheduleKillCursors(lk, opCtx); _lifecycleState = kKillComplete; _executor->signalEvent(_killCursorsScheduledEvent); - } else { - for (const auto& remote : _remotes) { - if (remote.cbHandle.isValid()) { - _executor->cancel(remote.cbHandle); - } - } } return _killCursorsScheduledEvent; diff --git a/src/mongo/s/query/async_results_merger_test.cpp b/src/mongo/s/query/async_results_merger_test.cpp index 1a9a2337095..a68c483979e 100644 --- a/src/mongo/s/query/async_results_merger_test.cpp +++ b/src/mongo/s/query/async_results_merger_test.cpp @@ -856,6 +856,7 @@ TEST_F(AsyncResultsMergerTest, BadResponseReceivedFromShard) { BSONObj response3 = CursorResponse(_nss, CursorId(789), batch3) .toBSON(CursorResponse::ResponseType::SubsequentResponse); scheduleNetworkResponseObjs({response1, response2, response3}); + runReadyCallbacks(); executor()->waitForEvent(readyEvent); ASSERT_TRUE(arm->ready()); auto statusWithNext = arm->nextReady(); @@ -1021,8 +1022,17 @@ TEST_F(AsyncResultsMergerTest, KillTwoOutstandingBatches) { // Kill event will only be signalled once the callbacks for the pending batches have run. auto killedEvent = arm->kill(operationContext()); - // The pending requests have been canceled, so run their callbacks. - runReadyCallbacks(); + // Schedule the remaining batches. + responses.clear(); + std::vector<BSONObj> batch2 = {fromjson("{_id: 3}"), fromjson("{_id: 4}")}; + responses.emplace_back(_nss, CursorId(2), batch2); + scheduleNetworkResponses(std::move(responses), + CursorResponse::ResponseType::SubsequentResponse); + responses.clear(); + std::vector<BSONObj> batch3 = {fromjson("{_id: 5}"), fromjson("{_id: 6}")}; + responses.emplace_back(_nss, CursorId(3), batch3); + scheduleNetworkResponses(std::move(responses), + CursorResponse::ResponseType::SubsequentResponse); // Ensure that we properly signal those waiting for more results to be ready. executor()->waitForEvent(readyEvent); @@ -1647,6 +1657,42 @@ TEST_F(AsyncResultsMergerTest, ShardCanErrorInBetweenReadyAndNextEvent) { executor()->waitForEvent(killEvent); } +TEST_F(AsyncResultsMergerTest, KillShouldWaitForRemoteCommandsBeforeSchedulingKillCursors) { + std::vector<ClusterClientCursorParams::RemoteCursor> cursors; + cursors.emplace_back(kTestShardIds[0], kTestShardHosts[0], CursorResponse(_nss, 1, {})); + makeCursorFromExistingCursors(std::move(cursors)); + + // Before any requests are scheduled, ARM is not ready to return results. + ASSERT_FALSE(arm->ready()); + ASSERT_FALSE(arm->remotesExhausted()); + + // Schedule requests. + auto readyEvent = unittest::assertGet(arm->nextEvent()); + + // Before any responses are delivered, ARM is not ready to return results. + ASSERT_FALSE(arm->ready()); + ASSERT_FALSE(arm->remotesExhausted()); + + // Kill the ARM while a batch is still outstanding. + auto killEvent = arm->kill(operationContext()); + + // Since the cursor has not returned any results and still has a pending remote + // request, the ARM should not attempt to kill the cursor. + ASSERT_FALSE(arm->remotesExhausted()); + + // Schedule the batch response, this should trigger cleanup of the batch and schedule the + // killCursors command. + std::vector<CursorResponse> responses; + std::vector<BSONObj> batch = {fromjson("{_id: 1}")}; + responses.emplace_back(_nss, CursorId(1), batch); + scheduleNetworkResponses(std::move(responses), + CursorResponse::ResponseType::SubsequentResponse); + executor()->waitForEvent(readyEvent); + + // Now the kill cursors command should be scheduled. + executor()->waitForEvent(killEvent); +} + } // namespace } // namespace mongo |