From 40d309be939eea757c09d4895b22c24905aa8b05 Mon Sep 17 00:00:00 2001 From: David Storch Date: Fri, 3 Feb 2017 18:06:05 -0500 Subject: SERVER-27701 fix race in AsyncResultsMerger error handling (cherry picked from commit 5a35a6c425b815ecf7795d8e2706b9f3e82c7ce6) --- src/mongo/s/query/async_results_merger.cpp | 5 +-- src/mongo/s/query/async_results_merger_test.cpp | 42 +++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/src/mongo/s/query/async_results_merger.cpp b/src/mongo/s/query/async_results_merger.cpp index cc158026af7..18f5b3a6c07 100644 --- a/src/mongo/s/query/async_results_merger.cpp +++ b/src/mongo/s/query/async_results_merger.cpp @@ -332,8 +332,9 @@ StatusWith AsyncResultsMerger::nextEvent() for (size_t i = 0; i < _remotes.size(); ++i) { auto& remote = _remotes[i]; - // It is illegal to call this method if there is an error received from any shard. - invariant(remote.status.isOK()); + if (!remote.status.isOK()) { + return remote.status; + } if (!remote.hasNext() && !remote.exhausted() && !remote.cbHandle.isValid()) { // If we already have established a cursor with this remote, and there is no outstanding diff --git a/src/mongo/s/query/async_results_merger_test.cpp b/src/mongo/s/query/async_results_merger_test.cpp index ec9f155b20e..2dd95b9708b 100644 --- a/src/mongo/s/query/async_results_merger_test.cpp +++ b/src/mongo/s/query/async_results_merger_test.cpp @@ -1437,6 +1437,48 @@ TEST_F(AsyncResultsMergerTest, GetMoreRequestWithoutAwaitDataCantHaveMaxTime) { executor->waitForEvent(killEvent); } +TEST_F(AsyncResultsMergerTest, ShardCanErrorInBetweenReadyAndNextEvent) { + BSONObj findCmd = fromjson("{find: 'testcoll', tailable: true}"); + makeCursorFromFindCmd(findCmd, {kTestShardIds[0]}); + + ASSERT_FALSE(arm->ready()); + auto readyEvent = unittest::assertGet(arm->nextEvent()); + scheduleErrorResponse({ErrorCodes::BadValue, "bad thing happened"}); + + ASSERT_EQ(ErrorCodes::BadValue, arm->nextEvent().getStatus()); + + // Required to kill the 'arm' on error before destruction. + auto killEvent = arm->kill(); + executor->waitForEvent(killEvent); +} + +TEST_F(AsyncResultsMergerTest, RetryWhenShardHasRetriableErrorInBetweenReadyAndNextEvent) { + BSONObj findCmd = fromjson("{find: 'testcoll'}"); + makeCursorFromFindCmd(findCmd, {kTestShardIds[0]}); + + ASSERT_FALSE(arm->ready()); + + // First attempt returns a retriable error. + auto readyEvent = unittest::assertGet(arm->nextEvent()); + scheduleErrorResponse({ErrorCodes::NotMasterNoSlaveOk, "not master and not slave"}); + + // We expect to be able to retrieve another event, and be waiting on the retry to succeed. + readyEvent = unittest::assertGet(arm->nextEvent()); + std::vector responses; + std::vector batch = {fromjson("{_id: 1}"), fromjson("{_id: 2}")}; + responses.emplace_back(_nss, CursorId(0), batch); + scheduleNetworkResponses(std::move(responses), CursorResponse::ResponseType::InitialResponse); + + executor->waitForEvent(readyEvent); + ASSERT_TRUE(arm->ready()); + ASSERT_EQ(fromjson("{_id: 1}"), *unittest::assertGet(arm->nextReady())); + ASSERT_TRUE(arm->ready()); + ASSERT_EQ(fromjson("{_id: 2}"), *unittest::assertGet(arm->nextReady())); + ASSERT_TRUE(arm->ready()); + ASSERT_TRUE(arm->remotesExhausted()); + ASSERT(!unittest::assertGet(arm->nextReady())); +} + } // namespace } // namespace mongo -- cgit v1.2.1