1 files changed, 22 insertions, 9 deletions
diff --git a/src/mongo/s/query/async_results_merger.cpp b/src/mongo/s/query/async_results_merger.cpp
index c82de6a3bbe..984c2f85184 100644
--- a/src/mongo/s/query/async_results_merger.cpp
+++ b/src/mongo/s/query/async_results_merger.cpp
@@ -304,6 +304,14 @@ Status AsyncResultsMerger::askForNextBatch_inlock(size_t remoteIndex) {
     return Status::OK();
 }
 
+/*
+ * Note: When nextEvent() is called to do retries, only the remotes with retriable errors will
+ * be rescheduled because:
+ *
+ * 1. Other pending remotes still have callback assigned to them.
+ * 2. Remotes that already has some result will have a non-empty buffer.
+ * 3. Remotes that reached maximum retries will be in 'exhausted' state.
+ */
 StatusWith<executor::TaskExecutor::EventHandle> AsyncResultsMerger::nextEvent() {
     stdx::lock_guard<stdx::mutex> lk(_mutex);
 
@@ -441,24 +449,29 @@ void AsyncResultsMerger::handleBatchResponse(
             }
         }
 
-        // If the error is retriable, schedule another request.
+        // If we can still retry the initial cursor establishment, reset the state so it can be
+        // retried the next time nextEvent is called. Never retry getMores to avoid
+        // accidentally skipping results.
         if (!remote.cursorId && remote.retryCount < kMaxNumFailedHostRetryAttempts &&
             ShardRegistry::kAllRetriableErrors.count(cursorResponseStatus.getStatus().code())) {
+            invariant(remote.docBuffer.empty());
+
             LOG(1) << "Initial cursor establishment failed with retriable error and will be retried"
                    << causedBy(cursorResponseStatus.getStatus());
 
             ++remote.retryCount;
 
-            // Since we potentially updated the targeter that the last host it chose might be
-            // faulty, the call below may end up getting a different host.
-            remote.status = askForNextBatch_inlock(remoteIndex);
-            if (remote.status.isOK()) {
-                return;
+            remote.status = Status::OK();  // Reset status so it can be retried.
+
+            // Signal the merger thread to make it retry this remote again.
+            if (_currentEvent.isValid()) {
+                // To prevent ourselves from signalling the event twice,
+                // we set '_currentEvent' as invalid after signalling it.
+                _executor->signalEvent(_currentEvent);
+                _currentEvent = executor::TaskExecutor::EventHandle();
             }
 
-            // If we end up here, it means we failed to schedule the retry request, which is a more
-            // severe error that should not be retried. Just pass through to the error handling
-            // logic below.
+            return;
         } else {
             remote.status = cursorResponseStatus.getStatus();
         }