SERVER-22027 Sharding should not retry killed operations

This change introduces a different interruption code (11602) which will be used to kill operations during replication primary stepdown so the config server retry logic can differentiate them from user-killed operations.
author: Kaloian Manassiev <kaloian.manassiev@mongodb.com> 2015-12-30 17:01:13 -0500
committer: Kaloian Manassiev <kaloian.manassiev@mongodb.com> 2015-12-30 17:09:28 -0500
commit: e49a2a16fb4b337d7ed1b0ec8d766f281741d8bf (patch)
tree: 7f0cac8cb50e56819bfe2546a525076976b1e193 /src/mongo/s/query/async_results_merger.cpp
parent: 715e9e1cdc618dad480a7a1a73458daf6ea9ce0f (diff)
download: mongo-e49a2a16fb4b337d7ed1b0ec8d766f281741d8bf.tar.gz
1 files changed, 7 insertions, 14 deletions
diff --git a/src/mongo/s/query/async_results_merger.cpp b/src/mongo/s/query/async_results_merger.cpp
index 8b23528a04a..c82de6a3bbe 100644
--- a/src/mongo/s/query/async_results_merger.cpp
+++ b/src/mongo/s/query/async_results_merger.cpp
@@ -51,15 +51,6 @@ namespace {
 // Maximum number of retries for network and replication notMaster errors (per host).
 const int kMaxNumFailedHostRetryAttempts = 3;
 
-/**
- * Returns whether a particular error code returned from the initial cursor establishment should
- * be retried.
- */
-bool isPerShardRetriableError(ErrorCodes::Error err) {
-    return (ShardRegistry::kAllRetriableErrors.count(err) ||
-            err == ErrorCodes::NotMasterOrSecondary);
-}
-
 }  // namespace
 
 AsyncResultsMerger::AsyncResultsMerger(executor::TaskExecutor* executor,
@@ -438,8 +429,7 @@ void AsyncResultsMerger::handleBatchResponse(
     if (!cursorResponseStatus.isOK()) {
         // Notify the shard registry of the failure.
         if (remote.shardId) {
-            // TODO: Pass down an OperationContext* to use here.
-            auto shard = grid.shardRegistry()->getShard(nullptr, *remote.shardId);
+            auto shard = grid.shardRegistry()->getShardNoReload(*remote.shardId);
             if (!shard) {
                 remote.status = Status(cursorResponseStatus.getStatus().code(),
                                        str::stream() << "Could not find shard " << *remote.shardId
@@ -453,7 +443,10 @@ void AsyncResultsMerger::handleBatchResponse(
 
         // If the error is retriable, schedule another request.
         if (!remote.cursorId && remote.retryCount < kMaxNumFailedHostRetryAttempts &&
-            isPerShardRetriableError(cursorResponseStatus.getStatus().code())) {
+            ShardRegistry::kAllRetriableErrors.count(cursorResponseStatus.getStatus().code())) {
+            LOG(1) << "Initial cursor establishment failed with retriable error and will be retried"
+                   << causedBy(cursorResponseStatus.getStatus());
+
             ++remote.retryCount;
 
             // Since we potentially updated the targeter that the last host it chose might be
@@ -641,13 +634,13 @@ Status AsyncResultsMerger::RemoteCursorData::resolveShardIdToHostAndPort(
     invariant(shardId);
     invariant(!cursorId);
 
-    // TODO: Pass down an OperationContext* to use here.
-    const auto shard = grid.shardRegistry()->getShard(nullptr, *shardId);
+    const auto shard = grid.shardRegistry()->getShardNoReload(*shardId);
     if (!shard) {
         return Status(ErrorCodes::ShardNotFound,
                       str::stream() << "Could not find shard " << *shardId);
     }
 
+    // TODO: Pass down an OperationContext* to use here.
     auto findHostStatus = shard->getTargeter()->findHost(
         readPref, RemoteCommandTargeter::selectFindHostMaxWaitTime(nullptr));
     if (!findHostStatus.isOK()) {
author	Kaloian Manassiev <kaloian.manassiev@mongodb.com>	2015-12-30 17:01:13 -0500
committer	Kaloian Manassiev <kaloian.manassiev@mongodb.com>	2015-12-30 17:09:28 -0500
commit	e49a2a16fb4b337d7ed1b0ec8d766f281741d8bf (patch)
tree	7f0cac8cb50e56819bfe2546a525076976b1e193 /src/mongo/s/query/async_results_merger.cpp
parent	715e9e1cdc618dad480a7a1a73458daf6ea9ce0f (diff)
download	mongo-e49a2a16fb4b337d7ed1b0ec8d766f281741d8bf.tar.gz