Revert "SERVER-23213 Remove all users of ShardRegistry::kAllRetriableErrors"

This reverts commit 21cfba12a005431b08a8c69dcdb1f262622ea780.
author: Kyle Suarez <ksuarz@gmail.com> 2016-05-03 00:01:19 -0400
committer: Kyle Suarez <ksuarz@gmail.com> 2016-05-03 00:01:19 -0400
commit: f294d1dcd4884857d693b0f8a33cf1a6be434409 (patch)
tree: 8ba380d2b2b461f3b5459282e1ac7b51da56475b /src
parent: 8abd5682f551ad79dec3557f721eebd45b21be0f (diff)
download: mongo-f294d1dcd4884857d693b0f8a33cf1a6be434409.tar.gz
6 files changed, 39 insertions, 88 deletions
diff --git a/src/mongo/s/catalog/replset/SConscript b/src/mongo/s/catalog/replset/SConscript
index 21a973a201a..9d05a072dca 100644
--- a/src/mongo/s/catalog/replset/SConscript
+++ b/src/mongo/s/catalog/replset/SConscript
@@ -13,7 +13,6 @@ env.Library(
         '$BUILD_DIR/mongo/s/catalog/dist_lock_catalog_interface',
         '$BUILD_DIR/mongo/s/catalog/dist_lock_manager',
         '$BUILD_DIR/mongo/s/client/sharding_client',
-        '$BUILD_DIR/mongo/s/coreshard',
         '$BUILD_DIR/mongo/util/fail_point'
     ],
 )
diff --git a/src/mongo/s/catalog/replset/catalog_manager_replica_set.cpp b/src/mongo/s/catalog/replset/catalog_manager_replica_set.cpp
index d29a40fc749..18c06b9139c 100644
--- a/src/mongo/s/catalog/replset/catalog_manager_replica_set.cpp
+++ b/src/mongo/s/catalog/replset/catalog_manager_replica_set.cpp
@@ -1651,20 +1651,12 @@ Status CatalogManagerReplicaSet::insertConfigDocument(OperationContext* txn,
     request.setNS(nss);
     request.setWriteConcern(kMajorityWriteConcern.toBSON());
 
-    auto configShard = Grid::get(txn)->shardRegistry()->getConfigShard();
     for (int retry = 1; retry <= kMaxWriteRetry; retry++) {
         BatchedCommandResponse response;
-        _runBatchWriteCommand(txn, request, &response, Shard::RetryPolicy::kNoRetry);
+        _runBatchWriteCommand(txn, request, &response, Shard::RetryPolicy::kNotIdempotent);
 
         Status status = response.toStatus();
 
-        if (retry < kMaxWriteRetry &&
-            configShard->isRetriableError(status.code(), Shard::RetryPolicy::kIdempotent)) {
-            // Pretend like the operation is idempotent because we're handling DuplicateKey errors
-            // specially
-            continue;
-        }
-
         // If we get DuplicateKey error on the first attempt to insert, this definitively means that
         // we are trying to insert the same entry a second time, so error out. If it happens on a
         // retry attempt though, it is not clear whether we are actually inserting a duplicate key
@@ -1702,6 +1694,10 @@ Status CatalogManagerReplicaSet::insertConfigDocument(OperationContext* txn,
             }
         }
 
+        if (ShardRegistry::kAllRetriableErrors.count(status.code()) && (retry < kMaxWriteRetry)) {
+            continue;
+        }
+
         return status;
     }
 
diff --git a/src/mongo/s/catalog/replset/replset_dist_lock_manager.cpp b/src/mongo/s/catalog/replset/replset_dist_lock_manager.cpp
index 31fb999cdbf..4f4457c793b 100644
--- a/src/mongo/s/catalog/replset/replset_dist_lock_manager.cpp
+++ b/src/mongo/s/catalog/replset/replset_dist_lock_manager.cpp
@@ -40,7 +40,6 @@
 #include "mongo/s/catalog/type_lockpings.h"
 #include "mongo/s/catalog/type_locks.h"
 #include "mongo/s/client/shard_registry.h"
-#include "mongo/s/grid.h"
 #include "mongo/stdx/chrono.h"
 #include "mongo/stdx/memory.h"
 #include "mongo/util/concurrency/thread_name.h"
@@ -286,7 +285,6 @@ StatusWith<DistLockManager::ScopedDistLock> ReplSetDistLockManager::lockWithSess
     // independent write operations.
     int networkErrorRetries = 0;
 
-    auto configShard = Grid::get(txn)->shardRegistry()->getConfigShard();
     // Distributed lock acquisition works by tring to update the state of the lock to 'taken'. If
     // the lock is currently taken, we will back off and try the acquisition again, repeating this
     // until the lockTryInterval has been reached. If a network error occurs at each lock
@@ -320,7 +318,7 @@ StatusWith<DistLockManager::ScopedDistLock> ReplSetDistLockManager::lockWithSess
         }
 
         // If a network error occurred, unlock the lock synchronously and try again
-        if (configShard->isRetriableError(status.code(), Shard::RetryPolicy::kIdempotent) &&
+        if (ShardRegistry::kAllRetriableErrors.count(status.code()) &&
             networkErrorRetries < kMaxNumLockAcquireRetries) {
             LOG(1) << "Failed to acquire distributed lock because of retriable error. Retrying "
                       "acquisition by first unlocking the stale entry, which possibly exists now"
diff --git a/src/mongo/s/catalog/replset/replset_dist_lock_manager_test.cpp b/src/mongo/s/catalog/replset/replset_dist_lock_manager_test.cpp
index 397796e0c36..999386de0ff 100644
--- a/src/mongo/s/catalog/replset/replset_dist_lock_manager_test.cpp
+++ b/src/mongo/s/catalog/replset/replset_dist_lock_manager_test.cpp
@@ -41,21 +41,12 @@
 #include "mongo/base/status_with.h"
 #include "mongo/bson/json.h"
 #include "mongo/bson/util/builder.h"
-#include "mongo/client/remote_command_targeter_factory_mock.h"
 #include "mongo/db/jsobj.h"
 #include "mongo/db/operation_context_noop.h"
 #include "mongo/db/service_context_noop.h"
-#include "mongo/executor/task_executor.h"
-#include "mongo/executor/task_executor_pool.h"
-#include "mongo/s/balancer/balancer_configuration.h"
-#include "mongo/s/catalog/catalog_cache.h"
 #include "mongo/s/catalog/dist_lock_catalog_mock.h"
 #include "mongo/s/catalog/type_lockpings.h"
 #include "mongo/s/catalog/type_locks.h"
-#include "mongo/s/client/shard_factory.h"
-#include "mongo/s/client/shard_registry.h"
-#include "mongo/s/grid.h"
-#include "mongo/s/query/cluster_cursor_manager.h"
 #include "mongo/stdx/condition_variable.h"
 #include "mongo/stdx/memory.h"
 #include "mongo/stdx/mutex.h"
@@ -128,22 +119,6 @@ public:
 protected:
     void setUp() override {
         getGlobalServiceContext()->setTickSource(stdx::make_unique<SystemTickSource>());
-
-        // Set up grid just so that the config shard is accessible via the ShardRegistry.
-        auto targeterFactory = stdx::make_unique<RemoteCommandTargeterFactoryMock>();
-        auto shardFactory = stdx::make_unique<ShardFactory>(std::move(targeterFactory));
-        ConnectionString configCS = ConnectionString::forReplicaSet(
-            "configReplSet", std::vector<HostAndPort>{HostAndPort{"config"}});
-        auto shardRegistry = stdx::make_unique<ShardRegistry>(std::move(shardFactory), configCS);
-        grid.init(nullptr,
-                  std::unique_ptr<CatalogCache>(),
-                  std::move(shardRegistry),
-                  std::unique_ptr<ClusterCursorManager>(),
-                  stdx::make_unique<BalancerConfiguration>(
-                      ChunkSizeSettingsType::kDefaultMaxChunkSizeBytes),
-                  std::unique_ptr<executor::TaskExecutorPool>(),
-                  nullptr);
-
         _mgr->startUp();
     }
 
@@ -151,7 +126,6 @@ protected:
         // Don't care about what shutDown passes to stopPing here.
         _mockCatalog->expectStopPing([](StringData) {}, Status::OK());
         _mgr->shutDown(txn());
-        grid.clearForUnitTests();
     }
 
     std::unique_ptr<DistLockCatalogMock> _dummyDoNotUse;  // dummy placeholder
@@ -172,7 +146,6 @@ public:
 
 protected:
     void setUp() override {
-        ReplSetDistLockManagerFixture::setUp();
         getGlobalServiceContext()->setTickSource(stdx::make_unique<TickSourceMock>());
         _mgr->startUp();
     }
diff --git a/src/mongo/s/query/async_results_merger.cpp b/src/mongo/s/query/async_results_merger.cpp
index 40ab9d8e427..0167ac5683d 100644
--- a/src/mongo/s/query/async_results_merger.cpp
+++ b/src/mongo/s/query/async_results_merger.cpp
@@ -427,42 +427,42 @@ void AsyncResultsMerger::handleBatchResponse(
                                : cbData.response.getStatus());
 
     if (!cursorResponseStatus.isOK()) {
-        auto shard = remote.getShard();
-        if (!shard) {
-            remote.status =
-                Status(cursorResponseStatus.getStatus().code(),
-                       str::stream() << "Could not find shard " << *remote.shardId
-                                     << " containing host " << remote.getTargetHost().toString());
-        } else {
-            shard->updateReplSetMonitor(remote.getTargetHost(), cursorResponseStatus.getStatus());
-
-            // Retry initial cursor establishment if possible.  Never retry getMores to avoid
-            // accidentally skipping results.
-            if (!remote.cursorId && remote.retryCount < kMaxNumFailedHostRetryAttempts &&
-                shard->isRetriableError(cursorResponseStatus.getStatus().code(),
-                                        Shard::RetryPolicy::kIdempotent)) {
-                invariant(remote.shardId);
-                LOG(1) << "Initial cursor establishment failed with retriable error and will be "
-                          "retried" << causedBy(cursorResponseStatus.getStatus());
-
-                ++remote.retryCount;
-
-                // Since we potentially updated the targeter that the last host it chose might be
-                // faulty, the call below may end up getting a different host.
-                remote.status = askForNextBatch_inlock(remoteIndex);
-                if (remote.status.isOK()) {
-                    return;
-                }
-
-                // If we end up here, it means we failed to schedule the retry request, which is a
-                // more
-                // severe error that should not be retried. Just pass through to the error handling
-                // logic below.
+        // Notify the shard registry of the failure.
+        if (remote.shardId) {
+            auto shard = grid.shardRegistry()->getShardNoReload(*remote.shardId);
+            if (!shard) {
+                remote.status = Status(cursorResponseStatus.getStatus().code(),
+                                       str::stream() << "Could not find shard " << *remote.shardId
+                                                     << " containing host "
+                                                     << remote.getTargetHost().toString());
             } else {
-                remote.status = cursorResponseStatus.getStatus();
+                shard->updateReplSetMonitor(remote.getTargetHost(),
+                                            cursorResponseStatus.getStatus());
             }
         }
 
+        // If the error is retriable, schedule another request.
+        if (!remote.cursorId && remote.retryCount < kMaxNumFailedHostRetryAttempts &&
+            ShardRegistry::kAllRetriableErrors.count(cursorResponseStatus.getStatus().code())) {
+            LOG(1) << "Initial cursor establishment failed with retriable error and will be retried"
+                   << causedBy(cursorResponseStatus.getStatus());
+
+            ++remote.retryCount;
+
+            // Since we potentially updated the targeter that the last host it chose might be
+            // faulty, the call below may end up getting a different host.
+            remote.status = askForNextBatch_inlock(remoteIndex);
+            if (remote.status.isOK()) {
+                return;
+            }
+
+            // If we end up here, it means we failed to schedule the retry request, which is a more
+            // severe error that should not be retried. Just pass through to the error handling
+            // logic below.
+        } else {
+            remote.status = cursorResponseStatus.getStatus();
+        }
+
         // Unreachable host errors are swallowed if the 'allowPartialResults' option is set. We
         // remove the unreachable host entirely from consideration by marking it as exhausted.
         if (_params.isAllowPartialResults) {
@@ -634,7 +634,7 @@ Status AsyncResultsMerger::RemoteCursorData::resolveShardIdToHostAndPort(
     invariant(shardId);
     invariant(!cursorId);
 
-    const auto shard = getShard();
+    const auto shard = grid.shardRegistry()->getShardNoReload(*shardId);
     if (!shard) {
         return Status(ErrorCodes::ShardNotFound,
                       str::stream() << "Could not find shard " << *shardId);
@@ -652,15 +652,6 @@ Status AsyncResultsMerger::RemoteCursorData::resolveShardIdToHostAndPort(
     return Status::OK();
 }
 
-std::shared_ptr<Shard> AsyncResultsMerger::RemoteCursorData::getShard() {
-    invariant(shardId || _shardHostAndPort);
-    if (shardId) {
-        return grid.shardRegistry()->getShardNoReload(*shardId);
-    } else {
-        return grid.shardRegistry()->getShardNoReload(_shardHostAndPort->toString());
-    }
-}
-
 //
 // AsyncResultsMerger::MergingComparator
 //
diff --git a/src/mongo/s/query/async_results_merger.h b/src/mongo/s/query/async_results_merger.h
index b4d04a9c7ad..d30281aa8f6 100644
--- a/src/mongo/s/query/async_results_merger.h
+++ b/src/mongo/s/query/async_results_merger.h
@@ -207,13 +207,7 @@ private:
          */
         Status resolveShardIdToHostAndPort(const ReadPreferenceSetting& readPref);
 
-        /**
-         * Returns the Shard object associated with this remote cursor.
-         */
-        std::shared_ptr<Shard> getShard();
-
         // ShardId on which a cursor will be created.
-        // TODO: This should always be set.
         const boost::optional<ShardId> shardId;
 
         // The command object for sending to the remote to establish the cursor. If a remote cursor
author	Kyle Suarez <ksuarz@gmail.com>	2016-05-03 00:01:19 -0400
committer	Kyle Suarez <ksuarz@gmail.com>	2016-05-03 00:01:19 -0400
commit	f294d1dcd4884857d693b0f8a33cf1a6be434409 (patch)
tree	8ba380d2b2b461f3b5459282e1ac7b51da56475b /src
parent	8abd5682f551ad79dec3557f721eebd45b21be0f (diff)
download	mongo-f294d1dcd4884857d693b0f8a33cf1a6be434409.tar.gz