diff options
author | Kyle Suarez <ksuarz@gmail.com> | 2016-05-03 00:01:19 -0400 |
---|---|---|
committer | Kyle Suarez <ksuarz@gmail.com> | 2016-05-03 00:01:19 -0400 |
commit | f294d1dcd4884857d693b0f8a33cf1a6be434409 (patch) | |
tree | 8ba380d2b2b461f3b5459282e1ac7b51da56475b /src/mongo/s | |
parent | 8abd5682f551ad79dec3557f721eebd45b21be0f (diff) | |
download | mongo-f294d1dcd4884857d693b0f8a33cf1a6be434409.tar.gz |
Revert "SERVER-23213 Remove all users of ShardRegistry::kAllRetriableErrors"
This reverts commit 21cfba12a005431b08a8c69dcdb1f262622ea780.
Diffstat (limited to 'src/mongo/s')
6 files changed, 39 insertions, 88 deletions
diff --git a/src/mongo/s/catalog/replset/SConscript b/src/mongo/s/catalog/replset/SConscript index 21a973a201a..9d05a072dca 100644 --- a/src/mongo/s/catalog/replset/SConscript +++ b/src/mongo/s/catalog/replset/SConscript @@ -13,7 +13,6 @@ env.Library( '$BUILD_DIR/mongo/s/catalog/dist_lock_catalog_interface', '$BUILD_DIR/mongo/s/catalog/dist_lock_manager', '$BUILD_DIR/mongo/s/client/sharding_client', - '$BUILD_DIR/mongo/s/coreshard', '$BUILD_DIR/mongo/util/fail_point' ], ) diff --git a/src/mongo/s/catalog/replset/catalog_manager_replica_set.cpp b/src/mongo/s/catalog/replset/catalog_manager_replica_set.cpp index d29a40fc749..18c06b9139c 100644 --- a/src/mongo/s/catalog/replset/catalog_manager_replica_set.cpp +++ b/src/mongo/s/catalog/replset/catalog_manager_replica_set.cpp @@ -1651,20 +1651,12 @@ Status CatalogManagerReplicaSet::insertConfigDocument(OperationContext* txn, request.setNS(nss); request.setWriteConcern(kMajorityWriteConcern.toBSON()); - auto configShard = Grid::get(txn)->shardRegistry()->getConfigShard(); for (int retry = 1; retry <= kMaxWriteRetry; retry++) { BatchedCommandResponse response; - _runBatchWriteCommand(txn, request, &response, Shard::RetryPolicy::kNoRetry); + _runBatchWriteCommand(txn, request, &response, Shard::RetryPolicy::kNotIdempotent); Status status = response.toStatus(); - if (retry < kMaxWriteRetry && - configShard->isRetriableError(status.code(), Shard::RetryPolicy::kIdempotent)) { - // Pretend like the operation is idempotent because we're handling DuplicateKey errors - // specially - continue; - } - // If we get DuplicateKey error on the first attempt to insert, this definitively means that // we are trying to insert the same entry a second time, so error out. If it happens on a // retry attempt though, it is not clear whether we are actually inserting a duplicate key @@ -1702,6 +1694,10 @@ Status CatalogManagerReplicaSet::insertConfigDocument(OperationContext* txn, } } + if (ShardRegistry::kAllRetriableErrors.count(status.code()) && (retry < kMaxWriteRetry)) { + continue; + } + return status; } diff --git a/src/mongo/s/catalog/replset/replset_dist_lock_manager.cpp b/src/mongo/s/catalog/replset/replset_dist_lock_manager.cpp index 31fb999cdbf..4f4457c793b 100644 --- a/src/mongo/s/catalog/replset/replset_dist_lock_manager.cpp +++ b/src/mongo/s/catalog/replset/replset_dist_lock_manager.cpp @@ -40,7 +40,6 @@ #include "mongo/s/catalog/type_lockpings.h" #include "mongo/s/catalog/type_locks.h" #include "mongo/s/client/shard_registry.h" -#include "mongo/s/grid.h" #include "mongo/stdx/chrono.h" #include "mongo/stdx/memory.h" #include "mongo/util/concurrency/thread_name.h" @@ -286,7 +285,6 @@ StatusWith<DistLockManager::ScopedDistLock> ReplSetDistLockManager::lockWithSess // independent write operations. int networkErrorRetries = 0; - auto configShard = Grid::get(txn)->shardRegistry()->getConfigShard(); // Distributed lock acquisition works by tring to update the state of the lock to 'taken'. If // the lock is currently taken, we will back off and try the acquisition again, repeating this // until the lockTryInterval has been reached. If a network error occurs at each lock @@ -320,7 +318,7 @@ StatusWith<DistLockManager::ScopedDistLock> ReplSetDistLockManager::lockWithSess } // If a network error occurred, unlock the lock synchronously and try again - if (configShard->isRetriableError(status.code(), Shard::RetryPolicy::kIdempotent) && + if (ShardRegistry::kAllRetriableErrors.count(status.code()) && networkErrorRetries < kMaxNumLockAcquireRetries) { LOG(1) << "Failed to acquire distributed lock because of retriable error. Retrying " "acquisition by first unlocking the stale entry, which possibly exists now" diff --git a/src/mongo/s/catalog/replset/replset_dist_lock_manager_test.cpp b/src/mongo/s/catalog/replset/replset_dist_lock_manager_test.cpp index 397796e0c36..999386de0ff 100644 --- a/src/mongo/s/catalog/replset/replset_dist_lock_manager_test.cpp +++ b/src/mongo/s/catalog/replset/replset_dist_lock_manager_test.cpp @@ -41,21 +41,12 @@ #include "mongo/base/status_with.h" #include "mongo/bson/json.h" #include "mongo/bson/util/builder.h" -#include "mongo/client/remote_command_targeter_factory_mock.h" #include "mongo/db/jsobj.h" #include "mongo/db/operation_context_noop.h" #include "mongo/db/service_context_noop.h" -#include "mongo/executor/task_executor.h" -#include "mongo/executor/task_executor_pool.h" -#include "mongo/s/balancer/balancer_configuration.h" -#include "mongo/s/catalog/catalog_cache.h" #include "mongo/s/catalog/dist_lock_catalog_mock.h" #include "mongo/s/catalog/type_lockpings.h" #include "mongo/s/catalog/type_locks.h" -#include "mongo/s/client/shard_factory.h" -#include "mongo/s/client/shard_registry.h" -#include "mongo/s/grid.h" -#include "mongo/s/query/cluster_cursor_manager.h" #include "mongo/stdx/condition_variable.h" #include "mongo/stdx/memory.h" #include "mongo/stdx/mutex.h" @@ -128,22 +119,6 @@ public: protected: void setUp() override { getGlobalServiceContext()->setTickSource(stdx::make_unique<SystemTickSource>()); - - // Set up grid just so that the config shard is accessible via the ShardRegistry. - auto targeterFactory = stdx::make_unique<RemoteCommandTargeterFactoryMock>(); - auto shardFactory = stdx::make_unique<ShardFactory>(std::move(targeterFactory)); - ConnectionString configCS = ConnectionString::forReplicaSet( - "configReplSet", std::vector<HostAndPort>{HostAndPort{"config"}}); - auto shardRegistry = stdx::make_unique<ShardRegistry>(std::move(shardFactory), configCS); - grid.init(nullptr, - std::unique_ptr<CatalogCache>(), - std::move(shardRegistry), - std::unique_ptr<ClusterCursorManager>(), - stdx::make_unique<BalancerConfiguration>( - ChunkSizeSettingsType::kDefaultMaxChunkSizeBytes), - std::unique_ptr<executor::TaskExecutorPool>(), - nullptr); - _mgr->startUp(); } @@ -151,7 +126,6 @@ protected: // Don't care about what shutDown passes to stopPing here. _mockCatalog->expectStopPing([](StringData) {}, Status::OK()); _mgr->shutDown(txn()); - grid.clearForUnitTests(); } std::unique_ptr<DistLockCatalogMock> _dummyDoNotUse; // dummy placeholder @@ -172,7 +146,6 @@ public: protected: void setUp() override { - ReplSetDistLockManagerFixture::setUp(); getGlobalServiceContext()->setTickSource(stdx::make_unique<TickSourceMock>()); _mgr->startUp(); } diff --git a/src/mongo/s/query/async_results_merger.cpp b/src/mongo/s/query/async_results_merger.cpp index 40ab9d8e427..0167ac5683d 100644 --- a/src/mongo/s/query/async_results_merger.cpp +++ b/src/mongo/s/query/async_results_merger.cpp @@ -427,42 +427,42 @@ void AsyncResultsMerger::handleBatchResponse( : cbData.response.getStatus()); if (!cursorResponseStatus.isOK()) { - auto shard = remote.getShard(); - if (!shard) { - remote.status = - Status(cursorResponseStatus.getStatus().code(), - str::stream() << "Could not find shard " << *remote.shardId - << " containing host " << remote.getTargetHost().toString()); - } else { - shard->updateReplSetMonitor(remote.getTargetHost(), cursorResponseStatus.getStatus()); - - // Retry initial cursor establishment if possible. Never retry getMores to avoid - // accidentally skipping results. - if (!remote.cursorId && remote.retryCount < kMaxNumFailedHostRetryAttempts && - shard->isRetriableError(cursorResponseStatus.getStatus().code(), - Shard::RetryPolicy::kIdempotent)) { - invariant(remote.shardId); - LOG(1) << "Initial cursor establishment failed with retriable error and will be " - "retried" << causedBy(cursorResponseStatus.getStatus()); - - ++remote.retryCount; - - // Since we potentially updated the targeter that the last host it chose might be - // faulty, the call below may end up getting a different host. - remote.status = askForNextBatch_inlock(remoteIndex); - if (remote.status.isOK()) { - return; - } - - // If we end up here, it means we failed to schedule the retry request, which is a - // more - // severe error that should not be retried. Just pass through to the error handling - // logic below. + // Notify the shard registry of the failure. + if (remote.shardId) { + auto shard = grid.shardRegistry()->getShardNoReload(*remote.shardId); + if (!shard) { + remote.status = Status(cursorResponseStatus.getStatus().code(), + str::stream() << "Could not find shard " << *remote.shardId + << " containing host " + << remote.getTargetHost().toString()); } else { - remote.status = cursorResponseStatus.getStatus(); + shard->updateReplSetMonitor(remote.getTargetHost(), + cursorResponseStatus.getStatus()); } } + // If the error is retriable, schedule another request. + if (!remote.cursorId && remote.retryCount < kMaxNumFailedHostRetryAttempts && + ShardRegistry::kAllRetriableErrors.count(cursorResponseStatus.getStatus().code())) { + LOG(1) << "Initial cursor establishment failed with retriable error and will be retried" + << causedBy(cursorResponseStatus.getStatus()); + + ++remote.retryCount; + + // Since we potentially updated the targeter that the last host it chose might be + // faulty, the call below may end up getting a different host. + remote.status = askForNextBatch_inlock(remoteIndex); + if (remote.status.isOK()) { + return; + } + + // If we end up here, it means we failed to schedule the retry request, which is a more + // severe error that should not be retried. Just pass through to the error handling + // logic below. + } else { + remote.status = cursorResponseStatus.getStatus(); + } + // Unreachable host errors are swallowed if the 'allowPartialResults' option is set. We // remove the unreachable host entirely from consideration by marking it as exhausted. if (_params.isAllowPartialResults) { @@ -634,7 +634,7 @@ Status AsyncResultsMerger::RemoteCursorData::resolveShardIdToHostAndPort( invariant(shardId); invariant(!cursorId); - const auto shard = getShard(); + const auto shard = grid.shardRegistry()->getShardNoReload(*shardId); if (!shard) { return Status(ErrorCodes::ShardNotFound, str::stream() << "Could not find shard " << *shardId); @@ -652,15 +652,6 @@ Status AsyncResultsMerger::RemoteCursorData::resolveShardIdToHostAndPort( return Status::OK(); } -std::shared_ptr<Shard> AsyncResultsMerger::RemoteCursorData::getShard() { - invariant(shardId || _shardHostAndPort); - if (shardId) { - return grid.shardRegistry()->getShardNoReload(*shardId); - } else { - return grid.shardRegistry()->getShardNoReload(_shardHostAndPort->toString()); - } -} - // // AsyncResultsMerger::MergingComparator // diff --git a/src/mongo/s/query/async_results_merger.h b/src/mongo/s/query/async_results_merger.h index b4d04a9c7ad..d30281aa8f6 100644 --- a/src/mongo/s/query/async_results_merger.h +++ b/src/mongo/s/query/async_results_merger.h @@ -207,13 +207,7 @@ private: */ Status resolveShardIdToHostAndPort(const ReadPreferenceSetting& readPref); - /** - * Returns the Shard object associated with this remote cursor. - */ - std::shared_ptr<Shard> getShard(); - // ShardId on which a cursor will be created. - // TODO: This should always be set. const boost::optional<ShardId> shardId; // The command object for sending to the remote to establish the cursor. If a remote cursor |