summaryrefslogtreecommitdiff
path: root/src/mongo/s
diff options
context:
space:
mode:
authorKyle Suarez <ksuarz@gmail.com>2016-05-03 00:01:19 -0400
committerKyle Suarez <ksuarz@gmail.com>2016-05-03 00:01:19 -0400
commitf294d1dcd4884857d693b0f8a33cf1a6be434409 (patch)
tree8ba380d2b2b461f3b5459282e1ac7b51da56475b /src/mongo/s
parent8abd5682f551ad79dec3557f721eebd45b21be0f (diff)
downloadmongo-f294d1dcd4884857d693b0f8a33cf1a6be434409.tar.gz
Revert "SERVER-23213 Remove all users of ShardRegistry::kAllRetriableErrors"
This reverts commit 21cfba12a005431b08a8c69dcdb1f262622ea780.
Diffstat (limited to 'src/mongo/s')
-rw-r--r--src/mongo/s/catalog/replset/SConscript1
-rw-r--r--src/mongo/s/catalog/replset/catalog_manager_replica_set.cpp14
-rw-r--r--src/mongo/s/catalog/replset/replset_dist_lock_manager.cpp4
-rw-r--r--src/mongo/s/catalog/replset/replset_dist_lock_manager_test.cpp27
-rw-r--r--src/mongo/s/query/async_results_merger.cpp75
-rw-r--r--src/mongo/s/query/async_results_merger.h6
6 files changed, 39 insertions, 88 deletions
diff --git a/src/mongo/s/catalog/replset/SConscript b/src/mongo/s/catalog/replset/SConscript
index 21a973a201a..9d05a072dca 100644
--- a/src/mongo/s/catalog/replset/SConscript
+++ b/src/mongo/s/catalog/replset/SConscript
@@ -13,7 +13,6 @@ env.Library(
'$BUILD_DIR/mongo/s/catalog/dist_lock_catalog_interface',
'$BUILD_DIR/mongo/s/catalog/dist_lock_manager',
'$BUILD_DIR/mongo/s/client/sharding_client',
- '$BUILD_DIR/mongo/s/coreshard',
'$BUILD_DIR/mongo/util/fail_point'
],
)
diff --git a/src/mongo/s/catalog/replset/catalog_manager_replica_set.cpp b/src/mongo/s/catalog/replset/catalog_manager_replica_set.cpp
index d29a40fc749..18c06b9139c 100644
--- a/src/mongo/s/catalog/replset/catalog_manager_replica_set.cpp
+++ b/src/mongo/s/catalog/replset/catalog_manager_replica_set.cpp
@@ -1651,20 +1651,12 @@ Status CatalogManagerReplicaSet::insertConfigDocument(OperationContext* txn,
request.setNS(nss);
request.setWriteConcern(kMajorityWriteConcern.toBSON());
- auto configShard = Grid::get(txn)->shardRegistry()->getConfigShard();
for (int retry = 1; retry <= kMaxWriteRetry; retry++) {
BatchedCommandResponse response;
- _runBatchWriteCommand(txn, request, &response, Shard::RetryPolicy::kNoRetry);
+ _runBatchWriteCommand(txn, request, &response, Shard::RetryPolicy::kNotIdempotent);
Status status = response.toStatus();
- if (retry < kMaxWriteRetry &&
- configShard->isRetriableError(status.code(), Shard::RetryPolicy::kIdempotent)) {
- // Pretend like the operation is idempotent because we're handling DuplicateKey errors
- // specially
- continue;
- }
-
// If we get DuplicateKey error on the first attempt to insert, this definitively means that
// we are trying to insert the same entry a second time, so error out. If it happens on a
// retry attempt though, it is not clear whether we are actually inserting a duplicate key
@@ -1702,6 +1694,10 @@ Status CatalogManagerReplicaSet::insertConfigDocument(OperationContext* txn,
}
}
+ if (ShardRegistry::kAllRetriableErrors.count(status.code()) && (retry < kMaxWriteRetry)) {
+ continue;
+ }
+
return status;
}
diff --git a/src/mongo/s/catalog/replset/replset_dist_lock_manager.cpp b/src/mongo/s/catalog/replset/replset_dist_lock_manager.cpp
index 31fb999cdbf..4f4457c793b 100644
--- a/src/mongo/s/catalog/replset/replset_dist_lock_manager.cpp
+++ b/src/mongo/s/catalog/replset/replset_dist_lock_manager.cpp
@@ -40,7 +40,6 @@
#include "mongo/s/catalog/type_lockpings.h"
#include "mongo/s/catalog/type_locks.h"
#include "mongo/s/client/shard_registry.h"
-#include "mongo/s/grid.h"
#include "mongo/stdx/chrono.h"
#include "mongo/stdx/memory.h"
#include "mongo/util/concurrency/thread_name.h"
@@ -286,7 +285,6 @@ StatusWith<DistLockManager::ScopedDistLock> ReplSetDistLockManager::lockWithSess
// independent write operations.
int networkErrorRetries = 0;
- auto configShard = Grid::get(txn)->shardRegistry()->getConfigShard();
// Distributed lock acquisition works by tring to update the state of the lock to 'taken'. If
// the lock is currently taken, we will back off and try the acquisition again, repeating this
// until the lockTryInterval has been reached. If a network error occurs at each lock
@@ -320,7 +318,7 @@ StatusWith<DistLockManager::ScopedDistLock> ReplSetDistLockManager::lockWithSess
}
// If a network error occurred, unlock the lock synchronously and try again
- if (configShard->isRetriableError(status.code(), Shard::RetryPolicy::kIdempotent) &&
+ if (ShardRegistry::kAllRetriableErrors.count(status.code()) &&
networkErrorRetries < kMaxNumLockAcquireRetries) {
LOG(1) << "Failed to acquire distributed lock because of retriable error. Retrying "
"acquisition by first unlocking the stale entry, which possibly exists now"
diff --git a/src/mongo/s/catalog/replset/replset_dist_lock_manager_test.cpp b/src/mongo/s/catalog/replset/replset_dist_lock_manager_test.cpp
index 397796e0c36..999386de0ff 100644
--- a/src/mongo/s/catalog/replset/replset_dist_lock_manager_test.cpp
+++ b/src/mongo/s/catalog/replset/replset_dist_lock_manager_test.cpp
@@ -41,21 +41,12 @@
#include "mongo/base/status_with.h"
#include "mongo/bson/json.h"
#include "mongo/bson/util/builder.h"
-#include "mongo/client/remote_command_targeter_factory_mock.h"
#include "mongo/db/jsobj.h"
#include "mongo/db/operation_context_noop.h"
#include "mongo/db/service_context_noop.h"
-#include "mongo/executor/task_executor.h"
-#include "mongo/executor/task_executor_pool.h"
-#include "mongo/s/balancer/balancer_configuration.h"
-#include "mongo/s/catalog/catalog_cache.h"
#include "mongo/s/catalog/dist_lock_catalog_mock.h"
#include "mongo/s/catalog/type_lockpings.h"
#include "mongo/s/catalog/type_locks.h"
-#include "mongo/s/client/shard_factory.h"
-#include "mongo/s/client/shard_registry.h"
-#include "mongo/s/grid.h"
-#include "mongo/s/query/cluster_cursor_manager.h"
#include "mongo/stdx/condition_variable.h"
#include "mongo/stdx/memory.h"
#include "mongo/stdx/mutex.h"
@@ -128,22 +119,6 @@ public:
protected:
void setUp() override {
getGlobalServiceContext()->setTickSource(stdx::make_unique<SystemTickSource>());
-
- // Set up grid just so that the config shard is accessible via the ShardRegistry.
- auto targeterFactory = stdx::make_unique<RemoteCommandTargeterFactoryMock>();
- auto shardFactory = stdx::make_unique<ShardFactory>(std::move(targeterFactory));
- ConnectionString configCS = ConnectionString::forReplicaSet(
- "configReplSet", std::vector<HostAndPort>{HostAndPort{"config"}});
- auto shardRegistry = stdx::make_unique<ShardRegistry>(std::move(shardFactory), configCS);
- grid.init(nullptr,
- std::unique_ptr<CatalogCache>(),
- std::move(shardRegistry),
- std::unique_ptr<ClusterCursorManager>(),
- stdx::make_unique<BalancerConfiguration>(
- ChunkSizeSettingsType::kDefaultMaxChunkSizeBytes),
- std::unique_ptr<executor::TaskExecutorPool>(),
- nullptr);
-
_mgr->startUp();
}
@@ -151,7 +126,6 @@ protected:
// Don't care about what shutDown passes to stopPing here.
_mockCatalog->expectStopPing([](StringData) {}, Status::OK());
_mgr->shutDown(txn());
- grid.clearForUnitTests();
}
std::unique_ptr<DistLockCatalogMock> _dummyDoNotUse; // dummy placeholder
@@ -172,7 +146,6 @@ public:
protected:
void setUp() override {
- ReplSetDistLockManagerFixture::setUp();
getGlobalServiceContext()->setTickSource(stdx::make_unique<TickSourceMock>());
_mgr->startUp();
}
diff --git a/src/mongo/s/query/async_results_merger.cpp b/src/mongo/s/query/async_results_merger.cpp
index 40ab9d8e427..0167ac5683d 100644
--- a/src/mongo/s/query/async_results_merger.cpp
+++ b/src/mongo/s/query/async_results_merger.cpp
@@ -427,42 +427,42 @@ void AsyncResultsMerger::handleBatchResponse(
: cbData.response.getStatus());
if (!cursorResponseStatus.isOK()) {
- auto shard = remote.getShard();
- if (!shard) {
- remote.status =
- Status(cursorResponseStatus.getStatus().code(),
- str::stream() << "Could not find shard " << *remote.shardId
- << " containing host " << remote.getTargetHost().toString());
- } else {
- shard->updateReplSetMonitor(remote.getTargetHost(), cursorResponseStatus.getStatus());
-
- // Retry initial cursor establishment if possible. Never retry getMores to avoid
- // accidentally skipping results.
- if (!remote.cursorId && remote.retryCount < kMaxNumFailedHostRetryAttempts &&
- shard->isRetriableError(cursorResponseStatus.getStatus().code(),
- Shard::RetryPolicy::kIdempotent)) {
- invariant(remote.shardId);
- LOG(1) << "Initial cursor establishment failed with retriable error and will be "
- "retried" << causedBy(cursorResponseStatus.getStatus());
-
- ++remote.retryCount;
-
- // Since we potentially updated the targeter that the last host it chose might be
- // faulty, the call below may end up getting a different host.
- remote.status = askForNextBatch_inlock(remoteIndex);
- if (remote.status.isOK()) {
- return;
- }
-
- // If we end up here, it means we failed to schedule the retry request, which is a
- // more
- // severe error that should not be retried. Just pass through to the error handling
- // logic below.
+ // Notify the shard registry of the failure.
+ if (remote.shardId) {
+ auto shard = grid.shardRegistry()->getShardNoReload(*remote.shardId);
+ if (!shard) {
+ remote.status = Status(cursorResponseStatus.getStatus().code(),
+ str::stream() << "Could not find shard " << *remote.shardId
+ << " containing host "
+ << remote.getTargetHost().toString());
} else {
- remote.status = cursorResponseStatus.getStatus();
+ shard->updateReplSetMonitor(remote.getTargetHost(),
+ cursorResponseStatus.getStatus());
}
}
+ // If the error is retriable, schedule another request.
+ if (!remote.cursorId && remote.retryCount < kMaxNumFailedHostRetryAttempts &&
+ ShardRegistry::kAllRetriableErrors.count(cursorResponseStatus.getStatus().code())) {
+ LOG(1) << "Initial cursor establishment failed with retriable error and will be retried"
+ << causedBy(cursorResponseStatus.getStatus());
+
+ ++remote.retryCount;
+
+ // Since we potentially updated the targeter that the last host it chose might be
+ // faulty, the call below may end up getting a different host.
+ remote.status = askForNextBatch_inlock(remoteIndex);
+ if (remote.status.isOK()) {
+ return;
+ }
+
+ // If we end up here, it means we failed to schedule the retry request, which is a more
+ // severe error that should not be retried. Just pass through to the error handling
+ // logic below.
+ } else {
+ remote.status = cursorResponseStatus.getStatus();
+ }
+
// Unreachable host errors are swallowed if the 'allowPartialResults' option is set. We
// remove the unreachable host entirely from consideration by marking it as exhausted.
if (_params.isAllowPartialResults) {
@@ -634,7 +634,7 @@ Status AsyncResultsMerger::RemoteCursorData::resolveShardIdToHostAndPort(
invariant(shardId);
invariant(!cursorId);
- const auto shard = getShard();
+ const auto shard = grid.shardRegistry()->getShardNoReload(*shardId);
if (!shard) {
return Status(ErrorCodes::ShardNotFound,
str::stream() << "Could not find shard " << *shardId);
@@ -652,15 +652,6 @@ Status AsyncResultsMerger::RemoteCursorData::resolveShardIdToHostAndPort(
return Status::OK();
}
-std::shared_ptr<Shard> AsyncResultsMerger::RemoteCursorData::getShard() {
- invariant(shardId || _shardHostAndPort);
- if (shardId) {
- return grid.shardRegistry()->getShardNoReload(*shardId);
- } else {
- return grid.shardRegistry()->getShardNoReload(_shardHostAndPort->toString());
- }
-}
-
//
// AsyncResultsMerger::MergingComparator
//
diff --git a/src/mongo/s/query/async_results_merger.h b/src/mongo/s/query/async_results_merger.h
index b4d04a9c7ad..d30281aa8f6 100644
--- a/src/mongo/s/query/async_results_merger.h
+++ b/src/mongo/s/query/async_results_merger.h
@@ -207,13 +207,7 @@ private:
*/
Status resolveShardIdToHostAndPort(const ReadPreferenceSetting& readPref);
- /**
- * Returns the Shard object associated with this remote cursor.
- */
- std::shared_ptr<Shard> getShard();
-
// ShardId on which a cursor will be created.
- // TODO: This should always be set.
const boost::optional<ShardId> shardId;
// The command object for sending to the remote to establish the cursor. If a remote cursor