diff options
-rw-r--r-- | src/mongo/base/error_codes.err | 15 | ||||
-rw-r--r-- | src/mongo/client/fetcher_test.cpp | 10 | ||||
-rw-r--r-- | src/mongo/client/remote_command_retry_scheduler.cpp | 89 | ||||
-rw-r--r-- | src/mongo/client/remote_command_retry_scheduler.h | 90 | ||||
-rw-r--r-- | src/mongo/client/remote_command_retry_scheduler_test.cpp | 49 | ||||
-rw-r--r-- | src/mongo/db/repl/collection_cloner.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/repl/database_cloner.cpp | 32 | ||||
-rw-r--r-- | src/mongo/db/repl/databases_cloner.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/repl/initial_syncer.cpp | 36 | ||||
-rw-r--r-- | src/mongo/s/client/shard.cpp | 6 | ||||
-rw-r--r-- | src/mongo/s/client/shard_remote.cpp | 19 | ||||
-rw-r--r-- | src/mongo/s/query/establish_cursors.cpp | 8 | ||||
-rw-r--r-- | src/mongo/s/transaction_router.cpp | 9 | ||||
-rw-r--r-- | src/mongo/util/assert_util_test.cpp | 14 |
14 files changed, 179 insertions, 213 deletions
diff --git a/src/mongo/base/error_codes.err b/src/mongo/base/error_codes.err index 377aa7649b7..a74d38125a6 100644 --- a/src/mongo/base/error_codes.err +++ b/src/mongo/base/error_codes.err @@ -364,3 +364,18 @@ error_class("SnapshotError", ["SnapshotTooOld", "SnapshotUnavailable", "StaleChu error_class("VoteAbortError", ["NoSuchTransaction", "ReadConcernMajorityNotEnabled", "TransactionTooOld"]) error_class("NonResumableChangeStreamError", ["ChangeStreamFatalError", "ChangeStreamHistoryLost"]) +error_class("RetriableError", [ + "NotMaster", + "NotMasterNoSlaveOk", + "NotMasterOrSecondary", + "InterruptedDueToReplStateChange", + "PrimarySteppedDown", + "WriteConcernFailed", + "HostUnreachable", + "HostNotFound", + "NetworkTimeout", + "SocketException", + "ShutdownInProgress", + "InterruptedAtShutdown", + "BalancerInterrupted", +]) diff --git a/src/mongo/client/fetcher_test.cpp b/src/mongo/client/fetcher_test.cpp index de38843b248..96a581aed7c 100644 --- a/src/mongo/client/fetcher_test.cpp +++ b/src/mongo/client/fetcher_test.cpp @@ -986,10 +986,8 @@ TEST_F(FetcherTest, ShutdownDuringSecondBatch) { } TEST_F(FetcherTest, FetcherAppliesRetryPolicyToFirstCommandButNotToGetMoreRequests) { - auto policy = RemoteCommandRetryScheduler::makeRetryPolicy( - 3U, - executor::RemoteCommandRequest::kNoTimeout, - {ErrorCodes::BadValue, ErrorCodes::InternalError}); + auto policy = RemoteCommandRetryScheduler::makeRetryPolicy<ErrorCategory::RetriableError>( + 3U, executor::RemoteCommandRequest::kNoTimeout); fetcher = stdx::make_unique<Fetcher>(&getExecutor(), source, @@ -1007,9 +1005,9 @@ TEST_F(FetcherTest, FetcherAppliesRetryPolicyToFirstCommandButNotToGetMoreReques // Retry policy is applied to find command. const BSONObj doc = BSON("_id" << 1); - auto rs = ResponseStatus(ErrorCodes::BadValue, "first", Milliseconds(0)); + auto rs = ResponseStatus(ErrorCodes::HostUnreachable, "first", Milliseconds(0)); processNetworkResponse(rs, ReadyQueueState::kHasReadyRequests, FetcherState::kActive); - rs = ResponseStatus(ErrorCodes::InternalError, "second", Milliseconds(0)); + rs = ResponseStatus(ErrorCodes::SocketException, "second", Milliseconds(0)); processNetworkResponse(rs, ReadyQueueState::kHasReadyRequests, FetcherState::kActive); processNetworkResponse(BSON("cursor" << BSON("id" << 1LL << "ns" << "db.coll" diff --git a/src/mongo/client/remote_command_retry_scheduler.cpp b/src/mongo/client/remote_command_retry_scheduler.cpp index 01e2a43f6b0..9aefd9fc902 100644 --- a/src/mongo/client/remote_command_retry_scheduler.cpp +++ b/src/mongo/client/remote_command_retry_scheduler.cpp @@ -42,95 +42,6 @@ namespace mongo { -namespace { - -class RetryPolicyImpl : public RemoteCommandRetryScheduler::RetryPolicy { -public: - RetryPolicyImpl(std::size_t maximumAttempts, - Milliseconds maximumResponseElapsedTotal, - const std::initializer_list<ErrorCodes::Error>& retryableErrors); - std::size_t getMaximumAttempts() const override; - Milliseconds getMaximumResponseElapsedTotal() const override; - bool shouldRetryOnError(ErrorCodes::Error error) const override; - std::string toString() const override; - -private: - std::size_t _maximumAttempts; - Milliseconds _maximumResponseElapsedTotal; - std::vector<ErrorCodes::Error> _retryableErrors; -}; - -RetryPolicyImpl::RetryPolicyImpl(std::size_t maximumAttempts, - Milliseconds maximumResponseElapsedTotal, - const std::initializer_list<ErrorCodes::Error>& retryableErrors) - : _maximumAttempts(maximumAttempts), - _maximumResponseElapsedTotal(maximumResponseElapsedTotal), - _retryableErrors(retryableErrors) { - std::sort(_retryableErrors.begin(), _retryableErrors.end()); -} - -std::string RetryPolicyImpl::toString() const { - str::stream output; - output << "RetryPolicyImpl"; - output << " maxAttempts: " << _maximumAttempts; - output << " maxTimeMillis: " << _maximumResponseElapsedTotal; - - if (_retryableErrors.size() > 0) { - output << "Retryable Errors: "; - for (auto error : _retryableErrors) { - output << error; - } - } - return output; -} - -std::size_t RetryPolicyImpl::getMaximumAttempts() const { - return _maximumAttempts; -} - -Milliseconds RetryPolicyImpl::getMaximumResponseElapsedTotal() const { - return _maximumResponseElapsedTotal; -} - -bool RetryPolicyImpl::shouldRetryOnError(ErrorCodes::Error error) const { - return std::binary_search(_retryableErrors.cbegin(), _retryableErrors.cend(), error); -} - -} // namespace - -const std::initializer_list<ErrorCodes::Error> RemoteCommandRetryScheduler::kNotMasterErrors{ - ErrorCodes::NotMaster, ErrorCodes::NotMasterNoSlaveOk, ErrorCodes::NotMasterOrSecondary}; - -const std::initializer_list<ErrorCodes::Error> RemoteCommandRetryScheduler::kAllRetriableErrors{ - ErrorCodes::NotMaster, - ErrorCodes::NotMasterNoSlaveOk, - ErrorCodes::NotMasterOrSecondary, - // If write concern failed to be satisfied on the remote server, this most probably means that - // some of the secondary nodes were unreachable or otherwise unresponsive, so the call is safe - // to be retried if idempotency can be guaranteed. - ErrorCodes::WriteConcernFailed, - ErrorCodes::HostUnreachable, - ErrorCodes::HostNotFound, - ErrorCodes::NetworkTimeout, - ErrorCodes::PrimarySteppedDown, - ErrorCodes::InterruptedDueToReplStateChange, - ErrorCodes::BalancerInterrupted}; - -std::unique_ptr<RemoteCommandRetryScheduler::RetryPolicy> -RemoteCommandRetryScheduler::makeNoRetryPolicy() { - return makeRetryPolicy(1U, executor::RemoteCommandRequest::kNoTimeout, {}); -} - -std::unique_ptr<RemoteCommandRetryScheduler::RetryPolicy> -RemoteCommandRetryScheduler::makeRetryPolicy( - std::size_t maxAttempts, - Milliseconds maxResponseElapsedTotal, - const std::initializer_list<ErrorCodes::Error>& retryableErrors) { - std::unique_ptr<RetryPolicy> policy = - stdx::make_unique<RetryPolicyImpl>(maxAttempts, maxResponseElapsedTotal, retryableErrors); - return policy; -} - RemoteCommandRetryScheduler::RemoteCommandRetryScheduler( executor::TaskExecutor* executor, const executor::RemoteCommandRequest& request, diff --git a/src/mongo/client/remote_command_retry_scheduler.h b/src/mongo/client/remote_command_retry_scheduler.h index 82766f7ca18..b4cfe52ef88 100644 --- a/src/mongo/client/remote_command_retry_scheduler.h +++ b/src/mongo/client/remote_command_retry_scheduler.h @@ -33,6 +33,8 @@ #include <initializer_list> #include <memory> +#include <fmt/format.h> + #include "mongo/base/error_codes.h" #include "mongo/executor/task_executor.h" #include "mongo/stdx/condition_variable.h" @@ -64,16 +66,6 @@ public: class RetryPolicy; /** - * List of not master error codes. - */ - static const std::initializer_list<ErrorCodes::Error> kNotMasterErrors; - - /** - * List of retriable error codes. - */ - static const std::initializer_list<ErrorCodes::Error> kAllRetriableErrors; - - /** * Generates a retry policy that will send the remote command request to the source at most * once. */ @@ -81,15 +73,12 @@ public: /** * Creates a retry policy that will send the remote command request at most "maxAttempts". - * This policy will also direct the scheduler to stop retrying if it encounters any of the - * errors in "nonRetryableErrors". * (Requires SERVER-24067) The scheduler will also stop retrying if the total elapsed time * of all failed requests exceeds "maxResponseElapsedTotal". */ - static std::unique_ptr<RetryPolicy> makeRetryPolicy( - std::size_t maxAttempts, - Milliseconds maxResponseElapsedTotal, - const std::initializer_list<ErrorCodes::Error>& retryableErrors); + template <ErrorCategory kCategory> + static std::unique_ptr<RetryPolicy> makeRetryPolicy(std::size_t maxAttempts, + Milliseconds maxResponseElapsedTotal); /** * Creates scheduler but does not schedule any remote command request. @@ -128,6 +117,10 @@ public: std::string toString() const; private: + class NoRetryPolicy; + template <ErrorCategory kCategory> + class RetryPolicyForCategory; + /** * Schedules remote command to be run by the executor. * "requestCount" is number of requests scheduled before calling this function. @@ -207,4 +200,69 @@ public: virtual std::string toString() const = 0; }; +class RemoteCommandRetryScheduler::NoRetryPolicy final + : public RemoteCommandRetryScheduler::RetryPolicy { +public: + std::size_t getMaximumAttempts() const override { + return 1U; + } + + Milliseconds getMaximumResponseElapsedTotal() const override { + return executor::RemoteCommandRequest::kNoTimeout; + } + + bool shouldRetryOnError(ErrorCodes::Error error) const override { + return false; + } + + std::string toString() const override { + return R"!({type: "NoRetryPolicy"})!"; + } +}; + +inline auto RemoteCommandRetryScheduler::makeNoRetryPolicy() -> std::unique_ptr<RetryPolicy> { + return std::make_unique<NoRetryPolicy>(); +} + +template <ErrorCategory kCategory> +class RemoteCommandRetryScheduler::RetryPolicyForCategory final + : public RemoteCommandRetryScheduler::RetryPolicy { +public: + RetryPolicyForCategory(std::size_t maximumAttempts, Milliseconds maximumResponseElapsedTotal) + : _maximumAttempts(maximumAttempts), + _maximumResponseElapsedTotal(maximumResponseElapsedTotal){}; + + std::size_t getMaximumAttempts() const override { + return _maximumAttempts; + } + + Milliseconds getMaximumResponseElapsedTotal() const override { + return _maximumResponseElapsedTotal; + } + + bool shouldRetryOnError(ErrorCodes::Error error) const override { + return ErrorCodes::isA<kCategory>(error); + } + + std::string toString() const override { + using namespace fmt::literals; + return R"!({{type: "RetryPolicyForCategory",categoryIndex: {}, maxAttempts: {}, maxTimeMS: {}}})!"_format( + static_cast<std::underlying_type_t<ErrorCategory>>(kCategory), + _maximumAttempts, + _maximumResponseElapsedTotal.count()); + } + +private: + std::size_t _maximumAttempts; + Milliseconds _maximumResponseElapsedTotal; +}; + +template <ErrorCategory kCategory> +auto RemoteCommandRetryScheduler::makeRetryPolicy(std::size_t maxAttempts, + Milliseconds maxResponseElapsedTotal) + -> std::unique_ptr<RetryPolicy> { + return std::make_unique<RetryPolicyForCategory<kCategory>>(maxAttempts, + maxResponseElapsedTotal); +} + } // namespace mongo diff --git a/src/mongo/client/remote_command_retry_scheduler_test.cpp b/src/mongo/client/remote_command_retry_scheduler_test.cpp index 211802e589b..ddc3574de1e 100644 --- a/src/mongo/client/remote_command_retry_scheduler_test.cpp +++ b/src/mongo/client/remote_command_retry_scheduler_test.cpp @@ -185,16 +185,13 @@ TEST_F(RemoteCommandRetrySchedulerTest, MakeSingleShotRetryPolicy) { } TEST_F(RemoteCommandRetrySchedulerTest, MakeRetryPolicy) { - auto policy = RemoteCommandRetryScheduler::makeRetryPolicy( - 5U, - Milliseconds(100), - {ErrorCodes::FailedToParse, ErrorCodes::InvalidNamespace, ErrorCodes::InternalError}); + auto policy = RemoteCommandRetryScheduler::makeRetryPolicy<ErrorCategory::Interruption>( + 5U, Milliseconds(100)); ASSERT_EQUALS(5U, policy->getMaximumAttempts()); ASSERT_EQUALS(Milliseconds(100), policy->getMaximumResponseElapsedTotal()); for (int i = 0; i < int(ErrorCodes::MaxError); ++i) { auto error = ErrorCodes::Error(i); - if (error == ErrorCodes::InternalError || error == ErrorCodes::FailedToParse || - error == ErrorCodes::InvalidNamespace) { + if (ErrorCodes::isA<ErrorCategory::Interruption>(error)) { ASSERT_TRUE(policy->shouldRetryOnError(error)); continue; } @@ -272,7 +269,8 @@ TEST_F(RemoteCommandRetrySchedulerTest, InvalidConstruction) { &getExecutor(), request, callback, - RemoteCommandRetryScheduler::makeRetryPolicy(0, Milliseconds(100), {})), + RemoteCommandRetryScheduler::makeRetryPolicy<ErrorCategory::RetriableError>( + 0, Milliseconds(100))), AssertionException, ErrorCodes::BadValue, "policy max attempts cannot be zero"); @@ -283,7 +281,8 @@ TEST_F(RemoteCommandRetrySchedulerTest, InvalidConstruction) { &getExecutor(), request, callback, - RemoteCommandRetryScheduler::makeRetryPolicy(1U, Milliseconds(-100), {})), + RemoteCommandRetryScheduler::makeRetryPolicy<ErrorCategory::RetriableError>( + 1U, Milliseconds(-100))), AssertionException, ErrorCodes::BadValue, "policy max response elapsed total cannot be negative"); @@ -318,8 +317,8 @@ TEST_F(RemoteCommandRetrySchedulerTest, StartupFailsWhenSchedulerIsShutDown) { TEST_F(RemoteCommandRetrySchedulerTest, ShuttingDownExecutorAfterSchedulerStartupInvokesCallbackWithCallbackCanceledError) { CallbackResponseSaver callback; - auto policy = RemoteCommandRetryScheduler::makeRetryPolicy( - 10U, Milliseconds(1), {ErrorCodes::HostUnreachable}); + auto policy = RemoteCommandRetryScheduler::makeRetryPolicy<ErrorCategory::RetriableError>( + 10U, Milliseconds(1)); RemoteCommandRetryScheduler scheduler( &getExecutor(), request, stdx::ref(callback), std::move(policy)); start(&scheduler); @@ -340,8 +339,8 @@ TEST_F(RemoteCommandRetrySchedulerTest, TEST_F(RemoteCommandRetrySchedulerTest, ShuttingDownSchedulerAfterSchedulerStartupInvokesCallbackWithCallbackCanceledError) { CallbackResponseSaver callback; - auto policy = RemoteCommandRetryScheduler::makeRetryPolicy( - 10U, Milliseconds(1), {ErrorCodes::HostUnreachable}); + auto policy = RemoteCommandRetryScheduler::makeRetryPolicy<ErrorCategory::RetriableError>( + 10U, Milliseconds(1)); RemoteCommandRetryScheduler scheduler( &getExecutor(), request, stdx::ref(callback), std::move(policy)); start(&scheduler); @@ -355,8 +354,8 @@ TEST_F(RemoteCommandRetrySchedulerTest, TEST_F(RemoteCommandRetrySchedulerTest, SchedulerInvokesCallbackOnNonRetryableErrorInResponse) { CallbackResponseSaver callback; - auto policy = RemoteCommandRetryScheduler::makeRetryPolicy( - 10U, Milliseconds(1), RemoteCommandRetryScheduler::kNotMasterErrors); + auto policy = RemoteCommandRetryScheduler::makeRetryPolicy<ErrorCategory::NotMasterError>( + 10U, Milliseconds(1)); RemoteCommandRetryScheduler scheduler( &getExecutor(), request, stdx::ref(callback), std::move(policy)); start(&scheduler); @@ -372,8 +371,8 @@ TEST_F(RemoteCommandRetrySchedulerTest, SchedulerInvokesCallbackOnNonRetryableEr TEST_F(RemoteCommandRetrySchedulerTest, SchedulerInvokesCallbackOnFirstSuccessfulResponse) { CallbackResponseSaver callback; - auto policy = RemoteCommandRetryScheduler::makeRetryPolicy( - 10U, Milliseconds(1), {ErrorCodes::HostUnreachable}); + auto policy = RemoteCommandRetryScheduler::makeRetryPolicy<ErrorCategory::RetriableError>( + 10U, Milliseconds(1)); RemoteCommandRetryScheduler scheduler( &getExecutor(), request, stdx::ref(callback), std::move(policy)); start(&scheduler); @@ -391,8 +390,8 @@ TEST_F(RemoteCommandRetrySchedulerTest, SchedulerInvokesCallbackOnFirstSuccessfu TEST_F(RemoteCommandRetrySchedulerTest, SchedulerIgnoresEmbeddedErrorInSuccessfulResponse) { CallbackResponseSaver callback; - auto policy = RemoteCommandRetryScheduler::makeRetryPolicy( - 10U, Milliseconds(1), {ErrorCodes::HostUnreachable}); + auto policy = RemoteCommandRetryScheduler::makeRetryPolicy<ErrorCategory::RetriableError>( + 10U, Milliseconds(1)); RemoteCommandRetryScheduler scheduler( &getExecutor(), request, stdx::ref(callback), std::move(policy)); start(&scheduler); @@ -412,8 +411,8 @@ TEST_F(RemoteCommandRetrySchedulerTest, SchedulerIgnoresEmbeddedErrorInSuccessfu TEST_F(RemoteCommandRetrySchedulerTest, SchedulerInvokesCallbackWithErrorFromExecutorIfScheduleRemoteCommandFailsOnRetry) { CallbackResponseSaver callback; - auto policy = RemoteCommandRetryScheduler::makeRetryPolicy( - 3U, executor::RemoteCommandRequest::kNoTimeout, {ErrorCodes::HostNotFound}); + auto policy = RemoteCommandRetryScheduler::makeRetryPolicy<ErrorCategory::RetriableError>( + 3U, executor::RemoteCommandRequest::kNoTimeout); TaskExecutorWithFailureInScheduleRemoteCommand badExecutor(&getExecutor()); RemoteCommandRetryScheduler scheduler( &badExecutor, request, stdx::ref(callback), std::move(policy)); @@ -433,10 +432,8 @@ TEST_F(RemoteCommandRetrySchedulerTest, TEST_F(RemoteCommandRetrySchedulerTest, SchedulerEnforcesPolicyMaximumAttemptsAndReturnsErrorOfLastFailedRequest) { CallbackResponseSaver callback; - auto policy = RemoteCommandRetryScheduler::makeRetryPolicy( - 3U, - executor::RemoteCommandRequest::kNoTimeout, - RemoteCommandRetryScheduler::kAllRetriableErrors); + auto policy = RemoteCommandRetryScheduler::makeRetryPolicy<ErrorCategory::RetriableError>( + 3U, executor::RemoteCommandRequest::kNoTimeout); RemoteCommandRetryScheduler scheduler( &getExecutor(), request, stdx::ref(callback), std::move(policy)); start(&scheduler); @@ -451,8 +448,8 @@ TEST_F(RemoteCommandRetrySchedulerTest, TEST_F(RemoteCommandRetrySchedulerTest, SchedulerShouldRetryUntilSuccessfulResponseIsReceived) { CallbackResponseSaver callback; - auto policy = RemoteCommandRetryScheduler::makeRetryPolicy( - 3U, executor::RemoteCommandRequest::kNoTimeout, {ErrorCodes::HostNotFound}); + auto policy = RemoteCommandRetryScheduler::makeRetryPolicy<ErrorCategory::RetriableError>( + 3U, executor::RemoteCommandRequest::kNoTimeout); RemoteCommandRetryScheduler scheduler( &getExecutor(), request, stdx::ref(callback), std::move(policy)); start(&scheduler); diff --git a/src/mongo/db/repl/collection_cloner.cpp b/src/mongo/db/repl/collection_cloner.cpp index cf3d0b63bde..a974144673a 100644 --- a/src/mongo/db/repl/collection_cloner.cpp +++ b/src/mongo/db/repl/collection_cloner.cpp @@ -122,10 +122,9 @@ CollectionCloner::CollectionCloner(executor::TaskExecutor* executor, [this](const executor::TaskExecutor::RemoteCommandCallbackArgs& args) { return _countCallback(args); }, - RemoteCommandRetryScheduler::makeRetryPolicy( + RemoteCommandRetryScheduler::makeRetryPolicy<ErrorCategory::RetriableError>( numInitialSyncCollectionCountAttempts.load(), - executor::RemoteCommandRequest::kNoTimeout, - RemoteCommandRetryScheduler::kAllRetriableErrors)), + executor::RemoteCommandRequest::kNoTimeout)), _listIndexesFetcher( _executor, _source, @@ -139,10 +138,9 @@ CollectionCloner::CollectionCloner(executor::TaskExecutor* executor, ReadPreferenceSetting::secondaryPreferredMetadata(), RemoteCommandRequest::kNoTimeout /* find network timeout */, RemoteCommandRequest::kNoTimeout /* getMore network timeout */, - RemoteCommandRetryScheduler::makeRetryPolicy( + RemoteCommandRetryScheduler::makeRetryPolicy<ErrorCategory::RetriableError>( numInitialSyncListIndexesAttempts.load(), - executor::RemoteCommandRequest::kNoTimeout, - RemoteCommandRetryScheduler::kAllRetriableErrors)), + executor::RemoteCommandRequest::kNoTimeout)), _indexSpecs(), _documentsToInsert(), _dbWorkTaskRunner(_dbWorkThreadPool), diff --git a/src/mongo/db/repl/database_cloner.cpp b/src/mongo/db/repl/database_cloner.cpp index 1d1f3dda338..1466b7b2dc3 100644 --- a/src/mongo/db/repl/database_cloner.cpp +++ b/src/mongo/db/repl/database_cloner.cpp @@ -112,22 +112,22 @@ DatabaseCloner::DatabaseCloner(executor::TaskExecutor* executor, _storageInterface(si), _collectionWork(collWork), _onCompletion(std::move(onCompletion)), - _listCollectionsFetcher(_executor, - _source, - _dbname, - createListCollectionsCommandObject(_listCollectionsFilter), - [=](const StatusWith<Fetcher::QueryResponse>& result, - Fetcher::NextAction* nextAction, - BSONObjBuilder* getMoreBob) { - _listCollectionsCallback(result, nextAction, getMoreBob); - }, - ReadPreferenceSetting::secondaryPreferredMetadata(), - RemoteCommandRequest::kNoTimeout /* find network timeout */, - RemoteCommandRequest::kNoTimeout /* getMore network timeout */, - RemoteCommandRetryScheduler::makeRetryPolicy( - numInitialSyncListCollectionsAttempts.load(), - executor::RemoteCommandRequest::kNoTimeout, - RemoteCommandRetryScheduler::kAllRetriableErrors)), + _listCollectionsFetcher( + _executor, + _source, + _dbname, + createListCollectionsCommandObject(_listCollectionsFilter), + [=](const StatusWith<Fetcher::QueryResponse>& result, + Fetcher::NextAction* nextAction, + BSONObjBuilder* getMoreBob) { + _listCollectionsCallback(result, nextAction, getMoreBob); + }, + ReadPreferenceSetting::secondaryPreferredMetadata(), + RemoteCommandRequest::kNoTimeout /* find network timeout */, + RemoteCommandRequest::kNoTimeout /* getMore network timeout */, + RemoteCommandRetryScheduler::makeRetryPolicy<ErrorCategory::RetriableError>( + numInitialSyncListCollectionsAttempts.load(), + executor::RemoteCommandRequest::kNoTimeout)), _startCollectionCloner([](CollectionCloner& cloner) { return cloner.startup(); }) { // Fetcher throws an exception on null executor. invariant(executor); diff --git a/src/mongo/db/repl/databases_cloner.cpp b/src/mongo/db/repl/databases_cloner.cpp index 9639734e645..54c1b3fdfc5 100644 --- a/src/mongo/db/repl/databases_cloner.cpp +++ b/src/mongo/db/repl/databases_cloner.cpp @@ -209,10 +209,9 @@ Status DatabasesCloner::startup() noexcept { _exec, listDBsReq, [this](const auto& x) { this->_onListDatabaseFinish(x); }, - RemoteCommandRetryScheduler::makeRetryPolicy( + RemoteCommandRetryScheduler::makeRetryPolicy<ErrorCategory::RetriableError>( numInitialSyncListDatabasesAttempts.load(), - executor::RemoteCommandRequest::kNoTimeout, - RemoteCommandRetryScheduler::kAllRetriableErrors)); + executor::RemoteCommandRequest::kNoTimeout)); _status = _listDBsScheduler->startup(); if (!_status.isOK()) { diff --git a/src/mongo/db/repl/initial_syncer.cpp b/src/mongo/db/repl/initial_syncer.cpp index 3ed877a9acc..b489e842b4c 100644 --- a/src/mongo/db/repl/initial_syncer.cpp +++ b/src/mongo/db/repl/initial_syncer.cpp @@ -713,10 +713,8 @@ Status InitialSyncer::_scheduleGetBeginFetchingOpTime_inlock( ReadPreferenceSetting::secondaryPreferredMetadata(), RemoteCommandRequest::kNoTimeout /* find network timeout */, RemoteCommandRequest::kNoTimeout /* getMore network timeout */, - RemoteCommandRetryScheduler::makeRetryPolicy( - numInitialSyncOplogFindAttempts.load(), - executor::RemoteCommandRequest::kNoTimeout, - RemoteCommandRetryScheduler::kAllRetriableErrors)); + RemoteCommandRetryScheduler::makeRetryPolicy<ErrorCategory::RetriableError>( + numInitialSyncOplogFindAttempts.load(), executor::RemoteCommandRequest::kNoTimeout)); Status scheduleStatus = _beginFetchingOpTimeFetcher->schedule(); if (!scheduleStatus.isOK()) { _beginFetchingOpTimeFetcher.reset(); @@ -837,10 +835,8 @@ void InitialSyncer::_lastOplogEntryFetcherCallbackForBeginApplyingTimestamp( ReadPreferenceSetting::secondaryPreferredMetadata(), RemoteCommandRequest::kNoTimeout /* find network timeout */, RemoteCommandRequest::kNoTimeout /* getMore network timeout */, - RemoteCommandRetryScheduler::makeRetryPolicy( - numInitialSyncOplogFindAttempts.load(), - executor::RemoteCommandRequest::kNoTimeout, - RemoteCommandRetryScheduler::kAllRetriableErrors)); + RemoteCommandRetryScheduler::makeRetryPolicy<ErrorCategory::RetriableError>( + numInitialSyncOplogFindAttempts.load(), executor::RemoteCommandRequest::kNoTimeout)); Status scheduleStatus = _fCVFetcher->schedule(); if (!scheduleStatus.isOK()) { _fCVFetcher.reset(); @@ -1542,19 +1538,17 @@ Status InitialSyncer::_scheduleLastOplogEntryFetcher_inlock(Fetcher::CallbackFn BSONObj query = BSON("find" << _opts.remoteOplogNS.coll() << "sort" << BSON("$natural" << -1) << "limit" << 1); - _lastOplogEntryFetcher = - stdx::make_unique<Fetcher>(_exec, - _syncSource, - _opts.remoteOplogNS.db().toString(), - query, - callback, - ReadPreferenceSetting::secondaryPreferredMetadata(), - RemoteCommandRequest::kNoTimeout /* find network timeout */, - RemoteCommandRequest::kNoTimeout /* getMore network timeout */, - RemoteCommandRetryScheduler::makeRetryPolicy( - numInitialSyncOplogFindAttempts.load(), - executor::RemoteCommandRequest::kNoTimeout, - RemoteCommandRetryScheduler::kAllRetriableErrors)); + _lastOplogEntryFetcher = stdx::make_unique<Fetcher>( + _exec, + _syncSource, + _opts.remoteOplogNS.db().toString(), + query, + callback, + ReadPreferenceSetting::secondaryPreferredMetadata(), + RemoteCommandRequest::kNoTimeout /* find network timeout */, + RemoteCommandRequest::kNoTimeout /* getMore network timeout */, + RemoteCommandRetryScheduler::makeRetryPolicy<ErrorCategory::RetriableError>( + numInitialSyncOplogFindAttempts.load(), executor::RemoteCommandRequest::kNoTimeout)); Status scheduleStatus = _lastOplogEntryFetcher->schedule(); if (!scheduleStatus.isOK()) { _lastOplogEntryFetcher.reset(); diff --git a/src/mongo/s/client/shard.cpp b/src/mongo/s/client/shard.cpp index e2c751529e2..d2b007a501a 100644 --- a/src/mongo/s/client/shard.cpp +++ b/src/mongo/s/client/shard.cpp @@ -93,10 +93,8 @@ Status Shard::CommandResponse::processBatchWriteResponse( const Milliseconds Shard::kDefaultConfigCommandTimeout = Seconds{30}; bool Shard::shouldErrorBePropagated(ErrorCodes::Error code) { - return std::find(RemoteCommandRetryScheduler::kAllRetriableErrors.begin(), - RemoteCommandRetryScheduler::kAllRetriableErrors.end(), - code) == RemoteCommandRetryScheduler::kAllRetriableErrors.end() && - code != ErrorCodes::NetworkInterfaceExceededTimeLimit; + return !ErrorCodes::isRetriableError(code) && + (code != ErrorCodes::NetworkInterfaceExceededTimeLimit); } Shard::Shard(const ShardId& id) : _id(id) {} diff --git a/src/mongo/s/client/shard_remote.cpp b/src/mongo/s/client/shard_remote.cpp index 8602c3d31d2..5e5f2617d47 100644 --- a/src/mongo/s/client/shard_remote.cpp +++ b/src/mongo/s/client/shard_remote.cpp @@ -106,14 +106,21 @@ bool ShardRemote::isRetriableError(ErrorCodes::Error code, RetryPolicy options) return false; } - if (options == RetryPolicy::kNoRetry) { - return false; + switch (options) { + case RetryPolicy::kNoRetry: { + return false; + } break; + + case RetryPolicy::kIdempotent: { + return ErrorCodes::isRetriableError(code); + } break; + + case RetryPolicy::kNotIdempotent: { + return ErrorCodes::isNotMasterError(code); + } break; } - const auto& retriableErrors = options == RetryPolicy::kIdempotent - ? RemoteCommandRetryScheduler::kAllRetriableErrors - : RemoteCommandRetryScheduler::kNotMasterErrors; - return std::find(retriableErrors.begin(), retriableErrors.end(), code) != retriableErrors.end(); + MONGO_UNREACHABLE; } const ConnectionString ShardRemote::getConnString() const { diff --git a/src/mongo/s/query/establish_cursors.cpp b/src/mongo/s/query/establish_cursors.cpp index b97f9026d03..a0ff0f6cb07 100644 --- a/src/mongo/s/query/establish_cursors.cpp +++ b/src/mongo/s/query/establish_cursors.cpp @@ -95,13 +95,9 @@ std::vector<RemoteCursor> establishCursors(OperationContext* opCtx, uassertStatusOK(cursor.getStatus()); } - } catch (const DBException& ex) { + } catch (const ExceptionForCat<ErrorCategory::RetriableError>&) { // Retriable errors are swallowed if 'allowPartialResults' is true. - if (allowPartialResults && - std::find(RemoteCommandRetryScheduler::kAllRetriableErrors.begin(), - RemoteCommandRetryScheduler::kAllRetriableErrors.end(), - ex.code()) != - RemoteCommandRetryScheduler::kAllRetriableErrors.end()) { + if (allowPartialResults) { continue; } throw; // Fail this loop. diff --git a/src/mongo/s/transaction_router.cpp b/src/mongo/s/transaction_router.cpp index 5b791ab0270..5e73c898bfd 100644 --- a/src/mongo/s/transaction_router.cpp +++ b/src/mongo/s/transaction_router.cpp @@ -158,13 +158,6 @@ bool isReadConcernLevelAllowedInTransaction(repl::ReadConcernLevel readConcernLe readConcernLevel == repl::ReadConcernLevel::kLocalReadConcern; } -// Returns if the error code would be considered a retryable error for a retryable write. -bool isRetryableWritesError(ErrorCodes::Error code) { - return std::find(RemoteCommandRetryScheduler::kAllRetriableErrors.begin(), - RemoteCommandRetryScheduler::kAllRetriableErrors.end(), - code) != RemoteCommandRetryScheduler::kAllRetriableErrors.end(); -} - // Returns if a transaction's commit result is unknown based on the given statuses. A result is // considered unknown if it would be given the "UnknownTransactionCommitResult" as defined by the // driver transactions specification or fails with one of the errors for invalid write concern that @@ -176,7 +169,7 @@ bool isRetryableWritesError(ErrorCodes::Error code) { // https://github.com/mongodb/specifications/blob/master/source/transactions/transactions.rst#unknowntransactioncommitresult. bool isCommitResultUnknown(const Status& commitStatus, const Status& commitWCStatus) { if (!commitStatus.isOK()) { - return isRetryableWritesError(commitStatus.code()) || + return ErrorCodes::isRetriableError(commitStatus.code()) || ErrorCodes::isExceededTimeLimitError(commitStatus.code()) || commitStatus.code() == ErrorCodes::TransactionTooOld; } diff --git a/src/mongo/util/assert_util_test.cpp b/src/mongo/util/assert_util_test.cpp index 05b33ea3e4b..754d747b5da 100644 --- a/src/mongo/util/assert_util_test.cpp +++ b/src/mongo/util/assert_util_test.cpp @@ -87,9 +87,10 @@ TEST(AssertUtils, UassertNamedCodeWithoutCategories) { ASSERT_NOT_CATCHES(ErrorCodes::BadValue, ExceptionForCat<ErrorCategory::Interruption>); } -// NotMaster - just NotMasterError +// NotMaster - NotMasterError, RetriableError MONGO_STATIC_ASSERT(std::is_same<error_details::ErrorCategoriesFor<ErrorCodes::NotMaster>, - error_details::CategoryList<ErrorCategory::NotMasterError>>()); + error_details::CategoryList<ErrorCategory::NotMasterError, + ErrorCategory::RetriableError>>()); MONGO_STATIC_ASSERT(std::is_base_of<AssertionException, ExceptionFor<ErrorCodes::NotMaster>>()); MONGO_STATIC_ASSERT(!std::is_base_of<ExceptionForCat<ErrorCategory::NetworkError>, ExceptionFor<ErrorCodes::NotMaster>>()); @@ -108,11 +109,12 @@ TEST(AssertUtils, UassertNamedCodeWithOneCategory) { ASSERT_NOT_CATCHES(ErrorCodes::NotMaster, ExceptionForCat<ErrorCategory::Interruption>); } -// InterruptedDueToReplStateChange - NotMasterError and Interruption +// InterruptedDueToReplStateChange - NotMasterError, Interruption, RetriableError MONGO_STATIC_ASSERT( - std::is_same< - error_details::ErrorCategoriesFor<ErrorCodes::InterruptedDueToReplStateChange>, - error_details::CategoryList<ErrorCategory::Interruption, ErrorCategory::NotMasterError>>()); + std::is_same<error_details::ErrorCategoriesFor<ErrorCodes::InterruptedDueToReplStateChange>, + error_details::CategoryList<ErrorCategory::Interruption, + ErrorCategory::NotMasterError, + ErrorCategory::RetriableError>>()); MONGO_STATIC_ASSERT(std::is_base_of<AssertionException, ExceptionFor<ErrorCodes::InterruptedDueToReplStateChange>>()); MONGO_STATIC_ASSERT(!std::is_base_of<ExceptionForCat<ErrorCategory::NetworkError>, |