diff options
author | Pierlauro Sciarelli <pierlauro.sciarelli@mongodb.com> | 2021-12-17 16:53:55 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-12-24 15:40:43 +0000 |
commit | 43d15218d9b05dfac3120428aea9394f8487557e (patch) | |
tree | 44b1ecef675ceb01636ac3ee896f8e867e72f306 | |
parent | c37974e1a2f7e3762f4b8c10446869c833643859 (diff) | |
download | mongo-43d15218d9b05dfac3120428aea9394f8487557e.tar.gz |
SERVER-61003 ReadConcernMajorityNotAvailableYet errors from ShardRegistry must be retried
-rw-r--r-- | src/mongo/s/client/shard_registry.cpp | 30 | ||||
-rw-r--r-- | src/mongo/s/client/shard_registry.h | 2 |
2 files changed, 29 insertions, 3 deletions
diff --git a/src/mongo/s/client/shard_registry.cpp b/src/mongo/s/client/shard_registry.cpp index 9e7ae13859d..1969f35719c 100644 --- a/src/mongo/s/client/shard_registry.cpp +++ b/src/mongo/s/client/shard_registry.cpp @@ -46,7 +46,9 @@ #include "mongo/s/catalog/sharding_catalog_client.h" #include "mongo/s/catalog/type_shard.h" #include "mongo/s/grid.h" +#include "mongo/util/future_util.h" #include "mongo/util/str.h" +#include "mongo/util/testing_proctor.h" namespace mongo { @@ -54,6 +56,8 @@ namespace { const Seconds kRefreshPeriod(30); +const Backoff kExponentialBackoff(Seconds(1), Milliseconds::max()); + } // namespace using CallbackArgs = executor::TaskExecutor::CallbackArgs; @@ -268,9 +272,6 @@ void ShardRegistry::_periodicReload(const CallbackArgs& cbArgs) { try { reload(opCtx.get()); } catch (const DBException& e) { - if (e.code() == ErrorCodes::ReadConcernMajorityNotAvailableYet) { - refreshPeriod = Seconds(1); - } LOGV2(22727, "Error running periodic reload of shard registry caused by {error}; will retry after " "{shardRegistryReloadInterval}", @@ -437,6 +438,29 @@ void ShardRegistry::toBSON(BSONObjBuilder* result) const { } void ShardRegistry::reload(OperationContext* opCtx) { + if (MONGO_unlikely(TestingProctor::instance().isEnabled())) { + // TODO SERVER-62163 investigate hang on reload in unit tests + // Some unit tests don't support running the reload's AsyncTry on the fixed executor. + _reloadInternal(opCtx); + } else { + AsyncTry([=]() mutable { + ThreadClient tc("ShardRegistry::reload", getGlobalServiceContext()); + auto opCtx = tc->makeOperationContext(); + + _reloadInternal(opCtx.get()); + }) + .until([](Status status) mutable { + return status != ErrorCodes::ReadConcernMajorityNotAvailableYet; + }) + .withBackoffBetweenIterations(kExponentialBackoff) + .on(Grid::get(opCtx)->getExecutorPool()->getFixedExecutor(), + CancellationToken::uncancelable()) + .semi() + .get(opCtx); + } +} + +void ShardRegistry::_reloadInternal(OperationContext* opCtx) { // Make the next acquire do a lookup. auto value = _forceReloadIncrement.addAndFetch(1); LOGV2_DEBUG(4620253, 2, "Forcing ShardRegistry reload", "newForceReloadIncrement"_attr = value); diff --git a/src/mongo/s/client/shard_registry.h b/src/mongo/s/client/shard_registry.h index d4f98846d4a..fa241007731 100644 --- a/src/mongo/s/client/shard_registry.h +++ b/src/mongo/s/client/shard_registry.h @@ -440,6 +440,8 @@ private: void _periodicReload(const executor::TaskExecutor::CallbackArgs& cbArgs); + void _reloadInternal(OperationContext* opCtx); + /** * Factory to create shards. Never changed after startup so safe to access outside of _mutex. */ |