diff options
20 files changed, 218 insertions, 48 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml index 6a59e155d51..fa0130bfdb9 100644 --- a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml +++ b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml @@ -117,6 +117,7 @@ selector: - jstests/sharding/ssv_config_check.js # Runs replSetGetStatus -- via awaitLastOpCommitted -- directly against the config server: # retries aren't supported. + - jstests/sharding/catalog_refresh_while_set_fcv.js - jstests/sharding/coll_epoch_test1.js - jstests/sharding/move_stale_mongos.js - jstests/sharding/shard4.js diff --git a/jstests/sharding/catalog_refresh_while_set_fcv.js b/jstests/sharding/catalog_refresh_while_set_fcv.js new file mode 100644 index 00000000000..1109794a2d2 --- /dev/null +++ b/jstests/sharding/catalog_refresh_while_set_fcv.js @@ -0,0 +1,71 @@ +/* + * Checks that if the ConfigServerCatalogCacheLoader is in the middle of a refresh (it has read the + * config.collections entry, but not config.chunks yet) when we change FCV, the cache is able to + * refresh correctly. + */ + +// @tags: [multiversion_incompatible] + +(function() { +'use strict'; + +load("jstests/libs/fail_point_util.js"); +load('jstests/libs/parallel_shell_helpers.js'); + +let st = new ShardingTest({mongos: 2, shards: 2}); + +const dbName = "test"; +const collName = "foo"; +const ns = dbName + "." + collName; + +let csrs_config_db = st.configRS.getPrimary().getDB('config'); +const isfeatureFlagShardingFullDDLSupportTimestampedVersionEnabled = + csrs_config_db + .adminCommand({getParameter: 1, featureFlagShardingFullDDLSupportTimestampedVersion: 1}) + .featureFlagShardingFullDDLSupportTimestampedVersion.value; + +function refreshCatalogCacheWhileChangingFCV(newFCVVersion) { + const numRefreshesBefore = st.s0.adminCommand({serverStatus: 1}) + .shardingStatistics.catalogCache.countFullRefreshesStarted; + + assert.commandWorked(st.s0.adminCommand({flushRouterConfig: ns})); + const fp = configureFailPoint(st.s0, "hangBeforeReadingChunks"); + let awaitShell = startParallelShell( + funWithArgs(function(dbName, collName) { + assert.eq(1, db.getSiblingDB(dbName).getCollection(collName).find({x: 1}).itcount()); + }, dbName, collName), st.s0.port); + + fp.wait(); + assert.commandWorked(st.s1.adminCommand({setFeatureCompatibilityVersion: newFCVVersion})); + // Ensure all config servers have replicated the patched-up metadata, so the catalog refresh + // on s0 won't possibly pick a lagged secondary that hasn't replicated it yet. + st.configRS.awaitLastOpCommitted(); + fp.off(); + awaitShell(); + + const numRefreshesAfter = st.s0.adminCommand({serverStatus: 1}) + .shardingStatistics.catalogCache.countFullRefreshesStarted; + + if (isfeatureFlagShardingFullDDLSupportTimestampedVersionEnabled) { + // TODO SERVER-53283 Remove once 5.0 has branched out. + // Expect that the refresh had to be retried due to the ConfigServerCatalogCache loader + // finding that the config.chunks format has changed since it had read the + // config.collections earlier. + assert.eq(2, numRefreshesAfter - numRefreshesBefore); + } else { + assert.eq(1, numRefreshesAfter - numRefreshesBefore); + } +} + +assert.commandWorked(st.s0.adminCommand({setFeatureCompatibilityVersion: lastLTSFCV})); + +assert.commandWorked(st.s0.adminCommand({enableSharding: dbName})); +assert.commandWorked(st.s0.adminCommand({movePrimary: dbName, to: st.shard0.shardName})); +assert.commandWorked(st.s0.adminCommand({shardCollection: ns, key: {x: 1}})); +assert.commandWorked(st.s0.getDB(dbName).getCollection(collName).insert({x: 1})); + +refreshCatalogCacheWhileChangingFCV(latestFCV); +refreshCatalogCacheWhileChangingFCV(lastLTSFCV); + +st.stop(); +})(); diff --git a/src/mongo/db/rs_local_client.cpp b/src/mongo/db/rs_local_client.cpp index 41d5ed47cc7..b1d8894d57a 100644 --- a/src/mongo/db/rs_local_client.cpp +++ b/src/mongo/db/rs_local_client.cpp @@ -97,7 +97,8 @@ StatusWith<Shard::QueryResponse> RSLocalClient::queryOnce( const NamespaceString& nss, const BSONObj& query, const BSONObj& sort, - boost::optional<long long> limit) { + boost::optional<long long> limit, + const boost::optional<BSONObj>& hint) { auto replCoord = repl::ReplicationCoordinator::get(opCtx); if (readConcernLevel == repl::ReadConcernLevel::kMajorityReadConcern) { @@ -127,6 +128,9 @@ StatusWith<Shard::QueryResponse> RSLocalClient::queryOnce( if (!sort.isEmpty()) { fullQuery.sort(sort); } + if (hint) { + fullQuery.hint(*hint); + } fullQuery.readPref(readPref.pref, BSONArray()); try { diff --git a/src/mongo/db/rs_local_client.h b/src/mongo/db/rs_local_client.h index 233732b5c8e..db04990e7dc 100644 --- a/src/mongo/db/rs_local_client.h +++ b/src/mongo/db/rs_local_client.h @@ -67,7 +67,8 @@ public: const NamespaceString& nss, const BSONObj& query, const BSONObj& sort, - boost::optional<long long> limit); + boost::optional<long long> limit, + const boost::optional<BSONObj>& hint = boost::none); private: /** diff --git a/src/mongo/db/s/resharding/resharding_coordinator_test.cpp b/src/mongo/db/s/resharding/resharding_coordinator_test.cpp index ff0cb7adc5f..08741817814 100644 --- a/src/mongo/db/s/resharding/resharding_coordinator_test.cpp +++ b/src/mongo/db/s/resharding/resharding_coordinator_test.cpp @@ -39,6 +39,7 @@ #include "mongo/db/repl/storage_interface_impl.h" #include "mongo/db/repl/storage_interface_mock.h" #include "mongo/db/s/config/config_server_test_fixture.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/db/s/resharding/resharding_coordinator_service.h" #include "mongo/db/s/resharding_util.h" #include "mongo/db/s/transaction_coordinator_service.h" @@ -167,6 +168,7 @@ protected: ChunkVersion version(1, 0, epoch, boost::none /* timestamp */); ChunkType chunk1(nss, chunkRanges[0], version, ShardId("shard0000")); chunk1.setName(ids[0]); + version.incMinor(); ChunkType chunk2(nss, chunkRanges[1], version, ShardId("shard0001")); chunk2.setName(ids[1]); @@ -530,6 +532,13 @@ protected: client.createCollection(ChunkType::ConfigNS.ns()); client.createCollection(TagsType::ConfigNS.ns()); + + auto configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard(); + ASSERT_OK(configShard->createIndexOnConfig( + opCtx, + ChunkType::ConfigNS, + BSON(ChunkType::ns() << 1 << ChunkType::lastmod() << 1), + true)); } resharding::insertCoordDocAndChangeOrigCollEntry(opCtx, expectedCoordinatorDoc); diff --git a/src/mongo/db/s/resharding/resharding_donor_service_test.cpp b/src/mongo/db/s/resharding/resharding_donor_service_test.cpp index 8fb8f0c69ad..76463cb7941 100644 --- a/src/mongo/db/s/resharding/resharding_donor_service_test.cpp +++ b/src/mongo/db/s/resharding/resharding_donor_service_test.cpp @@ -90,12 +90,14 @@ protected: return chunks; } - StatusWith<std::vector<ChunkType>> getChunks(OperationContext* opCtx, - const BSONObj& filter, - const BSONObj& sort, - boost::optional<int> limit, - repl::OpTime* opTime, - repl::ReadConcernLevel readConcern) override { + StatusWith<std::vector<ChunkType>> getChunks( + OperationContext* opCtx, + const BSONObj& filter, + const BSONObj& sort, + boost::optional<int> limit, + repl::OpTime* opTime, + repl::ReadConcernLevel readConcern, + const boost::optional<BSONObj>& hint) override { auto version = ChunkVersion(1, 0, OID::gen(), boost::none /* timestamp */); return makeChunks(reshardingTempNss(_existingUUID), _recipients, version); } diff --git a/src/mongo/db/s/shard_local.cpp b/src/mongo/db/s/shard_local.cpp index 1467b1cdd65..94429958639 100644 --- a/src/mongo/db/s/shard_local.cpp +++ b/src/mongo/db/s/shard_local.cpp @@ -129,8 +129,10 @@ StatusWith<Shard::QueryResponse> ShardLocal::_exhaustiveFindOnConfig( const NamespaceString& nss, const BSONObj& query, const BSONObj& sort, - boost::optional<long long> limit) { - return _rsLocalClient.queryOnce(opCtx, readPref, readConcernLevel, nss, query, sort, limit); + boost::optional<long long> limit, + const boost::optional<BSONObj>& hint) { + return _rsLocalClient.queryOnce( + opCtx, readPref, readConcernLevel, nss, query, sort, limit, hint); } Status ShardLocal::createIndexOnConfig(OperationContext* opCtx, diff --git a/src/mongo/db/s/shard_local.h b/src/mongo/db/s/shard_local.h index ef50595971c..3991e0999a0 100644 --- a/src/mongo/db/s/shard_local.h +++ b/src/mongo/db/s/shard_local.h @@ -95,7 +95,8 @@ private: const NamespaceString& nss, const BSONObj& query, const BSONObj& sort, - boost::optional<long long> limit) final; + boost::optional<long long> limit, + const boost::optional<BSONObj>& hint = boost::none) final; RSLocalClient _rsLocalClient; }; diff --git a/src/mongo/db/s/sharding_ddl_util_test.cpp b/src/mongo/db/s/sharding_ddl_util_test.cpp index 229049c2545..6d6be9de4e1 100644 --- a/src/mongo/db/s/sharding_ddl_util_test.cpp +++ b/src/mongo/db/s/sharding_ddl_util_test.cpp @@ -54,7 +54,7 @@ protected: ShardType shard0; void setUp() override { - ConfigServerTestFixture::setUp(); + setUpAndInitializeConfigDb(); // Create config.transactions collection auto opCtx = operationContext(); diff --git a/src/mongo/s/catalog/sharding_catalog_client.h b/src/mongo/s/catalog/sharding_catalog_client.h index 43c2c160a18..d2d7fde1165 100644 --- a/src/mongo/s/catalog/sharding_catalog_client.h +++ b/src/mongo/s/catalog/sharding_catalog_client.h @@ -173,12 +173,14 @@ public: * * Returns a vector of ChunkTypes, or a !OK status if an error occurs. */ - virtual StatusWith<std::vector<ChunkType>> getChunks(OperationContext* opCtx, - const BSONObj& filter, - const BSONObj& sort, - boost::optional<int> limit, - repl::OpTime* opTime, - repl::ReadConcernLevel readConcern) = 0; + virtual StatusWith<std::vector<ChunkType>> getChunks( + OperationContext* opCtx, + const BSONObj& filter, + const BSONObj& sort, + boost::optional<int> limit, + repl::OpTime* opTime, + repl::ReadConcernLevel readConcern, + const boost::optional<BSONObj>& hint = boost::none) = 0; /** * Retrieves all zones defined for the specified collection. The returned vector is sorted based @@ -341,7 +343,8 @@ private: const NamespaceString& nss, const BSONObj& query, const BSONObj& sort, - boost::optional<long long> limit) = 0; + boost::optional<long long> limit, + const boost::optional<BSONObj>& hint = boost::none) = 0; }; } // namespace mongo diff --git a/src/mongo/s/catalog/sharding_catalog_client_impl.cpp b/src/mongo/s/catalog/sharding_catalog_client_impl.cpp index ff7053ab8ee..d740406f842 100644 --- a/src/mongo/s/catalog/sharding_catalog_client_impl.cpp +++ b/src/mongo/s/catalog/sharding_catalog_client_impl.cpp @@ -404,14 +404,15 @@ StatusWith<std::vector<ChunkType>> ShardingCatalogClientImpl::getChunks( const BSONObj& sort, boost::optional<int> limit, OpTime* opTime, - repl::ReadConcernLevel readConcern) { + repl::ReadConcernLevel readConcern, + const boost::optional<BSONObj>& hint) { invariant(serverGlobalParams.clusterRole == ClusterRole::ConfigServer || readConcern == repl::ReadConcernLevel::kMajorityReadConcern); // Convert boost::optional<int> to boost::optional<long long>. auto longLimit = limit ? boost::optional<long long>(*limit) : boost::none; auto findStatus = _exhaustiveFindOnConfig( - opCtx, kConfigReadSelector, readConcern, ChunkType::ConfigNS, query, sort, longLimit); + opCtx, kConfigReadSelector, readConcern, ChunkType::ConfigNS, query, sort, longLimit, hint); if (!findStatus.isOK()) { return findStatus.getStatus().withContext("Failed to load chunks"); } @@ -877,9 +878,10 @@ StatusWith<repl::OpTimeWith<vector<BSONObj>>> ShardingCatalogClientImpl::_exhaus const NamespaceString& nss, const BSONObj& query, const BSONObj& sort, - boost::optional<long long> limit) { + boost::optional<long long> limit, + const boost::optional<BSONObj>& hint) { auto response = Grid::get(opCtx)->shardRegistry()->getConfigShard()->exhaustiveFindOnConfig( - opCtx, readPref, readConcern, nss, query, sort, limit); + opCtx, readPref, readConcern, nss, query, sort, limit, hint); if (!response.isOK()) { return response.getStatus(); } diff --git a/src/mongo/s/catalog/sharding_catalog_client_impl.h b/src/mongo/s/catalog/sharding_catalog_client_impl.h index 66f25e71fb3..767b4ae3c17 100644 --- a/src/mongo/s/catalog/sharding_catalog_client_impl.h +++ b/src/mongo/s/catalog/sharding_catalog_client_impl.h @@ -85,12 +85,14 @@ public: StatusWith<std::vector<std::string>> getDatabasesForShard(OperationContext* opCtx, const ShardId& shardName) override; - StatusWith<std::vector<ChunkType>> getChunks(OperationContext* opCtx, - const BSONObj& query, - const BSONObj& sort, - boost::optional<int> limit, - repl::OpTime* opTime, - repl::ReadConcernLevel readConcern) override; + StatusWith<std::vector<ChunkType>> getChunks( + OperationContext* opCtx, + const BSONObj& query, + const BSONObj& sort, + boost::optional<int> limit, + repl::OpTime* opTime, + repl::ReadConcernLevel readConcern, + const boost::optional<BSONObj>& hint = boost::none) override; StatusWith<std::vector<TagsType>> getTagsForCollection(OperationContext* opCtx, const NamespaceString& nss) override; @@ -178,7 +180,8 @@ private: const NamespaceString& nss, const BSONObj& query, const BSONObj& sort, - boost::optional<long long> limit) override; + boost::optional<long long> limit, + const boost::optional<BSONObj>& hint = boost::none) override; /** * Queries the config servers for the database metadata for the given database, using the diff --git a/src/mongo/s/catalog/sharding_catalog_client_mock.cpp b/src/mongo/s/catalog/sharding_catalog_client_mock.cpp index 7f3c5a90299..deec7539db3 100644 --- a/src/mongo/s/catalog/sharding_catalog_client_mock.cpp +++ b/src/mongo/s/catalog/sharding_catalog_client_mock.cpp @@ -82,7 +82,8 @@ StatusWith<std::vector<ChunkType>> ShardingCatalogClientMock::getChunks( const BSONObj& sort, boost::optional<int> limit, repl::OpTime* opTime, - repl::ReadConcernLevel readConcern) { + repl::ReadConcernLevel readConcern, + const boost::optional<BSONObj>& hint) { return {ErrorCodes::InternalError, "Method not implemented"}; } @@ -174,7 +175,8 @@ ShardingCatalogClientMock::_exhaustiveFindOnConfig(OperationContext* opCtx, const NamespaceString& nss, const BSONObj& query, const BSONObj& sort, - boost::optional<long long> limit) { + boost::optional<long long> limit, + const boost::optional<BSONObj>& hint) { return {ErrorCodes::InternalError, "Method not implemented"}; } diff --git a/src/mongo/s/catalog/sharding_catalog_client_mock.h b/src/mongo/s/catalog/sharding_catalog_client_mock.h index ba7ec3ba81e..0930579d55d 100644 --- a/src/mongo/s/catalog/sharding_catalog_client_mock.h +++ b/src/mongo/s/catalog/sharding_catalog_client_mock.h @@ -67,7 +67,8 @@ public: const BSONObj& sort, boost::optional<int> limit, repl::OpTime* opTime, - repl::ReadConcernLevel readConcern) override; + repl::ReadConcernLevel readConcern, + const boost::optional<BSONObj>& hint) override; StatusWith<std::vector<TagsType>> getTagsForCollection(OperationContext* opCtx, const NamespaceString& nss) override; @@ -139,7 +140,8 @@ private: const NamespaceString& nss, const BSONObj& query, const BSONObj& sort, - boost::optional<long long> limit) override; + boost::optional<long long> limit, + const boost::optional<BSONObj>& hint) override; }; } // namespace mongo diff --git a/src/mongo/s/catalog_cache.cpp b/src/mongo/s/catalog_cache.cpp index 2f9c7aca271..d2d494237a2 100644 --- a/src/mongo/s/catalog_cache.cpp +++ b/src/mongo/s/catalog_cache.cpp @@ -207,6 +207,41 @@ StatusWith<ChunkManager> CatalogCache::_getCollectionRoutingInfoAt( std::move(collEntry), atClusterTime); } catch (ExceptionFor<ErrorCodes::ConflictingOperationInProgress>& ex) { + LOGV2_FOR_CATALOG_REFRESH(5310501, + 0, + "Collection refresh failed", + "namespace"_attr = nss, + "exception"_attr = redact(ex)); + _stats.totalRefreshWaitTimeMicros.addAndFetch(t.micros()); + acquireTries++; + if (acquireTries == kMaxInconsistentRoutingInfoRefreshAttempts) { + return ex.toStatus(); + } + } catch (ExceptionFor<ErrorCodes::BadValue>& ex) { + // TODO SERVER-53283: Remove once 5.0 has branched out. + // This would happen when the query to config.chunks fails because the index + // specified in the 'hint' provided by ConfigServerCatalogCache loader does no + // longer exist because it was dropped as part of the FCV upgrade/downgrade process + // to/from 5.0. + LOGV2_FOR_CATALOG_REFRESH(5310502, + 0, + "Collection refresh failed", + "namespace"_attr = nss, + "exception"_attr = redact(ex)); + _stats.totalRefreshWaitTimeMicros.addAndFetch(t.micros()); + acquireTries++; + if (acquireTries == kMaxInconsistentRoutingInfoRefreshAttempts) { + return ex.toStatus(); + } + } catch (ExceptionFor<ErrorCodes::QueryPlanKilled>& ex) { + // TODO SERVER-53283: Remove once 5.0 has branched out. + // This would happen when the query to config.chunks is killed because the index it + // relied on has been dropped while the query was ongoing. + LOGV2_FOR_CATALOG_REFRESH(5310503, + 0, + "Collection refresh failed", + "namespace"_attr = nss, + "exception"_attr = redact(ex)); _stats.totalRefreshWaitTimeMicros.addAndFetch(t.micros()); acquireTries++; if (acquireTries == kMaxInconsistentRoutingInfoRefreshAttempts) { diff --git a/src/mongo/s/client/shard.cpp b/src/mongo/s/client/shard.cpp index 90159e313fd..69aefb113eb 100644 --- a/src/mongo/s/client/shard.cpp +++ b/src/mongo/s/client/shard.cpp @@ -237,13 +237,14 @@ StatusWith<Shard::QueryResponse> Shard::exhaustiveFindOnConfig( const NamespaceString& nss, const BSONObj& query, const BSONObj& sort, - const boost::optional<long long> limit) { + const boost::optional<long long> limit, + const boost::optional<BSONObj>& hint) { // Do not allow exhaustive finds to be run against regular shards. invariant(isConfig()); for (int retry = 1; retry <= kOnErrorNumRetries; retry++) { - auto result = - _exhaustiveFindOnConfig(opCtx, readPref, readConcernLevel, nss, query, sort, limit); + auto result = _exhaustiveFindOnConfig( + opCtx, readPref, readConcernLevel, nss, query, sort, limit, hint); if (retry < kOnErrorNumRetries && isRetriableError(result.getStatus().code(), RetryPolicy::kIdempotent)) { diff --git a/src/mongo/s/client/shard.h b/src/mongo/s/client/shard.h index d4ea6cedfc4..6e707423414 100644 --- a/src/mongo/s/client/shard.h +++ b/src/mongo/s/client/shard.h @@ -238,13 +238,15 @@ public: * ShardRemote instances expect "readConcernLevel" to always be kMajorityReadConcern, whereas * ShardLocal instances expect either kLocalReadConcern or kMajorityReadConcern. */ - StatusWith<QueryResponse> exhaustiveFindOnConfig(OperationContext* opCtx, - const ReadPreferenceSetting& readPref, - const repl::ReadConcernLevel& readConcernLevel, - const NamespaceString& nss, - const BSONObj& query, - const BSONObj& sort, - const boost::optional<long long> limit); + StatusWith<QueryResponse> exhaustiveFindOnConfig( + OperationContext* opCtx, + const ReadPreferenceSetting& readPref, + const repl::ReadConcernLevel& readConcernLevel, + const NamespaceString& nss, + const BSONObj& query, + const BSONObj& sort, + const boost::optional<long long> limit, + const boost::optional<BSONObj>& hint = boost::none); /** * Builds an index on a config server collection. @@ -318,7 +320,8 @@ private: const NamespaceString& nss, const BSONObj& query, const BSONObj& sort, - boost::optional<long long> limit) = 0; + boost::optional<long long> limit, + const boost::optional<BSONObj>& hint = boost::none) = 0; /** * Identifier of the shard as obtained from the configuration data (i.e. shard0000). diff --git a/src/mongo/s/client/shard_remote.cpp b/src/mongo/s/client/shard_remote.cpp index 6487164a274..a9d12cf30b3 100644 --- a/src/mongo/s/client/shard_remote.cpp +++ b/src/mongo/s/client/shard_remote.cpp @@ -353,7 +353,8 @@ StatusWith<Shard::QueryResponse> ShardRemote::_exhaustiveFindOnConfig( const NamespaceString& nss, const BSONObj& query, const BSONObj& sort, - boost::optional<long long> limit) { + boost::optional<long long> limit, + const boost::optional<BSONObj>& hint) { invariant(isConfig()); auto const grid = Grid::get(opCtx); @@ -380,6 +381,9 @@ StatusWith<Shard::QueryResponse> ShardRemote::_exhaustiveFindOnConfig( qr.setSort(sort); qr.setReadConcern(readConcernObj); qr.setLimit(limit ? static_cast<boost::optional<std::int64_t>>(*limit) : boost::none); + if (hint) { + qr.setHint(*hint); + } if (maxTimeMS < Milliseconds::max()) { qr.setMaxTimeMS(durationCount<Milliseconds>(maxTimeMS)); diff --git a/src/mongo/s/client/shard_remote.h b/src/mongo/s/client/shard_remote.h index da84f2b633b..7cf83cb5a39 100644 --- a/src/mongo/s/client/shard_remote.h +++ b/src/mongo/s/client/shard_remote.h @@ -122,7 +122,8 @@ private: const NamespaceString& nss, const BSONObj& query, const BSONObj& sort, - boost::optional<long long> limit) final; + boost::optional<long long> limit, + const boost::optional<BSONObj>& hint = boost::none) final; StatusWith<AsyncCmdHandle> _scheduleCommand( OperationContext* opCtx, diff --git a/src/mongo/s/config_server_catalog_cache_loader.cpp b/src/mongo/s/config_server_catalog_cache_loader.cpp index cd7133c2ff1..1befe47c719 100644 --- a/src/mongo/s/config_server_catalog_cache_loader.cpp +++ b/src/mongo/s/config_server_catalog_cache_loader.cpp @@ -37,6 +37,7 @@ #include "mongo/db/client.h" #include "mongo/db/operation_context.h" +#include "mongo/logv2/log.h" #include "mongo/s/catalog/sharding_catalog_client.h" #include "mongo/s/grid.h" #include "mongo/util/fail_point.h" @@ -47,6 +48,8 @@ using CollectionAndChangedChunks = CatalogCacheLoader::CollectionAndChangedChunk namespace { +MONGO_FAIL_POINT_DEFINE(hangBeforeReadingChunks); + /** * Structure repsenting the generated query and sort order for a chunk diffing operation. */ @@ -106,6 +109,25 @@ CollectionAndChangedChunks getChangedChunks(OperationContext* opCtx, } }(); + if (MONGO_unlikely(hangBeforeReadingChunks.shouldFail())) { + LOGV2(5310504, "Hit hangBeforeReadingChunks failpoint"); + hangBeforeReadingChunks.pauseWhileSet(opCtx); + } + + // TODO SERVER-53283: Remove once 5.0 has branched out. + // Use a hint to make sure the query will use an index. This ensures that the query on + // config.chunks will only execute if config.chunks is guaranteed to still have the same + // metadata format as we inferred from the config.collections entry we read. + // This is because when the config.chunks are patched up as part of the FCV upgrade (or + // downgrade), first the ns_1_lastmod_1 index (or uuid_1_lastmod_1) is dropped, then the 'ns' + // (or 'uuid') fields are unset from config.chunks. If the query is forced to use the expected + // index, we can guarantee that the config.chunks we will read will have the expected format. If + // it doesn't, it means that it's being patched-up. Then the query will fail and the refresh + // will be retried, this time expecting the new metadata format. + const auto hint = coll.getTimestamp() + ? BSON(ChunkType::collectionUUID() << 1 << ChunkType::lastmod() << 1) + : BSON(ChunkType::ns() << 1 << ChunkType::lastmod() << 1); + // Query the chunks which have changed repl::OpTime opTime; const std::vector<ChunkType> changedChunks = uassertStatusOK( @@ -114,7 +136,8 @@ CollectionAndChangedChunks getChangedChunks(OperationContext* opCtx, diffQuery.sort, boost::none, &opTime, - repl::ReadConcernLevel::kMajorityReadConcern)); + repl::ReadConcernLevel::kMajorityReadConcern, + hint)); uassert(ErrorCodes::ConflictingOperationInProgress, "No chunks were found for the collection", |