From 0efb63b64441f935b5d06f4180d0969434327551 Mon Sep 17 00:00:00 2001 From: Dianna Hohensee Date: Fri, 23 Oct 2020 01:48:29 -0400 Subject: SERVER-51319 Call DatabaseShardingState::checkDbVersion() and CSS::getCollectionDescription() safely in AutoGetCollectionLockFree and expand lock-free sharding testing. CollectionShardingState and DatabaseShardingState can now be accessed without a lock via new getSharedForLockFreeReads() functions on their respective interfaces. The shard key will now be available in the AutoGetCollection* RAII types via the CollectionPtr. This allows lock-free reads to acquire a single view of the sharding state to use throughout a read operation. --- etc/evergreen.yml | 13 +-- .../unionWith_sharded_unsharded_mix.js | 1 - .../lookup_graph_lookup_foreign_becomes_sharded.js | 2 +- .../sharding/sharding_statistics_server_status.js | 6 +- ..._and_restarted_shards_agree_on_shard_version.js | 4 +- src/mongo/db/catalog/collection.cpp | 5 ++ src/mongo/db/catalog/collection.h | 13 +++ src/mongo/db/catalog_raii.cpp | 30 ++++++- src/mongo/db/catalog_raii.h | 4 +- src/mongo/db/commands/count_cmd.cpp | 10 +-- src/mongo/db/commands/distinct.cpp | 5 +- src/mongo/db/query/get_executor.cpp | 6 +- src/mongo/db/s/collection_sharding_state.cpp | 14 +++- src/mongo/db/s/collection_sharding_state.h | 8 ++ src/mongo/db/s/database_sharding_state.cpp | 12 ++- src/mongo/db/s/database_sharding_state.h | 8 ++ .../db/s/shard_filtering_metadata_refresh.cpp | 93 +++++++++++++--------- 17 files changed, 165 insertions(+), 69 deletions(-) diff --git a/etc/evergreen.yml b/etc/evergreen.yml index 3fd25aab266..a639fabfa32 100644 --- a/etc/evergreen.yml +++ b/etc/evergreen.yml @@ -11034,14 +11034,17 @@ buildvariants: - name: compile_TG distros: - rhel62-large - - name: .aggregation !.sharded - - name: .concurrency !.large !.ubsan !.no_txns !.debug_only !.sharded - - name: .concurrency .large !.ubsan !.no_txns !.debug_only !.sharded + - name: .aggregation + - name: .concurrency !.large !.ubsan !.no_txns !.debug_only + - name: .concurrency .large !.ubsan !.no_txns !.debug_only distros: - rhel62-medium - - name: .jscore .common !.sharding - - name: .misc_js !.sharded + - name: .jscore .common + - name: .misc_js - name: .replica_sets + - name: .sharding .common + - name: .sharding .jscore + - name: .sharding .txns - name: enterprise-rhel-62-64-bit-time-series-collection display_name: "Enterprise RHEL 6.2 (Time-series Collection)" diff --git a/jstests/noPassthrough/unionWith_sharded_unsharded_mix.js b/jstests/noPassthrough/unionWith_sharded_unsharded_mix.js index f3c5ab0ca60..15051dbd076 100644 --- a/jstests/noPassthrough/unionWith_sharded_unsharded_mix.js +++ b/jstests/noPassthrough/unionWith_sharded_unsharded_mix.js @@ -6,7 +6,6 @@ * do_not_wrap_aggregations_in_facets, * requires_replication, * requires_sharding, - * incompatible_with_lockfreereads, // SERVER-51319 * ] */ diff --git a/jstests/sharding/query/lookup_graph_lookup_foreign_becomes_sharded.js b/jstests/sharding/query/lookup_graph_lookup_foreign_becomes_sharded.js index 4bfc3e3e7a5..586df49f9ad 100644 --- a/jstests/sharding/query/lookup_graph_lookup_foreign_becomes_sharded.js +++ b/jstests/sharding/query/lookup_graph_lookup_foreign_becomes_sharded.js @@ -169,7 +169,7 @@ profilerHasSingleMatchingEntryOrThrow({ filter: { ns: sourceCollection.getFullName(), errCode: ErrorCodes.StaleConfig, - errMsg: {$regex: `${foreignCollection.getFullName()} is not currently known`} + errMsg: {$regex: `${foreignCollection.getFullName()} is not currently available`} } }); diff --git a/jstests/sharding/sharding_statistics_server_status.js b/jstests/sharding/sharding_statistics_server_status.js index a61a93c60a8..aec7cd04a07 100644 --- a/jstests/sharding/sharding_statistics_server_status.js +++ b/jstests/sharding/sharding_statistics_server_status.js @@ -2,7 +2,11 @@ // Tests that serverStatus includes sharding statistics by default and the sharding statistics are // indeed the correct values. Does not test the catalog cache portion of sharding statistics. // -// @tags: [uses_transactions] +// @tags: [ +// uses_transactions, +// # This test expects to block a migration by hanging (failpoint) a read that holds a lock. +// incompatible_with_lockfreereads, +// ] (function() { 'use strict'; diff --git a/jstests/sharding/stale_mongos_and_restarted_shards_agree_on_shard_version.js b/jstests/sharding/stale_mongos_and_restarted_shards_agree_on_shard_version.js index 341142562e9..4648cb17272 100644 --- a/jstests/sharding/stale_mongos_and_restarted_shards_agree_on_shard_version.js +++ b/jstests/sharding/stale_mongos_and_restarted_shards_agree_on_shard_version.js @@ -212,10 +212,10 @@ const staleMongoS = st.s1; assert.eq(kNumThreadsForConvoyTest, freshMongoS.getDB(kDatabaseName).TestConvoyColl.countDocuments({a: 1})); - // Check if we're convoying counting the number of refresh. + // Check for the expected number of refreshes. const catalogCacheStatistics = st.shard0.adminCommand({serverStatus: 1}).shardingStatistics.catalogCache; - assert.eq(1, catalogCacheStatistics.countFullRefreshesStarted); + assert.eq(kNumThreadsForConvoyTest, catalogCacheStatistics.countFullRefreshesStarted); assert.eq(0, catalogCacheStatistics.countIncrementalRefreshesStarted); } diff --git a/src/mongo/db/catalog/collection.cpp b/src/mongo/db/catalog/collection.cpp index 5a4eeb5c584..59513b73ad1 100644 --- a/src/mongo/db/catalog/collection.cpp +++ b/src/mongo/db/catalog/collection.cpp @@ -103,6 +103,11 @@ void CollectionPtr::restore() const { } } +const BSONObj& CollectionPtr::getShardKeyPattern() const { + dassert(_shardKeyPattern); + return _shardKeyPattern.get(); +} + // ---- namespace { diff --git a/src/mongo/db/catalog/collection.h b/src/mongo/db/catalog/collection.h index 3eab72b82ea..d3f680f8e14 100644 --- a/src/mongo/db/catalog/collection.h +++ b/src/mongo/db/catalog/collection.h @@ -660,6 +660,15 @@ public: friend std::ostream& operator<<(std::ostream& os, const CollectionPtr& coll); + void setShardKeyPattern(const BSONObj& shardKeyPattern) { + _shardKeyPattern = shardKeyPattern.getOwned(); + } + const BSONObj& getShardKeyPattern() const; + + bool isSharded() const { + return static_cast(_shardKeyPattern); + } + private: bool _canYield() const; @@ -670,6 +679,10 @@ private: OperationContext* _opCtx; RestoreFn _restoreFn; + + // Stores a consistent view of shard key with the collection that will be needed during the + // operation. If _shardKeyPattern is set, that indicates that the collection is sharded. + boost::optional _shardKeyPattern = boost::none; }; inline std::ostream& operator<<(std::ostream& os, const CollectionPtr& coll) { diff --git a/src/mongo/db/catalog_raii.cpp b/src/mongo/db/catalog_raii.cpp index 7298e938145..002fbaeda87 100644 --- a/src/mongo/db/catalog_raii.cpp +++ b/src/mongo/db/catalog_raii.cpp @@ -33,6 +33,7 @@ #include "mongo/db/catalog/collection_catalog.h" #include "mongo/db/catalog/database_holder.h" +#include "mongo/db/s/collection_sharding_state.h" #include "mongo/db/s/database_sharding_state.h" #include "mongo/db/views/view_catalog.h" #include "mongo/util/fail_point.h" @@ -150,6 +151,16 @@ AutoGetCollection::AutoGetCollection(OperationContext* opCtx, } } + // Fetch and store the sharding collection description data needed for use during the + // operation. The shardVersion will be checked later if the shard filtering metadata is + // fetched, ensuring both that the collection description info used here and the routing + // table are consistent with the read request's shardVersion. + auto collDesc = + CollectionShardingState::get(opCtx, getNss())->getCollectionDescription(opCtx); + if (collDesc.isSharded()) { + _coll.setShardKeyPattern(collDesc.getKeyPattern()); + } + // If the collection exists, there is no need to check for views. return; } @@ -232,9 +243,26 @@ AutoGetCollectionLockFree::AutoGetCollectionLockFree(OperationContext* opCtx, return _collection.get(); }); - // TODO (SERVER-51319): add DatabaseShardingState::checkDbVersion somewhere. + { + // Check that the sharding database version matches our read. + // Note: this must always be checked, regardless of whether the collection exists, so that + // the dbVersion of this node or the caller gets updated quickly in case either is stale. + auto dss = DatabaseShardingState::getSharedForLockFreeReads(opCtx, _resolvedNss.db()); + auto dssLock = DatabaseShardingState::DSSLock::lockShared(opCtx, dss.get()); + dss->checkDbVersion(opCtx, dssLock); + } if (_collection) { + // Fetch and store the sharding collection description data needed for use during the + // operation. The shardVersion will be checked later if the shard filtering metadata is + // fetched, ensuring both that the collection description info fetched here and the routing + // table are consistent with the read request's shardVersion. + auto collDesc = CollectionShardingState::getSharedForLockFreeReads(opCtx, _collection->ns()) + ->getCollectionDescription(opCtx); + if (collDesc.isSharded()) { + _collectionPtr.setShardKeyPattern(collDesc.getKeyPattern()); + } + // If the collection exists, there is no need to check for views. return; } diff --git a/src/mongo/db/catalog_raii.h b/src/mongo/db/catalog_raii.h index 5ec8b22daa5..df94dbc2f4b 100644 --- a/src/mongo/db/catalog_raii.h +++ b/src/mongo/db/catalog_raii.h @@ -207,7 +207,9 @@ protected: * object goes out of scope. This object ensures the continued existence of a Collection reference, * if the collection exists when this object is instantiated. * - * This class is only used by AutoGetCollectionForReadLockFree. + * NOTE: this class is not safe to instantiate outside of AutoGetCollectionForReadLockFree. For + * example, it does not perform database or collection level shard version checks; nor does it + * establish a consistent storage snapshot with which to read. */ class AutoGetCollectionLockFree { AutoGetCollectionLockFree(const AutoGetCollectionLockFree&) = delete; diff --git a/src/mongo/db/commands/count_cmd.cpp b/src/mongo/db/commands/count_cmd.cpp index ef7e1b07ef8..bb5b45a5c9f 100644 --- a/src/mongo/db/commands/count_cmd.cpp +++ b/src/mongo/db/commands/count_cmd.cpp @@ -177,11 +177,10 @@ public: // Prevent chunks from being cleaned up during yields - this allows us to only check the // version on initial entry into count. - auto* const css = CollectionShardingState::get(opCtx, nss); boost::optional rangePreserver; - if (css->getCollectionDescription(opCtx).isSharded()) { + if (collection.isSharded()) { rangePreserver.emplace( - CollectionShardingState::get(opCtx, nss) + CollectionShardingState::getSharedForLockFreeReads(opCtx, nss) ->getOwnershipFilter( opCtx, CollectionShardingState::OrphanCleanupPolicy::kDisallowOrphanCleanup)); @@ -245,11 +244,10 @@ public: // Prevent chunks from being cleaned up during yields - this allows us to only check the // version on initial entry into count. - auto* const css = CollectionShardingState::get(opCtx, nss); boost::optional rangePreserver; - if (css->getCollectionDescription(opCtx).isSharded()) { + if (collection.isSharded()) { rangePreserver.emplace( - CollectionShardingState::get(opCtx, nss) + CollectionShardingState::getSharedForLockFreeReads(opCtx, nss) ->getOwnershipFilter( opCtx, CollectionShardingState::OrphanCleanupPolicy::kDisallowOrphanCleanup)); diff --git a/src/mongo/db/commands/distinct.cpp b/src/mongo/db/commands/distinct.cpp index f261336321f..78c835cb68d 100644 --- a/src/mongo/db/commands/distinct.cpp +++ b/src/mongo/db/commands/distinct.cpp @@ -206,10 +206,7 @@ public: "Cannot run 'distinct' on a sharded collection in a multi-document transaction. " "Please see http://dochub.mongodb.org/core/transaction-distinct for a recommended " "alternative.", - !opCtx->inMultiDocumentTransaction() || - !CollectionShardingState::get(opCtx, nss) - ->getCollectionDescription(opCtx) - .isSharded()); + !opCtx->inMultiDocumentTransaction() || !ctx->getCollection().isSharded()); } const ExtensionsCallbackReal extensionsCallback(opCtx, &nss); diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index 2e9d425424f..884bbc9c311 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -303,10 +303,8 @@ void fillOutPlannerParams(OperationContext* opCtx, // If the caller wants a shard filter, make sure we're actually sharded. if (plannerParams->options & QueryPlannerParams::INCLUDE_SHARD_FILTER) { - auto collDesc = CollectionShardingState::get(opCtx, canonicalQuery->nss()) - ->getCollectionDescription(opCtx); - if (collDesc.isSharded()) { - const auto& keyPattern = collDesc.getKeyPattern(); + if (collection.isSharded()) { + const auto& keyPattern = collection.getShardKeyPattern(); ShardKeyPattern shardKeyPattern(keyPattern); // If the shard key is specified exactly, the query is guaranteed to only target one diff --git a/src/mongo/db/s/collection_sharding_state.cpp b/src/mongo/db/s/collection_sharding_state.cpp index 894291ab676..58055904265 100644 --- a/src/mongo/db/s/collection_sharding_state.cpp +++ b/src/mongo/db/s/collection_sharding_state.cpp @@ -59,7 +59,7 @@ public: _factory->join(); } - CollectionShardingState& getOrCreate(const NamespaceString& nss) { + std::shared_ptr getOrCreate(const NamespaceString& nss) { stdx::lock_guard lg(_mutex); auto it = _collections.find(nss.ns()); @@ -69,7 +69,7 @@ public: it = std::move(inserted.first); } - return *it->second; + return it->second; } void appendInfoForShardingStateCommand(BSONObjBuilder* builder) { @@ -121,13 +121,19 @@ CollectionShardingState* CollectionShardingState::get(OperationContext* opCtx, dassert(opCtx->lockState()->isCollectionLockedForMode(nss, MODE_IS)); auto& collectionsMap = CollectionShardingStateMap::get(opCtx->getServiceContext()); - return &collectionsMap->getOrCreate(nss); + return collectionsMap->getOrCreate(nss).get(); +} + +std::shared_ptr CollectionShardingState::getSharedForLockFreeReads( + OperationContext* opCtx, const NamespaceString& nss) { + auto& collectionsMap = CollectionShardingStateMap::get(opCtx->getServiceContext()); + return collectionsMap->getOrCreate(nss); } CollectionShardingState* CollectionShardingState::get_UNSAFE(ServiceContext* svcCtx, const NamespaceString& nss) { auto& collectionsMap = CollectionShardingStateMap::get(svcCtx); - return &collectionsMap->getOrCreate(nss); + return collectionsMap->getOrCreate(nss).get(); } void CollectionShardingState::appendInfoForShardingStateCommand(OperationContext* opCtx, diff --git a/src/mongo/db/s/collection_sharding_state.h b/src/mongo/db/s/collection_sharding_state.h index 5cf19dfc8ad..cec50383478 100644 --- a/src/mongo/db/s/collection_sharding_state.h +++ b/src/mongo/db/s/collection_sharding_state.h @@ -73,6 +73,14 @@ public: */ static CollectionShardingState* get(OperationContext* opCtx, const NamespaceString& nss); + /** + * Obtain a pointer to the CollectionShardingState that remains safe to access without holding + * a collection lock. Should be called instead of the regular get() if no collection lock is + * held. The returned CollectionShardingState instance should not be modified! + */ + static std::shared_ptr getSharedForLockFreeReads( + OperationContext* opCtx, const NamespaceString& nss); + /** * Reports all collections which have filtering information associated. */ diff --git a/src/mongo/db/s/database_sharding_state.cpp b/src/mongo/db/s/database_sharding_state.cpp index 7d2f2c0ca44..0da6c244c05 100644 --- a/src/mongo/db/s/database_sharding_state.cpp +++ b/src/mongo/db/s/database_sharding_state.cpp @@ -52,7 +52,7 @@ public: DatabaseShardingStateMap() {} - DatabaseShardingState& getOrCreate(const StringData dbName) { + std::shared_ptr getOrCreate(const StringData dbName) { stdx::lock_guard lg(_mutex); auto it = _databases.find(dbName); @@ -63,7 +63,7 @@ public: it = std::move(inserted.first); } - return *it->second; + return it->second; } private: @@ -87,7 +87,13 @@ DatabaseShardingState* DatabaseShardingState::get(OperationContext* opCtx, dassert(opCtx->lockState()->isDbLockedForMode(dbName, MODE_IS)); auto& databasesMap = DatabaseShardingStateMap::get(opCtx->getServiceContext()); - return &databasesMap.getOrCreate(dbName); + return databasesMap.getOrCreate(dbName).get(); +} + +std::shared_ptr DatabaseShardingState::getSharedForLockFreeReads( + OperationContext* opCtx, const StringData dbName) { + auto& databasesMap = DatabaseShardingStateMap::get(opCtx->getServiceContext()); + return databasesMap.getOrCreate(dbName); } void DatabaseShardingState::enterCriticalSectionCatchUpPhase(OperationContext* opCtx, DSSLock&) { diff --git a/src/mongo/db/s/database_sharding_state.h b/src/mongo/db/s/database_sharding_state.h index 0a14b690484..c52b34babd2 100644 --- a/src/mongo/db/s/database_sharding_state.h +++ b/src/mongo/db/s/database_sharding_state.h @@ -65,6 +65,14 @@ public: */ static DatabaseShardingState* get(OperationContext* opCtx, const StringData dbName); + /** + * Obtain a pointer to the DatabaseShardingState that remains safe to access without holding + * a database lock. Should be called instead of the regular get() if no database lock is held. + * The returned DatabaseShardingState instance should not be modified! + */ + static std::shared_ptr getSharedForLockFreeReads( + OperationContext* opCtx, const StringData dbName); + /** * Methods to control the databases's critical section. Must be called with the database X lock * held. diff --git a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp index 9a6d18e7b32..b8c7bbff27f 100644 --- a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp +++ b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp @@ -117,9 +117,12 @@ SharedSemiFuture recoverRefreshShardVersion(ServiceContext* serviceContext ON_BLOCK_EXIT([&] { UninterruptibleLockGuard noInterrupt(opCtx->lockState()); // A view can potentially be created after spawning a thread to recover nss's shard - // version. It is then ok to lock also views in order to clear filtering metadata. - AutoGetCollection autoColl( - opCtx, nss, MODE_IX, AutoGetCollectionViewMode::kViewsPermitted); + // version. It is then ok to lock views in order to clear filtering metadata. + // + // DBLock and CollectionLock are used here to avoid throwing further recursive stale + // config errors. + Lock::DBLock dbLock(opCtx, nss.db(), MODE_IX); + Lock::CollectionLock collLock(opCtx, nss, MODE_IX); auto* const csr = CollectionShardingRuntime::get(opCtx, nss); @@ -161,7 +164,8 @@ SharedSemiFuture recoverRefreshShardVersion(ServiceContext* serviceContext // Return true if joins a shard version update/recover/refresh (in that case, all locks are dropped) bool joinShardVersionOperation(OperationContext* opCtx, CollectionShardingRuntime* csr, - boost::optional* collLock, + boost::optional* dbLock, + boost::optional* collLock, boost::optional* csrLock) { invariant(collLock->has_value()); invariant(csrLock->has_value()); @@ -176,6 +180,7 @@ bool joinShardVersionOperation(OperationContext* opCtx, // Drop the locks and wait for an ongoing shard version's recovery/refresh/update csrLock->reset(); collLock->reset(); + dbLock->reset(); if (critSecSignal) { critSecSignal->get(opCtx); @@ -214,14 +219,16 @@ void onShardVersionMismatch(OperationContext* opCtx, boost::optional> inRecoverOrRefresh; while (true) { - boost::optional autoColl; - autoColl.emplace(opCtx, nss, MODE_IS, AutoGetCollectionViewMode::kViewsForbidden); + boost::optional dbLock; + boost::optional collLock; + dbLock.emplace(opCtx, nss.db(), MODE_IS); + collLock.emplace(opCtx, nss, MODE_IS); auto* const csr = CollectionShardingRuntime::get(opCtx, nss); boost::optional csrLock = CollectionShardingRuntime::CSRLock::lockShared(opCtx, csr); - if (joinShardVersionOperation(opCtx, csr, &autoColl, &csrLock)) { + if (joinShardVersionOperation(opCtx, csr, &dbLock, &collLock, &csrLock)) { continue; } @@ -243,7 +250,7 @@ void onShardVersionMismatch(OperationContext* opCtx, // If there is no ongoing shard version operation, initialize the RecoverRefreshThread // thread and associate it to the CSR. - if (!joinShardVersionOperation(opCtx, csr, &autoColl, &csrLock)) { + if (!joinShardVersionOperation(opCtx, csr, &dbLock, &collLock, &csrLock)) { // If the shard doesn't yet know its filtering metadata, recovery needs to be run const bool runRecover = metadata ? false : true; csr->setShardVersionRecoverRefreshFuture( @@ -261,21 +268,27 @@ ScopedShardVersionCriticalSection::ScopedShardVersionCriticalSection(OperationCo : _opCtx(opCtx), _nss(std::move(nss)) { while (true) { + uassert(ErrorCodes::InvalidNamespace, + str::stream() << "Namespace " << nss << " is not a valid collection name", + _nss.isValid()); + // This acquisition is performed with collection lock MODE_S in order to ensure that any - // ongoing writes have completed and become visible - boost::optional autoColl; - autoColl.emplace(_opCtx, - _nss, - MODE_S, - AutoGetCollectionViewMode::kViewsForbidden, - _opCtx->getServiceContext()->getPreciseClockSource()->now() + - Milliseconds(migrationLockAcquisitionMaxWaitMS.load())); + // ongoing writes have completed and become visible. + // + // DBLock and CollectionLock are used here to avoid throwing further recursive stale config + // errors. + boost::optional dbLock; + boost::optional collLock; + auto deadline = _opCtx->getServiceContext()->getPreciseClockSource()->now() + + Milliseconds(migrationLockAcquisitionMaxWaitMS.load()); + dbLock.emplace(_opCtx, _nss.db(), MODE_IS, deadline); + collLock.emplace(_opCtx, _nss, MODE_S, deadline); auto* const csr = CollectionShardingRuntime::get(_opCtx, _nss); boost::optional csrLock = CollectionShardingRuntime::CSRLock::lockShared(_opCtx, csr); - if (joinShardVersionOperation(_opCtx, csr, &autoColl, &csrLock)) { + if (joinShardVersionOperation(_opCtx, csr, &dbLock, &collLock, &csrLock)) { continue; } @@ -283,7 +296,8 @@ ScopedShardVersionCriticalSection::ScopedShardVersionCriticalSection(OperationCo auto metadata = csr->getCurrentMetadataIfKnown(); if (!metadata) { csrLock.reset(); - autoColl.reset(); + collLock.reset(); + dbLock.reset(); onShardVersionMismatch(_opCtx, _nss, boost::none); continue; } @@ -291,7 +305,7 @@ ScopedShardVersionCriticalSection::ScopedShardVersionCriticalSection(OperationCo csrLock.reset(); csrLock.emplace(CollectionShardingRuntime::CSRLock::lockExclusive(_opCtx, csr)); - if (!joinShardVersionOperation(_opCtx, csr, &autoColl, &csrLock)) { + if (!joinShardVersionOperation(_opCtx, csr, &dbLock, &collLock, &csrLock)) { CollectionShardingRuntime::get(_opCtx, _nss) ->enterCriticalSectionCatchUpPhase(*csrLock); break; @@ -303,18 +317,21 @@ ScopedShardVersionCriticalSection::ScopedShardVersionCriticalSection(OperationCo ScopedShardVersionCriticalSection::~ScopedShardVersionCriticalSection() { UninterruptibleLockGuard noInterrupt(_opCtx->lockState()); - AutoGetCollection autoColl(_opCtx, _nss, MODE_IX); + // DBLock and CollectionLock are used here to avoid throwing further recursive stale config + // errors. + Lock::DBLock dbLock(_opCtx, _nss.db(), MODE_IX); + Lock::CollectionLock collLock(_opCtx, _nss, MODE_IX); auto* const csr = CollectionShardingRuntime::get(_opCtx, _nss); csr->exitCriticalSection(_opCtx); } void ScopedShardVersionCriticalSection::enterCommitPhase() { - AutoGetCollection autoColl(_opCtx, - _nss, - MODE_IS, - AutoGetCollectionViewMode::kViewsForbidden, - _opCtx->getServiceContext()->getPreciseClockSource()->now() + - Milliseconds(migrationLockAcquisitionMaxWaitMS.load())); + auto deadline = _opCtx->getServiceContext()->getPreciseClockSource()->now() + + Milliseconds(migrationLockAcquisitionMaxWaitMS.load()); + // DBLock and CollectionLock are used here to avoid throwing further recursive stale config + // errors. + Lock::DBLock dbLock(_opCtx, _nss.db(), MODE_IS, deadline); + Lock::CollectionLock collLock(_opCtx, _nss, MODE_IS, deadline); auto* const csr = CollectionShardingRuntime::get(_opCtx, _nss); auto csrLock = CollectionShardingRuntime::CSRLock::lockExclusive(_opCtx, csr); csr->enterCriticalSectionCommitPhase(csrLock); @@ -382,9 +399,10 @@ ChunkVersion forceShardFilteringMetadataRefresh(OperationContext* opCtx, Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfoWithRefresh(opCtx, nss)); if (!cm.isSharded()) { - // The collection is not sharded. Avoid using AutoGetCollection() as it returns the - // InvalidViewDefinition error code if an invalid view is in the 'system.views' collection. - AutoGetDb autoDb(opCtx, nss.db(), MODE_IX); + // DBLock and CollectionLock are used here to avoid throwing further recursive stale config + // errors, as well as a possible InvalidViewDefinition error if an invalid view is in the + // 'system.views' collection. + Lock::DBLock dbLock(opCtx, nss.db(), MODE_IX); Lock::CollectionLock collLock(opCtx, nss, MODE_IX); CollectionShardingRuntime::get(opCtx, nss) ->setFilteringMetadata(opCtx, CollectionMetadata()); @@ -395,9 +413,10 @@ ChunkVersion forceShardFilteringMetadataRefresh(OperationContext* opCtx, // Optimistic check with only IS lock in order to avoid threads piling up on the collection X // lock below { - // Avoid using AutoGetCollection() as it returns the InvalidViewDefinition error code - // if an invalid view is in the 'system.views' collection. - AutoGetDb autoDb(opCtx, nss.db(), MODE_IS); + // DBLock and CollectionLock are used here to avoid throwing further recursive stale config + // errors, as well as a possible InvalidViewDefinition error if an invalid view is in the + // 'system.views' collection. + Lock::DBLock dbLock(opCtx, nss.db(), MODE_IS); Lock::CollectionLock collLock(opCtx, nss, MODE_IS); auto optMetadata = CollectionShardingRuntime::get(opCtx, nss)->getCurrentMetadataIfKnown(); @@ -421,10 +440,12 @@ ChunkVersion forceShardFilteringMetadataRefresh(OperationContext* opCtx, } } - // Exclusive collection lock needed since we're now changing the metadata. Avoid using - // AutoGetCollection() as it returns the InvalidViewDefinition error code if an invalid view is - // in the 'system.views' collection. - AutoGetDb autoDb(opCtx, nss.db(), MODE_IX); + // Exclusive collection lock needed since we're now changing the metadata. + // + // DBLock and CollectionLock are used here to avoid throwing further recursive stale config + // errors, as well as a possible InvalidViewDefinition error if an invalid view is in the + // 'system.views' collection. + Lock::DBLock dbLock(opCtx, nss.db(), MODE_IX); Lock::CollectionLock collLock(opCtx, nss, MODE_IX); auto* const csr = CollectionShardingRuntime::get(opCtx, nss); -- cgit v1.2.1