diff options
Diffstat (limited to 'src/mongo/db/s')
-rw-r--r-- | src/mongo/db/s/collection_metadata_filtering_test.cpp | 221 | ||||
-rw-r--r-- | src/mongo/db/s/collection_range_deleter.cpp | 24 | ||||
-rw-r--r-- | src/mongo/db/s/collection_range_deleter.h | 1 | ||||
-rw-r--r-- | src/mongo/db/s/collection_range_deleter_test.cpp | 7 | ||||
-rw-r--r-- | src/mongo/db/s/collection_sharding_runtime.cpp | 20 | ||||
-rw-r--r-- | src/mongo/db/s/collection_sharding_runtime.h | 26 | ||||
-rw-r--r-- | src/mongo/db/s/collection_sharding_state.cpp | 21 | ||||
-rw-r--r-- | src/mongo/db/s/collection_sharding_state.h | 9 | ||||
-rw-r--r-- | src/mongo/db/s/collection_sharding_state_factory_embedded.cpp | 19 | ||||
-rw-r--r-- | src/mongo/db/s/collection_sharding_state_test.cpp | 14 | ||||
-rw-r--r-- | src/mongo/db/s/metadata_manager.cpp | 89 | ||||
-rw-r--r-- | src/mongo/db/s/metadata_manager.h | 31 | ||||
-rw-r--r-- | src/mongo/db/s/metadata_manager_test.cpp | 229 | ||||
-rw-r--r-- | src/mongo/db/s/migration_source_manager.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/s/shard_filtering_metadata_refresh.cpp | 7 | ||||
-rw-r--r-- | src/mongo/db/s/shard_server_op_observer.cpp | 15 |
16 files changed, 409 insertions, 328 deletions
diff --git a/src/mongo/db/s/collection_metadata_filtering_test.cpp b/src/mongo/db/s/collection_metadata_filtering_test.cpp index b9344b364f8..7f777dfa179 100644 --- a/src/mongo/db/s/collection_metadata_filtering_test.cpp +++ b/src/mongo/db/s/collection_metadata_filtering_test.cpp @@ -47,15 +47,19 @@ protected: _manager = std::make_shared<MetadataManager>(getServiceContext(), kNss, executor()); } - // Prepares data with a history array populated: - // chunk1 - [min, -100) - // chunk2 - [100, 0) - // chunk3 - [0, 100) - // chunk4 - [100, max) - // and the history: - // time (now,75) shard0(chunk1, chunk3) shard1(chunk2, chunk4) - // time (75,25) shard0(chunk2, chunk4) shard1(chunk1, chunk3) - // time (25,0) - no history + /** + * Prepares the CSS for 'kNss' and the standalone '_manager' to have their metadata be a history + * array populated as follows: + * chunk1 - [min, -100) + * chunk2 - [100, 0) + * chunk3 - [0, 100) + * chunk4 - [100, max) + * + * and the history: + * time (now,75) shard0(chunk1, chunk3) shard1(chunk2, chunk4) + * time (75,25) shard0(chunk2, chunk4) shard1(chunk1, chunk3) + * time (25,0) - no history + */ void prepareTestData() { const OID epoch = OID::gen(); const ShardKeyPattern shardKeyPattern(BSON("_id" << 1)); @@ -93,140 +97,121 @@ protected: return std::vector<ChunkType>{chunk1, chunk2, chunk3, chunk4}; }()); - auto cm = std::make_shared<ChunkManager>(rt, Timestamp(100, 0)); + auto cm = std::make_shared<ChunkManager>(rt, boost::none); ASSERT_EQ(4, cm->numChunks()); + { AutoGetCollection autoColl(operationContext(), kNss, MODE_X); auto* const css = CollectionShardingRuntime::get(operationContext(), kNss); - - css->refreshMetadata(operationContext(), - std::make_unique<CollectionMetadata>(cm, ShardId("0"))); + css->setFilteringMetadata(operationContext(), CollectionMetadata(cm, ShardId("0"))); } - _manager->refreshActiveMetadata(std::make_unique<CollectionMetadata>(cm, ShardId("0"))); + _manager->setFilteringMetadata(CollectionMetadata(cm, ShardId("0"))); } std::shared_ptr<MetadataManager> _manager; }; -// Verifies that right set of documents is visible. -TEST_F(CollectionMetadataFilteringTest, FilterDocumentsPresent) { - prepareTestData(); - - auto metadata = _manager->getActiveMetadata(_manager, LogicalTime(Timestamp(100, 0))); - - ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << -500))); - ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << 50))); - ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << -50))); - ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << 500))); -} - -// Verifies that a different set of documents is visible for a timestamp in the past. -TEST_F(CollectionMetadataFilteringTest, FilterDocumentsPast) { +// Verifies that right set of documents is visible +TEST_F(CollectionMetadataFilteringTest, FilterDocumentsInTheFuture) { prepareTestData(); - auto metadata = _manager->getActiveMetadata(_manager, LogicalTime(Timestamp(50, 0))); - - ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << -500))); - ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << 50))); - ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << -50))); - ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << 500))); -} - -// Verifies that when accessing too far into the past we get the stale error. -TEST_F(CollectionMetadataFilteringTest, FilterDocumentsStale) { - prepareTestData(); + const auto testFn = [](const ScopedCollectionMetadata& metadata) { + ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << -500))); + ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << 50))); + ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << -50))); + ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << 500))); + }; + + { + BSONObj readConcern = BSON("readConcern" << BSON("level" + << "snapshot" + << "atClusterTime" + << Timestamp(100, 0))); + + auto&& readConcernArgs = repl::ReadConcernArgs::get(operationContext()); + ASSERT_OK(readConcernArgs.initialize(readConcern["readConcern"])); + + AutoGetCollection autoColl(operationContext(), kNss, MODE_IS); + auto* const css = CollectionShardingState::get(operationContext(), kNss); + testFn(css->getMetadata(operationContext())); + } - auto metadata = _manager->getActiveMetadata(_manager, LogicalTime(Timestamp(10, 0))); - - ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << -500)), - AssertionException, - ErrorCodes::StaleChunkHistory); - ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << 50)), - AssertionException, - ErrorCodes::StaleChunkHistory); - ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << -50)), - AssertionException, - ErrorCodes::StaleChunkHistory); - ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << 500)), - AssertionException, - ErrorCodes::StaleChunkHistory); + { + const auto scm = _manager->getActiveMetadata(_manager, LogicalTime(Timestamp(100, 0))); + testFn(*scm); + } } -// The same test as FilterDocumentsPresent but using "readConcern" -TEST_F(CollectionMetadataFilteringTest, FilterDocumentsPresentShardingState) { +// Verifies that a different set of documents is visible for a timestamp in the past +TEST_F(CollectionMetadataFilteringTest, FilterDocumentsInThePast) { prepareTestData(); - BSONObj readConcern = BSON("readConcern" << BSON("level" - << "snapshot" - << "atClusterTime" - << Timestamp(100, 0))); - - auto&& readConcernArgs = repl::ReadConcernArgs::get(operationContext()); - ASSERT_OK(readConcernArgs.initialize(readConcern["readConcern"])); - - AutoGetCollection autoColl(operationContext(), kNss, MODE_IS); - auto const css = CollectionShardingState::get(operationContext(), kNss); - auto metadata = css->getMetadata(operationContext()); + const auto testFn = [](const ScopedCollectionMetadata& metadata) { + ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << -500))); + ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << 50))); + ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << -50))); + ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << 500))); + }; + + { + BSONObj readConcern = BSON("readConcern" << BSON("level" + << "snapshot" + << "atClusterTime" + << Timestamp(50, 0))); + + auto&& readConcernArgs = repl::ReadConcernArgs::get(operationContext()); + ASSERT_OK(readConcernArgs.initialize(readConcern["readConcern"])); + + AutoGetCollection autoColl(operationContext(), kNss, MODE_IS); + auto* const css = CollectionShardingState::get(operationContext(), kNss); + testFn(css->getMetadata(operationContext())); + } - ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << -500))); - ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << 50))); - ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << -50))); - ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << 500))); + { + const auto scm = _manager->getActiveMetadata(_manager, LogicalTime(Timestamp(50, 0))); + testFn(*scm); + } } -// The same test as FilterDocumentsPast but using "readConcern" -TEST_F(CollectionMetadataFilteringTest, FilterDocumentsPastShardingState) { +// Verifies that when accessing too far into the past we get the stale error +TEST_F(CollectionMetadataFilteringTest, FilterDocumentsTooFarInThePastThrowsStaleChunkHistory) { prepareTestData(); - BSONObj readConcern = BSON("readConcern" << BSON("level" - << "snapshot" - << "atClusterTime" - << Timestamp(50, 0))); - - auto&& readConcernArgs = repl::ReadConcernArgs::get(operationContext()); - ASSERT_OK(readConcernArgs.initialize(readConcern["readConcern"])); - - AutoGetCollection autoColl(operationContext(), kNss, MODE_IS); - auto const css = CollectionShardingState::get(operationContext(), kNss); - auto metadata = css->getMetadata(operationContext()); - - ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << -500))); - ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << 50))); - ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << -50))); - ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << 500))); -} - -// The same test as FilterDocumentsStale but using "readConcern" -TEST_F(CollectionMetadataFilteringTest, FilterDocumentsStaleShardingState) { - prepareTestData(); + const auto testFn = [](const ScopedCollectionMetadata& metadata) { + ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << -500)), + AssertionException, + ErrorCodes::StaleChunkHistory); + ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << 50)), + AssertionException, + ErrorCodes::StaleChunkHistory); + ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << -50)), + AssertionException, + ErrorCodes::StaleChunkHistory); + ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << 500)), + AssertionException, + ErrorCodes::StaleChunkHistory); + }; + + { + BSONObj readConcern = BSON("readConcern" << BSON("level" + << "snapshot" + << "atClusterTime" + << Timestamp(10, 0))); + + auto&& readConcernArgs = repl::ReadConcernArgs::get(operationContext()); + ASSERT_OK(readConcernArgs.initialize(readConcern["readConcern"])); + + AutoGetCollection autoColl(operationContext(), kNss, MODE_IS); + auto* const css = CollectionShardingState::get(operationContext(), kNss); + testFn(css->getMetadata(operationContext())); + } - BSONObj readConcern = BSON("readConcern" << BSON("level" - << "snapshot" - << "atClusterTime" - << Timestamp(10, 0))); - - auto&& readConcernArgs = repl::ReadConcernArgs::get(operationContext()); - ASSERT_OK(readConcernArgs.initialize(readConcern["readConcern"])); - - AutoGetCollection autoColl(operationContext(), kNss, MODE_IS); - auto const css = CollectionShardingState::get(operationContext(), kNss); - auto metadata = css->getMetadata(operationContext()); - - ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << -500)), - AssertionException, - ErrorCodes::StaleChunkHistory); - ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << 50)), - AssertionException, - ErrorCodes::StaleChunkHistory); - ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << -50)), - AssertionException, - ErrorCodes::StaleChunkHistory); - ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << 500)), - AssertionException, - ErrorCodes::StaleChunkHistory); + { + const auto scm = _manager->getActiveMetadata(_manager, LogicalTime(Timestamp(10, 0))); + testFn(*scm); + } } - } // namespace } // namespace mongo diff --git a/src/mongo/db/s/collection_range_deleter.cpp b/src/mongo/db/s/collection_range_deleter.cpp index 3832d495e88..adfae399073 100644 --- a/src/mongo/db/s/collection_range_deleter.cpp +++ b/src/mongo/db/s/collection_range_deleter.cpp @@ -136,7 +136,7 @@ boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange( auto& metadataManager = csr->_metadataManager; if (!_checkCollectionMetadataStillValid( - opCtx, nss, epoch, forTestOnly, collection, metadataManager)) { + nss, epoch, forTestOnly, collection, metadataManager)) { return boost::none; } @@ -209,10 +209,11 @@ boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange( const auto scopedCollectionMetadata = metadataManager->getActiveMetadata(metadataManager, boost::none); + const auto& metadata = *scopedCollectionMetadata; try { wrote = self->_doDeletion( - opCtx, collection, scopedCollectionMetadata->getKeyPattern(), *range, maxToDelete); + opCtx, collection, metadata->getKeyPattern(), *range, maxToDelete); } catch (const DBException& e) { wrote = e.toStatus(); warning() << e.what(); @@ -255,7 +256,7 @@ boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange( auto& metadataManager = csr->_metadataManager; if (!_checkCollectionMetadataStillValid( - opCtx, nss, epoch, forTestOnly, collection, metadataManager)) { + nss, epoch, forTestOnly, collection, metadataManager)) { return boost::none; } @@ -300,7 +301,6 @@ boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange( } bool CollectionRangeDeleter::_checkCollectionMetadataStillValid( - OperationContext* opCtx, const NamespaceString& nss, OID const& epoch, CollectionRangeDeleter* forTestOnly, @@ -310,7 +310,17 @@ bool CollectionRangeDeleter::_checkCollectionMetadataStillValid( const auto scopedCollectionMetadata = metadataManager->getActiveMetadata(metadataManager, boost::none); - if (!forTestOnly && (!collection || !scopedCollectionMetadata->isSharded())) { + if (!scopedCollectionMetadata) { + LOG(0) << "Abandoning any range deletions because the metadata for " << nss.ns() + << " was reset"; + stdx::lock_guard<stdx::mutex> lk(metadataManager->_managerLock); + metadataManager->_clearAllCleanups(lk); + return false; + } + + const auto& metadata = *scopedCollectionMetadata; + + if (!forTestOnly && (!collection || !metadata->isSharded())) { if (!collection) { LOG(0) << "Abandoning any range deletions left over from dropped " << nss.ns(); } else { @@ -323,9 +333,9 @@ bool CollectionRangeDeleter::_checkCollectionMetadataStillValid( return false; } - if (!forTestOnly && scopedCollectionMetadata->getCollVersion().epoch() != epoch) { + if (!forTestOnly && metadata->getCollVersion().epoch() != epoch) { LOG(1) << "Range deletion task for " << nss.ns() << " epoch " << epoch << " woke;" - << " (current is " << scopedCollectionMetadata->getCollVersion() << ")"; + << " (current is " << metadata->getCollVersion() << ")"; return false; } diff --git a/src/mongo/db/s/collection_range_deleter.h b/src/mongo/db/s/collection_range_deleter.h index f6b9f161a47..7e6cd26f93a 100644 --- a/src/mongo/db/s/collection_range_deleter.h +++ b/src/mongo/db/s/collection_range_deleter.h @@ -190,7 +190,6 @@ private: * the collection has not been dropped (or dropped then recreated). */ static bool _checkCollectionMetadataStillValid( - OperationContext* opCtx, const NamespaceString& nss, OID const& epoch, CollectionRangeDeleter* forTestOnly, diff --git a/src/mongo/db/s/collection_range_deleter_test.cpp b/src/mongo/db/s/collection_range_deleter_test.cpp index 3d0d430dd30..a1967d821ec 100644 --- a/src/mongo/db/s/collection_range_deleter_test.cpp +++ b/src/mongo/db/s/collection_range_deleter_test.cpp @@ -87,17 +87,14 @@ protected: AutoGetCollection autoColl(operationContext(), kNss, MODE_IX); auto* const css = CollectionShardingRuntime::get(operationContext(), kNss); - - css->refreshMetadata(operationContext(), - stdx::make_unique<CollectionMetadata>(cm, ShardId("thisShard"))); + css->setFilteringMetadata(operationContext(), CollectionMetadata(cm, ShardId("thisShard"))); } void tearDown() override { { AutoGetCollection autoColl(operationContext(), kNss, MODE_IX); auto* const css = CollectionShardingRuntime::get(operationContext(), kNss); - - css->refreshMetadata(operationContext(), nullptr); + css->clearFilteringMetadata(); } ShardServerTestFixture::tearDown(); diff --git a/src/mongo/db/s/collection_sharding_runtime.cpp b/src/mongo/db/s/collection_sharding_runtime.cpp index 80a006c61bb..f6f92850408 100644 --- a/src/mongo/db/s/collection_sharding_runtime.cpp +++ b/src/mongo/db/s/collection_sharding_runtime.cpp @@ -60,15 +60,15 @@ CollectionShardingRuntime* CollectionShardingRuntime::get(OperationContext* opCt return checked_cast<CollectionShardingRuntime*>(css); } -void CollectionShardingRuntime::refreshMetadata(OperationContext* opCtx, - std::unique_ptr<CollectionMetadata> newMetadata) { +void CollectionShardingRuntime::setFilteringMetadata(OperationContext* opCtx, + CollectionMetadata newMetadata) { invariant(opCtx->lockState()->isCollectionLockedForMode(_nss.ns(), MODE_X)); - _metadataManager->refreshActiveMetadata(std::move(newMetadata)); + _metadataManager->setFilteringMetadata(std::move(newMetadata)); } -void CollectionShardingRuntime::markNotShardedAtStepdown() { - _metadataManager->refreshActiveMetadata(nullptr); +void CollectionShardingRuntime::clearFilteringMetadata() { + _metadataManager->clearFilteringMetadata(); } auto CollectionShardingRuntime::beginReceive(ChunkRange const& range) -> CleanupNotification { @@ -100,9 +100,13 @@ Status CollectionShardingRuntime::waitForClean(OperationContext* opCtx, { // First, see if collection was dropped, but do it in a separate scope in order to // not hold reference on it, which would make it appear in use - auto metadata = + const auto optMetadata = self->_metadataManager->getActiveMetadata(self->_metadataManager, boost::none); + if (!optMetadata) + return {ErrorCodes::ConflictingOperationInProgress, + "Collection being migrated had its metadata reset"}; + const auto& metadata = *optMetadata; if (!metadata->isSharded() || metadata->getCollVersion().epoch() != epoch) { return {ErrorCodes::ConflictingOperationInProgress, "Collection being migrated was dropped"}; @@ -139,8 +143,8 @@ boost::optional<ChunkRange> CollectionShardingRuntime::getNextOrphanRange(BSONOb return _metadataManager->getNextOrphanRange(from); } -ScopedCollectionMetadata CollectionShardingRuntime::_getMetadata(OperationContext* opCtx) { - auto atClusterTime = repl::ReadConcernArgs::get(opCtx).getArgsAtClusterTime(); +boost::optional<ScopedCollectionMetadata> CollectionShardingRuntime::_getMetadata( + const boost::optional<mongo::LogicalTime>& atClusterTime) { return _metadataManager->getActiveMetadata(_metadataManager, atClusterTime); } diff --git a/src/mongo/db/s/collection_sharding_runtime.h b/src/mongo/db/s/collection_sharding_runtime.h index 427f5dbd8ba..fe678268ca6 100644 --- a/src/mongo/db/s/collection_sharding_runtime.h +++ b/src/mongo/db/s/collection_sharding_runtime.h @@ -61,19 +61,25 @@ public: static CollectionShardingRuntime* get(OperationContext* opCtx, const NamespaceString& nss); /** - * Updates the metadata based on changes received from the config server and also resolves the - * pending receives map in case some of these pending receives have completed or have been - * abandoned. If newMetadata is null, unshard the collection. + * Updates the collection's filtering metadata based on changes received from the config server + * and also resolves the pending receives map in case some of these pending receives have + * committed on the config server or have been abandoned by the donor shard. * - * Must always be called with an exclusive collection lock. + * This method must be called with an exclusive collection lock and it does not acquire any + * locks itself. */ - void refreshMetadata(OperationContext* opCtx, std::unique_ptr<CollectionMetadata> newMetadata); + void setFilteringMetadata(OperationContext* opCtx, CollectionMetadata newMetadata); /** - * Marks the collection as not sharded at stepdown time so that no filtering will occur for - * slaveOk queries. + * Marks the collection's filtering metadata as UNKNOWN, meaning that all attempts to check for + * shard version match will fail with StaleConfig errors in order to trigger an update. + * + * It is safe to call this method with only an intent lock on the collection (as opposed to + * setFilteringMetadata which requires exclusive), however note that clearing a collection's + * filtering metadata will interrupt all in-progress orphan cleanups in which case orphaned data + * will remain behind on disk. */ - void markNotShardedAtStepdown(); + void clearFilteringMetadata(); /** * Schedules any documents in `range` for immediate cleanup iff no running queries can depend @@ -135,7 +141,6 @@ public: _metadataManager->toBSONPending(bb); } - private: friend boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange( OperationContext*, NamespaceString const&, OID const&, int, CollectionRangeDeleter*); @@ -146,7 +151,8 @@ private: // Contains all the metadata associated with this collection. std::shared_ptr<MetadataManager> _metadataManager; - ScopedCollectionMetadata _getMetadata(OperationContext* opCtx) override; + boost::optional<ScopedCollectionMetadata> _getMetadata( + const boost::optional<mongo::LogicalTime>& atClusterTime) override; }; /** diff --git a/src/mongo/db/s/collection_sharding_state.cpp b/src/mongo/db/s/collection_sharding_state.cpp index 7ba81f81709..45645383969 100644 --- a/src/mongo/db/s/collection_sharding_state.cpp +++ b/src/mongo/db/s/collection_sharding_state.cpp @@ -95,6 +95,20 @@ const ServiceContext::Decoration<boost::optional<CollectionShardingStateMap>> CollectionShardingStateMap::get = ServiceContext::declareDecoration<boost::optional<CollectionShardingStateMap>>(); +class UnshardedCollection : public ScopedCollectionMetadata::Impl { +public: + UnshardedCollection() = default; + + const CollectionMetadata& get() override { + return _metadata; + } + +private: + CollectionMetadata _metadata; +}; + +const auto kUnshardedCollection = std::make_shared<UnshardedCollection>(); + } // namespace CollectionShardingState::CollectionShardingState(NamespaceString nss) : _nss(std::move(nss)) {} @@ -114,7 +128,12 @@ void CollectionShardingState::report(OperationContext* opCtx, BSONObjBuilder* bu } ScopedCollectionMetadata CollectionShardingState::getMetadata(OperationContext* opCtx) { - return _getMetadata(opCtx); + const auto atClusterTime = repl::ReadConcernArgs::get(opCtx).getArgsAtClusterTime(); + auto optMetadata = _getMetadata(atClusterTime); + if (!optMetadata) + return {kUnshardedCollection}; + + return std::move(*optMetadata); } void CollectionShardingState::checkShardVersionOrThrow(OperationContext* opCtx) { diff --git a/src/mongo/db/s/collection_sharding_state.h b/src/mongo/db/s/collection_sharding_state.h index 74f856924ec..77e11534f73 100644 --- a/src/mongo/db/s/collection_sharding_state.h +++ b/src/mongo/db/s/collection_sharding_state.h @@ -31,6 +31,7 @@ #pragma once #include "mongo/base/disallow_copying.h" +#include "mongo/db/logical_time.h" #include "mongo/db/namespace_string.h" #include "mongo/db/s/scoped_collection_metadata.h" #include "mongo/db/s/sharding_migration_critical_section.h" @@ -114,8 +115,12 @@ private: // Tracks the migration critical section state for this collection. ShardingMigrationCriticalSection _critSec; - // Obtains the current metadata for the collection - virtual ScopedCollectionMetadata _getMetadata(OperationContext* opCtx) = 0; + /** + * Obtains the current metadata for the collection or boost::none if the metadata is not yet + * known + */ + virtual boost::optional<ScopedCollectionMetadata> _getMetadata( + const boost::optional<mongo::LogicalTime>& atClusterTime) = 0; }; /** diff --git a/src/mongo/db/s/collection_sharding_state_factory_embedded.cpp b/src/mongo/db/s/collection_sharding_state_factory_embedded.cpp index 12b0ad69f5a..4984ef8a9af 100644 --- a/src/mongo/db/s/collection_sharding_state_factory_embedded.cpp +++ b/src/mongo/db/s/collection_sharding_state_factory_embedded.cpp @@ -38,20 +38,6 @@ namespace mongo { namespace { -class UnshardedCollection : public ScopedCollectionMetadata::Impl { -public: - UnshardedCollection() = default; - - const CollectionMetadata& get() override { - return _metadata; - } - -private: - CollectionMetadata _metadata; -}; - -const auto kUnshardedCollection = std::make_shared<UnshardedCollection>(); - class CollectionShardingStateFactoryEmbedded final : public CollectionShardingStateFactory { public: CollectionShardingStateFactoryEmbedded(ServiceContext* serviceContext) @@ -63,8 +49,9 @@ public: CollectionShardingStateStandalone(NamespaceString nss) : CollectionShardingState(nss) {} private: - ScopedCollectionMetadata _getMetadata(OperationContext* opCtx) override { - return {kUnshardedCollection}; + boost::optional<ScopedCollectionMetadata> _getMetadata( + const boost::optional<mongo::LogicalTime>& atClusterTime) override { + return boost::none; } }; diff --git a/src/mongo/db/s/collection_sharding_state_test.cpp b/src/mongo/db/s/collection_sharding_state_test.cpp index 2c1386eaae0..f5c4a3aad43 100644 --- a/src/mongo/db/s/collection_sharding_state_test.cpp +++ b/src/mongo/db/s/collection_sharding_state_test.cpp @@ -47,7 +47,7 @@ const NamespaceString kTestNss("TestDB", "TestColl"); * that DeleteState's constructor will extract from its `doc` argument into its member * DeleteState::documentKey. */ -std::unique_ptr<CollectionMetadata> makeAMetadata(BSONObj const& keyPattern) { +CollectionMetadata makeAMetadata(BSONObj const& keyPattern) { const OID epoch = OID::gen(); auto range = ChunkRange(BSON("key" << MINKEY), BSON("key" << MAXKEY)); auto chunk = ChunkType(kTestNss, std::move(range), ChunkVersion(1, 0, epoch), ShardId("other")); @@ -55,13 +55,15 @@ std::unique_ptr<CollectionMetadata> makeAMetadata(BSONObj const& keyPattern) { kTestNss, UUID::gen(), KeyPattern(keyPattern), nullptr, false, epoch, {std::move(chunk)}); std::shared_ptr<ChunkManager> cm = std::make_shared<ChunkManager>(rt, Timestamp(100, 0)); - return stdx::make_unique<CollectionMetadata>(std::move(cm), ShardId("this")); + return CollectionMetadata(std::move(cm), ShardId("this")); } using DeleteStateTest = ShardServerTestFixture; TEST_F(DeleteStateTest, MakeDeleteStateUnsharded) { AutoGetCollection autoColl(operationContext(), kTestNss, MODE_IX); + auto* const css = CollectionShardingRuntime::get(operationContext(), kTestNss); + css->setFilteringMetadata(operationContext(), CollectionMetadata()); auto doc = BSON("key3" << "abc" @@ -85,7 +87,7 @@ TEST_F(DeleteStateTest, MakeDeleteStateShardedWithoutIdInShardKey) { auto* const css = CollectionShardingRuntime::get(operationContext(), kTestNss); // Push a CollectionMetadata with a shard key not including "_id"... - css->refreshMetadata(operationContext(), makeAMetadata(BSON("key" << 1 << "key3" << 1))); + css->setFilteringMetadata(operationContext(), makeAMetadata(BSON("key" << 1 << "key3" << 1))); // The order of fields in `doc` deliberately does not match the shard key auto doc = BSON("key3" @@ -111,8 +113,8 @@ TEST_F(DeleteStateTest, MakeDeleteStateShardedWithIdInShardKey) { auto* const css = CollectionShardingRuntime::get(operationContext(), kTestNss); // Push a CollectionMetadata with a shard key that does have "_id" in the middle... - css->refreshMetadata(operationContext(), - makeAMetadata(BSON("key" << 1 << "_id" << 1 << "key2" << 1))); + css->setFilteringMetadata(operationContext(), + makeAMetadata(BSON("key" << 1 << "_id" << 1 << "key2" << 1))); // The order of fields in `doc` deliberately does not match the shard key auto doc = BSON("key2" << true << "key3" @@ -138,7 +140,7 @@ TEST_F(DeleteStateTest, MakeDeleteStateShardedWithIdHashInShardKey) { // Push a CollectionMetadata with a shard key "_id", hashed. auto aMetadata = makeAMetadata(BSON("_id" << "hashed")); - css->refreshMetadata(operationContext(), std::move(aMetadata)); + css->setFilteringMetadata(operationContext(), std::move(aMetadata)); auto doc = BSON("key2" << true << "_id" << "hello" diff --git a/src/mongo/db/s/metadata_manager.cpp b/src/mongo/db/s/metadata_manager.cpp index f4d9bc1f13f..836370d5d0e 100644 --- a/src/mongo/db/s/metadata_manager.cpp +++ b/src/mongo/db/s/metadata_manager.cpp @@ -200,7 +200,7 @@ public: } private: - friend ScopedCollectionMetadata MetadataManager::getActiveMetadata( + friend boost::optional<ScopedCollectionMetadata> MetadataManager::getActiveMetadata( std::shared_ptr<MetadataManager>, const boost::optional<LogicalTime>&); std::shared_ptr<MetadataManager> _metadataManager; @@ -216,9 +216,7 @@ MetadataManager::MetadataManager(ServiceContext* serviceContext, _receivingChunks(SimpleBSONObjComparator::kInstance.makeBSONObjIndexedMap<BSONObj>()) {} MetadataManager::~MetadataManager() { - stdx::lock_guard<stdx::mutex> lg(_managerLock); - _clearAllCleanups(lg); - _metadata.clear(); + clearFilteringMetadata(); } void MetadataManager::_clearAllCleanups(WithLock lock) { @@ -236,17 +234,18 @@ void MetadataManager::_clearAllCleanups(WithLock, Status status) { _rangesToClean.clear(status); } -ScopedCollectionMetadata MetadataManager::getActiveMetadata( +boost::optional<ScopedCollectionMetadata> MetadataManager::getActiveMetadata( std::shared_ptr<MetadataManager> self, const boost::optional<LogicalTime>& atClusterTime) { stdx::lock_guard<stdx::mutex> lg(_managerLock); if (_metadata.empty()) { - return {kUnshardedCollection}; + return boost::none; } auto metadataTracker = _metadata.back(); if (!atClusterTime) { - return {std::make_shared<RangePreserver>(lg, std::move(self), std::move(metadataTracker))}; + return ScopedCollectionMetadata( + std::make_shared<RangePreserver>(lg, std::move(self), std::move(metadataTracker))); } auto chunkManager = metadataTracker->metadata->getChunkManager(); @@ -266,8 +265,8 @@ ScopedCollectionMetadata MetadataManager::getActiveMetadata( CollectionMetadata _metadata; }; - return {std::make_shared<MetadataAtTimestamp>( - CollectionMetadata(chunkManagerAtClusterTime, metadataTracker->metadata->shardId()))}; + return ScopedCollectionMetadata(std::make_shared<MetadataAtTimestamp>( + CollectionMetadata(chunkManagerAtClusterTime, metadataTracker->metadata->shardId()))); } size_t MetadataManager::numberOfMetadataSnapshots() const { @@ -290,70 +289,53 @@ int MetadataManager::numberOfEmptyMetadataSnapshots() const { return emptyMetadataSnapshots; } -void MetadataManager::refreshActiveMetadata(std::unique_ptr<CollectionMetadata> remoteMetadata) { +void MetadataManager::setFilteringMetadata(CollectionMetadata remoteMetadata) { stdx::lock_guard<stdx::mutex> lg(_managerLock); - // Collection was never sharded in the first place. This check is necessary in order to avoid - // extraneous logging in the not-a-shard case, because all call sites always try to get the - // collection sharding information regardless of whether the node is sharded or not. - if (!remoteMetadata && _metadata.empty()) { - invariant(_receivingChunks.empty()); - invariant(_rangesToClean.isEmpty()); - return; - } - - // Collection is becoming unsharded - if (!remoteMetadata) { - log() << "Marking collection " << _nss.ns() << " with " - << redact(_metadata.back()->metadata->toStringBasic()) << " as unsharded"; - - _receivingChunks.clear(); - _clearAllCleanups(lg); - _metadata.clear(); - return; - } - // Collection is becoming sharded if (_metadata.empty()) { - log() << "Marking collection " << _nss.ns() << " as sharded with " - << remoteMetadata->toStringBasic(); + LOG(0) << "Marking collection " << _nss.ns() << " as " << remoteMetadata.toStringBasic(); invariant(_receivingChunks.empty()); - _setActiveMetadata(lg, std::move(*remoteMetadata)); invariant(_rangesToClean.isEmpty()); + + _setActiveMetadata(lg, std::move(remoteMetadata)); return; } - auto* const activeMetadata = &_metadata.back()->metadata.get(); + const auto& activeMetadata = _metadata.back()->metadata; // If the metadata being installed has a different epoch from ours, this means the collection // was dropped and recreated, so we must entirely reset the metadata state - if (activeMetadata->getCollVersion().epoch() != remoteMetadata->getCollVersion().epoch()) { - log() << "Overwriting metadata for collection " << _nss.ns() << " from " - << activeMetadata->toStringBasic() << " to " << remoteMetadata->toStringBasic() - << " due to epoch change"; + if (activeMetadata->getCollVersion().epoch() != remoteMetadata.getCollVersion().epoch()) { + LOG(0) << "Updating metadata for collection " << _nss.ns() << " from " + << activeMetadata->toStringBasic() << " to " << remoteMetadata.toStringBasic() + << " due to epoch change"; _receivingChunks.clear(); - _setActiveMetadata(lg, std::move(*remoteMetadata)); _clearAllCleanups(lg); + _metadata.clear(); + + _setActiveMetadata(lg, std::move(remoteMetadata)); return; } - // We already have newer version - if (activeMetadata->getCollVersion() >= remoteMetadata->getCollVersion()) { + // We already have the same or newer version + if (activeMetadata->getCollVersion() >= remoteMetadata.getCollVersion()) { LOG(1) << "Ignoring update of active metadata " << activeMetadata->toStringBasic() - << " with an older " << remoteMetadata->toStringBasic(); + << " with an older " << remoteMetadata.toStringBasic(); return; } - log() << "Updating collection metadata for " << _nss.ns() << " from " - << activeMetadata->toStringBasic() << " to " << remoteMetadata->toStringBasic(); + LOG(0) << "Updating metadata for collection " << _nss.ns() << " from " + << activeMetadata->toStringBasic() << " to " << remoteMetadata.toStringBasic() + << " due to version change"; // Resolve any receiving chunks, which might have completed by now for (auto it = _receivingChunks.begin(); it != _receivingChunks.end();) { const ChunkRange receivingRange(it->first, it->second); - if (!remoteMetadata->rangeOverlapsChunk(receivingRange)) { + if (!remoteMetadata.rangeOverlapsChunk(receivingRange)) { ++it; continue; } @@ -367,7 +349,14 @@ void MetadataManager::refreshActiveMetadata(std::unique_ptr<CollectionMetadata> it = _receivingChunks.begin(); } - _setActiveMetadata(lg, std::move(*remoteMetadata)); + _setActiveMetadata(lg, std::move(remoteMetadata)); +} + +void MetadataManager::clearFilteringMetadata() { + stdx::lock_guard<stdx::mutex> lg(_managerLock); + _receivingChunks.clear(); + _clearAllCleanups(lg); + _metadata.clear(); } void MetadataManager::_setActiveMetadata(WithLock wl, CollectionMetadata newMetadata) { @@ -381,9 +370,11 @@ void MetadataManager::_retireExpiredMetadata(WithLock lock) { // not 0) could have a query that is actually still accessing those documents. while (_metadata.size() > 1 && !_metadata.front()->usageCounter) { if (!_metadata.front()->orphans.empty()) { - log() << "Queries possibly dependent on " << _nss.ns() - << " range(s) finished; scheduling ranges for deletion"; + LOG(0) << "Queries possibly dependent on " << _nss.ns() + << " range(s) finished; scheduling ranges for deletion"; + // It is safe to push orphan ranges from _metadata.back(), even though new queries might + // start any time, because any request to delete a range it maps is rejected. _pushListToClean(lock, std::move(_metadata.front()->orphans)); } @@ -459,7 +450,6 @@ void MetadataManager::_pushListToClean(WithLock, std::list<Deletion> ranges) { scheduleCleanup( _executor, _nss, _metadata.back()->metadata->getCollVersion().epoch(), *when); } - invariant(ranges.empty()); } auto MetadataManager::beginReceive(ChunkRange const& range) -> CleanupNotification { @@ -531,6 +521,7 @@ auto MetadataManager::cleanUpRange(ChunkRange const& range, Date_t whenToDelete) auto& orphans = overlapMetadata->orphans; orphans.emplace_back(ChunkRange(range.getMin().getOwned(), range.getMax().getOwned()), whenToDelete); + return orphans.back().notification; } diff --git a/src/mongo/db/s/metadata_manager.h b/src/mongo/db/s/metadata_manager.h index b6cbf10cbc7..ba83f3e6ada 100644 --- a/src/mongo/db/s/metadata_manager.h +++ b/src/mongo/db/s/metadata_manager.h @@ -34,6 +34,7 @@ #include "mongo/base/disallow_copying.h" #include "mongo/bson/simple_bsonobj_comparator.h" +#include "mongo/db/logical_time.h" #include "mongo/db/namespace_string.h" #include "mongo/db/range_arithmetic.h" #include "mongo/db/s/collection_range_deleter.h" @@ -62,14 +63,17 @@ public: ~MetadataManager(); /** - * An ActiveMetadata must be set before this function can be called. + * If there is no filtering metadata set yet (setFilteringMetadata has not been called) returns + * boost::none. Otherwise increments the usage counter of the active metadata and returns an + * RAII object, which corresponds to it. * - * Increments the usage counter of the active metadata and returns an RAII object, which - * contains the currently active metadata. When the usageCounter goes to zero, the RAII - * object going out of scope will call _removeMetadata. + * Holding a reference on a particular instance of the metadata means that orphan cleanup is not + * allowed to run and delete chunks which are covered by that metadata. When the returned + * ScopedCollectionMetadata goes out of scope, the reference counter on the metadata will be + * decremented and if it reaches to zero, orphan cleanup may proceed. */ - ScopedCollectionMetadata getActiveMetadata(std::shared_ptr<MetadataManager> self, - const boost::optional<LogicalTime>& atClusterTime); + boost::optional<ScopedCollectionMetadata> getActiveMetadata( + std::shared_ptr<MetadataManager> self, const boost::optional<LogicalTime>& atClusterTime); /** * Returns the number of CollectionMetadata objects being maintained on behalf of running @@ -85,10 +89,9 @@ public: */ int numberOfEmptyMetadataSnapshots() const; - /** - * Uses the contents of the specified metadata as a way to purge any pending chunks. - */ - void refreshActiveMetadata(std::unique_ptr<CollectionMetadata> newMetadata); + void setFilteringMetadata(CollectionMetadata newMetadata); + + void clearFilteringMetadata(); void toBSONPending(BSONArrayBuilder& bb) const; @@ -239,10 +242,10 @@ private: // Mutex to protect the state below mutable stdx::mutex _managerLock; - // Contains a list of collection metadata ordered in chronological order based on the refreshes - // that occurred. The entry at _metadata.back() is the most recent metadata and is what is - // returned to new queries. The rest are previously active collection metadata instances still - // in use by active server operations or cursors. + // Contains a list of collection metadata for the same collection epoch, ordered in + // chronological order based on the refreshes that occurred. The entry at _metadata.back() is + // the most recent metadata and is what is returned to new queries. The rest are previously + // active collection metadata instances still in use by active server operations or cursors. std::list<std::shared_ptr<CollectionMetadataTracker>> _metadata; // Chunk ranges being migrated into to the shard. Indexed by the min key of the range. diff --git a/src/mongo/db/s/metadata_manager_test.cpp b/src/mongo/db/s/metadata_manager_test.cpp index 47241fdee53..29810e0d2da 100644 --- a/src/mongo/db/s/metadata_manager_test.cpp +++ b/src/mongo/db/s/metadata_manager_test.cpp @@ -71,7 +71,7 @@ protected: /** * Returns an instance of CollectionMetadata which has no chunks owned by 'thisShard'. */ - static std::unique_ptr<CollectionMetadata> makeEmptyMetadata() { + static CollectionMetadata makeEmptyMetadata() { const OID epoch = OID::gen(); auto rt = RoutingTableHistory::makeNew( @@ -88,7 +88,7 @@ protected: std::shared_ptr<ChunkManager> cm = std::make_shared<ChunkManager>(rt, boost::none); - return stdx::make_unique<CollectionMetadata>(cm, kThisShard); + return CollectionMetadata(cm, kThisShard); } /** @@ -99,46 +99,93 @@ protected: * It will fassert if the chunk bounds are incorrect or overlap an existing chunk or if the * chunk version is lower than the maximum one. */ - static std::unique_ptr<CollectionMetadata> cloneMetadataPlusChunk( - const ScopedCollectionMetadata& metadata, const ChunkRange range) { + static CollectionMetadata cloneMetadataPlusChunk(const ScopedCollectionMetadata& metadata, + const ChunkRange& range) { const BSONObj& minKey = range.getMin(); const BSONObj& maxKey = range.getMax(); - - ASSERT(SimpleBSONObjComparator::kInstance.evaluate(minKey < maxKey)) - << "minKey == " << minKey << "; maxKey == " << maxKey; ASSERT(!rangeMapOverlaps(metadata->getChunks(), minKey, maxKey)); auto cm = metadata->getChunkManager(); const auto chunkToSplit = cm->findIntersectingChunkWithSimpleCollation(minKey); - ASSERT(SimpleBSONObjComparator::kInstance.evaluate(maxKey <= chunkToSplit.getMax())) - << "maxKey == " << maxKey << " and chunkToSplit.getMax() == " << chunkToSplit.getMax(); + ASSERT_BSONOBJ_GTE(minKey, chunkToSplit.getMin()); + ASSERT_BSONOBJ_LT(maxKey, chunkToSplit.getMax()); + + std::vector<ChunkType> splitChunks; + + auto chunkVersion = cm->getVersion(); + + if (SimpleBSONObjComparator::kInstance.evaluate(chunkToSplit.getMin() < minKey)) { + chunkVersion.incMajor(); + splitChunks.emplace_back( + kNss, ChunkRange(chunkToSplit.getMin(), minKey), chunkVersion, kOtherShard); + } + + chunkVersion.incMajor(); + splitChunks.emplace_back(kNss, ChunkRange(minKey, maxKey), chunkVersion, kThisShard); + + chunkVersion.incMajor(); + splitChunks.emplace_back( + kNss, ChunkRange(maxKey, chunkToSplit.getMax()), chunkVersion, kOtherShard); - auto v1 = cm->getVersion(); - v1.incMajor(); - auto v2 = v1; - v2.incMajor(); - auto v3 = v2; - v3.incMajor(); + auto rt = cm->getRoutingHistory()->makeUpdated(splitChunks); + + return CollectionMetadata(std::make_shared<ChunkManager>(rt, boost::none), kThisShard); + } + + static CollectionMetadata cloneMetadataMinusChunk(const ScopedCollectionMetadata& metadata, + const ChunkRange& range) { + const BSONObj& minKey = range.getMin(); + const BSONObj& maxKey = range.getMax(); + ASSERT(rangeMapOverlaps(metadata->getChunks(), minKey, maxKey)); + + auto cm = metadata->getChunkManager(); + + const auto chunkToMoveOut = cm->findIntersectingChunkWithSimpleCollation(minKey); + ASSERT_BSONOBJ_EQ(minKey, chunkToMoveOut.getMin()); + ASSERT_BSONOBJ_EQ(maxKey, chunkToMoveOut.getMax()); + + auto chunkVersion = cm->getVersion(); + chunkVersion.incMajor(); auto rt = cm->getRoutingHistory()->makeUpdated( - {ChunkType{kNss, ChunkRange{chunkToSplit.getMin(), minKey}, v1, kOtherShard}, - ChunkType{kNss, ChunkRange{minKey, maxKey}, v2, kThisShard}, - ChunkType{kNss, ChunkRange{maxKey, chunkToSplit.getMax()}, v3, kOtherShard}}); + {ChunkType(kNss, ChunkRange(minKey, maxKey), chunkVersion, kOtherShard)}); - return stdx::make_unique<CollectionMetadata>( - std::make_shared<ChunkManager>(rt, boost::none), kThisShard); + return CollectionMetadata(std::make_shared<ChunkManager>(rt, boost::none), kThisShard); } std::shared_ptr<MetadataManager> _manager; }; +TEST_F(MetadataManagerTest, InitialMetadataIsUnknown) { + ASSERT(!_manager->getActiveMetadata(_manager, boost::none)); + ASSERT(!_manager->getActiveMetadata(_manager, LogicalTime(Timestamp(10)))); + + ASSERT_EQ(0UL, _manager->numberOfMetadataSnapshots()); + ASSERT_EQ(0UL, _manager->numberOfRangesToClean()); + ASSERT_EQ(0UL, _manager->numberOfRangesToCleanStillInUse()); +} + +TEST_F(MetadataManagerTest, MetadataAfterClearIsUnknown) { + _manager->setFilteringMetadata(makeEmptyMetadata()); + ASSERT(_manager->getActiveMetadata(_manager, boost::none)); + ASSERT(_manager->getActiveMetadata(_manager, LogicalTime(Timestamp(10)))); + + _manager->clearFilteringMetadata(); + ASSERT(!_manager->getActiveMetadata(_manager, boost::none)); + ASSERT(!_manager->getActiveMetadata(_manager, LogicalTime(Timestamp(10)))); + + ASSERT_EQ(0UL, _manager->numberOfMetadataSnapshots()); + ASSERT_EQ(0UL, _manager->numberOfRangesToClean()); + ASSERT_EQ(0UL, _manager->numberOfRangesToCleanStillInUse()); +} + TEST_F(MetadataManagerTest, CleanUpForMigrateIn) { - _manager->refreshActiveMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata(makeEmptyMetadata()); // Sanity checks - ASSERT(_manager->getActiveMetadata(_manager, boost::none)->isSharded()); - ASSERT_EQ(0UL, _manager->getActiveMetadata(_manager, boost::none)->getChunks().size()); + ASSERT((*_manager->getActiveMetadata(_manager, boost::none))->isSharded()); + ASSERT_EQ(0UL, (*_manager->getActiveMetadata(_manager, boost::none))->getChunks().size()); ChunkRange range1(BSON("key" << 0), BSON("key" << 10)); ChunkRange range2(BSON("key" << 10), BSON("key" << 20)); @@ -157,12 +204,14 @@ TEST_F(MetadataManagerTest, CleanUpForMigrateIn) { } TEST_F(MetadataManagerTest, AddRangeNotificationsBlockAndYield) { - _manager->refreshActiveMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata(makeEmptyMetadata()); ChunkRange cr1(BSON("key" << 0), BSON("key" << 10)); + auto notifn1 = _manager->cleanUpRange(cr1, Date_t{}); ASSERT_FALSE(notifn1.ready()); ASSERT_EQ(_manager->numberOfRangesToClean(), 1UL); + auto optNotifn = _manager->trackOrphanedDataCleanup(cr1); ASSERT_FALSE(notifn1.ready()); ASSERT_FALSE(optNotifn->ready()); @@ -172,7 +221,7 @@ TEST_F(MetadataManagerTest, AddRangeNotificationsBlockAndYield) { } TEST_F(MetadataManagerTest, NotificationBlocksUntilDeletion) { - _manager->refreshActiveMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata(makeEmptyMetadata()); ChunkRange cr1(BSON("key" << 20), BSON("key" << 30)); auto optNotif = _manager->trackOrphanedDataCleanup(cr1); @@ -185,28 +234,28 @@ TEST_F(MetadataManagerTest, NotificationBlocksUntilDeletion) { auto scm1 = _manager->getActiveMetadata(_manager, boost::none); // and increment refcount const auto addChunk = [this] { - _manager->refreshActiveMetadata( - cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none), + _manager->setFilteringMetadata( + cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), {BSON("key" << 0), BSON("key" << 20)})); }; addChunk(); // push new metadata auto scm2 = _manager->getActiveMetadata(_manager, boost::none); // and increment refcount - ASSERT_EQ(1ULL, scm2->getChunks().size()); + ASSERT_EQ(1ULL, (*scm2)->getChunks().size()); // Simulate drop and recreate - _manager->refreshActiveMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata(makeEmptyMetadata()); addChunk(); // push new metadata auto scm3 = _manager->getActiveMetadata(_manager, boost::none); // and increment refcount - ASSERT_EQ(1ULL, scm3->getChunks().size()); + ASSERT_EQ(1ULL, (*scm3)->getChunks().size()); - ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 3UL); - ASSERT_EQ(_manager->numberOfRangesToClean(), 0UL); // not yet... + ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 0UL); + ASSERT_EQ(_manager->numberOfRangesToClean(), 0UL); optNotif = _manager->cleanUpRange(cr1, Date_t{}); ASSERT(optNotif); - ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 3UL); + ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 0UL); ASSERT_EQ(_manager->numberOfRangesToClean(), 1UL); } @@ -227,74 +276,102 @@ TEST_F(MetadataManagerTest, NotificationBlocksUntilDeletion) { optNotif2->abandon(); } +TEST_F(MetadataManagerTest, CleanupNotificationsAreSignaledOnDropAndRecreate) { + const ChunkRange rangeToClean(BSON("key" << 20), BSON("key" << 30)); + + _manager->setFilteringMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata( + cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), + {BSON("key" << 0), BSON("key" << 20)})); + + _manager->setFilteringMetadata( + cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), rangeToClean)); + auto cursorOnMovedMetadata = _manager->getActiveMetadata(_manager, boost::none); + + _manager->setFilteringMetadata( + cloneMetadataMinusChunk(*_manager->getActiveMetadata(_manager, boost::none), rangeToClean)); + + auto notif = _manager->cleanUpRange(rangeToClean, Date_t{}); + ASSERT(!notif.ready()); + + auto optNotif = _manager->trackOrphanedDataCleanup(rangeToClean); + ASSERT(optNotif); + ASSERT(!optNotif->ready()); + + _manager->setFilteringMetadata(makeEmptyMetadata()); + ASSERT(notif.ready()); + ASSERT(optNotif->ready()); +} + TEST_F(MetadataManagerTest, RefreshAfterSuccessfulMigrationSinglePending) { - _manager->refreshActiveMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata(makeEmptyMetadata()); ChunkRange cr1(BSON("key" << 0), BSON("key" << 10)); - _manager->refreshActiveMetadata( - cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none), cr1)); - ASSERT_EQ(_manager->getActiveMetadata(_manager, boost::none)->getChunks().size(), 1UL); + _manager->setFilteringMetadata( + cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), cr1)); + ASSERT_EQ((*_manager->getActiveMetadata(_manager, boost::none))->getChunks().size(), 1UL); } TEST_F(MetadataManagerTest, RefreshAfterSuccessfulMigrationMultiplePending) { - _manager->refreshActiveMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata(makeEmptyMetadata()); ChunkRange cr1(BSON("key" << 0), BSON("key" << 10)); ChunkRange cr2(BSON("key" << 30), BSON("key" << 40)); { - _manager->refreshActiveMetadata( - cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none), cr1)); + _manager->setFilteringMetadata( + cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), cr1)); ASSERT_EQ(_manager->numberOfRangesToClean(), 0UL); - ASSERT_EQ(_manager->getActiveMetadata(_manager, boost::none)->getChunks().size(), 1UL); + ASSERT_EQ((*_manager->getActiveMetadata(_manager, boost::none))->getChunks().size(), 1UL); } { - _manager->refreshActiveMetadata( - cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none), cr2)); - ASSERT_EQ(_manager->getActiveMetadata(_manager, boost::none)->getChunks().size(), 2UL); + _manager->setFilteringMetadata( + cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), cr2)); + ASSERT_EQ((*_manager->getActiveMetadata(_manager, boost::none))->getChunks().size(), 2UL); } } TEST_F(MetadataManagerTest, RefreshAfterNotYetCompletedMigrationMultiplePending) { - _manager->refreshActiveMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata(makeEmptyMetadata()); ChunkRange cr1(BSON("key" << 0), BSON("key" << 10)); ChunkRange cr2(BSON("key" << 30), BSON("key" << 40)); - _manager->refreshActiveMetadata( - cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none), + _manager->setFilteringMetadata( + cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), {BSON("key" << 50), BSON("key" << 60)})); - ASSERT_EQ(_manager->getActiveMetadata(_manager, boost::none)->getChunks().size(), 1UL); + ASSERT_EQ((*_manager->getActiveMetadata(_manager, boost::none))->getChunks().size(), 1UL); } TEST_F(MetadataManagerTest, BeginReceiveWithOverlappingRange) { - _manager->refreshActiveMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata(makeEmptyMetadata()); ChunkRange cr1(BSON("key" << 0), BSON("key" << 10)); ChunkRange cr2(BSON("key" << 30), BSON("key" << 40)); - _manager->refreshActiveMetadata( - cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none), cr1)); - _manager->refreshActiveMetadata( - cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none), cr2)); + _manager->setFilteringMetadata( + cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), cr1)); + _manager->setFilteringMetadata( + cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), cr2)); ChunkRange crOverlap(BSON("key" << 5), BSON("key" << 35)); } TEST_F(MetadataManagerTest, RefreshMetadataAfterDropAndRecreate) { - _manager->refreshActiveMetadata(makeEmptyMetadata()); - _manager->refreshActiveMetadata(cloneMetadataPlusChunk( - _manager->getActiveMetadata(_manager, boost::none), {BSON("key" << 0), BSON("key" << 10)})); + _manager->setFilteringMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata( + cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), + {BSON("key" << 0), BSON("key" << 10)})); // Now, pretend that the collection was dropped and recreated - _manager->refreshActiveMetadata(makeEmptyMetadata()); - _manager->refreshActiveMetadata( - cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none), + _manager->setFilteringMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata( + cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), {BSON("key" << 20), BSON("key" << 30)})); - const auto chunks = _manager->getActiveMetadata(_manager, boost::none)->getChunks(); + const auto chunks = (*_manager->getActiveMetadata(_manager, boost::none))->getChunks(); ASSERT_EQ(1UL, chunks.size()); const auto chunkEntry = chunks.begin(); ASSERT_BSONOBJ_EQ(BSON("key" << 20), chunkEntry->first); @@ -303,7 +380,7 @@ TEST_F(MetadataManagerTest, RefreshMetadataAfterDropAndRecreate) { // Tests membership functions for _rangesToClean TEST_F(MetadataManagerTest, RangesToCleanMembership) { - _manager->refreshActiveMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata(makeEmptyMetadata()); ChunkRange cr(BSON("key" << 0), BSON("key" << 10)); @@ -317,19 +394,19 @@ TEST_F(MetadataManagerTest, RangesToCleanMembership) { } TEST_F(MetadataManagerTest, ClearUnneededChunkManagerObjectsLastSnapshotInList) { - _manager->refreshActiveMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata(makeEmptyMetadata()); ChunkRange cr1(BSON("key" << 0), BSON("key" << 10)); ChunkRange cr2(BSON("key" << 30), BSON("key" << 40)); - auto scm1 = _manager->getActiveMetadata(_manager, boost::none); + auto scm1 = *_manager->getActiveMetadata(_manager, boost::none); { - _manager->refreshActiveMetadata(cloneMetadataPlusChunk(scm1, cr1)); + _manager->setFilteringMetadata(cloneMetadataPlusChunk(scm1, cr1)); ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 1UL); ASSERT_EQ(_manager->numberOfRangesToClean(), 0UL); - auto scm2 = _manager->getActiveMetadata(_manager, boost::none); + auto scm2 = *_manager->getActiveMetadata(_manager, boost::none); ASSERT_EQ(scm2->getChunks().size(), 1UL); - _manager->refreshActiveMetadata(cloneMetadataPlusChunk(scm2, cr2)); + _manager->setFilteringMetadata(cloneMetadataPlusChunk(scm2, cr2)); ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 2UL); ASSERT_EQ(_manager->numberOfEmptyMetadataSnapshots(), 0); } @@ -338,29 +415,29 @@ TEST_F(MetadataManagerTest, ClearUnneededChunkManagerObjectsLastSnapshotInList) // is now out of scope, but that in scm1 should remain ASSERT_EQ(_manager->numberOfEmptyMetadataSnapshots(), 1); ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 2UL); - ASSERT_EQ((_manager->getActiveMetadata(_manager, boost::none))->getChunks().size(), 2UL); + ASSERT_EQ((*_manager->getActiveMetadata(_manager, boost::none))->getChunks().size(), 2UL); } TEST_F(MetadataManagerTest, ClearUnneededChunkManagerObjectSnapshotInMiddleOfList) { - _manager->refreshActiveMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata(makeEmptyMetadata()); ChunkRange cr1(BSON("key" << 0), BSON("key" << 10)); ChunkRange cr2(BSON("key" << 30), BSON("key" << 40)); ChunkRange cr3(BSON("key" << 50), BSON("key" << 80)); ChunkRange cr4(BSON("key" << 90), BSON("key" << 100)); - auto scm = _manager->getActiveMetadata(_manager, boost::none); - _manager->refreshActiveMetadata(cloneMetadataPlusChunk(scm, cr1)); + auto scm = *_manager->getActiveMetadata(_manager, boost::none); + _manager->setFilteringMetadata(cloneMetadataPlusChunk(scm, cr1)); ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 1UL); ASSERT_EQ(_manager->numberOfRangesToClean(), 0UL); - auto scm2 = _manager->getActiveMetadata(_manager, boost::none); + auto scm2 = *_manager->getActiveMetadata(_manager, boost::none); ASSERT_EQ(scm2->getChunks().size(), 1UL); - _manager->refreshActiveMetadata(cloneMetadataPlusChunk(scm2, cr2)); + _manager->setFilteringMetadata(cloneMetadataPlusChunk(scm2, cr2)); { - auto scm3 = _manager->getActiveMetadata(_manager, boost::none); + auto scm3 = *_manager->getActiveMetadata(_manager, boost::none); ASSERT_EQ(scm3->getChunks().size(), 2UL); - _manager->refreshActiveMetadata(cloneMetadataPlusChunk(scm3, cr3)); + _manager->setFilteringMetadata(cloneMetadataPlusChunk(scm3, cr3)); ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 3UL); ASSERT_EQ(_manager->numberOfEmptyMetadataSnapshots(), 0); @@ -374,9 +451,9 @@ TEST_F(MetadataManagerTest, ClearUnneededChunkManagerObjectSnapshotInMiddleOfLis * CollectionMetadataTracker{ metadata: xxx, orphans: [], usageCounter: 1} * ] */ - scm2 = _manager->getActiveMetadata(_manager, boost::none); + scm2 = *_manager->getActiveMetadata(_manager, boost::none); ASSERT_EQ(scm2->getChunks().size(), 3UL); - _manager->refreshActiveMetadata(cloneMetadataPlusChunk(scm2, cr4)); + _manager->setFilteringMetadata(cloneMetadataPlusChunk(scm2, cr4)); ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 4UL); ASSERT_EQ(_manager->numberOfEmptyMetadataSnapshots(), 1); } diff --git a/src/mongo/db/s/migration_source_manager.cpp b/src/mongo/db/s/migration_source_manager.cpp index eb3026e084b..57937d9c5fe 100644 --- a/src/mongo/db/s/migration_source_manager.cpp +++ b/src/mongo/db/s/migration_source_manager.cpp @@ -482,7 +482,7 @@ Status MigrationSourceManager::commitChunkMetadataOnConfig(OperationContext* opC UninterruptibleLockGuard noInterrupt(opCtx->lockState()); AutoGetCollection autoColl(opCtx, getNss(), MODE_IX, MODE_X); if (!repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesFor(opCtx, getNss())) { - CollectionShardingRuntime::get(opCtx, getNss())->refreshMetadata(opCtx, nullptr); + CollectionShardingRuntime::get(opCtx, getNss())->clearFilteringMetadata(); uassertStatusOK(status.withContext( str::stream() << "Unable to verify migration commit for chunk: " << redact(_args.toString()) @@ -518,7 +518,7 @@ Status MigrationSourceManager::commitChunkMetadataOnConfig(OperationContext* opC UninterruptibleLockGuard noInterrupt(opCtx->lockState()); AutoGetCollection autoColl(opCtx, getNss(), MODE_IX, MODE_X); - CollectionShardingRuntime::get(opCtx, getNss())->refreshMetadata(opCtx, nullptr); + CollectionShardingRuntime::get(opCtx, getNss())->clearFilteringMetadata(); log() << "Failed to refresh metadata after a " << (migrationCommitStatus.isOK() ? "failed commit attempt" : "successful commit") diff --git a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp index d6fd4b3d567..1fb93a6213f 100644 --- a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp +++ b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp @@ -151,7 +151,7 @@ ChunkVersion forceShardFilteringMetadataRefresh(OperationContext* opCtx, AutoGetCollection autoColl(opCtx, nss, MODE_IX, MODE_X); auto* const css = CollectionShardingRuntime::get(opCtx, nss); - css->refreshMetadata(opCtx, nullptr); + css->setFilteringMetadata(opCtx, CollectionMetadata()); return ChunkVersion::UNSHARDED(); } @@ -185,10 +185,7 @@ ChunkVersion forceShardFilteringMetadataRefresh(OperationContext* opCtx, return metadata->getShardVersion(); } - std::unique_ptr<CollectionMetadata> newCollectionMetadata = - stdx::make_unique<CollectionMetadata>(cm, shardingState->shardId()); - - css->refreshMetadata(opCtx, std::move(newCollectionMetadata)); + css->setFilteringMetadata(opCtx, CollectionMetadata(cm, shardingState->shardId())); return css->getMetadata(opCtx)->getShardVersion(); } diff --git a/src/mongo/db/s/shard_server_op_observer.cpp b/src/mongo/db/s/shard_server_op_observer.cpp index 4d0f59a36df..622225a9977 100644 --- a/src/mongo/db/s/shard_server_op_observer.cpp +++ b/src/mongo/db/s/shard_server_op_observer.cpp @@ -76,10 +76,9 @@ public: CatalogCacheLoader::get(_opCtx).notifyOfCollectionVersionUpdate(_nss); - // This is a hack to get around CollectionShardingState::refreshMetadata() requiring the X - // lock: markNotShardedAtStepdown() doesn't have a lock check. Temporary measure until - // SERVER-31595 removes the X lock requirement. - CollectionShardingRuntime::get(_opCtx, _nss)->markNotShardedAtStepdown(); + // Force subsequent uses of the namespace to refresh the filtering metadata so they can + // synchronize with any work happening on the primary (e.g., migration critical section). + CollectionShardingRuntime::get(_opCtx, _nss)->clearFilteringMetadata(); } void rollback() override {} @@ -281,10 +280,10 @@ void ShardServerOpObserver::onUpdate(OperationContext* opCtx, const OplogUpdateE } if (setField.hasField(ShardCollectionType::enterCriticalSectionCounter.name())) { - // This is a hack to get around CollectionShardingState::refreshMetadata() requiring - // the X lock: markNotShardedAtStepdown() doesn't have a lock check. Temporary - // measure until SERVER-31595 removes the X lock requirement. - CollectionShardingRuntime::get(opCtx, updatedNss)->markNotShardedAtStepdown(); + // Force subsequent uses of the namespace to refresh the filtering metadata so they + // can synchronize with any work happening on the primary (e.g., migration critical + // section). + CollectionShardingRuntime::get(opCtx, updatedNss)->clearFilteringMetadata(); } } } |