diff options
author | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2018-08-14 10:53:56 -0400 |
---|---|---|
committer | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2018-09-11 04:18:17 -0400 |
commit | 20117b8ee9678794be675eb4d728bfcc8f9d75f4 (patch) | |
tree | 1a27ea6e9c4a73617922e487b90e8ef2c3e8fff9 /src | |
parent | 47826721dd85248b8acb569694687db0e71257cd (diff) | |
download | mongo-20117b8ee9678794be675eb4d728bfcc8f9d75f4.tar.gz |
SERVER-32198 Make MetadataManager support an 'UNKNOWN' filtering metadata state
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/s/collection_metadata_filtering_test.cpp | 221 | ||||
-rw-r--r-- | src/mongo/db/s/collection_range_deleter.cpp | 18 | ||||
-rw-r--r-- | src/mongo/db/s/collection_range_deleter_test.cpp | 7 | ||||
-rw-r--r-- | src/mongo/db/s/collection_sharding_runtime.cpp | 20 | ||||
-rw-r--r-- | src/mongo/db/s/collection_sharding_runtime.h | 26 | ||||
-rw-r--r-- | src/mongo/db/s/collection_sharding_state.cpp | 21 | ||||
-rw-r--r-- | src/mongo/db/s/collection_sharding_state.h | 9 | ||||
-rw-r--r-- | src/mongo/db/s/collection_sharding_state_test.cpp | 16 | ||||
-rw-r--r-- | src/mongo/db/s/metadata_manager.cpp | 90 | ||||
-rw-r--r-- | src/mongo/db/s/metadata_manager.h | 31 | ||||
-rw-r--r-- | src/mongo/db/s/metadata_manager_test.cpp | 199 | ||||
-rw-r--r-- | src/mongo/db/s/migration_source_manager.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/s/shard_filtering_metadata_refresh.cpp | 7 | ||||
-rw-r--r-- | src/mongo/db/s/shard_server_op_observer.cpp | 15 | ||||
-rw-r--r-- | src/mongo/s/catalog/type_chunk.cpp | 4 |
15 files changed, 391 insertions, 297 deletions
diff --git a/src/mongo/db/s/collection_metadata_filtering_test.cpp b/src/mongo/db/s/collection_metadata_filtering_test.cpp index 3193a8e53a8..05a52f2dd16 100644 --- a/src/mongo/db/s/collection_metadata_filtering_test.cpp +++ b/src/mongo/db/s/collection_metadata_filtering_test.cpp @@ -45,15 +45,19 @@ protected: _manager = std::make_shared<MetadataManager>(getServiceContext(), kNss, executor()); } - // Prepares data with a history array populated: - // chunk1 - [min, -100) - // chunk2 - [100, 0) - // chunk3 - [0, 100) - // chunk4 - [100, max) - // and the history: - // time (now,75) shard0(chunk1, chunk3) shard1(chunk2, chunk4) - // time (75,25) shard0(chunk2, chunk4) shard1(chunk1, chunk3) - // time (25,0) - no history + /** + * Prepares the CSS for 'kNss' and the standalone '_manager' to have their metadata be a history + * array populated as follows: + * chunk1 - [min, -100) + * chunk2 - [100, 0) + * chunk3 - [0, 100) + * chunk4 - [100, max) + * + * and the history: + * time (now,75) shard0(chunk1, chunk3) shard1(chunk2, chunk4) + * time (75,25) shard0(chunk2, chunk4) shard1(chunk1, chunk3) + * time (25,0) - no history + */ void prepareTestData() { const OID epoch = OID::gen(); const ShardKeyPattern shardKeyPattern(BSON("_id" << 1)); @@ -91,140 +95,121 @@ protected: return std::vector<ChunkType>{chunk1, chunk2, chunk3, chunk4}; }()); - auto cm = std::make_shared<ChunkManager>(rt, Timestamp(100, 0)); + auto cm = std::make_shared<ChunkManager>(rt, boost::none); ASSERT_EQ(4, cm->numChunks()); + { AutoGetCollection autoColl(operationContext(), kNss, MODE_X); auto* const css = CollectionShardingRuntime::get(operationContext(), kNss); - - css->refreshMetadata(operationContext(), - std::make_unique<CollectionMetadata>(cm, ShardId("0"))); + css->setFilteringMetadata(operationContext(), CollectionMetadata(cm, ShardId("0"))); } - _manager->refreshActiveMetadata(std::make_unique<CollectionMetadata>(cm, ShardId("0"))); + _manager->setFilteringMetadata(CollectionMetadata(cm, ShardId("0"))); } std::shared_ptr<MetadataManager> _manager; }; -// Verifies that right set of documents is visible. -TEST_F(CollectionMetadataFilteringTest, FilterDocumentsPresent) { - prepareTestData(); - - auto metadata = _manager->getActiveMetadata(_manager, LogicalTime(Timestamp(100, 0))); - - ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << -500))); - ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << 50))); - ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << -50))); - ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << 500))); -} - -// Verifies that a different set of documents is visible for a timestamp in the past. -TEST_F(CollectionMetadataFilteringTest, FilterDocumentsPast) { +// Verifies that right set of documents is visible +TEST_F(CollectionMetadataFilteringTest, FilterDocumentsInTheFuture) { prepareTestData(); - auto metadata = _manager->getActiveMetadata(_manager, LogicalTime(Timestamp(50, 0))); - - ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << -500))); - ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << 50))); - ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << -50))); - ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << 500))); -} - -// Verifies that when accessing too far into the past we get the stale error. -TEST_F(CollectionMetadataFilteringTest, FilterDocumentsStale) { - prepareTestData(); + const auto testFn = [](const ScopedCollectionMetadata& metadata) { + ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << -500))); + ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << 50))); + ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << -50))); + ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << 500))); + }; + + { + BSONObj readConcern = BSON("readConcern" << BSON("level" + << "snapshot" + << "atClusterTime" + << Timestamp(100, 0))); + + auto&& readConcernArgs = repl::ReadConcernArgs::get(operationContext()); + ASSERT_OK(readConcernArgs.initialize(readConcern["readConcern"])); + + AutoGetCollection autoColl(operationContext(), kNss, MODE_IS); + auto* const css = CollectionShardingState::get(operationContext(), kNss); + testFn(css->getMetadata(operationContext())); + } - auto metadata = _manager->getActiveMetadata(_manager, LogicalTime(Timestamp(10, 0))); - - ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << -500)), - AssertionException, - ErrorCodes::StaleChunkHistory); - ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << 50)), - AssertionException, - ErrorCodes::StaleChunkHistory); - ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << -50)), - AssertionException, - ErrorCodes::StaleChunkHistory); - ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << 500)), - AssertionException, - ErrorCodes::StaleChunkHistory); + { + const auto scm = _manager->getActiveMetadata(_manager, LogicalTime(Timestamp(100, 0))); + testFn(*scm); + } } -// The same test as FilterDocumentsPresent but using "readConcern" -TEST_F(CollectionMetadataFilteringTest, FilterDocumentsPresentShardingState) { +// Verifies that a different set of documents is visible for a timestamp in the past +TEST_F(CollectionMetadataFilteringTest, FilterDocumentsInThePast) { prepareTestData(); - BSONObj readConcern = BSON("readConcern" << BSON("level" - << "snapshot" - << "atClusterTime" - << Timestamp(100, 0))); - - auto&& readConcernArgs = repl::ReadConcernArgs::get(operationContext()); - ASSERT_OK(readConcernArgs.initialize(readConcern["readConcern"])); - - AutoGetCollection autoColl(operationContext(), kNss, MODE_IS); - auto const css = CollectionShardingState::get(operationContext(), kNss); - auto metadata = css->getMetadata(operationContext()); + const auto testFn = [](const ScopedCollectionMetadata& metadata) { + ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << -500))); + ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << 50))); + ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << -50))); + ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << 500))); + }; + + { + BSONObj readConcern = BSON("readConcern" << BSON("level" + << "snapshot" + << "atClusterTime" + << Timestamp(50, 0))); + + auto&& readConcernArgs = repl::ReadConcernArgs::get(operationContext()); + ASSERT_OK(readConcernArgs.initialize(readConcern["readConcern"])); + + AutoGetCollection autoColl(operationContext(), kNss, MODE_IS); + auto* const css = CollectionShardingState::get(operationContext(), kNss); + testFn(css->getMetadata(operationContext())); + } - ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << -500))); - ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << 50))); - ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << -50))); - ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << 500))); + { + const auto scm = _manager->getActiveMetadata(_manager, LogicalTime(Timestamp(50, 0))); + testFn(*scm); + } } -// The same test as FilterDocumentsPast but using "readConcern" -TEST_F(CollectionMetadataFilteringTest, FilterDocumentsPastShardingState) { +// Verifies that when accessing too far into the past we get the stale error +TEST_F(CollectionMetadataFilteringTest, FilterDocumentsTooFarInThePastThrowsStaleChunkHistory) { prepareTestData(); - BSONObj readConcern = BSON("readConcern" << BSON("level" - << "snapshot" - << "atClusterTime" - << Timestamp(50, 0))); - - auto&& readConcernArgs = repl::ReadConcernArgs::get(operationContext()); - ASSERT_OK(readConcernArgs.initialize(readConcern["readConcern"])); - - AutoGetCollection autoColl(operationContext(), kNss, MODE_IS); - auto const css = CollectionShardingState::get(operationContext(), kNss); - auto metadata = css->getMetadata(operationContext()); - - ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << -500))); - ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << 50))); - ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << -50))); - ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << 500))); -} - -// The same test as FilterDocumentsStale but using "readConcern" -TEST_F(CollectionMetadataFilteringTest, FilterDocumentsStaleShardingState) { - prepareTestData(); + const auto testFn = [](const ScopedCollectionMetadata& metadata) { + ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << -500)), + AssertionException, + ErrorCodes::StaleChunkHistory); + ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << 50)), + AssertionException, + ErrorCodes::StaleChunkHistory); + ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << -50)), + AssertionException, + ErrorCodes::StaleChunkHistory); + ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << 500)), + AssertionException, + ErrorCodes::StaleChunkHistory); + }; + + { + BSONObj readConcern = BSON("readConcern" << BSON("level" + << "snapshot" + << "atClusterTime" + << Timestamp(10, 0))); + + auto&& readConcernArgs = repl::ReadConcernArgs::get(operationContext()); + ASSERT_OK(readConcernArgs.initialize(readConcern["readConcern"])); + + AutoGetCollection autoColl(operationContext(), kNss, MODE_IS); + auto* const css = CollectionShardingState::get(operationContext(), kNss); + testFn(css->getMetadata(operationContext())); + } - BSONObj readConcern = BSON("readConcern" << BSON("level" - << "snapshot" - << "atClusterTime" - << Timestamp(10, 0))); - - auto&& readConcernArgs = repl::ReadConcernArgs::get(operationContext()); - ASSERT_OK(readConcernArgs.initialize(readConcern["readConcern"])); - - AutoGetCollection autoColl(operationContext(), kNss, MODE_IS); - auto const css = CollectionShardingState::get(operationContext(), kNss); - auto metadata = css->getMetadata(operationContext()); - - ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << -500)), - AssertionException, - ErrorCodes::StaleChunkHistory); - ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << 50)), - AssertionException, - ErrorCodes::StaleChunkHistory); - ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << -50)), - AssertionException, - ErrorCodes::StaleChunkHistory); - ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << 500)), - AssertionException, - ErrorCodes::StaleChunkHistory); + { + const auto scm = _manager->getActiveMetadata(_manager, LogicalTime(Timestamp(10, 0))); + testFn(*scm); + } } - } // namespace } // namespace mongo diff --git a/src/mongo/db/s/collection_range_deleter.cpp b/src/mongo/db/s/collection_range_deleter.cpp index 4d79f8a43ac..f3b3415ff52 100644 --- a/src/mongo/db/s/collection_range_deleter.cpp +++ b/src/mongo/db/s/collection_range_deleter.cpp @@ -124,7 +124,17 @@ boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange( const auto scopedCollectionMetadata = metadataManager->getActiveMetadata(metadataManager, boost::none); - if (!forTestOnly && (!collection || !scopedCollectionMetadata->isSharded())) { + if (!scopedCollectionMetadata) { + LOG(0) << "Abandoning any range deletions because the metadata for " << nss.ns() + << " was reset"; + stdx::lock_guard<stdx::mutex> lk(css->_metadataManager->_managerLock); + css->_metadataManager->_clearAllCleanups(lk); + return boost::none; + } + + const auto& metadata = *scopedCollectionMetadata; + + if (!forTestOnly && (!collection || !metadata->isSharded())) { if (!collection) { LOG(0) << "Abandoning any range deletions left over from dropped " << nss.ns(); } else { @@ -137,9 +147,9 @@ boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange( return boost::none; } - if (!forTestOnly && scopedCollectionMetadata->getCollVersion().epoch() != epoch) { + if (!forTestOnly && metadata->getCollVersion().epoch() != epoch) { LOG(1) << "Range deletion task for " << nss.ns() << " epoch " << epoch << " woke;" - << " (current is " << scopedCollectionMetadata->getCollVersion() << ")"; + << " (current is " << metadata->getCollVersion() << ")"; return boost::none; } @@ -210,7 +220,7 @@ boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange( try { wrote = self->_doDeletion( - opCtx, collection, scopedCollectionMetadata->getKeyPattern(), *range, maxToDelete); + opCtx, collection, metadata->getKeyPattern(), *range, maxToDelete); } catch (const DBException& e) { wrote = e.toStatus(); warning() << e.what(); diff --git a/src/mongo/db/s/collection_range_deleter_test.cpp b/src/mongo/db/s/collection_range_deleter_test.cpp index 61768efce87..4ce2ee1a6cc 100644 --- a/src/mongo/db/s/collection_range_deleter_test.cpp +++ b/src/mongo/db/s/collection_range_deleter_test.cpp @@ -85,17 +85,14 @@ protected: AutoGetCollection autoColl(operationContext(), kNss, MODE_IX); auto* const css = CollectionShardingRuntime::get(operationContext(), kNss); - - css->refreshMetadata(operationContext(), - stdx::make_unique<CollectionMetadata>(cm, ShardId("thisShard"))); + css->setFilteringMetadata(operationContext(), CollectionMetadata(cm, ShardId("thisShard"))); } void tearDown() override { { AutoGetCollection autoColl(operationContext(), kNss, MODE_IX); auto* const css = CollectionShardingRuntime::get(operationContext(), kNss); - - css->refreshMetadata(operationContext(), nullptr); + css->clearFilteringMetadata(); } ShardServerTestFixture::tearDown(); diff --git a/src/mongo/db/s/collection_sharding_runtime.cpp b/src/mongo/db/s/collection_sharding_runtime.cpp index 770d4a2af4b..38df95d32d6 100644 --- a/src/mongo/db/s/collection_sharding_runtime.cpp +++ b/src/mongo/db/s/collection_sharding_runtime.cpp @@ -58,15 +58,15 @@ CollectionShardingRuntime* CollectionShardingRuntime::get(OperationContext* opCt return checked_cast<CollectionShardingRuntime*>(css); } -void CollectionShardingRuntime::refreshMetadata(OperationContext* opCtx, - std::unique_ptr<CollectionMetadata> newMetadata) { +void CollectionShardingRuntime::setFilteringMetadata(OperationContext* opCtx, + CollectionMetadata newMetadata) { invariant(opCtx->lockState()->isCollectionLockedForMode(_nss.ns(), MODE_X)); - _metadataManager->refreshActiveMetadata(std::move(newMetadata)); + _metadataManager->setFilteringMetadata(std::move(newMetadata)); } -void CollectionShardingRuntime::markNotShardedAtStepdown() { - _metadataManager->refreshActiveMetadata(nullptr); +void CollectionShardingRuntime::clearFilteringMetadata() { + _metadataManager->clearFilteringMetadata(); } auto CollectionShardingRuntime::beginReceive(ChunkRange const& range) -> CleanupNotification { @@ -98,9 +98,13 @@ Status CollectionShardingRuntime::waitForClean(OperationContext* opCtx, { // First, see if collection was dropped, but do it in a separate scope in order to // not hold reference on it, which would make it appear in use - auto metadata = + const auto optMetadata = self->_metadataManager->getActiveMetadata(self->_metadataManager, boost::none); + if (!optMetadata) + return {ErrorCodes::ConflictingOperationInProgress, + "Collection being migrated had its metadata reset"}; + const auto& metadata = *optMetadata; if (!metadata->isSharded() || metadata->getCollVersion().epoch() != epoch) { return {ErrorCodes::ConflictingOperationInProgress, "Collection being migrated was dropped"}; @@ -137,8 +141,8 @@ boost::optional<ChunkRange> CollectionShardingRuntime::getNextOrphanRange(BSONOb return _metadataManager->getNextOrphanRange(from); } -ScopedCollectionMetadata CollectionShardingRuntime::_getMetadata(OperationContext* opCtx) { - auto atClusterTime = repl::ReadConcernArgs::get(opCtx).getArgsAtClusterTime(); +boost::optional<ScopedCollectionMetadata> CollectionShardingRuntime::_getMetadata( + const boost::optional<mongo::LogicalTime>& atClusterTime) { return _metadataManager->getActiveMetadata(_metadataManager, atClusterTime); } diff --git a/src/mongo/db/s/collection_sharding_runtime.h b/src/mongo/db/s/collection_sharding_runtime.h index 0515fde5ab0..28526a79e3e 100644 --- a/src/mongo/db/s/collection_sharding_runtime.h +++ b/src/mongo/db/s/collection_sharding_runtime.h @@ -59,19 +59,25 @@ public: static CollectionShardingRuntime* get(OperationContext* opCtx, const NamespaceString& nss); /** - * Updates the metadata based on changes received from the config server and also resolves the - * pending receives map in case some of these pending receives have completed or have been - * abandoned. If newMetadata is null, unshard the collection. + * Updates the collection's filtering metadata based on changes received from the config server + * and also resolves the pending receives map in case some of these pending receives have + * committed on the config server or have been abandoned by the donor shard. * - * Must always be called with an exclusive collection lock. + * This method must be called with an exclusive collection lock and it does not acquire any + * locks itself. */ - void refreshMetadata(OperationContext* opCtx, std::unique_ptr<CollectionMetadata> newMetadata); + void setFilteringMetadata(OperationContext* opCtx, CollectionMetadata newMetadata); /** - * Marks the collection as not sharded at stepdown time so that no filtering will occur for - * slaveOk queries. + * Marks the collection's filtering metadata as UNKNOWN, meaning that all attempts to check for + * shard version match will fail with StaleConfig errors in order to trigger an update. + * + * It is safe to call this method with only an intent lock on the collection (as opposed to + * setFilteringMetadata which requires exclusive), however note that clearing a collection's + * filtering metadata will interrupt all in-progress orphan cleanups in which case orphaned data + * will remain behind on disk. */ - void markNotShardedAtStepdown(); + void clearFilteringMetadata(); /** * Schedules any documents in `range` for immediate cleanup iff no running queries can depend @@ -133,7 +139,6 @@ public: _metadataManager->toBSONPending(bb); } - private: friend boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange( OperationContext*, NamespaceString const&, OID const&, int, CollectionRangeDeleter*); @@ -144,7 +149,8 @@ private: // Contains all the metadata associated with this collection. std::shared_ptr<MetadataManager> _metadataManager; - ScopedCollectionMetadata _getMetadata(OperationContext* opCtx) override; + boost::optional<ScopedCollectionMetadata> _getMetadata( + const boost::optional<mongo::LogicalTime>& atClusterTime) override; }; /** diff --git a/src/mongo/db/s/collection_sharding_state.cpp b/src/mongo/db/s/collection_sharding_state.cpp index 2968870b834..5f83a439fa5 100644 --- a/src/mongo/db/s/collection_sharding_state.cpp +++ b/src/mongo/db/s/collection_sharding_state.cpp @@ -93,6 +93,20 @@ const ServiceContext::Decoration<boost::optional<CollectionShardingStateMap>> CollectionShardingStateMap::get = ServiceContext::declareDecoration<boost::optional<CollectionShardingStateMap>>(); +class UnshardedCollection : public ScopedCollectionMetadata::Impl { +public: + UnshardedCollection() = default; + + const CollectionMetadata& get() override { + return _metadata; + } + +private: + CollectionMetadata _metadata; +}; + +const auto kUnshardedCollection = std::make_shared<UnshardedCollection>(); + } // namespace CollectionShardingState::CollectionShardingState(NamespaceString nss) : _nss(std::move(nss)) {} @@ -112,7 +126,12 @@ void CollectionShardingState::report(OperationContext* opCtx, BSONObjBuilder* bu } ScopedCollectionMetadata CollectionShardingState::getMetadata(OperationContext* opCtx) { - return _getMetadata(opCtx); + const auto atClusterTime = repl::ReadConcernArgs::get(opCtx).getArgsAtClusterTime(); + auto optMetadata = _getMetadata(atClusterTime); + if (!optMetadata) + return {kUnshardedCollection}; + + return std::move(*optMetadata); } void CollectionShardingState::checkShardVersionOrThrow(OperationContext* opCtx) { diff --git a/src/mongo/db/s/collection_sharding_state.h b/src/mongo/db/s/collection_sharding_state.h index daa0c3ca5e8..29132a39bcd 100644 --- a/src/mongo/db/s/collection_sharding_state.h +++ b/src/mongo/db/s/collection_sharding_state.h @@ -29,6 +29,7 @@ #pragma once #include "mongo/base/disallow_copying.h" +#include "mongo/db/logical_time.h" #include "mongo/db/namespace_string.h" #include "mongo/db/s/scoped_collection_metadata.h" #include "mongo/db/s/sharding_migration_critical_section.h" @@ -112,8 +113,12 @@ private: // Tracks the migration critical section state for this collection. ShardingMigrationCriticalSection _critSec; - // Obtains the current metadata for the collection - virtual ScopedCollectionMetadata _getMetadata(OperationContext* opCtx) = 0; + /** + * Obtains the current metadata for the collection or boost::none if the metadata is not yet + * known + */ + virtual boost::optional<ScopedCollectionMetadata> _getMetadata( + const boost::optional<mongo::LogicalTime>& atClusterTime) = 0; }; /** diff --git a/src/mongo/db/s/collection_sharding_state_test.cpp b/src/mongo/db/s/collection_sharding_state_test.cpp index 10fb4937c1d..ca60bb77db2 100644 --- a/src/mongo/db/s/collection_sharding_state_test.cpp +++ b/src/mongo/db/s/collection_sharding_state_test.cpp @@ -44,7 +44,7 @@ const NamespaceString kTestNss("TestDB", "TestColl"); * that DeleteState's constructor will extract from its `doc` argument into its member * DeleteState::documentKey. */ -std::unique_ptr<CollectionMetadata> makeAMetadata(BSONObj const& keyPattern) { +CollectionMetadata makeAMetadata(BSONObj const& keyPattern) { const OID epoch = OID::gen(); auto range = ChunkRange(BSON("key" << MINKEY), BSON("key" << MAXKEY)); auto chunk = ChunkType(kTestNss, std::move(range), ChunkVersion(1, 0, epoch), ShardId("other")); @@ -52,7 +52,7 @@ std::unique_ptr<CollectionMetadata> makeAMetadata(BSONObj const& keyPattern) { kTestNss, UUID::gen(), KeyPattern(keyPattern), nullptr, false, epoch, {std::move(chunk)}); std::shared_ptr<ChunkManager> cm = std::make_shared<ChunkManager>(rt, Timestamp(100, 0)); - return stdx::make_unique<CollectionMetadata>(std::move(cm), ShardId("this")); + return CollectionMetadata(std::move(cm), ShardId("this")); } using DeleteStateTest = ShardServerTestFixture; @@ -60,6 +60,7 @@ using DeleteStateTest = ShardServerTestFixture; TEST_F(DeleteStateTest, MakeDeleteStateUnsharded) { AutoGetCollection autoColl(operationContext(), kTestNss, MODE_IX); auto* const css = CollectionShardingRuntime::get(operationContext(), kTestNss); + css->setFilteringMetadata(operationContext(), CollectionMetadata()); auto doc = BSON("key3" << "abc" @@ -70,8 +71,7 @@ TEST_F(DeleteStateTest, MakeDeleteStateUnsharded) { << "key2" << true); - // First, check that an order for deletion from an unsharded collection (where css has not been - // "refreshed" with chunk metadata) extracts just the "_id" field: + // Check that an order for deletion from an unsharded collection extracts just the "_id" field auto deleteState = ShardObserverDeleteState::make(operationContext(), css, doc); ASSERT_BSONOBJ_EQ(deleteState.documentKey, BSON("_id" @@ -84,7 +84,7 @@ TEST_F(DeleteStateTest, MakeDeleteStateShardedWithoutIdInShardKey) { auto* const css = CollectionShardingRuntime::get(operationContext(), kTestNss); // Push a CollectionMetadata with a shard key not including "_id"... - css->refreshMetadata(operationContext(), makeAMetadata(BSON("key" << 1 << "key3" << 1))); + css->setFilteringMetadata(operationContext(), makeAMetadata(BSON("key" << 1 << "key3" << 1))); // The order of fields in `doc` deliberately does not match the shard key auto doc = BSON("key3" @@ -111,8 +111,8 @@ TEST_F(DeleteStateTest, MakeDeleteStateShardedWithIdInShardKey) { auto* const css = CollectionShardingRuntime::get(operationContext(), kTestNss); // Push a CollectionMetadata with a shard key that does have "_id" in the middle... - css->refreshMetadata(operationContext(), - makeAMetadata(BSON("key" << 1 << "_id" << 1 << "key2" << 1))); + css->setFilteringMetadata(operationContext(), + makeAMetadata(BSON("key" << 1 << "_id" << 1 << "key2" << 1))); // The order of fields in `doc` deliberately does not match the shard key auto doc = BSON("key2" << true << "key3" @@ -139,7 +139,7 @@ TEST_F(DeleteStateTest, MakeDeleteStateShardedWithIdHashInShardKey) { // Push a CollectionMetadata with a shard key "_id", hashed. auto aMetadata = makeAMetadata(BSON("_id" << "hashed")); - css->refreshMetadata(operationContext(), std::move(aMetadata)); + css->setFilteringMetadata(operationContext(), std::move(aMetadata)); auto doc = BSON("key2" << true << "_id" << "hello" diff --git a/src/mongo/db/s/metadata_manager.cpp b/src/mongo/db/s/metadata_manager.cpp index b48d3721b35..1b9fba1282d 100644 --- a/src/mongo/db/s/metadata_manager.cpp +++ b/src/mongo/db/s/metadata_manager.cpp @@ -197,7 +197,7 @@ public: } private: - friend ScopedCollectionMetadata MetadataManager::getActiveMetadata( + friend boost::optional<ScopedCollectionMetadata> MetadataManager::getActiveMetadata( std::shared_ptr<MetadataManager>, const boost::optional<LogicalTime>&); std::shared_ptr<MetadataManager> _metadataManager; @@ -213,9 +213,7 @@ MetadataManager::MetadataManager(ServiceContext* serviceContext, _receivingChunks(SimpleBSONObjComparator::kInstance.makeBSONObjIndexedMap<BSONObj>()) {} MetadataManager::~MetadataManager() { - stdx::lock_guard<stdx::mutex> lg(_managerLock); - _clearAllCleanups(lg); - _metadata.clear(); + clearFilteringMetadata(); } void MetadataManager::_clearAllCleanups(WithLock lock) { @@ -233,17 +231,18 @@ void MetadataManager::_clearAllCleanups(WithLock, Status status) { _rangesToClean.clear(status); } -ScopedCollectionMetadata MetadataManager::getActiveMetadata( +boost::optional<ScopedCollectionMetadata> MetadataManager::getActiveMetadata( std::shared_ptr<MetadataManager> self, const boost::optional<LogicalTime>& atClusterTime) { stdx::lock_guard<stdx::mutex> lg(_managerLock); if (_metadata.empty()) { - return {kUnshardedCollection}; + return boost::none; } auto metadataTracker = _metadata.back(); if (!atClusterTime) { - return {std::make_shared<RangePreserver>(lg, std::move(self), std::move(metadataTracker))}; + return ScopedCollectionMetadata( + std::make_shared<RangePreserver>(lg, std::move(self), std::move(metadataTracker))); } auto chunkManager = metadataTracker->metadata.getChunkManager(); @@ -262,8 +261,8 @@ ScopedCollectionMetadata MetadataManager::getActiveMetadata( CollectionMetadata _metadata; }; - return {std::make_shared<MetadataAtTimestamp>( - CollectionMetadata(chunkManagerAtClusterTime, metadataTracker->metadata.shardId()))}; + return ScopedCollectionMetadata(std::make_shared<MetadataAtTimestamp>( + CollectionMetadata(chunkManagerAtClusterTime, metadataTracker->metadata.shardId()))); } size_t MetadataManager::numberOfMetadataSnapshots() const { @@ -274,70 +273,53 @@ size_t MetadataManager::numberOfMetadataSnapshots() const { return _metadata.size() - 1; } -void MetadataManager::refreshActiveMetadata(std::unique_ptr<CollectionMetadata> remoteMetadata) { +void MetadataManager::setFilteringMetadata(CollectionMetadata remoteMetadata) { stdx::lock_guard<stdx::mutex> lg(_managerLock); - // Collection was never sharded in the first place. This check is necessary in order to avoid - // extraneous logging in the not-a-shard case, because all call sites always try to get the - // collection sharding information regardless of whether the node is sharded or not. - if (!remoteMetadata && _metadata.empty()) { - invariant(_receivingChunks.empty()); - invariant(_rangesToClean.isEmpty()); - return; - } - - // Collection is becoming unsharded - if (!remoteMetadata) { - log() << "Marking collection " << _nss.ns() << " with " - << redact(_metadata.back()->metadata.toStringBasic()) << " as unsharded"; - - _receivingChunks.clear(); - _clearAllCleanups(lg); - _metadata.clear(); - return; - } - // Collection is becoming sharded if (_metadata.empty()) { - log() << "Marking collection " << _nss.ns() << " as sharded with " - << remoteMetadata->toStringBasic(); + LOG(0) << "Marking collection " << _nss.ns() << " as " << remoteMetadata.toStringBasic(); invariant(_receivingChunks.empty()); - _setActiveMetadata(lg, std::move(*remoteMetadata)); invariant(_rangesToClean.isEmpty()); + + _setActiveMetadata(lg, std::move(remoteMetadata)); return; } - auto* const activeMetadata = &_metadata.back()->metadata; + const auto& activeMetadata = _metadata.back()->metadata; // If the metadata being installed has a different epoch from ours, this means the collection // was dropped and recreated, so we must entirely reset the metadata state - if (activeMetadata->getCollVersion().epoch() != remoteMetadata->getCollVersion().epoch()) { - log() << "Overwriting metadata for collection " << _nss.ns() << " from " - << activeMetadata->toStringBasic() << " to " << remoteMetadata->toStringBasic() - << " due to epoch change"; + if (activeMetadata.getCollVersion().epoch() != remoteMetadata.getCollVersion().epoch()) { + LOG(0) << "Updating metadata for collection " << _nss.ns() << " from " + << activeMetadata.toStringBasic() << " to " << remoteMetadata.toStringBasic() + << " due to epoch change"; _receivingChunks.clear(); - _setActiveMetadata(lg, std::move(*remoteMetadata)); _clearAllCleanups(lg); + _metadata.clear(); + + _setActiveMetadata(lg, std::move(remoteMetadata)); return; } - // We already have newer version - if (activeMetadata->getCollVersion() >= remoteMetadata->getCollVersion()) { - LOG(1) << "Ignoring update of active metadata " << activeMetadata->toStringBasic() - << " with an older " << remoteMetadata->toStringBasic(); + // We already have the same or newer version + if (activeMetadata.getCollVersion() >= remoteMetadata.getCollVersion()) { + LOG(1) << "Ignoring update of active metadata " << activeMetadata.toStringBasic() + << " with an older " << remoteMetadata.toStringBasic(); return; } - log() << "Updating collection metadata for " << _nss.ns() << " from " - << activeMetadata->toStringBasic() << " to " << remoteMetadata->toStringBasic(); + LOG(0) << "Updating metadata for collection " << _nss.ns() << " from " + << activeMetadata.toStringBasic() << " to " << remoteMetadata.toStringBasic() + << " due to version change"; // Resolve any receiving chunks, which might have completed by now for (auto it = _receivingChunks.begin(); it != _receivingChunks.end();) { const ChunkRange receivingRange(it->first, it->second); - if (!remoteMetadata->rangeOverlapsChunk(receivingRange)) { + if (!remoteMetadata.rangeOverlapsChunk(receivingRange)) { ++it; continue; } @@ -351,7 +333,14 @@ void MetadataManager::refreshActiveMetadata(std::unique_ptr<CollectionMetadata> it = _receivingChunks.begin(); } - _setActiveMetadata(lg, std::move(*remoteMetadata)); + _setActiveMetadata(lg, std::move(remoteMetadata)); +} + +void MetadataManager::clearFilteringMetadata() { + stdx::lock_guard<stdx::mutex> lg(_managerLock); + _receivingChunks.clear(); + _clearAllCleanups(lg); + _metadata.clear(); } void MetadataManager::_setActiveMetadata(WithLock wl, CollectionMetadata newMetadata) { @@ -362,8 +351,9 @@ void MetadataManager::_setActiveMetadata(WithLock wl, CollectionMetadata newMeta void MetadataManager::_retireExpiredMetadata(WithLock lock) { while (_metadata.size() > 1 && !_metadata.front()->usageCounter) { if (!_metadata.front()->orphans.empty()) { - log() << "Queries possibly dependent on " << _nss.ns() - << " range(s) finished; scheduling ranges for deletion"; + LOG(0) << "Queries possibly dependent on " << _nss.ns() + << " range(s) finished; scheduling ranges for deletion"; + // It is safe to push orphan ranges from _metadata.back(), even though new queries might // start any time, because any request to delete a range it maps is rejected. _pushListToClean(lock, std::move(_metadata.front()->orphans)); @@ -427,7 +417,6 @@ void MetadataManager::_pushListToClean(WithLock, std::list<Deletion> ranges) { scheduleCleanup( _executor, _nss, _metadata.back()->metadata.getCollVersion().epoch(), *when); } - invariant(ranges.empty()); } auto MetadataManager::beginReceive(ChunkRange const& range) -> CleanupNotification { @@ -499,6 +488,7 @@ auto MetadataManager::cleanUpRange(ChunkRange const& range, Date_t whenToDelete) auto& orphans = overlapMetadata->orphans; orphans.emplace_back(ChunkRange(range.getMin().getOwned(), range.getMax().getOwned()), whenToDelete); + return orphans.back().notification; } diff --git a/src/mongo/db/s/metadata_manager.h b/src/mongo/db/s/metadata_manager.h index 19ba890e48c..f63948c8d7c 100644 --- a/src/mongo/db/s/metadata_manager.h +++ b/src/mongo/db/s/metadata_manager.h @@ -32,6 +32,7 @@ #include "mongo/base/disallow_copying.h" #include "mongo/bson/simple_bsonobj_comparator.h" +#include "mongo/db/logical_time.h" #include "mongo/db/namespace_string.h" #include "mongo/db/range_arithmetic.h" #include "mongo/db/s/collection_range_deleter.h" @@ -60,14 +61,17 @@ public: ~MetadataManager(); /** - * An ActiveMetadata must be set before this function can be called. + * If there is no filtering metadata set yet (setFilteringMetadata has not been called) returns + * boost::none. Otherwise increments the usage counter of the active metadata and returns an + * RAII object, which corresponds to it. * - * Increments the usage counter of the active metadata and returns an RAII object, which - * contains the currently active metadata. When the usageCounter goes to zero, the RAII - * object going out of scope will call _removeMetadata. + * Holding a reference on a particular instance of the metadata means that orphan cleanup is not + * allowed to run and delete chunks which are covered by that metadata. When the returned + * ScopedCollectionMetadata goes out of scope, the reference counter on the metadata will be + * decremented and if it reaches to zero, orphan cleanup may proceed. */ - ScopedCollectionMetadata getActiveMetadata(std::shared_ptr<MetadataManager> self, - const boost::optional<LogicalTime>& atClusterTime); + boost::optional<ScopedCollectionMetadata> getActiveMetadata( + std::shared_ptr<MetadataManager> self, const boost::optional<LogicalTime>& atClusterTime); /** * Returns the number of CollectionMetadata objects being maintained on behalf of running @@ -76,10 +80,9 @@ public: */ size_t numberOfMetadataSnapshots() const; - /** - * Uses the contents of the specified metadata as a way to purge any pending chunks. - */ - void refreshActiveMetadata(std::unique_ptr<CollectionMetadata> newMetadata); + void setFilteringMetadata(CollectionMetadata newMetadata); + + void clearFilteringMetadata(); void toBSONPending(BSONArrayBuilder& bb) const; @@ -230,10 +233,10 @@ private: // Mutex to protect the state below mutable stdx::mutex _managerLock; - // Contains a list of collection metadata ordered in chronological order based on the refreshes - // that occurred. The entry at _metadata.back() is the most recent metadata and is what is - // returned to new queries. The rest are previously active collection metadata instances still - // in use by active server operations or cursors. + // Contains a list of collection metadata for the same collection epoch, ordered in + // chronological order based on the refreshes that occurred. The entry at _metadata.back() is + // the most recent metadata and is what is returned to new queries. The rest are previously + // active collection metadata instances still in use by active server operations or cursors. std::list<std::shared_ptr<CollectionMetadataTracker>> _metadata; // Chunk ranges being migrated into to the shard. Indexed by the min key of the range. diff --git a/src/mongo/db/s/metadata_manager_test.cpp b/src/mongo/db/s/metadata_manager_test.cpp index 5eb5991aca2..f089acfb493 100644 --- a/src/mongo/db/s/metadata_manager_test.cpp +++ b/src/mongo/db/s/metadata_manager_test.cpp @@ -69,7 +69,7 @@ protected: /** * Returns an instance of CollectionMetadata which has no chunks owned by 'thisShard'. */ - static std::unique_ptr<CollectionMetadata> makeEmptyMetadata() { + static CollectionMetadata makeEmptyMetadata() { const OID epoch = OID::gen(); auto rt = RoutingTableHistory::makeNew( @@ -86,7 +86,7 @@ protected: std::shared_ptr<ChunkManager> cm = std::make_shared<ChunkManager>(rt, boost::none); - return stdx::make_unique<CollectionMetadata>(cm, kThisShard); + return CollectionMetadata(cm, kThisShard); } /** @@ -97,46 +97,93 @@ protected: * It will fassert if the chunk bounds are incorrect or overlap an existing chunk or if the * chunk version is lower than the maximum one. */ - static std::unique_ptr<CollectionMetadata> cloneMetadataPlusChunk( - const ScopedCollectionMetadata& metadata, const ChunkRange range) { + static CollectionMetadata cloneMetadataPlusChunk(const ScopedCollectionMetadata& metadata, + const ChunkRange& range) { const BSONObj& minKey = range.getMin(); const BSONObj& maxKey = range.getMax(); - - ASSERT(SimpleBSONObjComparator::kInstance.evaluate(minKey < maxKey)) - << "minKey == " << minKey << "; maxKey == " << maxKey; ASSERT(!rangeMapOverlaps(metadata->getChunks(), minKey, maxKey)); auto cm = metadata->getChunkManager(); const auto chunkToSplit = cm->findIntersectingChunkWithSimpleCollation(minKey); - ASSERT(SimpleBSONObjComparator::kInstance.evaluate(maxKey <= chunkToSplit.getMax())) - << "maxKey == " << maxKey << " and chunkToSplit.getMax() == " << chunkToSplit.getMax(); + ASSERT_BSONOBJ_GTE(minKey, chunkToSplit.getMin()); + ASSERT_BSONOBJ_LT(maxKey, chunkToSplit.getMax()); + + std::vector<ChunkType> splitChunks; + + auto chunkVersion = cm->getVersion(); + + if (SimpleBSONObjComparator::kInstance.evaluate(chunkToSplit.getMin() < minKey)) { + chunkVersion.incMajor(); + splitChunks.emplace_back( + kNss, ChunkRange(chunkToSplit.getMin(), minKey), chunkVersion, kOtherShard); + } + + chunkVersion.incMajor(); + splitChunks.emplace_back(kNss, ChunkRange(minKey, maxKey), chunkVersion, kThisShard); + + chunkVersion.incMajor(); + splitChunks.emplace_back( + kNss, ChunkRange(maxKey, chunkToSplit.getMax()), chunkVersion, kOtherShard); + + auto rt = cm->getRoutingHistory()->makeUpdated(splitChunks); + + return CollectionMetadata(std::make_shared<ChunkManager>(rt, boost::none), kThisShard); + } + + static CollectionMetadata cloneMetadataMinusChunk(const ScopedCollectionMetadata& metadata, + const ChunkRange& range) { + const BSONObj& minKey = range.getMin(); + const BSONObj& maxKey = range.getMax(); + ASSERT(rangeMapOverlaps(metadata->getChunks(), minKey, maxKey)); + + auto cm = metadata->getChunkManager(); + + const auto chunkToMoveOut = cm->findIntersectingChunkWithSimpleCollation(minKey); + ASSERT_BSONOBJ_EQ(minKey, chunkToMoveOut.getMin()); + ASSERT_BSONOBJ_EQ(maxKey, chunkToMoveOut.getMax()); - auto v1 = cm->getVersion(); - v1.incMajor(); - auto v2 = v1; - v2.incMajor(); - auto v3 = v2; - v3.incMajor(); + auto chunkVersion = cm->getVersion(); + chunkVersion.incMajor(); auto rt = cm->getRoutingHistory()->makeUpdated( - {ChunkType{kNss, ChunkRange{chunkToSplit.getMin(), minKey}, v1, kOtherShard}, - ChunkType{kNss, ChunkRange{minKey, maxKey}, v2, kThisShard}, - ChunkType{kNss, ChunkRange{maxKey, chunkToSplit.getMax()}, v3, kOtherShard}}); + {ChunkType(kNss, ChunkRange(minKey, maxKey), chunkVersion, kOtherShard)}); - return stdx::make_unique<CollectionMetadata>( - std::make_shared<ChunkManager>(rt, boost::none), kThisShard); + return CollectionMetadata(std::make_shared<ChunkManager>(rt, boost::none), kThisShard); } std::shared_ptr<MetadataManager> _manager; }; +TEST_F(MetadataManagerTest, InitialMetadataIsUnknown) { + ASSERT(!_manager->getActiveMetadata(_manager, boost::none)); + ASSERT(!_manager->getActiveMetadata(_manager, LogicalTime(Timestamp(10)))); + + ASSERT_EQ(0UL, _manager->numberOfMetadataSnapshots()); + ASSERT_EQ(0UL, _manager->numberOfRangesToClean()); + ASSERT_EQ(0UL, _manager->numberOfRangesToCleanStillInUse()); +} + +TEST_F(MetadataManagerTest, MetadataAfterClearIsUnknown) { + _manager->setFilteringMetadata(makeEmptyMetadata()); + ASSERT(_manager->getActiveMetadata(_manager, boost::none)); + ASSERT(_manager->getActiveMetadata(_manager, LogicalTime(Timestamp(10)))); + + _manager->clearFilteringMetadata(); + ASSERT(!_manager->getActiveMetadata(_manager, boost::none)); + ASSERT(!_manager->getActiveMetadata(_manager, LogicalTime(Timestamp(10)))); + + ASSERT_EQ(0UL, _manager->numberOfMetadataSnapshots()); + ASSERT_EQ(0UL, _manager->numberOfRangesToClean()); + ASSERT_EQ(0UL, _manager->numberOfRangesToCleanStillInUse()); +} + TEST_F(MetadataManagerTest, CleanUpForMigrateIn) { - _manager->refreshActiveMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata(makeEmptyMetadata()); // Sanity checks - ASSERT(_manager->getActiveMetadata(_manager, boost::none)->isSharded()); - ASSERT_EQ(0UL, _manager->getActiveMetadata(_manager, boost::none)->getChunks().size()); + ASSERT((*_manager->getActiveMetadata(_manager, boost::none))->isSharded()); + ASSERT_EQ(0UL, (*_manager->getActiveMetadata(_manager, boost::none))->getChunks().size()); ChunkRange range1(BSON("key" << 0), BSON("key" << 10)); ChunkRange range2(BSON("key" << 10), BSON("key" << 20)); @@ -155,12 +202,14 @@ TEST_F(MetadataManagerTest, CleanUpForMigrateIn) { } TEST_F(MetadataManagerTest, AddRangeNotificationsBlockAndYield) { - _manager->refreshActiveMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata(makeEmptyMetadata()); ChunkRange cr1(BSON("key" << 0), BSON("key" << 10)); + auto notifn1 = _manager->cleanUpRange(cr1, Date_t{}); ASSERT_FALSE(notifn1.ready()); ASSERT_EQ(_manager->numberOfRangesToClean(), 1UL); + auto optNotifn = _manager->trackOrphanedDataCleanup(cr1); ASSERT_FALSE(notifn1.ready()); ASSERT_FALSE(optNotifn->ready()); @@ -170,7 +219,7 @@ TEST_F(MetadataManagerTest, AddRangeNotificationsBlockAndYield) { } TEST_F(MetadataManagerTest, NotificationBlocksUntilDeletion) { - _manager->refreshActiveMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata(makeEmptyMetadata()); ChunkRange cr1(BSON("key" << 20), BSON("key" << 30)); auto optNotif = _manager->trackOrphanedDataCleanup(cr1); @@ -183,28 +232,28 @@ TEST_F(MetadataManagerTest, NotificationBlocksUntilDeletion) { auto scm1 = _manager->getActiveMetadata(_manager, boost::none); // and increment refcount const auto addChunk = [this] { - _manager->refreshActiveMetadata( - cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none), + _manager->setFilteringMetadata( + cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), {BSON("key" << 0), BSON("key" << 20)})); }; addChunk(); // push new metadata auto scm2 = _manager->getActiveMetadata(_manager, boost::none); // and increment refcount - ASSERT_EQ(1ULL, scm2->getChunks().size()); + ASSERT_EQ(1ULL, (*scm2)->getChunks().size()); // Simulate drop and recreate - _manager->refreshActiveMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata(makeEmptyMetadata()); addChunk(); // push new metadata auto scm3 = _manager->getActiveMetadata(_manager, boost::none); // and increment refcount - ASSERT_EQ(1ULL, scm3->getChunks().size()); + ASSERT_EQ(1ULL, (*scm3)->getChunks().size()); - ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 3UL); - ASSERT_EQ(_manager->numberOfRangesToClean(), 0UL); // not yet... + ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 0UL); + ASSERT_EQ(_manager->numberOfRangesToClean(), 0UL); optNotif = _manager->cleanUpRange(cr1, Date_t{}); ASSERT(optNotif); - ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 3UL); + ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 0UL); ASSERT_EQ(_manager->numberOfRangesToClean(), 1UL); } @@ -225,74 +274,102 @@ TEST_F(MetadataManagerTest, NotificationBlocksUntilDeletion) { optNotif2->abandon(); } +TEST_F(MetadataManagerTest, CleanupNotificationsAreSignaledOnDropAndRecreate) { + const ChunkRange rangeToClean(BSON("key" << 20), BSON("key" << 30)); + + _manager->setFilteringMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata( + cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), + {BSON("key" << 0), BSON("key" << 20)})); + + _manager->setFilteringMetadata( + cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), rangeToClean)); + auto cursorOnMovedMetadata = _manager->getActiveMetadata(_manager, boost::none); + + _manager->setFilteringMetadata( + cloneMetadataMinusChunk(*_manager->getActiveMetadata(_manager, boost::none), rangeToClean)); + + auto notif = _manager->cleanUpRange(rangeToClean, Date_t{}); + ASSERT(!notif.ready()); + + auto optNotif = _manager->trackOrphanedDataCleanup(rangeToClean); + ASSERT(optNotif); + ASSERT(!optNotif->ready()); + + _manager->setFilteringMetadata(makeEmptyMetadata()); + ASSERT(notif.ready()); + ASSERT(optNotif->ready()); +} + TEST_F(MetadataManagerTest, RefreshAfterSuccessfulMigrationSinglePending) { - _manager->refreshActiveMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata(makeEmptyMetadata()); ChunkRange cr1(BSON("key" << 0), BSON("key" << 10)); - _manager->refreshActiveMetadata( - cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none), cr1)); - ASSERT_EQ(_manager->getActiveMetadata(_manager, boost::none)->getChunks().size(), 1UL); + _manager->setFilteringMetadata( + cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), cr1)); + ASSERT_EQ((*_manager->getActiveMetadata(_manager, boost::none))->getChunks().size(), 1UL); } TEST_F(MetadataManagerTest, RefreshAfterSuccessfulMigrationMultiplePending) { - _manager->refreshActiveMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata(makeEmptyMetadata()); ChunkRange cr1(BSON("key" << 0), BSON("key" << 10)); ChunkRange cr2(BSON("key" << 30), BSON("key" << 40)); { - _manager->refreshActiveMetadata( - cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none), cr1)); + _manager->setFilteringMetadata( + cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), cr1)); ASSERT_EQ(_manager->numberOfRangesToClean(), 0UL); - ASSERT_EQ(_manager->getActiveMetadata(_manager, boost::none)->getChunks().size(), 1UL); + ASSERT_EQ((*_manager->getActiveMetadata(_manager, boost::none))->getChunks().size(), 1UL); } { - _manager->refreshActiveMetadata( - cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none), cr2)); - ASSERT_EQ(_manager->getActiveMetadata(_manager, boost::none)->getChunks().size(), 2UL); + _manager->setFilteringMetadata( + cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), cr2)); + ASSERT_EQ((*_manager->getActiveMetadata(_manager, boost::none))->getChunks().size(), 2UL); } } TEST_F(MetadataManagerTest, RefreshAfterNotYetCompletedMigrationMultiplePending) { - _manager->refreshActiveMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata(makeEmptyMetadata()); ChunkRange cr1(BSON("key" << 0), BSON("key" << 10)); ChunkRange cr2(BSON("key" << 30), BSON("key" << 40)); - _manager->refreshActiveMetadata( - cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none), + _manager->setFilteringMetadata( + cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), {BSON("key" << 50), BSON("key" << 60)})); - ASSERT_EQ(_manager->getActiveMetadata(_manager, boost::none)->getChunks().size(), 1UL); + ASSERT_EQ((*_manager->getActiveMetadata(_manager, boost::none))->getChunks().size(), 1UL); } TEST_F(MetadataManagerTest, BeginReceiveWithOverlappingRange) { - _manager->refreshActiveMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata(makeEmptyMetadata()); ChunkRange cr1(BSON("key" << 0), BSON("key" << 10)); ChunkRange cr2(BSON("key" << 30), BSON("key" << 40)); - _manager->refreshActiveMetadata( - cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none), cr1)); - _manager->refreshActiveMetadata( - cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none), cr2)); + _manager->setFilteringMetadata( + cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), cr1)); + _manager->setFilteringMetadata( + cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), cr2)); ChunkRange crOverlap(BSON("key" << 5), BSON("key" << 35)); } TEST_F(MetadataManagerTest, RefreshMetadataAfterDropAndRecreate) { - _manager->refreshActiveMetadata(makeEmptyMetadata()); - _manager->refreshActiveMetadata(cloneMetadataPlusChunk( - _manager->getActiveMetadata(_manager, boost::none), {BSON("key" << 0), BSON("key" << 10)})); + _manager->setFilteringMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata( + cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), + {BSON("key" << 0), BSON("key" << 10)})); // Now, pretend that the collection was dropped and recreated - _manager->refreshActiveMetadata(makeEmptyMetadata()); - _manager->refreshActiveMetadata( - cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none), + _manager->setFilteringMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata( + cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), {BSON("key" << 20), BSON("key" << 30)})); - const auto chunks = _manager->getActiveMetadata(_manager, boost::none)->getChunks(); + const auto chunks = (*_manager->getActiveMetadata(_manager, boost::none))->getChunks(); ASSERT_EQ(1UL, chunks.size()); const auto chunkEntry = chunks.begin(); ASSERT_BSONOBJ_EQ(BSON("key" << 20), chunkEntry->first); @@ -301,7 +378,7 @@ TEST_F(MetadataManagerTest, RefreshMetadataAfterDropAndRecreate) { // Tests membership functions for _rangesToClean TEST_F(MetadataManagerTest, RangesToCleanMembership) { - _manager->refreshActiveMetadata(makeEmptyMetadata()); + _manager->setFilteringMetadata(makeEmptyMetadata()); ChunkRange cr(BSON("key" << 0), BSON("key" << 10)); diff --git a/src/mongo/db/s/migration_source_manager.cpp b/src/mongo/db/s/migration_source_manager.cpp index 83848a26cd0..deb72b68e06 100644 --- a/src/mongo/db/s/migration_source_manager.cpp +++ b/src/mongo/db/s/migration_source_manager.cpp @@ -471,7 +471,7 @@ Status MigrationSourceManager::commitChunkMetadataOnConfig(OperationContext* opC UninterruptibleLockGuard noInterrupt(opCtx->lockState()); AutoGetCollection autoColl(opCtx, getNss(), MODE_IX, MODE_X); if (!repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesFor(opCtx, getNss())) { - CollectionShardingRuntime::get(opCtx, getNss())->refreshMetadata(opCtx, nullptr); + CollectionShardingRuntime::get(opCtx, getNss())->clearFilteringMetadata(); uassertStatusOK(status.withContext( str::stream() << "Unable to verify migration commit for chunk: " << redact(_args.toString()) @@ -507,7 +507,7 @@ Status MigrationSourceManager::commitChunkMetadataOnConfig(OperationContext* opC UninterruptibleLockGuard noInterrupt(opCtx->lockState()); AutoGetCollection autoColl(opCtx, getNss(), MODE_IX, MODE_X); - CollectionShardingRuntime::get(opCtx, getNss())->refreshMetadata(opCtx, nullptr); + CollectionShardingRuntime::get(opCtx, getNss())->clearFilteringMetadata(); log() << "Failed to refresh metadata after a " << (migrationCommitStatus.isOK() ? "failed commit attempt" : "successful commit") diff --git a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp index 2eaeb841bdc..3475891e19d 100644 --- a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp +++ b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp @@ -146,7 +146,7 @@ ChunkVersion forceShardFilteringMetadataRefresh(OperationContext* opCtx, AutoGetCollection autoColl(opCtx, nss, MODE_IX, MODE_X); auto* const css = CollectionShardingRuntime::get(opCtx, nss); - css->refreshMetadata(opCtx, nullptr); + css->setFilteringMetadata(opCtx, CollectionMetadata()); return ChunkVersion::UNSHARDED(); } @@ -180,10 +180,7 @@ ChunkVersion forceShardFilteringMetadataRefresh(OperationContext* opCtx, return metadata->getShardVersion(); } - std::unique_ptr<CollectionMetadata> newCollectionMetadata = - stdx::make_unique<CollectionMetadata>(cm, shardingState->shardId()); - - css->refreshMetadata(opCtx, std::move(newCollectionMetadata)); + css->setFilteringMetadata(opCtx, CollectionMetadata(cm, shardingState->shardId())); return css->getMetadata(opCtx)->getShardVersion(); } diff --git a/src/mongo/db/s/shard_server_op_observer.cpp b/src/mongo/db/s/shard_server_op_observer.cpp index 7294b6e2e42..f199098a677 100644 --- a/src/mongo/db/s/shard_server_op_observer.cpp +++ b/src/mongo/db/s/shard_server_op_observer.cpp @@ -75,10 +75,9 @@ public: CatalogCacheLoader::get(_opCtx).notifyOfCollectionVersionUpdate(_nss); - // This is a hack to get around CollectionShardingState::refreshMetadata() requiring the X - // lock: markNotShardedAtStepdown() doesn't have a lock check. Temporary measure until - // SERVER-31595 removes the X lock requirement. - CollectionShardingRuntime::get(_opCtx, _nss)->markNotShardedAtStepdown(); + // Force subsequent uses of the namespace to refresh the filtering metadata so they can + // synchronize with any work happening on the primary (e.g., migration critical section). + CollectionShardingRuntime::get(_opCtx, _nss)->clearFilteringMetadata(); } void rollback() override {} @@ -283,10 +282,10 @@ void ShardServerOpObserver::onUpdate(OperationContext* opCtx, const OplogUpdateE } if (setField.hasField(ShardCollectionType::enterCriticalSectionCounter.name())) { - // This is a hack to get around CollectionShardingState::refreshMetadata() requiring - // the X lock: markNotShardedAtStepdown() doesn't have a lock check. Temporary - // measure until SERVER-31595 removes the X lock requirement. - CollectionShardingRuntime::get(opCtx, updatedNss)->markNotShardedAtStepdown(); + // Force subsequent uses of the namespace to refresh the filtering metadata so they + // can synchronize with any work happening on the primary (e.g., migration critical + // section). + CollectionShardingRuntime::get(opCtx, updatedNss)->clearFilteringMetadata(); } } } diff --git a/src/mongo/s/catalog/type_chunk.cpp b/src/mongo/s/catalog/type_chunk.cpp index c9e6dd9a8f4..e60a916276b 100644 --- a/src/mongo/s/catalog/type_chunk.cpp +++ b/src/mongo/s/catalog/type_chunk.cpp @@ -83,7 +83,9 @@ Status extractObject(const BSONObj& obj, const std::string& fieldName, BSONEleme ChunkRange::ChunkRange(BSONObj minKey, BSONObj maxKey) : _minKey(std::move(minKey)), _maxKey(std::move(maxKey)) { - dassert(SimpleBSONObjComparator::kInstance.evaluate(_minKey < _maxKey)); + dassert(SimpleBSONObjComparator::kInstance.evaluate(_minKey < _maxKey), + str::stream() << "Illegal chunk range: " << _minKey.toString() << ", " + << _maxKey.toString()); } StatusWith<ChunkRange> ChunkRange::fromBSON(const BSONObj& obj) { |