summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorKaloian Manassiev <kaloian.manassiev@mongodb.com>2020-02-25 16:04:41 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-02-26 08:00:01 +0000
commit96ced970aef7c5dbf81b660009bb80f33d8026e8 (patch)
treef598b0172107a76a72a8f36c6a66ac28d1346f39 /src/mongo/db
parent615e3c3946c57956069689eaadf1a77058b881f5 (diff)
downloadmongo-96ced970aef7c5dbf81b660009bb80f33d8026e8.tar.gz
SERVER-45599 Backport of SERVER-32198: Make MetadataManager support an 'UNKNOWN' filtering metadata state
This is partial cherry-pick of the changes done under 20117b8ee9678794be675eb4d728bfcc8f9d75f4 to make the 4.0 logic match that of 4.2.
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/s/collection_metadata_filtering_test.cpp221
-rw-r--r--src/mongo/db/s/collection_range_deleter.cpp24
-rw-r--r--src/mongo/db/s/collection_range_deleter.h1
-rw-r--r--src/mongo/db/s/collection_range_deleter_test.cpp7
-rw-r--r--src/mongo/db/s/collection_sharding_runtime.cpp20
-rw-r--r--src/mongo/db/s/collection_sharding_runtime.h26
-rw-r--r--src/mongo/db/s/collection_sharding_state.cpp21
-rw-r--r--src/mongo/db/s/collection_sharding_state.h9
-rw-r--r--src/mongo/db/s/collection_sharding_state_factory_embedded.cpp19
-rw-r--r--src/mongo/db/s/collection_sharding_state_test.cpp14
-rw-r--r--src/mongo/db/s/metadata_manager.cpp89
-rw-r--r--src/mongo/db/s/metadata_manager.h31
-rw-r--r--src/mongo/db/s/metadata_manager_test.cpp229
-rw-r--r--src/mongo/db/s/migration_source_manager.cpp4
-rw-r--r--src/mongo/db/s/shard_filtering_metadata_refresh.cpp7
-rw-r--r--src/mongo/db/s/shard_server_op_observer.cpp15
16 files changed, 409 insertions, 328 deletions
diff --git a/src/mongo/db/s/collection_metadata_filtering_test.cpp b/src/mongo/db/s/collection_metadata_filtering_test.cpp
index b9344b364f8..7f777dfa179 100644
--- a/src/mongo/db/s/collection_metadata_filtering_test.cpp
+++ b/src/mongo/db/s/collection_metadata_filtering_test.cpp
@@ -47,15 +47,19 @@ protected:
_manager = std::make_shared<MetadataManager>(getServiceContext(), kNss, executor());
}
- // Prepares data with a history array populated:
- // chunk1 - [min, -100)
- // chunk2 - [100, 0)
- // chunk3 - [0, 100)
- // chunk4 - [100, max)
- // and the history:
- // time (now,75) shard0(chunk1, chunk3) shard1(chunk2, chunk4)
- // time (75,25) shard0(chunk2, chunk4) shard1(chunk1, chunk3)
- // time (25,0) - no history
+ /**
+ * Prepares the CSS for 'kNss' and the standalone '_manager' to have their metadata be a history
+ * array populated as follows:
+ * chunk1 - [min, -100)
+ * chunk2 - [100, 0)
+ * chunk3 - [0, 100)
+ * chunk4 - [100, max)
+ *
+ * and the history:
+ * time (now,75) shard0(chunk1, chunk3) shard1(chunk2, chunk4)
+ * time (75,25) shard0(chunk2, chunk4) shard1(chunk1, chunk3)
+ * time (25,0) - no history
+ */
void prepareTestData() {
const OID epoch = OID::gen();
const ShardKeyPattern shardKeyPattern(BSON("_id" << 1));
@@ -93,140 +97,121 @@ protected:
return std::vector<ChunkType>{chunk1, chunk2, chunk3, chunk4};
}());
- auto cm = std::make_shared<ChunkManager>(rt, Timestamp(100, 0));
+ auto cm = std::make_shared<ChunkManager>(rt, boost::none);
ASSERT_EQ(4, cm->numChunks());
+
{
AutoGetCollection autoColl(operationContext(), kNss, MODE_X);
auto* const css = CollectionShardingRuntime::get(operationContext(), kNss);
-
- css->refreshMetadata(operationContext(),
- std::make_unique<CollectionMetadata>(cm, ShardId("0")));
+ css->setFilteringMetadata(operationContext(), CollectionMetadata(cm, ShardId("0")));
}
- _manager->refreshActiveMetadata(std::make_unique<CollectionMetadata>(cm, ShardId("0")));
+ _manager->setFilteringMetadata(CollectionMetadata(cm, ShardId("0")));
}
std::shared_ptr<MetadataManager> _manager;
};
-// Verifies that right set of documents is visible.
-TEST_F(CollectionMetadataFilteringTest, FilterDocumentsPresent) {
- prepareTestData();
-
- auto metadata = _manager->getActiveMetadata(_manager, LogicalTime(Timestamp(100, 0)));
-
- ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << -500)));
- ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << 50)));
- ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << -50)));
- ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << 500)));
-}
-
-// Verifies that a different set of documents is visible for a timestamp in the past.
-TEST_F(CollectionMetadataFilteringTest, FilterDocumentsPast) {
+// Verifies that right set of documents is visible
+TEST_F(CollectionMetadataFilteringTest, FilterDocumentsInTheFuture) {
prepareTestData();
- auto metadata = _manager->getActiveMetadata(_manager, LogicalTime(Timestamp(50, 0)));
-
- ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << -500)));
- ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << 50)));
- ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << -50)));
- ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << 500)));
-}
-
-// Verifies that when accessing too far into the past we get the stale error.
-TEST_F(CollectionMetadataFilteringTest, FilterDocumentsStale) {
- prepareTestData();
+ const auto testFn = [](const ScopedCollectionMetadata& metadata) {
+ ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << -500)));
+ ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << 50)));
+ ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << -50)));
+ ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << 500)));
+ };
+
+ {
+ BSONObj readConcern = BSON("readConcern" << BSON("level"
+ << "snapshot"
+ << "atClusterTime"
+ << Timestamp(100, 0)));
+
+ auto&& readConcernArgs = repl::ReadConcernArgs::get(operationContext());
+ ASSERT_OK(readConcernArgs.initialize(readConcern["readConcern"]));
+
+ AutoGetCollection autoColl(operationContext(), kNss, MODE_IS);
+ auto* const css = CollectionShardingState::get(operationContext(), kNss);
+ testFn(css->getMetadata(operationContext()));
+ }
- auto metadata = _manager->getActiveMetadata(_manager, LogicalTime(Timestamp(10, 0)));
-
- ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << -500)),
- AssertionException,
- ErrorCodes::StaleChunkHistory);
- ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << 50)),
- AssertionException,
- ErrorCodes::StaleChunkHistory);
- ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << -50)),
- AssertionException,
- ErrorCodes::StaleChunkHistory);
- ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << 500)),
- AssertionException,
- ErrorCodes::StaleChunkHistory);
+ {
+ const auto scm = _manager->getActiveMetadata(_manager, LogicalTime(Timestamp(100, 0)));
+ testFn(*scm);
+ }
}
-// The same test as FilterDocumentsPresent but using "readConcern"
-TEST_F(CollectionMetadataFilteringTest, FilterDocumentsPresentShardingState) {
+// Verifies that a different set of documents is visible for a timestamp in the past
+TEST_F(CollectionMetadataFilteringTest, FilterDocumentsInThePast) {
prepareTestData();
- BSONObj readConcern = BSON("readConcern" << BSON("level"
- << "snapshot"
- << "atClusterTime"
- << Timestamp(100, 0)));
-
- auto&& readConcernArgs = repl::ReadConcernArgs::get(operationContext());
- ASSERT_OK(readConcernArgs.initialize(readConcern["readConcern"]));
-
- AutoGetCollection autoColl(operationContext(), kNss, MODE_IS);
- auto const css = CollectionShardingState::get(operationContext(), kNss);
- auto metadata = css->getMetadata(operationContext());
+ const auto testFn = [](const ScopedCollectionMetadata& metadata) {
+ ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << -500)));
+ ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << 50)));
+ ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << -50)));
+ ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << 500)));
+ };
+
+ {
+ BSONObj readConcern = BSON("readConcern" << BSON("level"
+ << "snapshot"
+ << "atClusterTime"
+ << Timestamp(50, 0)));
+
+ auto&& readConcernArgs = repl::ReadConcernArgs::get(operationContext());
+ ASSERT_OK(readConcernArgs.initialize(readConcern["readConcern"]));
+
+ AutoGetCollection autoColl(operationContext(), kNss, MODE_IS);
+ auto* const css = CollectionShardingState::get(operationContext(), kNss);
+ testFn(css->getMetadata(operationContext()));
+ }
- ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << -500)));
- ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << 50)));
- ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << -50)));
- ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << 500)));
+ {
+ const auto scm = _manager->getActiveMetadata(_manager, LogicalTime(Timestamp(50, 0)));
+ testFn(*scm);
+ }
}
-// The same test as FilterDocumentsPast but using "readConcern"
-TEST_F(CollectionMetadataFilteringTest, FilterDocumentsPastShardingState) {
+// Verifies that when accessing too far into the past we get the stale error
+TEST_F(CollectionMetadataFilteringTest, FilterDocumentsTooFarInThePastThrowsStaleChunkHistory) {
prepareTestData();
- BSONObj readConcern = BSON("readConcern" << BSON("level"
- << "snapshot"
- << "atClusterTime"
- << Timestamp(50, 0)));
-
- auto&& readConcernArgs = repl::ReadConcernArgs::get(operationContext());
- ASSERT_OK(readConcernArgs.initialize(readConcern["readConcern"]));
-
- AutoGetCollection autoColl(operationContext(), kNss, MODE_IS);
- auto const css = CollectionShardingState::get(operationContext(), kNss);
- auto metadata = css->getMetadata(operationContext());
-
- ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << -500)));
- ASSERT_FALSE(metadata->keyBelongsToMe(BSON("_id" << 50)));
- ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << -50)));
- ASSERT_TRUE(metadata->keyBelongsToMe(BSON("_id" << 500)));
-}
-
-// The same test as FilterDocumentsStale but using "readConcern"
-TEST_F(CollectionMetadataFilteringTest, FilterDocumentsStaleShardingState) {
- prepareTestData();
+ const auto testFn = [](const ScopedCollectionMetadata& metadata) {
+ ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << -500)),
+ AssertionException,
+ ErrorCodes::StaleChunkHistory);
+ ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << 50)),
+ AssertionException,
+ ErrorCodes::StaleChunkHistory);
+ ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << -50)),
+ AssertionException,
+ ErrorCodes::StaleChunkHistory);
+ ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << 500)),
+ AssertionException,
+ ErrorCodes::StaleChunkHistory);
+ };
+
+ {
+ BSONObj readConcern = BSON("readConcern" << BSON("level"
+ << "snapshot"
+ << "atClusterTime"
+ << Timestamp(10, 0)));
+
+ auto&& readConcernArgs = repl::ReadConcernArgs::get(operationContext());
+ ASSERT_OK(readConcernArgs.initialize(readConcern["readConcern"]));
+
+ AutoGetCollection autoColl(operationContext(), kNss, MODE_IS);
+ auto* const css = CollectionShardingState::get(operationContext(), kNss);
+ testFn(css->getMetadata(operationContext()));
+ }
- BSONObj readConcern = BSON("readConcern" << BSON("level"
- << "snapshot"
- << "atClusterTime"
- << Timestamp(10, 0)));
-
- auto&& readConcernArgs = repl::ReadConcernArgs::get(operationContext());
- ASSERT_OK(readConcernArgs.initialize(readConcern["readConcern"]));
-
- AutoGetCollection autoColl(operationContext(), kNss, MODE_IS);
- auto const css = CollectionShardingState::get(operationContext(), kNss);
- auto metadata = css->getMetadata(operationContext());
-
- ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << -500)),
- AssertionException,
- ErrorCodes::StaleChunkHistory);
- ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << 50)),
- AssertionException,
- ErrorCodes::StaleChunkHistory);
- ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << -50)),
- AssertionException,
- ErrorCodes::StaleChunkHistory);
- ASSERT_THROWS_CODE(metadata->keyBelongsToMe(BSON("_id" << 500)),
- AssertionException,
- ErrorCodes::StaleChunkHistory);
+ {
+ const auto scm = _manager->getActiveMetadata(_manager, LogicalTime(Timestamp(10, 0)));
+ testFn(*scm);
+ }
}
-
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/s/collection_range_deleter.cpp b/src/mongo/db/s/collection_range_deleter.cpp
index 3832d495e88..adfae399073 100644
--- a/src/mongo/db/s/collection_range_deleter.cpp
+++ b/src/mongo/db/s/collection_range_deleter.cpp
@@ -136,7 +136,7 @@ boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange(
auto& metadataManager = csr->_metadataManager;
if (!_checkCollectionMetadataStillValid(
- opCtx, nss, epoch, forTestOnly, collection, metadataManager)) {
+ nss, epoch, forTestOnly, collection, metadataManager)) {
return boost::none;
}
@@ -209,10 +209,11 @@ boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange(
const auto scopedCollectionMetadata =
metadataManager->getActiveMetadata(metadataManager, boost::none);
+ const auto& metadata = *scopedCollectionMetadata;
try {
wrote = self->_doDeletion(
- opCtx, collection, scopedCollectionMetadata->getKeyPattern(), *range, maxToDelete);
+ opCtx, collection, metadata->getKeyPattern(), *range, maxToDelete);
} catch (const DBException& e) {
wrote = e.toStatus();
warning() << e.what();
@@ -255,7 +256,7 @@ boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange(
auto& metadataManager = csr->_metadataManager;
if (!_checkCollectionMetadataStillValid(
- opCtx, nss, epoch, forTestOnly, collection, metadataManager)) {
+ nss, epoch, forTestOnly, collection, metadataManager)) {
return boost::none;
}
@@ -300,7 +301,6 @@ boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange(
}
bool CollectionRangeDeleter::_checkCollectionMetadataStillValid(
- OperationContext* opCtx,
const NamespaceString& nss,
OID const& epoch,
CollectionRangeDeleter* forTestOnly,
@@ -310,7 +310,17 @@ bool CollectionRangeDeleter::_checkCollectionMetadataStillValid(
const auto scopedCollectionMetadata =
metadataManager->getActiveMetadata(metadataManager, boost::none);
- if (!forTestOnly && (!collection || !scopedCollectionMetadata->isSharded())) {
+ if (!scopedCollectionMetadata) {
+ LOG(0) << "Abandoning any range deletions because the metadata for " << nss.ns()
+ << " was reset";
+ stdx::lock_guard<stdx::mutex> lk(metadataManager->_managerLock);
+ metadataManager->_clearAllCleanups(lk);
+ return false;
+ }
+
+ const auto& metadata = *scopedCollectionMetadata;
+
+ if (!forTestOnly && (!collection || !metadata->isSharded())) {
if (!collection) {
LOG(0) << "Abandoning any range deletions left over from dropped " << nss.ns();
} else {
@@ -323,9 +333,9 @@ bool CollectionRangeDeleter::_checkCollectionMetadataStillValid(
return false;
}
- if (!forTestOnly && scopedCollectionMetadata->getCollVersion().epoch() != epoch) {
+ if (!forTestOnly && metadata->getCollVersion().epoch() != epoch) {
LOG(1) << "Range deletion task for " << nss.ns() << " epoch " << epoch << " woke;"
- << " (current is " << scopedCollectionMetadata->getCollVersion() << ")";
+ << " (current is " << metadata->getCollVersion() << ")";
return false;
}
diff --git a/src/mongo/db/s/collection_range_deleter.h b/src/mongo/db/s/collection_range_deleter.h
index f6b9f161a47..7e6cd26f93a 100644
--- a/src/mongo/db/s/collection_range_deleter.h
+++ b/src/mongo/db/s/collection_range_deleter.h
@@ -190,7 +190,6 @@ private:
* the collection has not been dropped (or dropped then recreated).
*/
static bool _checkCollectionMetadataStillValid(
- OperationContext* opCtx,
const NamespaceString& nss,
OID const& epoch,
CollectionRangeDeleter* forTestOnly,
diff --git a/src/mongo/db/s/collection_range_deleter_test.cpp b/src/mongo/db/s/collection_range_deleter_test.cpp
index 3d0d430dd30..a1967d821ec 100644
--- a/src/mongo/db/s/collection_range_deleter_test.cpp
+++ b/src/mongo/db/s/collection_range_deleter_test.cpp
@@ -87,17 +87,14 @@ protected:
AutoGetCollection autoColl(operationContext(), kNss, MODE_IX);
auto* const css = CollectionShardingRuntime::get(operationContext(), kNss);
-
- css->refreshMetadata(operationContext(),
- stdx::make_unique<CollectionMetadata>(cm, ShardId("thisShard")));
+ css->setFilteringMetadata(operationContext(), CollectionMetadata(cm, ShardId("thisShard")));
}
void tearDown() override {
{
AutoGetCollection autoColl(operationContext(), kNss, MODE_IX);
auto* const css = CollectionShardingRuntime::get(operationContext(), kNss);
-
- css->refreshMetadata(operationContext(), nullptr);
+ css->clearFilteringMetadata();
}
ShardServerTestFixture::tearDown();
diff --git a/src/mongo/db/s/collection_sharding_runtime.cpp b/src/mongo/db/s/collection_sharding_runtime.cpp
index 80a006c61bb..f6f92850408 100644
--- a/src/mongo/db/s/collection_sharding_runtime.cpp
+++ b/src/mongo/db/s/collection_sharding_runtime.cpp
@@ -60,15 +60,15 @@ CollectionShardingRuntime* CollectionShardingRuntime::get(OperationContext* opCt
return checked_cast<CollectionShardingRuntime*>(css);
}
-void CollectionShardingRuntime::refreshMetadata(OperationContext* opCtx,
- std::unique_ptr<CollectionMetadata> newMetadata) {
+void CollectionShardingRuntime::setFilteringMetadata(OperationContext* opCtx,
+ CollectionMetadata newMetadata) {
invariant(opCtx->lockState()->isCollectionLockedForMode(_nss.ns(), MODE_X));
- _metadataManager->refreshActiveMetadata(std::move(newMetadata));
+ _metadataManager->setFilteringMetadata(std::move(newMetadata));
}
-void CollectionShardingRuntime::markNotShardedAtStepdown() {
- _metadataManager->refreshActiveMetadata(nullptr);
+void CollectionShardingRuntime::clearFilteringMetadata() {
+ _metadataManager->clearFilteringMetadata();
}
auto CollectionShardingRuntime::beginReceive(ChunkRange const& range) -> CleanupNotification {
@@ -100,9 +100,13 @@ Status CollectionShardingRuntime::waitForClean(OperationContext* opCtx,
{
// First, see if collection was dropped, but do it in a separate scope in order to
// not hold reference on it, which would make it appear in use
- auto metadata =
+ const auto optMetadata =
self->_metadataManager->getActiveMetadata(self->_metadataManager, boost::none);
+ if (!optMetadata)
+ return {ErrorCodes::ConflictingOperationInProgress,
+ "Collection being migrated had its metadata reset"};
+ const auto& metadata = *optMetadata;
if (!metadata->isSharded() || metadata->getCollVersion().epoch() != epoch) {
return {ErrorCodes::ConflictingOperationInProgress,
"Collection being migrated was dropped"};
@@ -139,8 +143,8 @@ boost::optional<ChunkRange> CollectionShardingRuntime::getNextOrphanRange(BSONOb
return _metadataManager->getNextOrphanRange(from);
}
-ScopedCollectionMetadata CollectionShardingRuntime::_getMetadata(OperationContext* opCtx) {
- auto atClusterTime = repl::ReadConcernArgs::get(opCtx).getArgsAtClusterTime();
+boost::optional<ScopedCollectionMetadata> CollectionShardingRuntime::_getMetadata(
+ const boost::optional<mongo::LogicalTime>& atClusterTime) {
return _metadataManager->getActiveMetadata(_metadataManager, atClusterTime);
}
diff --git a/src/mongo/db/s/collection_sharding_runtime.h b/src/mongo/db/s/collection_sharding_runtime.h
index 427f5dbd8ba..fe678268ca6 100644
--- a/src/mongo/db/s/collection_sharding_runtime.h
+++ b/src/mongo/db/s/collection_sharding_runtime.h
@@ -61,19 +61,25 @@ public:
static CollectionShardingRuntime* get(OperationContext* opCtx, const NamespaceString& nss);
/**
- * Updates the metadata based on changes received from the config server and also resolves the
- * pending receives map in case some of these pending receives have completed or have been
- * abandoned. If newMetadata is null, unshard the collection.
+ * Updates the collection's filtering metadata based on changes received from the config server
+ * and also resolves the pending receives map in case some of these pending receives have
+ * committed on the config server or have been abandoned by the donor shard.
*
- * Must always be called with an exclusive collection lock.
+ * This method must be called with an exclusive collection lock and it does not acquire any
+ * locks itself.
*/
- void refreshMetadata(OperationContext* opCtx, std::unique_ptr<CollectionMetadata> newMetadata);
+ void setFilteringMetadata(OperationContext* opCtx, CollectionMetadata newMetadata);
/**
- * Marks the collection as not sharded at stepdown time so that no filtering will occur for
- * slaveOk queries.
+ * Marks the collection's filtering metadata as UNKNOWN, meaning that all attempts to check for
+ * shard version match will fail with StaleConfig errors in order to trigger an update.
+ *
+ * It is safe to call this method with only an intent lock on the collection (as opposed to
+ * setFilteringMetadata which requires exclusive), however note that clearing a collection's
+ * filtering metadata will interrupt all in-progress orphan cleanups in which case orphaned data
+ * will remain behind on disk.
*/
- void markNotShardedAtStepdown();
+ void clearFilteringMetadata();
/**
* Schedules any documents in `range` for immediate cleanup iff no running queries can depend
@@ -135,7 +141,6 @@ public:
_metadataManager->toBSONPending(bb);
}
-
private:
friend boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange(
OperationContext*, NamespaceString const&, OID const&, int, CollectionRangeDeleter*);
@@ -146,7 +151,8 @@ private:
// Contains all the metadata associated with this collection.
std::shared_ptr<MetadataManager> _metadataManager;
- ScopedCollectionMetadata _getMetadata(OperationContext* opCtx) override;
+ boost::optional<ScopedCollectionMetadata> _getMetadata(
+ const boost::optional<mongo::LogicalTime>& atClusterTime) override;
};
/**
diff --git a/src/mongo/db/s/collection_sharding_state.cpp b/src/mongo/db/s/collection_sharding_state.cpp
index 7ba81f81709..45645383969 100644
--- a/src/mongo/db/s/collection_sharding_state.cpp
+++ b/src/mongo/db/s/collection_sharding_state.cpp
@@ -95,6 +95,20 @@ const ServiceContext::Decoration<boost::optional<CollectionShardingStateMap>>
CollectionShardingStateMap::get =
ServiceContext::declareDecoration<boost::optional<CollectionShardingStateMap>>();
+class UnshardedCollection : public ScopedCollectionMetadata::Impl {
+public:
+ UnshardedCollection() = default;
+
+ const CollectionMetadata& get() override {
+ return _metadata;
+ }
+
+private:
+ CollectionMetadata _metadata;
+};
+
+const auto kUnshardedCollection = std::make_shared<UnshardedCollection>();
+
} // namespace
CollectionShardingState::CollectionShardingState(NamespaceString nss) : _nss(std::move(nss)) {}
@@ -114,7 +128,12 @@ void CollectionShardingState::report(OperationContext* opCtx, BSONObjBuilder* bu
}
ScopedCollectionMetadata CollectionShardingState::getMetadata(OperationContext* opCtx) {
- return _getMetadata(opCtx);
+ const auto atClusterTime = repl::ReadConcernArgs::get(opCtx).getArgsAtClusterTime();
+ auto optMetadata = _getMetadata(atClusterTime);
+ if (!optMetadata)
+ return {kUnshardedCollection};
+
+ return std::move(*optMetadata);
}
void CollectionShardingState::checkShardVersionOrThrow(OperationContext* opCtx) {
diff --git a/src/mongo/db/s/collection_sharding_state.h b/src/mongo/db/s/collection_sharding_state.h
index 74f856924ec..77e11534f73 100644
--- a/src/mongo/db/s/collection_sharding_state.h
+++ b/src/mongo/db/s/collection_sharding_state.h
@@ -31,6 +31,7 @@
#pragma once
#include "mongo/base/disallow_copying.h"
+#include "mongo/db/logical_time.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/s/scoped_collection_metadata.h"
#include "mongo/db/s/sharding_migration_critical_section.h"
@@ -114,8 +115,12 @@ private:
// Tracks the migration critical section state for this collection.
ShardingMigrationCriticalSection _critSec;
- // Obtains the current metadata for the collection
- virtual ScopedCollectionMetadata _getMetadata(OperationContext* opCtx) = 0;
+ /**
+ * Obtains the current metadata for the collection or boost::none if the metadata is not yet
+ * known
+ */
+ virtual boost::optional<ScopedCollectionMetadata> _getMetadata(
+ const boost::optional<mongo::LogicalTime>& atClusterTime) = 0;
};
/**
diff --git a/src/mongo/db/s/collection_sharding_state_factory_embedded.cpp b/src/mongo/db/s/collection_sharding_state_factory_embedded.cpp
index 12b0ad69f5a..4984ef8a9af 100644
--- a/src/mongo/db/s/collection_sharding_state_factory_embedded.cpp
+++ b/src/mongo/db/s/collection_sharding_state_factory_embedded.cpp
@@ -38,20 +38,6 @@
namespace mongo {
namespace {
-class UnshardedCollection : public ScopedCollectionMetadata::Impl {
-public:
- UnshardedCollection() = default;
-
- const CollectionMetadata& get() override {
- return _metadata;
- }
-
-private:
- CollectionMetadata _metadata;
-};
-
-const auto kUnshardedCollection = std::make_shared<UnshardedCollection>();
-
class CollectionShardingStateFactoryEmbedded final : public CollectionShardingStateFactory {
public:
CollectionShardingStateFactoryEmbedded(ServiceContext* serviceContext)
@@ -63,8 +49,9 @@ public:
CollectionShardingStateStandalone(NamespaceString nss) : CollectionShardingState(nss) {}
private:
- ScopedCollectionMetadata _getMetadata(OperationContext* opCtx) override {
- return {kUnshardedCollection};
+ boost::optional<ScopedCollectionMetadata> _getMetadata(
+ const boost::optional<mongo::LogicalTime>& atClusterTime) override {
+ return boost::none;
}
};
diff --git a/src/mongo/db/s/collection_sharding_state_test.cpp b/src/mongo/db/s/collection_sharding_state_test.cpp
index 2c1386eaae0..f5c4a3aad43 100644
--- a/src/mongo/db/s/collection_sharding_state_test.cpp
+++ b/src/mongo/db/s/collection_sharding_state_test.cpp
@@ -47,7 +47,7 @@ const NamespaceString kTestNss("TestDB", "TestColl");
* that DeleteState's constructor will extract from its `doc` argument into its member
* DeleteState::documentKey.
*/
-std::unique_ptr<CollectionMetadata> makeAMetadata(BSONObj const& keyPattern) {
+CollectionMetadata makeAMetadata(BSONObj const& keyPattern) {
const OID epoch = OID::gen();
auto range = ChunkRange(BSON("key" << MINKEY), BSON("key" << MAXKEY));
auto chunk = ChunkType(kTestNss, std::move(range), ChunkVersion(1, 0, epoch), ShardId("other"));
@@ -55,13 +55,15 @@ std::unique_ptr<CollectionMetadata> makeAMetadata(BSONObj const& keyPattern) {
kTestNss, UUID::gen(), KeyPattern(keyPattern), nullptr, false, epoch, {std::move(chunk)});
std::shared_ptr<ChunkManager> cm = std::make_shared<ChunkManager>(rt, Timestamp(100, 0));
- return stdx::make_unique<CollectionMetadata>(std::move(cm), ShardId("this"));
+ return CollectionMetadata(std::move(cm), ShardId("this"));
}
using DeleteStateTest = ShardServerTestFixture;
TEST_F(DeleteStateTest, MakeDeleteStateUnsharded) {
AutoGetCollection autoColl(operationContext(), kTestNss, MODE_IX);
+ auto* const css = CollectionShardingRuntime::get(operationContext(), kTestNss);
+ css->setFilteringMetadata(operationContext(), CollectionMetadata());
auto doc = BSON("key3"
<< "abc"
@@ -85,7 +87,7 @@ TEST_F(DeleteStateTest, MakeDeleteStateShardedWithoutIdInShardKey) {
auto* const css = CollectionShardingRuntime::get(operationContext(), kTestNss);
// Push a CollectionMetadata with a shard key not including "_id"...
- css->refreshMetadata(operationContext(), makeAMetadata(BSON("key" << 1 << "key3" << 1)));
+ css->setFilteringMetadata(operationContext(), makeAMetadata(BSON("key" << 1 << "key3" << 1)));
// The order of fields in `doc` deliberately does not match the shard key
auto doc = BSON("key3"
@@ -111,8 +113,8 @@ TEST_F(DeleteStateTest, MakeDeleteStateShardedWithIdInShardKey) {
auto* const css = CollectionShardingRuntime::get(operationContext(), kTestNss);
// Push a CollectionMetadata with a shard key that does have "_id" in the middle...
- css->refreshMetadata(operationContext(),
- makeAMetadata(BSON("key" << 1 << "_id" << 1 << "key2" << 1)));
+ css->setFilteringMetadata(operationContext(),
+ makeAMetadata(BSON("key" << 1 << "_id" << 1 << "key2" << 1)));
// The order of fields in `doc` deliberately does not match the shard key
auto doc = BSON("key2" << true << "key3"
@@ -138,7 +140,7 @@ TEST_F(DeleteStateTest, MakeDeleteStateShardedWithIdHashInShardKey) {
// Push a CollectionMetadata with a shard key "_id", hashed.
auto aMetadata = makeAMetadata(BSON("_id"
<< "hashed"));
- css->refreshMetadata(operationContext(), std::move(aMetadata));
+ css->setFilteringMetadata(operationContext(), std::move(aMetadata));
auto doc = BSON("key2" << true << "_id"
<< "hello"
diff --git a/src/mongo/db/s/metadata_manager.cpp b/src/mongo/db/s/metadata_manager.cpp
index f4d9bc1f13f..836370d5d0e 100644
--- a/src/mongo/db/s/metadata_manager.cpp
+++ b/src/mongo/db/s/metadata_manager.cpp
@@ -200,7 +200,7 @@ public:
}
private:
- friend ScopedCollectionMetadata MetadataManager::getActiveMetadata(
+ friend boost::optional<ScopedCollectionMetadata> MetadataManager::getActiveMetadata(
std::shared_ptr<MetadataManager>, const boost::optional<LogicalTime>&);
std::shared_ptr<MetadataManager> _metadataManager;
@@ -216,9 +216,7 @@ MetadataManager::MetadataManager(ServiceContext* serviceContext,
_receivingChunks(SimpleBSONObjComparator::kInstance.makeBSONObjIndexedMap<BSONObj>()) {}
MetadataManager::~MetadataManager() {
- stdx::lock_guard<stdx::mutex> lg(_managerLock);
- _clearAllCleanups(lg);
- _metadata.clear();
+ clearFilteringMetadata();
}
void MetadataManager::_clearAllCleanups(WithLock lock) {
@@ -236,17 +234,18 @@ void MetadataManager::_clearAllCleanups(WithLock, Status status) {
_rangesToClean.clear(status);
}
-ScopedCollectionMetadata MetadataManager::getActiveMetadata(
+boost::optional<ScopedCollectionMetadata> MetadataManager::getActiveMetadata(
std::shared_ptr<MetadataManager> self, const boost::optional<LogicalTime>& atClusterTime) {
stdx::lock_guard<stdx::mutex> lg(_managerLock);
if (_metadata.empty()) {
- return {kUnshardedCollection};
+ return boost::none;
}
auto metadataTracker = _metadata.back();
if (!atClusterTime) {
- return {std::make_shared<RangePreserver>(lg, std::move(self), std::move(metadataTracker))};
+ return ScopedCollectionMetadata(
+ std::make_shared<RangePreserver>(lg, std::move(self), std::move(metadataTracker)));
}
auto chunkManager = metadataTracker->metadata->getChunkManager();
@@ -266,8 +265,8 @@ ScopedCollectionMetadata MetadataManager::getActiveMetadata(
CollectionMetadata _metadata;
};
- return {std::make_shared<MetadataAtTimestamp>(
- CollectionMetadata(chunkManagerAtClusterTime, metadataTracker->metadata->shardId()))};
+ return ScopedCollectionMetadata(std::make_shared<MetadataAtTimestamp>(
+ CollectionMetadata(chunkManagerAtClusterTime, metadataTracker->metadata->shardId())));
}
size_t MetadataManager::numberOfMetadataSnapshots() const {
@@ -290,70 +289,53 @@ int MetadataManager::numberOfEmptyMetadataSnapshots() const {
return emptyMetadataSnapshots;
}
-void MetadataManager::refreshActiveMetadata(std::unique_ptr<CollectionMetadata> remoteMetadata) {
+void MetadataManager::setFilteringMetadata(CollectionMetadata remoteMetadata) {
stdx::lock_guard<stdx::mutex> lg(_managerLock);
- // Collection was never sharded in the first place. This check is necessary in order to avoid
- // extraneous logging in the not-a-shard case, because all call sites always try to get the
- // collection sharding information regardless of whether the node is sharded or not.
- if (!remoteMetadata && _metadata.empty()) {
- invariant(_receivingChunks.empty());
- invariant(_rangesToClean.isEmpty());
- return;
- }
-
- // Collection is becoming unsharded
- if (!remoteMetadata) {
- log() << "Marking collection " << _nss.ns() << " with "
- << redact(_metadata.back()->metadata->toStringBasic()) << " as unsharded";
-
- _receivingChunks.clear();
- _clearAllCleanups(lg);
- _metadata.clear();
- return;
- }
-
// Collection is becoming sharded
if (_metadata.empty()) {
- log() << "Marking collection " << _nss.ns() << " as sharded with "
- << remoteMetadata->toStringBasic();
+ LOG(0) << "Marking collection " << _nss.ns() << " as " << remoteMetadata.toStringBasic();
invariant(_receivingChunks.empty());
- _setActiveMetadata(lg, std::move(*remoteMetadata));
invariant(_rangesToClean.isEmpty());
+
+ _setActiveMetadata(lg, std::move(remoteMetadata));
return;
}
- auto* const activeMetadata = &_metadata.back()->metadata.get();
+ const auto& activeMetadata = _metadata.back()->metadata;
// If the metadata being installed has a different epoch from ours, this means the collection
// was dropped and recreated, so we must entirely reset the metadata state
- if (activeMetadata->getCollVersion().epoch() != remoteMetadata->getCollVersion().epoch()) {
- log() << "Overwriting metadata for collection " << _nss.ns() << " from "
- << activeMetadata->toStringBasic() << " to " << remoteMetadata->toStringBasic()
- << " due to epoch change";
+ if (activeMetadata->getCollVersion().epoch() != remoteMetadata.getCollVersion().epoch()) {
+ LOG(0) << "Updating metadata for collection " << _nss.ns() << " from "
+ << activeMetadata->toStringBasic() << " to " << remoteMetadata.toStringBasic()
+ << " due to epoch change";
_receivingChunks.clear();
- _setActiveMetadata(lg, std::move(*remoteMetadata));
_clearAllCleanups(lg);
+ _metadata.clear();
+
+ _setActiveMetadata(lg, std::move(remoteMetadata));
return;
}
- // We already have newer version
- if (activeMetadata->getCollVersion() >= remoteMetadata->getCollVersion()) {
+ // We already have the same or newer version
+ if (activeMetadata->getCollVersion() >= remoteMetadata.getCollVersion()) {
LOG(1) << "Ignoring update of active metadata " << activeMetadata->toStringBasic()
- << " with an older " << remoteMetadata->toStringBasic();
+ << " with an older " << remoteMetadata.toStringBasic();
return;
}
- log() << "Updating collection metadata for " << _nss.ns() << " from "
- << activeMetadata->toStringBasic() << " to " << remoteMetadata->toStringBasic();
+ LOG(0) << "Updating metadata for collection " << _nss.ns() << " from "
+ << activeMetadata->toStringBasic() << " to " << remoteMetadata.toStringBasic()
+ << " due to version change";
// Resolve any receiving chunks, which might have completed by now
for (auto it = _receivingChunks.begin(); it != _receivingChunks.end();) {
const ChunkRange receivingRange(it->first, it->second);
- if (!remoteMetadata->rangeOverlapsChunk(receivingRange)) {
+ if (!remoteMetadata.rangeOverlapsChunk(receivingRange)) {
++it;
continue;
}
@@ -367,7 +349,14 @@ void MetadataManager::refreshActiveMetadata(std::unique_ptr<CollectionMetadata>
it = _receivingChunks.begin();
}
- _setActiveMetadata(lg, std::move(*remoteMetadata));
+ _setActiveMetadata(lg, std::move(remoteMetadata));
+}
+
+void MetadataManager::clearFilteringMetadata() {
+ stdx::lock_guard<stdx::mutex> lg(_managerLock);
+ _receivingChunks.clear();
+ _clearAllCleanups(lg);
+ _metadata.clear();
}
void MetadataManager::_setActiveMetadata(WithLock wl, CollectionMetadata newMetadata) {
@@ -381,9 +370,11 @@ void MetadataManager::_retireExpiredMetadata(WithLock lock) {
// not 0) could have a query that is actually still accessing those documents.
while (_metadata.size() > 1 && !_metadata.front()->usageCounter) {
if (!_metadata.front()->orphans.empty()) {
- log() << "Queries possibly dependent on " << _nss.ns()
- << " range(s) finished; scheduling ranges for deletion";
+ LOG(0) << "Queries possibly dependent on " << _nss.ns()
+ << " range(s) finished; scheduling ranges for deletion";
+ // It is safe to push orphan ranges from _metadata.back(), even though new queries might
+ // start any time, because any request to delete a range it maps is rejected.
_pushListToClean(lock, std::move(_metadata.front()->orphans));
}
@@ -459,7 +450,6 @@ void MetadataManager::_pushListToClean(WithLock, std::list<Deletion> ranges) {
scheduleCleanup(
_executor, _nss, _metadata.back()->metadata->getCollVersion().epoch(), *when);
}
- invariant(ranges.empty());
}
auto MetadataManager::beginReceive(ChunkRange const& range) -> CleanupNotification {
@@ -531,6 +521,7 @@ auto MetadataManager::cleanUpRange(ChunkRange const& range, Date_t whenToDelete)
auto& orphans = overlapMetadata->orphans;
orphans.emplace_back(ChunkRange(range.getMin().getOwned(), range.getMax().getOwned()),
whenToDelete);
+
return orphans.back().notification;
}
diff --git a/src/mongo/db/s/metadata_manager.h b/src/mongo/db/s/metadata_manager.h
index b6cbf10cbc7..ba83f3e6ada 100644
--- a/src/mongo/db/s/metadata_manager.h
+++ b/src/mongo/db/s/metadata_manager.h
@@ -34,6 +34,7 @@
#include "mongo/base/disallow_copying.h"
#include "mongo/bson/simple_bsonobj_comparator.h"
+#include "mongo/db/logical_time.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/range_arithmetic.h"
#include "mongo/db/s/collection_range_deleter.h"
@@ -62,14 +63,17 @@ public:
~MetadataManager();
/**
- * An ActiveMetadata must be set before this function can be called.
+ * If there is no filtering metadata set yet (setFilteringMetadata has not been called) returns
+ * boost::none. Otherwise increments the usage counter of the active metadata and returns an
+ * RAII object, which corresponds to it.
*
- * Increments the usage counter of the active metadata and returns an RAII object, which
- * contains the currently active metadata. When the usageCounter goes to zero, the RAII
- * object going out of scope will call _removeMetadata.
+ * Holding a reference on a particular instance of the metadata means that orphan cleanup is not
+ * allowed to run and delete chunks which are covered by that metadata. When the returned
+ * ScopedCollectionMetadata goes out of scope, the reference counter on the metadata will be
+ * decremented and if it reaches to zero, orphan cleanup may proceed.
*/
- ScopedCollectionMetadata getActiveMetadata(std::shared_ptr<MetadataManager> self,
- const boost::optional<LogicalTime>& atClusterTime);
+ boost::optional<ScopedCollectionMetadata> getActiveMetadata(
+ std::shared_ptr<MetadataManager> self, const boost::optional<LogicalTime>& atClusterTime);
/**
* Returns the number of CollectionMetadata objects being maintained on behalf of running
@@ -85,10 +89,9 @@ public:
*/
int numberOfEmptyMetadataSnapshots() const;
- /**
- * Uses the contents of the specified metadata as a way to purge any pending chunks.
- */
- void refreshActiveMetadata(std::unique_ptr<CollectionMetadata> newMetadata);
+ void setFilteringMetadata(CollectionMetadata newMetadata);
+
+ void clearFilteringMetadata();
void toBSONPending(BSONArrayBuilder& bb) const;
@@ -239,10 +242,10 @@ private:
// Mutex to protect the state below
mutable stdx::mutex _managerLock;
- // Contains a list of collection metadata ordered in chronological order based on the refreshes
- // that occurred. The entry at _metadata.back() is the most recent metadata and is what is
- // returned to new queries. The rest are previously active collection metadata instances still
- // in use by active server operations or cursors.
+ // Contains a list of collection metadata for the same collection epoch, ordered in
+ // chronological order based on the refreshes that occurred. The entry at _metadata.back() is
+ // the most recent metadata and is what is returned to new queries. The rest are previously
+ // active collection metadata instances still in use by active server operations or cursors.
std::list<std::shared_ptr<CollectionMetadataTracker>> _metadata;
// Chunk ranges being migrated into to the shard. Indexed by the min key of the range.
diff --git a/src/mongo/db/s/metadata_manager_test.cpp b/src/mongo/db/s/metadata_manager_test.cpp
index 47241fdee53..29810e0d2da 100644
--- a/src/mongo/db/s/metadata_manager_test.cpp
+++ b/src/mongo/db/s/metadata_manager_test.cpp
@@ -71,7 +71,7 @@ protected:
/**
* Returns an instance of CollectionMetadata which has no chunks owned by 'thisShard'.
*/
- static std::unique_ptr<CollectionMetadata> makeEmptyMetadata() {
+ static CollectionMetadata makeEmptyMetadata() {
const OID epoch = OID::gen();
auto rt = RoutingTableHistory::makeNew(
@@ -88,7 +88,7 @@ protected:
std::shared_ptr<ChunkManager> cm = std::make_shared<ChunkManager>(rt, boost::none);
- return stdx::make_unique<CollectionMetadata>(cm, kThisShard);
+ return CollectionMetadata(cm, kThisShard);
}
/**
@@ -99,46 +99,93 @@ protected:
* It will fassert if the chunk bounds are incorrect or overlap an existing chunk or if the
* chunk version is lower than the maximum one.
*/
- static std::unique_ptr<CollectionMetadata> cloneMetadataPlusChunk(
- const ScopedCollectionMetadata& metadata, const ChunkRange range) {
+ static CollectionMetadata cloneMetadataPlusChunk(const ScopedCollectionMetadata& metadata,
+ const ChunkRange& range) {
const BSONObj& minKey = range.getMin();
const BSONObj& maxKey = range.getMax();
-
- ASSERT(SimpleBSONObjComparator::kInstance.evaluate(minKey < maxKey))
- << "minKey == " << minKey << "; maxKey == " << maxKey;
ASSERT(!rangeMapOverlaps(metadata->getChunks(), minKey, maxKey));
auto cm = metadata->getChunkManager();
const auto chunkToSplit = cm->findIntersectingChunkWithSimpleCollation(minKey);
- ASSERT(SimpleBSONObjComparator::kInstance.evaluate(maxKey <= chunkToSplit.getMax()))
- << "maxKey == " << maxKey << " and chunkToSplit.getMax() == " << chunkToSplit.getMax();
+ ASSERT_BSONOBJ_GTE(minKey, chunkToSplit.getMin());
+ ASSERT_BSONOBJ_LT(maxKey, chunkToSplit.getMax());
+
+ std::vector<ChunkType> splitChunks;
+
+ auto chunkVersion = cm->getVersion();
+
+ if (SimpleBSONObjComparator::kInstance.evaluate(chunkToSplit.getMin() < minKey)) {
+ chunkVersion.incMajor();
+ splitChunks.emplace_back(
+ kNss, ChunkRange(chunkToSplit.getMin(), minKey), chunkVersion, kOtherShard);
+ }
+
+ chunkVersion.incMajor();
+ splitChunks.emplace_back(kNss, ChunkRange(minKey, maxKey), chunkVersion, kThisShard);
+
+ chunkVersion.incMajor();
+ splitChunks.emplace_back(
+ kNss, ChunkRange(maxKey, chunkToSplit.getMax()), chunkVersion, kOtherShard);
- auto v1 = cm->getVersion();
- v1.incMajor();
- auto v2 = v1;
- v2.incMajor();
- auto v3 = v2;
- v3.incMajor();
+ auto rt = cm->getRoutingHistory()->makeUpdated(splitChunks);
+
+ return CollectionMetadata(std::make_shared<ChunkManager>(rt, boost::none), kThisShard);
+ }
+
+ static CollectionMetadata cloneMetadataMinusChunk(const ScopedCollectionMetadata& metadata,
+ const ChunkRange& range) {
+ const BSONObj& minKey = range.getMin();
+ const BSONObj& maxKey = range.getMax();
+ ASSERT(rangeMapOverlaps(metadata->getChunks(), minKey, maxKey));
+
+ auto cm = metadata->getChunkManager();
+
+ const auto chunkToMoveOut = cm->findIntersectingChunkWithSimpleCollation(minKey);
+ ASSERT_BSONOBJ_EQ(minKey, chunkToMoveOut.getMin());
+ ASSERT_BSONOBJ_EQ(maxKey, chunkToMoveOut.getMax());
+
+ auto chunkVersion = cm->getVersion();
+ chunkVersion.incMajor();
auto rt = cm->getRoutingHistory()->makeUpdated(
- {ChunkType{kNss, ChunkRange{chunkToSplit.getMin(), minKey}, v1, kOtherShard},
- ChunkType{kNss, ChunkRange{minKey, maxKey}, v2, kThisShard},
- ChunkType{kNss, ChunkRange{maxKey, chunkToSplit.getMax()}, v3, kOtherShard}});
+ {ChunkType(kNss, ChunkRange(minKey, maxKey), chunkVersion, kOtherShard)});
- return stdx::make_unique<CollectionMetadata>(
- std::make_shared<ChunkManager>(rt, boost::none), kThisShard);
+ return CollectionMetadata(std::make_shared<ChunkManager>(rt, boost::none), kThisShard);
}
std::shared_ptr<MetadataManager> _manager;
};
+TEST_F(MetadataManagerTest, InitialMetadataIsUnknown) {
+ ASSERT(!_manager->getActiveMetadata(_manager, boost::none));
+ ASSERT(!_manager->getActiveMetadata(_manager, LogicalTime(Timestamp(10))));
+
+ ASSERT_EQ(0UL, _manager->numberOfMetadataSnapshots());
+ ASSERT_EQ(0UL, _manager->numberOfRangesToClean());
+ ASSERT_EQ(0UL, _manager->numberOfRangesToCleanStillInUse());
+}
+
+TEST_F(MetadataManagerTest, MetadataAfterClearIsUnknown) {
+ _manager->setFilteringMetadata(makeEmptyMetadata());
+ ASSERT(_manager->getActiveMetadata(_manager, boost::none));
+ ASSERT(_manager->getActiveMetadata(_manager, LogicalTime(Timestamp(10))));
+
+ _manager->clearFilteringMetadata();
+ ASSERT(!_manager->getActiveMetadata(_manager, boost::none));
+ ASSERT(!_manager->getActiveMetadata(_manager, LogicalTime(Timestamp(10))));
+
+ ASSERT_EQ(0UL, _manager->numberOfMetadataSnapshots());
+ ASSERT_EQ(0UL, _manager->numberOfRangesToClean());
+ ASSERT_EQ(0UL, _manager->numberOfRangesToCleanStillInUse());
+}
+
TEST_F(MetadataManagerTest, CleanUpForMigrateIn) {
- _manager->refreshActiveMetadata(makeEmptyMetadata());
+ _manager->setFilteringMetadata(makeEmptyMetadata());
// Sanity checks
- ASSERT(_manager->getActiveMetadata(_manager, boost::none)->isSharded());
- ASSERT_EQ(0UL, _manager->getActiveMetadata(_manager, boost::none)->getChunks().size());
+ ASSERT((*_manager->getActiveMetadata(_manager, boost::none))->isSharded());
+ ASSERT_EQ(0UL, (*_manager->getActiveMetadata(_manager, boost::none))->getChunks().size());
ChunkRange range1(BSON("key" << 0), BSON("key" << 10));
ChunkRange range2(BSON("key" << 10), BSON("key" << 20));
@@ -157,12 +204,14 @@ TEST_F(MetadataManagerTest, CleanUpForMigrateIn) {
}
TEST_F(MetadataManagerTest, AddRangeNotificationsBlockAndYield) {
- _manager->refreshActiveMetadata(makeEmptyMetadata());
+ _manager->setFilteringMetadata(makeEmptyMetadata());
ChunkRange cr1(BSON("key" << 0), BSON("key" << 10));
+
auto notifn1 = _manager->cleanUpRange(cr1, Date_t{});
ASSERT_FALSE(notifn1.ready());
ASSERT_EQ(_manager->numberOfRangesToClean(), 1UL);
+
auto optNotifn = _manager->trackOrphanedDataCleanup(cr1);
ASSERT_FALSE(notifn1.ready());
ASSERT_FALSE(optNotifn->ready());
@@ -172,7 +221,7 @@ TEST_F(MetadataManagerTest, AddRangeNotificationsBlockAndYield) {
}
TEST_F(MetadataManagerTest, NotificationBlocksUntilDeletion) {
- _manager->refreshActiveMetadata(makeEmptyMetadata());
+ _manager->setFilteringMetadata(makeEmptyMetadata());
ChunkRange cr1(BSON("key" << 20), BSON("key" << 30));
auto optNotif = _manager->trackOrphanedDataCleanup(cr1);
@@ -185,28 +234,28 @@ TEST_F(MetadataManagerTest, NotificationBlocksUntilDeletion) {
auto scm1 = _manager->getActiveMetadata(_manager, boost::none); // and increment refcount
const auto addChunk = [this] {
- _manager->refreshActiveMetadata(
- cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none),
+ _manager->setFilteringMetadata(
+ cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none),
{BSON("key" << 0), BSON("key" << 20)}));
};
addChunk(); // push new metadata
auto scm2 = _manager->getActiveMetadata(_manager, boost::none); // and increment refcount
- ASSERT_EQ(1ULL, scm2->getChunks().size());
+ ASSERT_EQ(1ULL, (*scm2)->getChunks().size());
// Simulate drop and recreate
- _manager->refreshActiveMetadata(makeEmptyMetadata());
+ _manager->setFilteringMetadata(makeEmptyMetadata());
addChunk(); // push new metadata
auto scm3 = _manager->getActiveMetadata(_manager, boost::none); // and increment refcount
- ASSERT_EQ(1ULL, scm3->getChunks().size());
+ ASSERT_EQ(1ULL, (*scm3)->getChunks().size());
- ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 3UL);
- ASSERT_EQ(_manager->numberOfRangesToClean(), 0UL); // not yet...
+ ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 0UL);
+ ASSERT_EQ(_manager->numberOfRangesToClean(), 0UL);
optNotif = _manager->cleanUpRange(cr1, Date_t{});
ASSERT(optNotif);
- ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 3UL);
+ ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 0UL);
ASSERT_EQ(_manager->numberOfRangesToClean(), 1UL);
}
@@ -227,74 +276,102 @@ TEST_F(MetadataManagerTest, NotificationBlocksUntilDeletion) {
optNotif2->abandon();
}
+TEST_F(MetadataManagerTest, CleanupNotificationsAreSignaledOnDropAndRecreate) {
+ const ChunkRange rangeToClean(BSON("key" << 20), BSON("key" << 30));
+
+ _manager->setFilteringMetadata(makeEmptyMetadata());
+ _manager->setFilteringMetadata(
+ cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none),
+ {BSON("key" << 0), BSON("key" << 20)}));
+
+ _manager->setFilteringMetadata(
+ cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), rangeToClean));
+ auto cursorOnMovedMetadata = _manager->getActiveMetadata(_manager, boost::none);
+
+ _manager->setFilteringMetadata(
+ cloneMetadataMinusChunk(*_manager->getActiveMetadata(_manager, boost::none), rangeToClean));
+
+ auto notif = _manager->cleanUpRange(rangeToClean, Date_t{});
+ ASSERT(!notif.ready());
+
+ auto optNotif = _manager->trackOrphanedDataCleanup(rangeToClean);
+ ASSERT(optNotif);
+ ASSERT(!optNotif->ready());
+
+ _manager->setFilteringMetadata(makeEmptyMetadata());
+ ASSERT(notif.ready());
+ ASSERT(optNotif->ready());
+}
+
TEST_F(MetadataManagerTest, RefreshAfterSuccessfulMigrationSinglePending) {
- _manager->refreshActiveMetadata(makeEmptyMetadata());
+ _manager->setFilteringMetadata(makeEmptyMetadata());
ChunkRange cr1(BSON("key" << 0), BSON("key" << 10));
- _manager->refreshActiveMetadata(
- cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none), cr1));
- ASSERT_EQ(_manager->getActiveMetadata(_manager, boost::none)->getChunks().size(), 1UL);
+ _manager->setFilteringMetadata(
+ cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), cr1));
+ ASSERT_EQ((*_manager->getActiveMetadata(_manager, boost::none))->getChunks().size(), 1UL);
}
TEST_F(MetadataManagerTest, RefreshAfterSuccessfulMigrationMultiplePending) {
- _manager->refreshActiveMetadata(makeEmptyMetadata());
+ _manager->setFilteringMetadata(makeEmptyMetadata());
ChunkRange cr1(BSON("key" << 0), BSON("key" << 10));
ChunkRange cr2(BSON("key" << 30), BSON("key" << 40));
{
- _manager->refreshActiveMetadata(
- cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none), cr1));
+ _manager->setFilteringMetadata(
+ cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), cr1));
ASSERT_EQ(_manager->numberOfRangesToClean(), 0UL);
- ASSERT_EQ(_manager->getActiveMetadata(_manager, boost::none)->getChunks().size(), 1UL);
+ ASSERT_EQ((*_manager->getActiveMetadata(_manager, boost::none))->getChunks().size(), 1UL);
}
{
- _manager->refreshActiveMetadata(
- cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none), cr2));
- ASSERT_EQ(_manager->getActiveMetadata(_manager, boost::none)->getChunks().size(), 2UL);
+ _manager->setFilteringMetadata(
+ cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), cr2));
+ ASSERT_EQ((*_manager->getActiveMetadata(_manager, boost::none))->getChunks().size(), 2UL);
}
}
TEST_F(MetadataManagerTest, RefreshAfterNotYetCompletedMigrationMultiplePending) {
- _manager->refreshActiveMetadata(makeEmptyMetadata());
+ _manager->setFilteringMetadata(makeEmptyMetadata());
ChunkRange cr1(BSON("key" << 0), BSON("key" << 10));
ChunkRange cr2(BSON("key" << 30), BSON("key" << 40));
- _manager->refreshActiveMetadata(
- cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none),
+ _manager->setFilteringMetadata(
+ cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none),
{BSON("key" << 50), BSON("key" << 60)}));
- ASSERT_EQ(_manager->getActiveMetadata(_manager, boost::none)->getChunks().size(), 1UL);
+ ASSERT_EQ((*_manager->getActiveMetadata(_manager, boost::none))->getChunks().size(), 1UL);
}
TEST_F(MetadataManagerTest, BeginReceiveWithOverlappingRange) {
- _manager->refreshActiveMetadata(makeEmptyMetadata());
+ _manager->setFilteringMetadata(makeEmptyMetadata());
ChunkRange cr1(BSON("key" << 0), BSON("key" << 10));
ChunkRange cr2(BSON("key" << 30), BSON("key" << 40));
- _manager->refreshActiveMetadata(
- cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none), cr1));
- _manager->refreshActiveMetadata(
- cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none), cr2));
+ _manager->setFilteringMetadata(
+ cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), cr1));
+ _manager->setFilteringMetadata(
+ cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none), cr2));
ChunkRange crOverlap(BSON("key" << 5), BSON("key" << 35));
}
TEST_F(MetadataManagerTest, RefreshMetadataAfterDropAndRecreate) {
- _manager->refreshActiveMetadata(makeEmptyMetadata());
- _manager->refreshActiveMetadata(cloneMetadataPlusChunk(
- _manager->getActiveMetadata(_manager, boost::none), {BSON("key" << 0), BSON("key" << 10)}));
+ _manager->setFilteringMetadata(makeEmptyMetadata());
+ _manager->setFilteringMetadata(
+ cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none),
+ {BSON("key" << 0), BSON("key" << 10)}));
// Now, pretend that the collection was dropped and recreated
- _manager->refreshActiveMetadata(makeEmptyMetadata());
- _manager->refreshActiveMetadata(
- cloneMetadataPlusChunk(_manager->getActiveMetadata(_manager, boost::none),
+ _manager->setFilteringMetadata(makeEmptyMetadata());
+ _manager->setFilteringMetadata(
+ cloneMetadataPlusChunk(*_manager->getActiveMetadata(_manager, boost::none),
{BSON("key" << 20), BSON("key" << 30)}));
- const auto chunks = _manager->getActiveMetadata(_manager, boost::none)->getChunks();
+ const auto chunks = (*_manager->getActiveMetadata(_manager, boost::none))->getChunks();
ASSERT_EQ(1UL, chunks.size());
const auto chunkEntry = chunks.begin();
ASSERT_BSONOBJ_EQ(BSON("key" << 20), chunkEntry->first);
@@ -303,7 +380,7 @@ TEST_F(MetadataManagerTest, RefreshMetadataAfterDropAndRecreate) {
// Tests membership functions for _rangesToClean
TEST_F(MetadataManagerTest, RangesToCleanMembership) {
- _manager->refreshActiveMetadata(makeEmptyMetadata());
+ _manager->setFilteringMetadata(makeEmptyMetadata());
ChunkRange cr(BSON("key" << 0), BSON("key" << 10));
@@ -317,19 +394,19 @@ TEST_F(MetadataManagerTest, RangesToCleanMembership) {
}
TEST_F(MetadataManagerTest, ClearUnneededChunkManagerObjectsLastSnapshotInList) {
- _manager->refreshActiveMetadata(makeEmptyMetadata());
+ _manager->setFilteringMetadata(makeEmptyMetadata());
ChunkRange cr1(BSON("key" << 0), BSON("key" << 10));
ChunkRange cr2(BSON("key" << 30), BSON("key" << 40));
- auto scm1 = _manager->getActiveMetadata(_manager, boost::none);
+ auto scm1 = *_manager->getActiveMetadata(_manager, boost::none);
{
- _manager->refreshActiveMetadata(cloneMetadataPlusChunk(scm1, cr1));
+ _manager->setFilteringMetadata(cloneMetadataPlusChunk(scm1, cr1));
ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 1UL);
ASSERT_EQ(_manager->numberOfRangesToClean(), 0UL);
- auto scm2 = _manager->getActiveMetadata(_manager, boost::none);
+ auto scm2 = *_manager->getActiveMetadata(_manager, boost::none);
ASSERT_EQ(scm2->getChunks().size(), 1UL);
- _manager->refreshActiveMetadata(cloneMetadataPlusChunk(scm2, cr2));
+ _manager->setFilteringMetadata(cloneMetadataPlusChunk(scm2, cr2));
ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 2UL);
ASSERT_EQ(_manager->numberOfEmptyMetadataSnapshots(), 0);
}
@@ -338,29 +415,29 @@ TEST_F(MetadataManagerTest, ClearUnneededChunkManagerObjectsLastSnapshotInList)
// is now out of scope, but that in scm1 should remain
ASSERT_EQ(_manager->numberOfEmptyMetadataSnapshots(), 1);
ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 2UL);
- ASSERT_EQ((_manager->getActiveMetadata(_manager, boost::none))->getChunks().size(), 2UL);
+ ASSERT_EQ((*_manager->getActiveMetadata(_manager, boost::none))->getChunks().size(), 2UL);
}
TEST_F(MetadataManagerTest, ClearUnneededChunkManagerObjectSnapshotInMiddleOfList) {
- _manager->refreshActiveMetadata(makeEmptyMetadata());
+ _manager->setFilteringMetadata(makeEmptyMetadata());
ChunkRange cr1(BSON("key" << 0), BSON("key" << 10));
ChunkRange cr2(BSON("key" << 30), BSON("key" << 40));
ChunkRange cr3(BSON("key" << 50), BSON("key" << 80));
ChunkRange cr4(BSON("key" << 90), BSON("key" << 100));
- auto scm = _manager->getActiveMetadata(_manager, boost::none);
- _manager->refreshActiveMetadata(cloneMetadataPlusChunk(scm, cr1));
+ auto scm = *_manager->getActiveMetadata(_manager, boost::none);
+ _manager->setFilteringMetadata(cloneMetadataPlusChunk(scm, cr1));
ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 1UL);
ASSERT_EQ(_manager->numberOfRangesToClean(), 0UL);
- auto scm2 = _manager->getActiveMetadata(_manager, boost::none);
+ auto scm2 = *_manager->getActiveMetadata(_manager, boost::none);
ASSERT_EQ(scm2->getChunks().size(), 1UL);
- _manager->refreshActiveMetadata(cloneMetadataPlusChunk(scm2, cr2));
+ _manager->setFilteringMetadata(cloneMetadataPlusChunk(scm2, cr2));
{
- auto scm3 = _manager->getActiveMetadata(_manager, boost::none);
+ auto scm3 = *_manager->getActiveMetadata(_manager, boost::none);
ASSERT_EQ(scm3->getChunks().size(), 2UL);
- _manager->refreshActiveMetadata(cloneMetadataPlusChunk(scm3, cr3));
+ _manager->setFilteringMetadata(cloneMetadataPlusChunk(scm3, cr3));
ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 3UL);
ASSERT_EQ(_manager->numberOfEmptyMetadataSnapshots(), 0);
@@ -374,9 +451,9 @@ TEST_F(MetadataManagerTest, ClearUnneededChunkManagerObjectSnapshotInMiddleOfLis
* CollectionMetadataTracker{ metadata: xxx, orphans: [], usageCounter: 1}
* ]
*/
- scm2 = _manager->getActiveMetadata(_manager, boost::none);
+ scm2 = *_manager->getActiveMetadata(_manager, boost::none);
ASSERT_EQ(scm2->getChunks().size(), 3UL);
- _manager->refreshActiveMetadata(cloneMetadataPlusChunk(scm2, cr4));
+ _manager->setFilteringMetadata(cloneMetadataPlusChunk(scm2, cr4));
ASSERT_EQ(_manager->numberOfMetadataSnapshots(), 4UL);
ASSERT_EQ(_manager->numberOfEmptyMetadataSnapshots(), 1);
}
diff --git a/src/mongo/db/s/migration_source_manager.cpp b/src/mongo/db/s/migration_source_manager.cpp
index eb3026e084b..57937d9c5fe 100644
--- a/src/mongo/db/s/migration_source_manager.cpp
+++ b/src/mongo/db/s/migration_source_manager.cpp
@@ -482,7 +482,7 @@ Status MigrationSourceManager::commitChunkMetadataOnConfig(OperationContext* opC
UninterruptibleLockGuard noInterrupt(opCtx->lockState());
AutoGetCollection autoColl(opCtx, getNss(), MODE_IX, MODE_X);
if (!repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesFor(opCtx, getNss())) {
- CollectionShardingRuntime::get(opCtx, getNss())->refreshMetadata(opCtx, nullptr);
+ CollectionShardingRuntime::get(opCtx, getNss())->clearFilteringMetadata();
uassertStatusOK(status.withContext(
str::stream() << "Unable to verify migration commit for chunk: "
<< redact(_args.toString())
@@ -518,7 +518,7 @@ Status MigrationSourceManager::commitChunkMetadataOnConfig(OperationContext* opC
UninterruptibleLockGuard noInterrupt(opCtx->lockState());
AutoGetCollection autoColl(opCtx, getNss(), MODE_IX, MODE_X);
- CollectionShardingRuntime::get(opCtx, getNss())->refreshMetadata(opCtx, nullptr);
+ CollectionShardingRuntime::get(opCtx, getNss())->clearFilteringMetadata();
log() << "Failed to refresh metadata after a "
<< (migrationCommitStatus.isOK() ? "failed commit attempt" : "successful commit")
diff --git a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp
index d6fd4b3d567..1fb93a6213f 100644
--- a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp
+++ b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp
@@ -151,7 +151,7 @@ ChunkVersion forceShardFilteringMetadataRefresh(OperationContext* opCtx,
AutoGetCollection autoColl(opCtx, nss, MODE_IX, MODE_X);
auto* const css = CollectionShardingRuntime::get(opCtx, nss);
- css->refreshMetadata(opCtx, nullptr);
+ css->setFilteringMetadata(opCtx, CollectionMetadata());
return ChunkVersion::UNSHARDED();
}
@@ -185,10 +185,7 @@ ChunkVersion forceShardFilteringMetadataRefresh(OperationContext* opCtx,
return metadata->getShardVersion();
}
- std::unique_ptr<CollectionMetadata> newCollectionMetadata =
- stdx::make_unique<CollectionMetadata>(cm, shardingState->shardId());
-
- css->refreshMetadata(opCtx, std::move(newCollectionMetadata));
+ css->setFilteringMetadata(opCtx, CollectionMetadata(cm, shardingState->shardId()));
return css->getMetadata(opCtx)->getShardVersion();
}
diff --git a/src/mongo/db/s/shard_server_op_observer.cpp b/src/mongo/db/s/shard_server_op_observer.cpp
index 4d0f59a36df..622225a9977 100644
--- a/src/mongo/db/s/shard_server_op_observer.cpp
+++ b/src/mongo/db/s/shard_server_op_observer.cpp
@@ -76,10 +76,9 @@ public:
CatalogCacheLoader::get(_opCtx).notifyOfCollectionVersionUpdate(_nss);
- // This is a hack to get around CollectionShardingState::refreshMetadata() requiring the X
- // lock: markNotShardedAtStepdown() doesn't have a lock check. Temporary measure until
- // SERVER-31595 removes the X lock requirement.
- CollectionShardingRuntime::get(_opCtx, _nss)->markNotShardedAtStepdown();
+ // Force subsequent uses of the namespace to refresh the filtering metadata so they can
+ // synchronize with any work happening on the primary (e.g., migration critical section).
+ CollectionShardingRuntime::get(_opCtx, _nss)->clearFilteringMetadata();
}
void rollback() override {}
@@ -281,10 +280,10 @@ void ShardServerOpObserver::onUpdate(OperationContext* opCtx, const OplogUpdateE
}
if (setField.hasField(ShardCollectionType::enterCriticalSectionCounter.name())) {
- // This is a hack to get around CollectionShardingState::refreshMetadata() requiring
- // the X lock: markNotShardedAtStepdown() doesn't have a lock check. Temporary
- // measure until SERVER-31595 removes the X lock requirement.
- CollectionShardingRuntime::get(opCtx, updatedNss)->markNotShardedAtStepdown();
+ // Force subsequent uses of the namespace to refresh the filtering metadata so they
+ // can synchronize with any work happening on the primary (e.g., migration critical
+ // section).
+ CollectionShardingRuntime::get(opCtx, updatedNss)->clearFilteringMetadata();
}
}
}