diff options
author | Nathan Myers <nathan.myers@10gen.com> | 2017-06-01 17:52:56 -0400 |
---|---|---|
committer | Nathan Myers <nathan.myers@10gen.com> | 2017-06-01 17:52:56 -0400 |
commit | 3c47c2f2992fb881622195ef5c045f08061e278b (patch) | |
tree | f3374723c26e8464f7697000baf4c1b2ae152426 | |
parent | 6bf062db95d69f06b02da708b54f5e3efbb9dfa5 (diff) | |
download | mongo-3c47c2f2992fb881622195ef5c045f08061e278b.tar.gz |
Revert "SERVER-29342 CollectionShardState/RangeDeleter support for safe secondary reads"
This reverts commit 6bf062db95d69f06b02da708b54f5e3efbb9dfa5.
-rw-r--r-- | jstests/core/set1.js | 2 | ||||
-rw-r--r-- | jstests/core/update_replace.js | 8 | ||||
-rw-r--r-- | jstests/core/upsert_fields.js | 2 | ||||
-rw-r--r-- | src/mongo/db/exec/update.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/s/collection_range_deleter.cpp | 57 | ||||
-rw-r--r-- | src/mongo/db/s/collection_range_deleter.h | 37 | ||||
-rw-r--r-- | src/mongo/db/s/collection_range_deleter_test.cpp | 131 | ||||
-rw-r--r-- | src/mongo/db/s/collection_sharding_state.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/s/collection_sharding_state.h | 14 | ||||
-rw-r--r-- | src/mongo/db/s/metadata_manager.cpp | 157 | ||||
-rw-r--r-- | src/mongo/db/s/metadata_manager.h | 42 | ||||
-rw-r--r-- | src/mongo/db/s/metadata_manager_test.cpp | 33 |
12 files changed, 152 insertions, 343 deletions
diff --git a/jstests/core/set1.js b/jstests/core/set1.js index bae41fc5803..33840e3f431 100644 --- a/jstests/core/set1.js +++ b/jstests/core/set1.js @@ -4,4 +4,4 @@ t.drop(); t.insert({_id: 1, emb: {}}); t.update({_id: 1}, {$set: {emb: {'a.dot': 'data'}}}); -assert.eq({_id: 1, emb: {"a.dot": 'data'}}, t.findOne(), "A"); +assert.eq({_id: 1, emb: {}}, t.findOne(), "A"); diff --git a/jstests/core/update_replace.js b/jstests/core/update_replace.js index e62d03eb09f..44099851ef4 100644 --- a/jstests/core/update_replace.js +++ b/jstests/core/update_replace.js @@ -13,13 +13,13 @@ var res; // Bypass validation in shell so we can test the server. conn._skipValidation = true; -// Allow "." in field names +// Should not allow "." in field names res = t.save({_id: 1, "a.a": 1}); -assert(!res.hasWriteError(), "a.a"); +assert(res.hasWriteError(), "a.a"); -// Allow "." in field names, embedded +// Should not allow "." in field names, embedded res = t.save({_id: 1, a: {"a.a": 1}}); -assert(!res.hasWriteError(), "a: a.a"); +assert(res.hasWriteError(), "a: a.a"); // Should not allow "$"-prefixed field names, caught before "." check res = t.save({_id: 1, $a: {"a.a": 1}}); diff --git a/jstests/core/upsert_fields.js b/jstests/core/upsert_fields.js index 490625c23db..311d6984ce9 100644 --- a/jstests/core/upsert_fields.js +++ b/jstests/core/upsert_fields.js @@ -154,7 +154,6 @@ for (var i = 0; i < 3; i++) { assert.eq(value, upsertedXVal({$or: [{x: {$eq: 1}}]}, expr)); // Special types extracted assert.eq(isReplStyle ? undefined : [1, 2], upsertedXVal({x: [1, 2]}, expr)); - assert.eq({'x.x': 1}, upsertedXVal({x: {'x.x': 1}}, expr)); // field not extracted assert.eq(undefined, upsertedXVal({x: {$gt: 1}}, expr)); @@ -173,6 +172,7 @@ for (var i = 0; i < 3; i++) { assert.writeError(upsertedResult({x: undefined}, expr)); if (!isReplStyle) { + assert.writeError(upsertedResult({x: {'x.x': 1}}, expr)); assert.writeError(upsertedResult({x: {$all: [1, 2]}}, expr)); assert.writeError(upsertedResult({$and: [{x: 1}, {x: 1}]}, expr)); assert.writeError(upsertedResult({$and: [{x: {$eq: 1}}, {x: 2}]}, expr)); diff --git a/src/mongo/db/exec/update.cpp b/src/mongo/db/exec/update.cpp index 90cd46ab423..9bcc5460290 100644 --- a/src/mongo/db/exec/update.cpp +++ b/src/mongo/db/exec/update.cpp @@ -254,6 +254,12 @@ StatusWith<std::uint32_t> storageValid(const mb::ConstElement& elem, Status status = validateDollarPrefixElement(elem, deep); if (!status.isOK()) return status; + } else if (fieldName.find(".") != string::npos) { + // Field name cannot have a "." in it. + return Status(ErrorCodes::DottedFieldName, + str::stream() << "The dotted field '" << elem.getFieldName() << "' in '" + << mb::getFullName(elem) + << "' is not valid for storage."); } } diff --git a/src/mongo/db/s/collection_range_deleter.cpp b/src/mongo/db/s/collection_range_deleter.cpp index 2392305fe5d..df690affe3d 100644 --- a/src/mongo/db/s/collection_range_deleter.cpp +++ b/src/mongo/db/s/collection_range_deleter.cpp @@ -76,16 +76,13 @@ const WriteConcernOptions kMajorityWriteConcern(WriteConcernOptions::kMajority, CollectionRangeDeleter::~CollectionRangeDeleter() { // notify anybody still sleeping on orphan ranges clear(Status{ErrorCodes::InterruptedDueToReplStateChange, - "Collection sharding metadata discarded"}); + "Collection sharding metadata destroyed"}); } -auto CollectionRangeDeleter::cleanUpNextRange(OperationContext* opCtx, +bool CollectionRangeDeleter::cleanUpNextRange(OperationContext* opCtx, NamespaceString const& nss, - Action action, int maxToDelete, - CollectionRangeDeleter* forTestOnly) -> Action { - - invariant(action != Action::kFinished); + CollectionRangeDeleter* rangeDeleterForTestOnly) { StatusWith<int> wrote = 0; auto range = boost::optional<ChunkRange>(boost::none); auto notification = DeleteNotification(); @@ -95,55 +92,28 @@ auto CollectionRangeDeleter::cleanUpNextRange(OperationContext* opCtx, auto* css = CollectionShardingState::get(opCtx, nss); { auto scopedCollectionMetadata = css->getMetadata(); - if ((!collection || !scopedCollectionMetadata) && !forTestOnly) { - log() << "Abandoning range deletions left over from previously sharded collection" - << nss.ns(); + if ((!collection || !scopedCollectionMetadata) && !rangeDeleterForTestOnly) { + log() << "Abandoning range deletions in collection " << nss.ns() + << " left over from sharded state"; stdx::lock_guard<stdx::mutex> lk(css->_metadataManager->_managerLock); css->_metadataManager->_clearAllCleanups(); - return Action::kFinished; + return false; // collection was unsharded } // We don't actually know if this is the same collection that we were originally // scheduled to do deletions on, or another one with the same name. But it doesn't - // matter: if it has a record of deletions scheduled, now is as good a time as any - // to do them. - - auto self = forTestOnly ? forTestOnly : &css->_metadataManager->_rangesToClean; + // matter: if it has deletions scheduled, now is as good a time as any to do them. + auto self = rangeDeleterForTestOnly ? rangeDeleterForTestOnly + : &css->_metadataManager->_rangesToClean; { stdx::lock_guard<stdx::mutex> scopedLock(css->_metadataManager->_managerLock); if (self->isEmpty()) - return Action::kFinished; + return false; const auto& frontRange = self->_orphans.front().range; range.emplace(frontRange.getMin().getOwned(), frontRange.getMax().getOwned()); notification = self->_orphans.front().notification; } - invariant(range); - - if (action == Action::kWriteOpLog) { - // clang-format off - // Secondaries will watch for this update, and kill any queries that may depend on - // documents in the range -- excepting any queries with a read-concern option - // 'ignoreChunkMigration' - try { - auto& adminSystemVersion = NamespaceString::kConfigCollectionNamespace; - auto epoch = scopedCollectionMetadata->getCollVersion().epoch(); - AutoGetCollection autoAdmin(opCtx, adminSystemVersion, MODE_IX); - - Helpers::upsert(opCtx, adminSystemVersion.ns(), - BSON("_id" << "startRangeDeletion" << "ns" << nss.ns() << "epoch" << epoch - << "min" << range->getMin() << "max" << range->getMax())); - - } catch (DBException const& e) { - stdx::lock_guard<stdx::mutex> scopedLock(css->_metadataManager->_managerLock); - css->_metadataManager->_clearAllCleanups( - {ErrorCodes::fromInt(e.getCode()), - str::stream() << "cannot push startRangeDeletion record to Op Log," - " abandoning scheduled range deletions: " << e.what()}); - return Action::kFinished; - } - // clang-format on - } try { auto keyPattern = scopedCollectionMetadata->getKeyPattern(); @@ -153,6 +123,7 @@ auto CollectionRangeDeleter::cleanUpNextRange(OperationContext* opCtx, wrote = e.toStatus(); warning() << e.what(); } + if (!wrote.isOK() || wrote.getValue() == 0) { if (wrote.isOK()) { log() << "No documents remain to delete in " << nss << " range " @@ -160,7 +131,7 @@ auto CollectionRangeDeleter::cleanUpNextRange(OperationContext* opCtx, } stdx::lock_guard<stdx::mutex> scopedLock(css->_metadataManager->_managerLock); self->_pop(wrote.getStatus()); - return Action::kWriteOpLog; + return true; } } // drop scopedCollectionMetadata } // drop autoColl @@ -203,7 +174,7 @@ auto CollectionRangeDeleter::cleanUpNextRange(OperationContext* opCtx, } notification.abandon(); - return Action::kMore; + return true; } StatusWith<int> CollectionRangeDeleter::_doDeletion(OperationContext* opCtx, diff --git a/src/mongo/db/s/collection_range_deleter.h b/src/mongo/db/s/collection_range_deleter.h index 52d3ec44256..a0a6625ba02 100644 --- a/src/mongo/db/s/collection_range_deleter.h +++ b/src/mongo/db/s/collection_range_deleter.h @@ -46,8 +46,7 @@ public: /** * This is an object n that asynchronously changes state when a scheduled range deletion * completes or fails. Call n.ready() to discover if the event has already occurred. Call - * n.waitStatus(opCtx) to sleep waiting for the event, and get its result. If the wait is - * interrupted, waitStatus throws. + * n.waitStatus(opCtx) to sleep waiting for the event, and get its result. * * It is an error to destroy a returned CleanupNotification object n unless either n.ready() * is true or n.abandon() has been called. After n.abandon(), n is in a moved-from state. @@ -86,9 +85,6 @@ public: bool operator==(DeleteNotification const& other) const { return notification == other.notification; } - bool operator!=(DeleteNotification const& other) const { - return notification != other.notification; - } private: std::shared_ptr<Notification<Status>> notification; @@ -100,8 +96,6 @@ public: DeleteNotification notification{}; }; - enum class Action { kFinished, kMore, kWriteOpLog }; - CollectionRangeDeleter() = default; ~CollectionRangeDeleter(); @@ -134,8 +128,8 @@ public: bool isEmpty() const; /* - * Notifies with the specified status anything waiting on ranges scheduled, and then discards - * the ranges and notifications. Is called in the destructor. + * Notify with the specified status anything waiting on ranges scheduled, before discarding the + * ranges and notifications. */ void clear(Status); @@ -145,22 +139,21 @@ public: void append(BSONObjBuilder* builder) const; /** - * If any range deletions are scheduled, deletes up to maxToDelete documents, notifying - * watchers of ranges as they are done being deleted. It performs its own collection locking, so - * it must be called without locks. + * If any ranges are scheduled to clean, deletes up to maxToDelete documents, notifying watchers + * of ranges as they are done being deleted. It performs its own collection locking so it must + * be called without locks. * - * Returns kMore or kWriteOpLog if it should be scheduled to run again because there might be - * more documents to delete, or kFinished otherwise. When calling again, pass the value - * returned. + * The 'rangeDeleterForTestOnly' is used as a utility for unit-tests that directly test the + * CollectionRangeDeleter class so they do not need to set up CollectionShardingState and + * MetadataManager objects. * - * Argument 'forTestOnly' is used in unit tests that exercise the CollectionRangeDeleter class, - * so that they do not need to set up CollectionShardingState and MetadataManager objects. + * Returns true if it should be scheduled to run again because there might be more documents to + * delete, or false otherwise. */ - static Action cleanUpNextRange(OperationContext*, - NamespaceString const& nss, - Action, - int maxToDelete, - CollectionRangeDeleter* forTestOnly = nullptr); + static bool cleanUpNextRange(OperationContext*, + NamespaceString const& nss, + int maxToDelete, + CollectionRangeDeleter* rangeDeleterForTestOnly = nullptr); private: /** diff --git a/src/mongo/db/s/collection_range_deleter_test.cpp b/src/mongo/db/s/collection_range_deleter_test.cpp index adaf90e7d0f..6442734d927 100644 --- a/src/mongo/db/s/collection_range_deleter_test.cpp +++ b/src/mongo/db/s/collection_range_deleter_test.cpp @@ -60,25 +60,18 @@ const std::string kPattern = "_id"; const BSONObj kKeyPattern = BSON(kPattern << 1); const std::string kShardName{"a"}; const HostAndPort dummyHost("dummy", 123); -const NamespaceString kAdminSystemVersion = NamespaceString("admin", "system.version"); class CollectionRangeDeleterTest : public ShardingMongodTestFixture { -public: - using Deletion = CollectionRangeDeleter::Deletion; - using Action = CollectionRangeDeleter::Action; - protected: - auto next(CollectionRangeDeleter& rangeDeleter, Action action, int maxToDelete) - -> CollectionRangeDeleter::Action { + bool next(CollectionRangeDeleter& rangeDeleter, int maxToDelete) { return CollectionRangeDeleter::cleanUpNextRange( - operationContext(), kNss, action, maxToDelete, &rangeDeleter); + operationContext(), kNss, maxToDelete, &rangeDeleter); } std::shared_ptr<RemoteCommandTargeterMock> configTargeter() { return RemoteCommandTargeterMock::get(shardRegistry()->getConfigShard()->getTargeter()); } - OID const& epoch() { - return _epoch; - } + + using Deletion = CollectionRangeDeleter::Deletion; private: void setUp() override; @@ -98,19 +91,9 @@ private: std::unique_ptr<DistLockManager> distLockManager) override { return stdx::make_unique<ShardingCatalogClientMock>(std::move(distLockManager)); } - - OID _epoch; }; -bool operator==(CollectionRangeDeleter::Action a, CollectionRangeDeleter::Action b) { - return (int)a == (int)b; -} -std::ostream& operator<<(std::ostream& os, CollectionRangeDeleter::Action a) { - return os << (int)a; -} - void CollectionRangeDeleterTest::setUp() { - _epoch = OID::gen(); serverGlobalParams.clusterRole = ClusterRole::ShardServer; ShardingMongodTestFixture::setUp(); replicationCoordinator()->alwaysAllowWrites(true); @@ -125,12 +108,13 @@ void CollectionRangeDeleterTest::setUp() { { AutoGetCollection autoColl(operationContext(), kNss, MODE_IX); auto collectionShardingState = CollectionShardingState::get(operationContext(), kNss); + const OID epoch = OID::gen(); collectionShardingState->refreshMetadata( operationContext(), stdx::make_unique<CollectionMetadata>( kKeyPattern, - ChunkVersion(1, 0, epoch()), - ChunkVersion(0, 0, epoch()), + ChunkVersion(1, 0, epoch), + ChunkVersion(0, 0, epoch), SimpleBSONObjComparator::kInstance.makeBSONObjIndexedMap<CachedChunkInfo>())); } } @@ -149,7 +133,7 @@ namespace { // Tests the case that there is nothing in the database. TEST_F(CollectionRangeDeleterTest, EmptyDatabase) { CollectionRangeDeleter rangeDeleter; - ASSERT_EQ(Action::kFinished, next(rangeDeleter, Action::kWriteOpLog, 1)); + ASSERT_FALSE(next(rangeDeleter, 1)); } // Tests the case that there is data, but it is not in a range to clean. @@ -161,14 +145,14 @@ TEST_F(CollectionRangeDeleterTest, NoDataInGivenRangeToClean) { ASSERT_BSONOBJ_EQ(insertedDoc, dbclient.findOne(kNss.toString(), QUERY(kPattern << 25))); std::list<Deletion> ranges; ranges.emplace_back(Deletion(ChunkRange(BSON(kPattern << 0), BSON(kPattern << 10)))); - ASSERT_TRUE(rangeDeleter.add(std::move(ranges))); + rangeDeleter.add(std::move(ranges)); ASSERT_EQ(1u, rangeDeleter.size()); - ASSERT_EQ(Action::kWriteOpLog, next(rangeDeleter, Action::kWriteOpLog, 1)); + ASSERT_TRUE(next(rangeDeleter, 1)); ASSERT_EQ(0u, rangeDeleter.size()); ASSERT_BSONOBJ_EQ(insertedDoc, dbclient.findOne(kNss.toString(), QUERY(kPattern << 25))); - ASSERT_EQ(Action::kFinished, next(rangeDeleter, Action::kWriteOpLog, 1)); + ASSERT_FALSE(next(rangeDeleter, 1)); } // Tests the case that there is a single document within a range to clean. @@ -182,24 +166,23 @@ TEST_F(CollectionRangeDeleterTest, OneDocumentInOneRangeToClean) { std::list<Deletion> ranges; Deletion deletion{ChunkRange(BSON(kPattern << 0), BSON(kPattern << 10))}; ranges.emplace_back(std::move(deletion)); - ASSERT_TRUE(rangeDeleter.add(std::move(ranges))); + rangeDeleter.add(std::move(ranges)); ASSERT_TRUE(ranges.empty()); // spliced elements out of it auto optNotifn = rangeDeleter.overlaps(ChunkRange(BSON(kPattern << 0), BSON(kPattern << 10))); ASSERT(optNotifn); auto notifn = *optNotifn; ASSERT(!notifn.ready()); - ASSERT_EQ(Action::kMore, next(rangeDeleter, Action::kWriteOpLog, 1)); // actually delete one + ASSERT_TRUE(next(rangeDeleter, 1)); // actually delete one ASSERT(!notifn.ready()); ASSERT_EQ(rangeDeleter.size(), 1u); - // range empty, pop range, notify - ASSERT_EQ(Action::kWriteOpLog, next(rangeDeleter, Action::kMore, 1)); + ASSERT_TRUE(next(rangeDeleter, 1)); // range empty, pop range, notify ASSERT_TRUE(rangeDeleter.isEmpty()); ASSERT(notifn.ready() && notifn.waitStatus(operationContext()).isOK()); ASSERT_TRUE(dbclient.findOne(kNss.toString(), QUERY(kPattern << 5)).isEmpty()); - ASSERT_EQ(Action::kFinished, next(rangeDeleter, Action::kWriteOpLog, 1)); + ASSERT_FALSE(next(rangeDeleter, 1)); } // Tests the case that there are multiple documents within a range to clean. @@ -214,12 +197,12 @@ TEST_F(CollectionRangeDeleterTest, MultipleDocumentsInOneRangeToClean) { std::list<Deletion> ranges; Deletion deletion{ChunkRange(BSON(kPattern << 0), BSON(kPattern << 10))}; ranges.emplace_back(std::move(deletion)); - ASSERT_TRUE(rangeDeleter.add(std::move(ranges))); + rangeDeleter.add(std::move(ranges)); - ASSERT_EQ(Action::kMore, next(rangeDeleter, Action::kWriteOpLog, 100)); - ASSERT_EQ(Action::kWriteOpLog, next(rangeDeleter, Action::kMore, 100)); + ASSERT_TRUE(next(rangeDeleter, 100)); + ASSERT_TRUE(next(rangeDeleter, 100)); ASSERT_EQUALS(0ULL, dbclient.count(kNss.toString(), BSON(kPattern << LT << 5))); - ASSERT_EQ(Action::kFinished, next(rangeDeleter, Action::kWriteOpLog, 100)); + ASSERT_FALSE(next(rangeDeleter, 100)); } // Tests the case that there are multiple documents within a range to clean, and the range deleter @@ -235,20 +218,21 @@ TEST_F(CollectionRangeDeleterTest, MultipleCleanupNextRangeCalls) { std::list<Deletion> ranges; Deletion deletion{ChunkRange(BSON(kPattern << 0), BSON(kPattern << 10))}; ranges.emplace_back(std::move(deletion)); - ASSERT_TRUE(rangeDeleter.add(std::move(ranges))); + rangeDeleter.add(std::move(ranges)); - ASSERT_EQ(Action::kMore, next(rangeDeleter, Action::kWriteOpLog, 1)); + ASSERT_TRUE(next(rangeDeleter, 1)); ASSERT_EQUALS(2ULL, dbclient.count(kNss.toString(), BSON(kPattern << LT << 5))); - ASSERT_EQ(Action::kMore, next(rangeDeleter, Action::kMore, 1)); + ASSERT_TRUE(next(rangeDeleter, 1)); ASSERT_EQUALS(1ULL, dbclient.count(kNss.toString(), BSON(kPattern << LT << 5))); - ASSERT_EQ(Action::kMore, next(rangeDeleter, Action::kMore, 1)); - ASSERT_EQ(Action::kWriteOpLog, next(rangeDeleter, Action::kMore, 1)); + ASSERT_TRUE(next(rangeDeleter, 1)); + ASSERT_TRUE(next(rangeDeleter, 1)); ASSERT_EQUALS(0ULL, dbclient.count(kNss.toString(), BSON(kPattern << LT << 5))); - ASSERT_EQ(Action::kFinished, next(rangeDeleter, Action::kWriteOpLog, 1)); + ASSERT_FALSE(next(rangeDeleter, 1)); } + // Tests the case that there are two ranges to clean, each containing multiple documents. TEST_F(CollectionRangeDeleterTest, MultipleDocumentsInMultipleRangesToClean) { CollectionRangeDeleter rangeDeleter; @@ -263,10 +247,10 @@ TEST_F(CollectionRangeDeleterTest, MultipleDocumentsInMultipleRangesToClean) { std::list<Deletion> ranges; ranges.emplace_back(Deletion{ChunkRange{BSON(kPattern << 0), BSON(kPattern << 4)}}); - ASSERT_TRUE(rangeDeleter.add(std::move(ranges))); + rangeDeleter.add(std::move(ranges)); ASSERT_TRUE(ranges.empty()); ranges.emplace_back(Deletion{ChunkRange{BSON(kPattern << 4), BSON(kPattern << 7)}}); - ASSERT_FALSE(rangeDeleter.add(std::move(ranges))); + rangeDeleter.add(std::move(ranges)); auto optNotifn1 = rangeDeleter.overlaps(ChunkRange{BSON(kPattern << 0), BSON(kPattern << 4)}); ASSERT_TRUE(optNotifn1); @@ -277,68 +261,29 @@ TEST_F(CollectionRangeDeleterTest, MultipleDocumentsInMultipleRangesToClean) { auto& notifn2 = *optNotifn2; ASSERT_FALSE(notifn2.ready()); - // test op== on notifications - ASSERT_TRUE(notifn1 == *optNotifn1); - ASSERT_FALSE(notifn1 == *optNotifn2); - ASSERT_TRUE(notifn1 != *optNotifn2); - ASSERT_FALSE(notifn1 != *optNotifn1); - - ASSERT_EQUALS(0ULL, - dbclient.count(kAdminSystemVersion.ns(), BSON(kPattern << "startRangeDeletion"))); - - ASSERT_EQ(Action::kMore, next(rangeDeleter, Action::kWriteOpLog, 100)); + ASSERT_TRUE(next(rangeDeleter, 100)); ASSERT_FALSE(notifn1.ready()); // no trigger yet ASSERT_FALSE(notifn2.ready()); // no trigger yet - ASSERT_EQUALS(1ULL, - dbclient.count(kAdminSystemVersion.ns(), BSON(kPattern << "startRangeDeletion"))); - // clang-format off - ASSERT_BSONOBJ_EQ( - BSON("_id" << "startRangeDeletion" << "ns" << kNss.ns() - << "epoch" << epoch() << "min" << BSON("_id" << 0) << "max" << BSON("_id" << 4)), - dbclient.findOne(kAdminSystemVersion.ns(), QUERY("_id" << "startRangeDeletion"))); - // clang-format on - - ASSERT_EQUALS(0ULL, dbclient.count(kNss.ns(), BSON(kPattern << LT << 4))); - ASSERT_EQUALS(3ULL, dbclient.count(kNss.ns(), BSON(kPattern << LT << 10))); - - // discover there are no more < 4, pop range 1 - ASSERT_EQ(Action::kWriteOpLog, next(rangeDeleter, Action::kMore, 100)); - - ASSERT_EQUALS(1ULL, - dbclient.count(kAdminSystemVersion.ns(), BSON(kPattern << "startRangeDeletion"))); - // clang-format off - ASSERT_BSONOBJ_EQ( - BSON("_id" << "startRangeDeletion" << "ns" << kNss.ns() - << "epoch" << epoch() << "min" << BSON("_id" << 0) << "max" << BSON("_id" << 4)), - dbclient.findOne(kAdminSystemVersion.ns(), QUERY("_id" << "startRangeDeletion"))); - // clang-format on + ASSERT_EQUALS(0ULL, dbclient.count(kNss.toString(), BSON(kPattern << LT << 4))); + ASSERT_EQUALS(3ULL, dbclient.count(kNss.toString(), BSON(kPattern << LT << 10))); + + ASSERT_TRUE(next(rangeDeleter, 100)); // discover there are no more < 4, pop range 1 ASSERT_TRUE(notifn1.ready() && notifn1.waitStatus(operationContext()).isOK()); ASSERT_FALSE(notifn2.ready()); - ASSERT_EQUALS(3ULL, dbclient.count(kNss.ns(), BSON(kPattern << LT << 10))); + ASSERT_EQUALS(3ULL, dbclient.count(kNss.toString(), BSON(kPattern << LT << 10))); - // delete the remaining documents - ASSERT_EQ(Action::kMore, next(rangeDeleter, Action::kWriteOpLog, 100)); + ASSERT_TRUE(next(rangeDeleter, 100)); // delete the remaining documents ASSERT_FALSE(notifn2.ready()); - // clang-format off - ASSERT_BSONOBJ_EQ( - BSON("_id" << "startRangeDeletion" << "ns" << kNss.ns() - << "epoch" << epoch() << "min" << BSON("_id" << 4) << "max" << BSON("_id" << 7)), - dbclient.findOne(kAdminSystemVersion.ns(), QUERY("_id" << "startRangeDeletion"))); - // clang-format on - - ASSERT_EQUALS(0ULL, dbclient.count(kNss.ns(), BSON(kPattern << LT << 10))); - - // discover there are no more, pop range 2 - ASSERT_EQ(Action::kWriteOpLog, next(rangeDeleter, Action::kMore, 1)); + ASSERT_EQUALS(0ULL, dbclient.count(kNss.toString(), BSON(kPattern << LT << 10))); + ASSERT_TRUE(next(rangeDeleter, 1)); // discover there are no more, pop range 2 ASSERT_TRUE(notifn2.ready() && notifn2.waitStatus(operationContext()).isOK()); - // discover there are no more ranges - ASSERT_EQ(Action::kFinished, next(rangeDeleter, Action::kWriteOpLog, 1)); + ASSERT_FALSE(next(rangeDeleter, 1)); // discover there are no more ranges } } // unnamed namespace diff --git a/src/mongo/db/s/collection_sharding_state.cpp b/src/mongo/db/s/collection_sharding_state.cpp index e4a04ed2aaf..6440415d511 100644 --- a/src/mongo/db/s/collection_sharding_state.cpp +++ b/src/mongo/db/s/collection_sharding_state.cpp @@ -134,12 +134,6 @@ auto CollectionShardingState::cleanUpRange(ChunkRange const& range) -> CleanupNo return _metadataManager->cleanUpRange(range); } -auto CollectionShardingState::overlappingMetadata(ChunkRange const& range) const - -> std::vector<ScopedCollectionMetadata> { - return _metadataManager->overlappingMetadata(_metadataManager, range); -} - - MigrationSourceManager* CollectionShardingState::getMigrationSourceManager() { return _sourceMgr; } diff --git a/src/mongo/db/s/collection_sharding_state.h b/src/mongo/db/s/collection_sharding_state.h index 28ad2ad12d1..ff0489f4d9a 100644 --- a/src/mongo/db/s/collection_sharding_state.h +++ b/src/mongo/db/s/collection_sharding_state.h @@ -141,14 +141,6 @@ public: auto cleanUpRange(ChunkRange const& range) -> CleanupNotification; /** - * Returns a vector of ScopedCollectionMetadata objects representing metadata instances in use - * by running queries that overlap the argument range, suitable for identifying and invalidating - * those queries. - */ - auto overlappingMetadata(ChunkRange const& range) const - -> std::vector<ScopedCollectionMetadata>; - - /** * Returns the active migration source manager, if one is available. */ MigrationSourceManager* getMigrationSourceManager(); @@ -251,12 +243,10 @@ private: MigrationSourceManager* _sourceMgr{nullptr}; // for access to _metadataManager - friend auto CollectionRangeDeleter::cleanUpNextRange(OperationContext*, + friend bool CollectionRangeDeleter::cleanUpNextRange(OperationContext*, NamespaceString const&, - CollectionRangeDeleter::Action, int maxToDelete, - CollectionRangeDeleter*) - -> CollectionRangeDeleter::Action; + CollectionRangeDeleter*); }; } // namespace mongo diff --git a/src/mongo/db/s/metadata_manager.cpp b/src/mongo/db/s/metadata_manager.cpp index de6cfcabd96..70cc7de5ee5 100644 --- a/src/mongo/db/s/metadata_manager.cpp +++ b/src/mongo/db/s/metadata_manager.cpp @@ -44,67 +44,60 @@ #include "mongo/util/assert_util.h" #include "mongo/util/log.h" -// MetadataManager maintains pointers to CollectionMetadata objects in a member list named -// _metadata. Each CollectionMetadata contains an immutable _chunksMap of chunks assigned to this -// shard, along with details related to its own lifecycle in a member _tracker. +// MetadataManager maintains std::shared_ptr<CollectionMetadataManager> pointers in a list +// _metadata. It also contains a CollectionRangeDeleter that queues orphan ranges to delete in +// a background thread, and a record of the ranges being migrated in, to avoid deleting them. // -// The current chunk mapping, used by queries starting up, is at _metadata.back(). Each query, -// when it starts up, requests and holds a ScopedCollectionMetadata object, and destroys it on -// termination. Each ScopedCollectionMetadata keeps a shared_ptr to its CollectionMetadata chunk -// mapping, and to the MetadataManager itself. CollectionMetadata mappings also keep a record of -// chunk ranges that may be deleted when it is determined that the range can no longer be in use. +// Free-floating CollectionMetadata objects are maintained by these pointers, and also by clients +// via shared pointers in ScopedCollectionMetadata objects. // -// ScopedCollectionMetadata's destructor decrements the CollectionMetadata's usageCounter. -// Whenever a usageCounter drops to zero, we check whether any now-unused CollectionMetadata -// elements can be popped off the front of _metadata. We need to keep the unused elements in the -// middle (as seen below) because they may schedule deletions of chunks depended on by older -// mappings. +// The _tracker member of CollectionMetadata keeps: +// a count of the ScopedCollectionMetadata objects that have pointers to the CollectionMetadata +// a list of key ranges [min,max) of orphaned documents that may be deleted when the count goes +// to zero +// ____________________________ +// (s): std::shared_ptr<> Clients:| ScopedCollectionMetadata | +// _________________________ +----(s) manager metadata (s)-----------------+ +// | CollectionShardingState | | |____________________________| | | +// | _metadataManager (s) | +-------(s) manager metadata (s)-------------+ | +// |____________________|____| | |____________________________| | | | +// ____________________v_______ +----------(s) manager metadata (s) | | | +// | MetadataManager | | |________________________|___| | | +// | |<---+ | | | +// | | ________________________ | | | +// | /----------->| CollectionMetadata |<----+ (1 use) | | +// | [(s),----/ | | ______________________|_ | | +// | (s),------------------->| CollectionMetadata | (0 uses) | | +// | _metadata: (s)]----\ | | | ______________________|_ | | +// | \--------------->| CollectionMetadata | | | +// | | | | | | | | +// | _rangesToClean: | | | | _tracker: |<------------+ | +// | ________________________ | | | | ____________________ |<--------------+ +// | | CollectionRangeDeleter | | | | | | Tracker | | (2 uses) +// | | | | | | | | | | +// | | _orphans [[min,max), | | | | | | usageCounter | | +// | | [min,max), | | | | | | orphans [min,max), | | +// | | ... ] | | | | | | ... ] | | +// | |________________________| | |_| | |____________________| | +// |____________________________| | | _chunksMap | +// |_| _chunkVersion | +// | ... | +// |________________________| // -// New chunk mappings are pushed onto the back of _metadata. Subsequently started queries use the -// new mapping while still-running queries continue using the older "snapshot" mappings. We treat -// _metadata.back()'s usage count differently from the snapshots because it can't reliably be -// compared to zero; a new query may increment it at any time. +// A ScopedCollectionMetadata object is created and held during a query, and destroyed when the +// query no longer needs access to the collection. Its destructor decrements the CollectionMetadata +// _tracker member's usageCounter. Note that the collection may become unsharded, and even get +// sharded again, between construction and destruction of a ScopedCollectionMetadata. // -// (Note that the collection may be dropped or become unsharded, and even get made and sharded -// again, between construction and destruction of a ScopedCollectionMetadata). +// When a new chunk mapping replaces the active mapping, it is pushed onto the back of _metadata. // -// MetadataManager also contains a CollectionRangeDeleter _rangesToClean that queues orphan ranges -// being deleted in a background thread, and a mapping _receivingChunks of the ranges being migrated -// in, to avoid deleting them. Each range deletion is paired with a notification object triggered -// when the deletion is completed or abandoned. -// -// ____________________________ -// (s): std::shared_ptr<> Clients:| ScopedCollectionMetadata | -// _________________________ +----(s) manager metadata (s)------------------+ -// | CollectionShardingState | | |____________________________| | | -// | _metadataManager (s) | +-------(s) manager metadata (s)--------------+ | -// |____________________|____| | |____________________________| | | | -// ____________________v________ +------------(s) manager metadata (s)-----+ | | -// | MetadataManager | | |____________________________| | | | -// | |<--+ | | | -// | | ___________________________ (1 use) | | | -// | getActiveMetadata(): /---------->| CollectionMetadata |<---------+ | | -// | back(): [(s),------/ | | _________________________|_ | | -// | (s),-------------------->| CollectionMetadata | (0 uses) | | -// | _metadata: (s)]------\ | | | _________________________|_ | | -// | \-------------->| CollectionMetadata | | | -// | _receivingChunks | | | | | (2 uses) | | -// | _rangesToClean: | | | | _tracker: |<---------+ | -// | _________________________ | | | | _______________________ |<-----------+ -// | | CollectionRangeDeleter | | | | | | Tracker | | -// | | | | | | | | | | -// | | _orphans [range,notif, | | | | | | usageCounter | | -// | | range,notif, | | | | | | orphans [range,notif, | | -// | | ... ] | | | | | | range,notif, | | -// | | | | | | | | ... ] | | -// | |_________________________| | |_| | |_______________________| | -// |_____________________________| | | _chunksMap | -// |_| _chunkVersion | -// | ... | -// |___________________________| +// A CollectionMetadata object pointed to from _metadata is maintained at least as long as any +// query holds a ScopedCollectionMetadata object referring to it, or to any older one. In the +// diagram above, the middle CollectionMetadata is kept until the one below it is disposed of. // // Note that _metadata as shown here has its front() at the bottom, back() at the top. As usual, -// new entries are pushed onto the back, popped off the front. +// new entries are pushed onto the back, popped off the front. The "active" metadata used by new +// queries (when there is one), is _metadata.back(). namespace mongo { @@ -125,17 +118,13 @@ MetadataManager::~MetadataManager() { } void MetadataManager::_clearAllCleanups() { - _clearAllCleanups( - {ErrorCodes::InterruptedDueToReplStateChange, - str::stream() << "Range deletions in " << _nss.ns() - << " abandoned because collection was dropped or became unsharded"}); -} - -void MetadataManager::_clearAllCleanups(Status status) { for (auto& metadata : _metadata) { _pushListToClean(std::move(metadata->_tracker.orphans)); } - _rangesToClean.clear(status); + _rangesToClean.clear({ErrorCodes::InterruptedDueToReplStateChange, + str::stream() << "Range deletions in " << _nss.ns() + << " abandoned because collection was" + " dropped or became unsharded"}); } ScopedCollectionMetadata MetadataManager::getActiveMetadata(std::shared_ptr<MetadataManager> self) { @@ -252,12 +241,10 @@ void MetadataManager::_retireExpiredMetadata() { if (!_metadata.front()->_tracker.orphans.empty()) { log() << "Queries possibly dependent on " << _nss.ns() << " range(s) finished; scheduling for deletion"; - // It is safe to push orphan ranges from _metadata.back(), even though new queries might - // start any time, because any request to delete a range it maps is rejected. _pushListToClean(std::move(_metadata.front()->_tracker.orphans)); } if (&_metadata.front() == &_metadata.back()) - break; // do not pop the active chunk mapping! + break; // do not retire current chunk metadata. } } @@ -267,8 +254,6 @@ void MetadataManager::_retireExpiredMetadata() { ScopedCollectionMetadata::ScopedCollectionMetadata(std::shared_ptr<MetadataManager> manager, std::shared_ptr<CollectionMetadata> metadata) : _metadata(std::move(metadata)), _manager(std::move(manager)) { - invariant(_metadata); - invariant(_manager); ++_metadata->_tracker.usageCounter; } @@ -357,17 +342,15 @@ void MetadataManager::append(BSONObjBuilder* builder) { amrArr.done(); } -void MetadataManager::_scheduleCleanup(executor::TaskExecutor* executor, - NamespaceString nss, - CollectionRangeDeleter::Action action) { - executor->scheduleWork([executor, nss, action](auto&) { +void MetadataManager::_scheduleCleanup(executor::TaskExecutor* executor, NamespaceString nss) { + executor->scheduleWork([executor, nss](auto&) { const int maxToDelete = std::max(int(internalQueryExecYieldIterations.load()), 1); Client::initThreadIfNotAlready("Collection Range Deleter"); auto UniqueOpCtx = Client::getCurrent()->makeOperationContext(); auto opCtx = UniqueOpCtx.get(); - auto next = CollectionRangeDeleter::cleanUpNextRange(opCtx, nss, action, maxToDelete); - if (next != CollectionRangeDeleter::Action::kFinished) { - _scheduleCleanup(executor, nss, next); + bool again = CollectionRangeDeleter::cleanUpNextRange(opCtx, nss, maxToDelete); + if (again) { + _scheduleCleanup(executor, nss); } }); } @@ -382,9 +365,9 @@ auto MetadataManager::_pushRangeToClean(ChunkRange const& range) -> CleanupNotif void MetadataManager::_pushListToClean(std::list<Deletion> ranges) { if (_rangesToClean.add(std::move(ranges))) { - _scheduleCleanup(_executor, _nss, CollectionRangeDeleter::Action::kWriteOpLog); + _scheduleCleanup(_executor, _nss); } - invariant(ranges.empty()); + dassert(ranges.empty()); } void MetadataManager::_addToReceiving(ChunkRange const& range) { @@ -459,28 +442,6 @@ auto MetadataManager::cleanUpRange(ChunkRange const& range) -> CleanupNotificati return activeMetadata->_tracker.orphans.back().notification; } -auto MetadataManager::overlappingMetadata(std::shared_ptr<MetadataManager> const& self, - ChunkRange const& range) - -> std::vector<ScopedCollectionMetadata> { - invariant(!_metadata.empty()); - stdx::lock_guard<stdx::mutex> scopedLock(_managerLock); - std::vector<ScopedCollectionMetadata> result; - result.reserve(_metadata.size()); - auto it = _metadata.crbegin(); // start with the current active chunk mapping - if ((*it)->rangeOverlapsChunk(range)) { - // We ignore the refcount of the active mapping; effectively, we assume it is in use. - result.push_back(ScopedCollectionMetadata(self, *it)); - } - ++it; // step to snapshots - for (auto end = _metadata.crend(); it != end; ++it) { - // We want all the overlapping snapshot mappings still possibly in use by a query. - if ((*it)->_tracker.usageCounter > 0 && (*it)->rangeOverlapsChunk(range)) { - result.push_back(ScopedCollectionMetadata(self, *it)); - } - } - return result; -} - size_t MetadataManager::numberOfRangesToCleanStillInUse() { stdx::lock_guard<stdx::mutex> scopedLock(_managerLock); size_t count = 0; diff --git a/src/mongo/db/s/metadata_manager.h b/src/mongo/db/s/metadata_manager.h index ff0770f450e..46da84db9e2 100644 --- a/src/mongo/db/s/metadata_manager.h +++ b/src/mongo/db/s/metadata_manager.h @@ -114,14 +114,6 @@ public: CleanupNotification cleanUpRange(ChunkRange const& range); /** - * Returns a vector of ScopedCollectionMetadata objects representing metadata instances in use - * by running queries that overlap the argument range, suitable for identifying and invalidating - * those queries. - */ - auto overlappingMetadata(std::shared_ptr<MetadataManager> const& itself, - ChunkRange const& range) -> std::vector<ScopedCollectionMetadata>; - - /** * Returns the number of ranges scheduled to be cleaned, exclusive of such ranges that might * still be in use by running queries. Outside of test drivers, the actual number may vary * after it returns, so this is really only useful for unit tests. @@ -154,19 +146,12 @@ private: * Each time it completes cleaning up a range, it wakes up clients waiting on completion of * that range, which may then verify their range has no more deletions scheduled, and proceed. */ - static void _scheduleCleanup(executor::TaskExecutor*, - NamespaceString nss, - CollectionRangeDeleter::Action); + static void _scheduleCleanup(executor::TaskExecutor*, NamespaceString nss); // All of the following functions must be called while holding _managerLock. /** - * Cancels all scheduled deletions of orphan ranges, notifying listeners with specified status. - */ - void _clearAllCleanups(Status); - - /** - * Cancels all scheduled deletions of orphan ranges, notifying listeners with status + * Cancel all scheduled deletions of orphan ranges, notifying listeners with status * InterruptedDueToReplStateChange. */ void _clearAllCleanups(); @@ -244,12 +229,10 @@ private: // friends // for access to _rangesToClean and _managerLock under task callback - friend auto CollectionRangeDeleter::cleanUpNextRange(OperationContext*, + friend bool CollectionRangeDeleter::cleanUpNextRange(OperationContext*, NamespaceString const&, - CollectionRangeDeleter::Action, int maxToDelete, - CollectionRangeDeleter*) - -> CollectionRangeDeleter::Action; + CollectionRangeDeleter*); friend class ScopedCollectionMetadata; }; @@ -282,17 +265,6 @@ public: */ operator bool() const; - /** - * Checks whether both objects refer to the identically the same metadata. - */ - bool operator==(ScopedCollectionMetadata const& other) const { - return _metadata == other._metadata; - } - bool operator!=(ScopedCollectionMetadata const& other) const { - return _metadata != other._metadata; - } - - private: /** * Increments the usageCounter in the specified CollectionMetadata. @@ -313,12 +285,8 @@ private: std::shared_ptr<MetadataManager> _manager{nullptr}; - // These use our private ctor friend ScopedCollectionMetadata MetadataManager::getActiveMetadata( - std::shared_ptr<MetadataManager>); - friend auto MetadataManager::overlappingMetadata(std::shared_ptr<MetadataManager> const& itself, - ChunkRange const& range) - -> std::vector<ScopedCollectionMetadata>; + std::shared_ptr<MetadataManager>); // uses our private ctor }; } // namespace mongo diff --git a/src/mongo/db/s/metadata_manager_test.cpp b/src/mongo/db/s/metadata_manager_test.cpp index 5ba653ac731..a3375634d9f 100644 --- a/src/mongo/db/s/metadata_manager_test.cpp +++ b/src/mongo/db/s/metadata_manager_test.cpp @@ -222,36 +222,17 @@ TEST_F(MetadataManagerTest, NotificationBlocksUntilDeletion) { ASSERT_EQ(manager->numberOfMetadataSnapshots(), 0UL); ASSERT_EQ(manager->numberOfRangesToClean(), 0UL); - auto scm1 = manager->getActiveMetadata(manager); // and increment refcount - ASSERT_TRUE(bool(scm1)); - ASSERT_EQ(0ULL, scm1->getChunks().size()); - - addChunk(manager); // push new metadata - auto scm2 = manager->getActiveMetadata(manager); // and increment refcount - ASSERT_EQ(1ULL, scm2->getChunks().size()); - - // this is here solely to pacify an invariant in addChunk - manager->refreshActiveMetadata(makeEmptyMetadata()); - - addChunk(manager); // push new metadata - auto scm3 = manager->getActiveMetadata(manager); // and increment refcount - ASSERT_EQ(1ULL, scm3->getChunks().size()); - - auto overlaps = - manager->overlappingMetadata(manager, ChunkRange(BSON("key" << 0), BSON("key" << 10))); - ASSERT_EQ(2ULL, overlaps.size()); - std::vector<ScopedCollectionMetadata> ref; - ref.push_back(std::move(scm3)); - ref.push_back(std::move(scm2)); - ASSERT(ref == overlaps); - - ASSERT_EQ(manager->numberOfMetadataSnapshots(), 3UL); + auto scm = manager->getActiveMetadata(manager); // and increment scm's refcount + ASSERT(bool(scm)); + addChunk(manager); // push new metadata + + ASSERT_EQ(manager->numberOfMetadataSnapshots(), 1UL); ASSERT_EQ(manager->numberOfRangesToClean(), 0UL); // not yet... optNotif = manager->cleanUpRange(cr1); - ASSERT_EQ(manager->numberOfMetadataSnapshots(), 3UL); + ASSERT_EQ(manager->numberOfMetadataSnapshots(), 1UL); ASSERT_EQ(manager->numberOfRangesToClean(), 1UL); - } // scm1,2,3 destroyed, refcount of each metadata goes to zero + } // scm destroyed, refcount of metadata goes to zero ASSERT_EQ(manager->numberOfMetadataSnapshots(), 0UL); ASSERT_EQ(manager->numberOfRangesToClean(), 1UL); ASSERT_FALSE(optNotif->ready()); |