diff options
author | Blake Oler <blake.oler@mongodb.com> | 2018-12-04 13:23:27 -0500 |
---|---|---|
committer | Blake Oler <blake.oler@mongodb.com> | 2018-12-31 16:43:30 -0500 |
commit | b5d7f2f919c57ee029f76629234613761ff05f4b (patch) | |
tree | b4a176b3f32a393deb22a97575789e86069c89e7 | |
parent | 0df66cd7b29f507f31ea78149b04537b32fe4a80 (diff) | |
download | mongo-b5d7f2f919c57ee029f76629234613761ff05f4b.tar.gz |
SERVER-38050 Validate the sameness of collection metadata after the range deleter's deletion loop
(cherry picked from commit cee9c4deed8bbf0c612b465be4625d5d0775d204)
-rw-r--r-- | src/mongo/db/s/collection_range_deleter.cpp | 76 | ||||
-rw-r--r-- | src/mongo/db/s/collection_range_deleter.h | 13 | ||||
-rw-r--r-- | src/mongo/db/s/metadata_manager.h | 5 |
3 files changed, 67 insertions, 27 deletions
diff --git a/src/mongo/db/s/collection_range_deleter.cpp b/src/mongo/db/s/collection_range_deleter.cpp index ed7c303a041..1ed2d2caa8e 100644 --- a/src/mongo/db/s/collection_range_deleter.cpp +++ b/src/mongo/db/s/collection_range_deleter.cpp @@ -130,31 +130,16 @@ boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange( { AutoGetCollection autoColl(opCtx, nss, MODE_IX); - auto* const collection = autoColl.getCollection(); auto* const css = CollectionShardingState::get(opCtx, nss); - auto* const self = forTestOnly ? forTestOnly : &css->_metadataManager->_rangesToClean; - - auto scopedCollectionMetadata = css->getMetadata(); - - if (!forTestOnly && (!collection || !scopedCollectionMetadata)) { - if (!collection) { - LOG(0) << "Abandoning any range deletions left over from dropped " << nss.ns(); - } else { - LOG(0) << "Abandoning any range deletions left over from previously sharded" - << nss.ns(); - } + auto& metadataManager = css->_metadataManager; - stdx::lock_guard<stdx::mutex> lk(css->_metadataManager->_managerLock); - css->_metadataManager->_clearAllCleanups(lk); + if (!_checkCollectionMetadataStillValid( + opCtx, nss, epoch, forTestOnly, collection, metadataManager)) { return boost::none; } - if (!forTestOnly && scopedCollectionMetadata->getCollVersion().epoch() != epoch) { - LOG(1) << "Range deletion task for " << nss.ns() << " epoch " << epoch << " woke;" - << " (current is " << scopedCollectionMetadata->getCollVersion() << ")"; - return boost::none; - } + auto* const self = forTestOnly ? forTestOnly : &metadataManager->_rangesToClean; bool writeOpLog = false; @@ -223,6 +208,8 @@ boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange( } } + const auto scopedCollectionMetadata = metadataManager->getActiveMetadata(metadataManager); + try { const auto keyPattern = scopedCollectionMetadata->getKeyPattern(); wrote = self->_doDeletion(opCtx, collection, keyPattern, *range, maxToDelete); @@ -249,7 +236,7 @@ boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange( const auto clientOpTime = repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp(); // Wait for replication outside the lock - const auto status = [&] { + const auto replicationStatus = [&] { try { WriteConcernResult unusedWCResult; return waitForWriteConcern( @@ -261,13 +248,22 @@ boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange( // Get the lock again to finish off this range (including notifying, if necessary). AutoGetCollection autoColl(opCtx, nss, MODE_IX); + auto* const collection = autoColl.getCollection(); auto* const css = CollectionShardingState::get(opCtx, nss); - auto* const self = forTestOnly ? forTestOnly : &css->_metadataManager->_rangesToClean; + auto& metadataManager = css->_metadataManager; + + if (!_checkCollectionMetadataStillValid( + opCtx, nss, epoch, forTestOnly, collection, metadataManager)) { + return boost::none; + } + + auto* const self = forTestOnly ? forTestOnly : &metadataManager->_rangesToClean; + stdx::lock_guard<stdx::mutex> scopedLock(css->_metadataManager->_managerLock); - if (!status.isOK()) { + if (!replicationStatus.isOK()) { LOG(0) << "Error when waiting for write concern after removing " << nss << " range " - << redact(range->toString()) << " : " << redact(status.reason()); + << redact(range->toString()) << " : " << redact(replicationStatus.reason()); // If range were already popped (e.g. by dropping nss during the waitForWriteConcern // above) its notification would have been triggered, so this check suffices to ensure @@ -276,7 +272,7 @@ boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange( invariant(!self->isEmpty() && self->_orphans.front().notification == notification); LOG(0) << "Abandoning deletion of latest range in " << nss.ns() << " after local " << "deletions because of replication failure"; - self->_pop(status); + self->_pop(replicationStatus); } } else { LOG(0) << "Finished deleting documents in " << nss.ns() << " range " @@ -301,6 +297,38 @@ boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange( return Date_t::now() + stdx::chrono::milliseconds{rangeDeleterBatchDelayMS.load()}; } +bool CollectionRangeDeleter::_checkCollectionMetadataStillValid( + OperationContext* opCtx, + const NamespaceString& nss, + OID const& epoch, + CollectionRangeDeleter* forTestOnly, + Collection* collection, + std::shared_ptr<MetadataManager> metadataManager) { + + const auto scopedCollectionMetadata = metadataManager->getActiveMetadata(metadataManager); + + if (!forTestOnly && (!collection || !scopedCollectionMetadata)) { + if (!collection) { + LOG(0) << "Abandoning any range deletions left over from dropped " << nss.ns(); + } else { + LOG(0) << "Abandoning any range deletions left over from previously sharded" + << nss.ns(); + } + + stdx::lock_guard<stdx::mutex> lk(metadataManager->_managerLock); + metadataManager->_clearAllCleanups(lk); + return false; + } + + if (!forTestOnly && scopedCollectionMetadata->getCollVersion().epoch() != epoch) { + LOG(1) << "Range deletion task for " << nss.ns() << " epoch " << epoch << " woke;" + << " (current is " << scopedCollectionMetadata->getCollVersion() << ")"; + return false; + } + + return true; +} + StatusWith<int> CollectionRangeDeleter::_doDeletion(OperationContext* opCtx, Collection* collection, BSONObj const& keyPattern, diff --git a/src/mongo/db/s/collection_range_deleter.h b/src/mongo/db/s/collection_range_deleter.h index 4949c1febde..f6b9f161a47 100644 --- a/src/mongo/db/s/collection_range_deleter.h +++ b/src/mongo/db/s/collection_range_deleter.h @@ -42,6 +42,7 @@ namespace mongo { class BSONObj; class Collection; +class MetadataManager; class OperationContext; // The maximum number of documents to delete in a single batch during range deletion. @@ -185,6 +186,18 @@ public: private: /** + * Verifies that the metadata for the collection to be cleaned up is still valid. Makes sure + * the collection has not been dropped (or dropped then recreated). + */ + static bool _checkCollectionMetadataStillValid( + OperationContext* opCtx, + const NamespaceString& nss, + OID const& epoch, + CollectionRangeDeleter* forTestOnly, + Collection* collection, + std::shared_ptr<MetadataManager> metadataManager); + + /** * Performs the deletion of up to maxToDelete entries within the range in progress. Must be * called under the collection lock. * diff --git a/src/mongo/db/s/metadata_manager.h b/src/mongo/db/s/metadata_manager.h index 20b3f4d1895..cbac10ab3d4 100644 --- a/src/mongo/db/s/metadata_manager.h +++ b/src/mongo/db/s/metadata_manager.h @@ -148,9 +148,8 @@ private: // Management of the _metadata list is implemented in ScopedCollectionMetadata friend class ScopedCollectionMetadata; - // For access to _rangesToClean and _managerLock under task callback - friend boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange( - OperationContext*, NamespaceString const&, OID const&, int, CollectionRangeDeleter*); + // For access to _managerLock, _rangesToClean, and _clearAllCleanups under task callback + friend class CollectionRangeDeleter; /** * Represents an instance of what the filtering metadata for this collection was at a particular |