diff options
49 files changed, 602 insertions, 116 deletions
diff --git a/src/mongo/db/catalog/collection.h b/src/mongo/db/catalog/collection.h index a990135de69..ea37ea27943 100644 --- a/src/mongo/db/catalog/collection.h +++ b/src/mongo/db/catalog/collection.h @@ -215,6 +215,8 @@ public: virtual const CollectionInfoCache* infoCache() const = 0; virtual const NamespaceString& ns() const = 0; + virtual void setNs(NamespaceString) = 0; + virtual OptionalCollectionUUID uuid() const = 0; virtual const IndexCatalog* getIndexCatalog() const = 0; @@ -397,6 +399,10 @@ public: return this->_impl().ns(); } + inline void setNs(NamespaceString nss) { + this->_impl().setNs(std::move(nss)); + } + inline OptionalCollectionUUID uuid() const { return this->_impl().uuid(); } diff --git a/src/mongo/db/catalog/collection_catalog_entry.h b/src/mongo/db/catalog/collection_catalog_entry.h index c84e587f1cd..729adb86bb9 100644 --- a/src/mongo/db/catalog/collection_catalog_entry.h +++ b/src/mongo/db/catalog/collection_catalog_entry.h @@ -55,6 +55,10 @@ public: return _ns; } + void setNs(NamespaceString ns) { + _ns = std::move(ns); + } + // ------- indexes ---------- virtual CollectionOptions getCollectionOptions(OperationContext* opCtx) const = 0; diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp index e0259d1d4ab..c2d8c76a754 100644 --- a/src/mongo/db/catalog/collection_impl.cpp +++ b/src/mongo/db/catalog/collection_impl.cpp @@ -168,7 +168,7 @@ CollectionImpl::CollectionImpl(Collection* _this_init, parseValidationAction(_details->getCollectionOptions(opCtx).validationAction))), _validationLevel(uassertStatusOK( parseValidationLevel(_details->getCollectionOptions(opCtx).validationLevel))), - _cursorManager(_ns), + _cursorManager(std::make_unique<CursorManager>(_ns)), _cappedNotifier(_recordStore->isCapped() ? stdx::make_unique<CappedInsertNotifier>() : nullptr), _this(_this_init) {} @@ -792,7 +792,7 @@ Status CollectionImpl::truncate(OperationContext* opCtx) { // 2) drop indexes _indexCatalog->dropAllIndexes(opCtx, true); - _cursorManager.invalidateAll(opCtx, false, "collection truncated"); + _cursorManager->invalidateAll(opCtx, false, "collection truncated"); // 3) truncate record store auto status = _recordStore->truncate(opCtx); @@ -815,7 +815,7 @@ void CollectionImpl::cappedTruncateAfter(OperationContext* opCtx, RecordId end, BackgroundOperation::assertNoBgOpInProgForNs(ns()); invariant(_indexCatalog->numIndexesInProgress(opCtx) == 0); - _cursorManager.invalidateAll(opCtx, false, "capped collection truncated"); + _cursorManager->invalidateAll(opCtx, false, "capped collection truncated"); _recordStore->cappedTruncateAfter(opCtx, end, inclusive); } @@ -1293,4 +1293,19 @@ std::unique_ptr<MultiIndexBlock> CollectionImpl::createMultiIndexBlock(Operation return std::make_unique<MultiIndexBlockImpl>(opCtx, _this); } +void CollectionImpl::setNs(NamespaceString nss) { + _ns = std::move(nss); + _indexCatalog->setNs(_ns); + _infoCache.setNs(_ns); + _recordStore->setNs(_ns); + + // Until the query layer is prepared for cursors to survive renames, all cursors are killed when + // the name of a collection changes. Therefore, the CursorManager should be empty. This means it + // is safe to re-establish it with a new namespace by tearing down the old one and allocating a + // new manager associated with the new name. This is done in order to ensure that the + // 'globalCursorIdCache' maintains the correct mapping from cursor id "prefix" (the high order + // bits) to namespace. + _cursorManager = std::make_unique<CursorManager>(_ns); +} + } // namespace mongo diff --git a/src/mongo/db/catalog/collection_impl.h b/src/mongo/db/catalog/collection_impl.h index b374eaa70fb..5d616d01791 100644 --- a/src/mongo/db/catalog/collection_impl.h +++ b/src/mongo/db/catalog/collection_impl.h @@ -80,6 +80,8 @@ public: return _ns; } + void setNs(NamespaceString nss) final; + OptionalCollectionUUID uuid() const { return _uuid; } @@ -101,7 +103,7 @@ public: } CursorManager* getCursorManager() const final { - return &_cursorManager; + return _cursorManager.get(); } bool requiresIdIndex() const final; @@ -390,7 +392,7 @@ private: int _magic; - const NamespaceString _ns; + NamespaceString _ns; OptionalCollectionUUID _uuid; CollectionCatalogEntry* const _details; RecordStore* const _recordStore; @@ -415,10 +417,7 @@ private: ValidationAction _validationAction; ValidationLevel _validationLevel; - // this is mutable because read only users of the Collection class - // use it keep state. This seems valid as const correctness of Collection - // should be about the data. - mutable CursorManager _cursorManager; + std::unique_ptr<CursorManager> _cursorManager; // Notifier object for awaitData. Threads polling a capped collection for new data can wait // on this object until notified of the arrival of new data. @@ -430,7 +429,5 @@ private: boost::optional<Timestamp> _minVisibleSnapshot; Collection* _this; - - friend class NamespaceDetails; }; } // namespace mongo diff --git a/src/mongo/db/catalog/collection_info_cache.h b/src/mongo/db/catalog/collection_info_cache.h index 28bd6832e91..2426b16490d 100644 --- a/src/mongo/db/catalog/collection_info_cache.h +++ b/src/mongo/db/catalog/collection_info_cache.h @@ -70,6 +70,8 @@ public: virtual void notifyOfQuery(OperationContext* opCtx, const std::set<std::string>& indexesUsed) = 0; + + virtual void setNs(NamespaceString ns) = 0; }; @@ -159,6 +161,10 @@ public: return this->_impl().notifyOfQuery(opCtx, indexesUsed); } + inline void setNs(NamespaceString ns) { + this->_impl().setNs(std::move(ns)); + } + std::unique_ptr<Impl> _pimpl; // This structure exists to give us a customization point to decide how to force users of this diff --git a/src/mongo/db/catalog/collection_info_cache_impl.cpp b/src/mongo/db/catalog/collection_info_cache_impl.cpp index 17dde1830fc..d05615dd713 100644 --- a/src/mongo/db/catalog/collection_info_cache_impl.cpp +++ b/src/mongo/db/catalog/collection_info_cache_impl.cpp @@ -71,7 +71,7 @@ CollectionInfoCacheImpl::~CollectionInfoCacheImpl() { // Necessary because the collection cache will not explicitly get updated upon database drop. if (_hasTTLIndex) { TTLCollectionCache& ttlCollectionCache = TTLCollectionCache::get(getGlobalServiceContext()); - ttlCollectionCache.unregisterCollection(_ns); + ttlCollectionCache.unregisterCollection(_collection->ns()); } } @@ -254,4 +254,19 @@ void CollectionInfoCacheImpl::rebuildIndexData(OperationContext* opCtx) { CollectionIndexUsageMap CollectionInfoCacheImpl::getIndexUsageStats() const { return _indexUsageTracker.getUsageStats(); } + +void CollectionInfoCacheImpl::setNs(NamespaceString ns) { + auto oldNs = _ns; + _ns = std::move(ns); + + _planCache->setNs(_ns); + + // Update the TTL collection cache. + if (_hasTTLIndex) { + auto& ttlCollectionCache = TTLCollectionCache::get(getGlobalServiceContext()); + ttlCollectionCache.unregisterCollection(oldNs); + ttlCollectionCache.registerCollection(_ns); + } +} + } // namespace mongo diff --git a/src/mongo/db/catalog/collection_info_cache_impl.h b/src/mongo/db/catalog/collection_info_cache_impl.h index 5723603f39e..6900ba9c091 100644 --- a/src/mongo/db/catalog/collection_info_cache_impl.h +++ b/src/mongo/db/catalog/collection_info_cache_impl.h @@ -110,6 +110,8 @@ public: */ void notifyOfQuery(OperationContext* opCtx, const std::set<std::string>& indexesUsed); + void setNs(NamespaceString ns) override; + private: void computeIndexKeys(OperationContext* opCtx); void updatePlanCacheIndexEntries(OperationContext* opCtx); @@ -121,7 +123,8 @@ private: void rebuildIndexData(OperationContext* opCtx); Collection* _collection; // not owned - const NamespaceString _ns; + + NamespaceString _ns; // --- index keys cache bool _keysComputed; diff --git a/src/mongo/db/catalog/collection_mock.h b/src/mongo/db/catalog/collection_mock.h index e74c40b6196..471dba32623 100644 --- a/src/mongo/db/catalog/collection_mock.h +++ b/src/mongo/db/catalog/collection_mock.h @@ -59,12 +59,17 @@ private: std::abort(); } - const NamespaceString _ns; + NamespaceString _ns; public: const NamespaceString& ns() const { return _ns; } + + void setNs(NamespaceString nss) final { + _ns = std::move(nss); + } + bool ok() const { std::abort(); } diff --git a/src/mongo/db/catalog/database_impl.cpp b/src/mongo/db/catalog/database_impl.cpp index 0ced8f869b1..9f63f793c2d 100644 --- a/src/mongo/db/catalog/database_impl.cpp +++ b/src/mongo/db/catalog/database_impl.cpp @@ -159,6 +159,36 @@ public: Collection* const _coll; }; +class DatabaseImpl::RenameCollectionChange final : public RecoveryUnit::Change { +public: + RenameCollectionChange(DatabaseImpl* db, + Collection* coll, + NamespaceString fromNs, + NamespaceString toNs) + : _db(db), _coll(coll), _fromNs(std::move(fromNs)), _toNs(std::move(toNs)) {} + + void commit(boost::optional<Timestamp> commitTime) override { + // Ban reading from this collection on committed reads on snapshots before now. + if (commitTime) { + _coll->setMinimumVisibleSnapshot(commitTime.get()); + } + } + + void rollback() override { + auto it = _db->_collections.find(_toNs.ns()); + invariant(it != _db->_collections.end()); + invariant(it->second == _coll); + _db->_collections[_fromNs.ns()] = _coll; + _db->_collections.erase(it); + _coll->setNs(_fromNs); + } + + DatabaseImpl* const _db; + Collection* const _coll; + const NamespaceString _fromNs; + const NamespaceString _toNs; +}; + DatabaseImpl::~DatabaseImpl() { for (CollectionMap::const_iterator i = _collections.begin(); i != _collections.end(); ++i) delete i->second; @@ -524,7 +554,8 @@ Status DatabaseImpl::dropCollectionEvenIfSystem(OperationContext* opCtx, if (!status.isOK()) { return status; } - opObserver->onDropCollection(opCtx, fullns, uuid); + opObserver->onDropCollection( + opCtx, fullns, uuid, OpObserver::CollectionDropType::kOnePhase); return Status::OK(); } @@ -569,24 +600,24 @@ Status DatabaseImpl::dropCollectionEvenIfSystem(OperationContext* opCtx, // Log oplog entry for collection drop and proceed to complete rest of two phase drop // process. - dropOpTime = opObserver->onDropCollection(opCtx, fullns, uuid); + dropOpTime = opObserver->onDropCollection( + opCtx, fullns, uuid, OpObserver::CollectionDropType::kTwoPhase); - // Drop collection immediately if OpObserver did not write entry to oplog. + // The OpObserver should have written an entry to the oplog with a particular op time. // After writing the oplog entry, all errors are fatal. See getNextOpTime() comments in // oplog.cpp. if (dropOpTime.isNull()) { log() << "dropCollection: " << fullns << " (" << uuidString - << ") - no drop optime available for pending-drop. " - << "Dropping collection immediately."; - fassert(40462, _finishDropCollection(opCtx, fullns, collection)); - return Status::OK(); + << ") - expected oplog entry to be written"; + fassertFailed(40462); } } else { // If we are provided with a valid 'dropOpTime', it means we are dropping this collection // in the context of applying an oplog entry on a secondary. // OpObserver::onDropCollection() should be returning a null OpTime because we should not be // writing to the oplog. - auto opTime = opObserver->onDropCollection(opCtx, fullns, uuid); + auto opTime = opObserver->onDropCollection( + opCtx, fullns, uuid, OpObserver::CollectionDropType::kTwoPhase); if (!opTime.isNull()) { severe() << "dropCollection: " << fullns << " (" << uuidString << ") - unexpected oplog entry written to the oplog with optime " << opTime; @@ -678,36 +709,49 @@ Status DatabaseImpl::renameCollection(OperationContext* opCtx, BackgroundOperation::assertNoBgOpInProgForNs(fromNS); BackgroundOperation::assertNoBgOpInProgForNs(toNS); - NamespaceString fromNSS(fromNS); - NamespaceString toNSS(toNS); - { // remove anything cached - Collection* coll = getCollection(opCtx, fromNS); + const NamespaceString fromNSS(fromNS); + const NamespaceString toNSS(toNS); - if (!coll) - return Status(ErrorCodes::NamespaceNotFound, "collection not found to rename"); + invariant(fromNSS.db() == _name); + invariant(toNSS.db() == _name); + if (getCollection(opCtx, toNSS)) { + return Status(ErrorCodes::NamespaceExists, + str::stream() << "Cannot rename '" << fromNS << "' to '" << toNS + << "' because the destination namespace already exists"); + } - string clearCacheReason = str::stream() << "renamed collection '" << fromNS << "' to '" - << toNS << "'"; - IndexCatalog::IndexIterator ii = coll->getIndexCatalog()->getIndexIterator(opCtx, true); + Collection* collToRename = getCollection(opCtx, fromNSS); + if (!collToRename) { + return Status(ErrorCodes::NamespaceNotFound, "collection not found to rename"); + } - while (ii.more()) { - IndexDescriptor* desc = ii.next(); - _clearCollectionCache( - opCtx, desc->indexNamespace(), clearCacheReason, /*collectionGoingAway*/ true); - } + string clearCacheReason = str::stream() << "renamed collection '" << fromNS << "' to '" << toNS + << "'"; - _clearCollectionCache(opCtx, fromNS, clearCacheReason, /*collectionGoingAway*/ true); - _clearCollectionCache(opCtx, toNS, clearCacheReason, /*collectionGoingAway*/ false); + // Notify the cursor manager that it should kill all the cursors in the source and target + // collections. This is currently necessary since the query layer is not prepared for cursors to + // survive collection renames. + auto sourceManager = collToRename->getCursorManager(); + invariant(sourceManager); + sourceManager->invalidateAll(opCtx, /*collectionGoingAway*/ true, clearCacheReason); - Top::get(opCtx->getServiceContext()).collectionDropped(fromNS.toString()); + log() << "renameCollection: renaming collection " << collToRename->uuid()->toString() + << " from " << fromNS << " to " << toNS; - log() << "renameCollection: renaming collection " << coll->uuid()->toString() << " from " - << fromNS << " to " << toNS; - } + Top::get(opCtx->getServiceContext()).collectionDropped(fromNS.toString()); Status s = _dbEntry->renameCollection(opCtx, fromNS, toNS, stayTemp); - opCtx->recoveryUnit()->registerChange(new AddCollectionChange(opCtx, this, toNS)); - _collections[toNS] = _getOrCreateCollectionInstance(opCtx, toNSS); + // Make 'toNS' map to the collection instead of 'fromNS'. + _collections.erase(fromNS); + _collections[toNS] = collToRename; + + // Update Collection's ns. + collToRename->setNs(toNSS); + + // Register a Change which, on rollback, will reinstall the Collection* in the collections map + // so that it is associated with 'fromNS', not 'toNS'. + opCtx->recoveryUnit()->registerChange( + new RenameCollectionChange(this, collToRename, fromNSS, toNSS)); return s; } diff --git a/src/mongo/db/catalog/database_impl.h b/src/mongo/db/catalog/database_impl.h index 38213e06055..5a2e95070f7 100644 --- a/src/mongo/db/catalog/database_impl.h +++ b/src/mongo/db/catalog/database_impl.h @@ -50,7 +50,6 @@ namespace mongo { class Collection; class DatabaseCatalogEntry; class IndexCatalog; -class NamespaceDetails; class OperationContext; class PseudoRandom; @@ -202,6 +201,11 @@ public: Collection* getOrCreateCollection(OperationContext* opCtx, const NamespaceString& nss) final; + /** + * Renames the fully qualified namespace 'fromNS' to the fully qualified namespace 'toNS'. + * Illegal to call unless both 'fromNS' and 'toNS' are within this database. Returns an error if + * 'toNS' already exists or 'fromNS' does not exist. + */ Status renameCollection(OperationContext* opCtx, StringData fromNS, StringData toNS, @@ -272,6 +276,7 @@ private: class AddCollectionChange; class RemoveCollectionChange; + class RenameCollectionChange; const std::string _name; // "dbname" @@ -300,7 +305,6 @@ private: Database* _this; // Pointer to wrapper, for external caller compatibility. friend class Collection; - friend class NamespaceDetails; friend class IndexCatalog; }; diff --git a/src/mongo/db/catalog/drop_database_test.cpp b/src/mongo/db/catalog/drop_database_test.cpp index 89a99fb4bd2..642c759fc5d 100644 --- a/src/mongo/db/catalog/drop_database_test.cpp +++ b/src/mongo/db/catalog/drop_database_test.cpp @@ -69,12 +69,14 @@ public: void onDropDatabase(OperationContext* opCtx, const std::string& dbName) override; repl::OpTime onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) override; + OptionalCollectionUUID uuid, + CollectionDropType dropType) override; std::set<std::string> droppedDatabaseNames; std::set<NamespaceString> droppedCollectionNames; Database* db = nullptr; bool onDropCollectionThrowsException = false; + const repl::OpTime dropOpTime = {Timestamp(Seconds(100), 1U), 1LL}; }; void OpObserverMock::onDropDatabase(OperationContext* opCtx, const std::string& dbName) { @@ -86,9 +88,11 @@ void OpObserverMock::onDropDatabase(OperationContext* opCtx, const std::string& repl::OpTime OpObserverMock::onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) { + OptionalCollectionUUID uuid, + const CollectionDropType dropType) { ASSERT_TRUE(opCtx->lockState()->inAWriteUnitOfWork()); - auto opTime = OpObserverNoop::onDropCollection(opCtx, collectionName, uuid); + auto opTime = OpObserverNoop::onDropCollection(opCtx, collectionName, uuid, dropType); + invariant(opTime.isNull()); // Do not update 'droppedCollectionNames' if OpObserverNoop::onDropCollection() throws. droppedCollectionNames.insert(collectionName); @@ -100,7 +104,7 @@ repl::OpTime OpObserverMock::onDropCollection(OperationContext* opCtx, uassert( ErrorCodes::OperationFailed, "onDropCollection() failed", !onDropCollectionThrowsException); - OpObserver::Times::get(opCtx).reservedOpTimes.push_back(opTime); + OpObserver::Times::get(opCtx).reservedOpTimes.push_back(dropOpTime); return {}; } diff --git a/src/mongo/db/catalog/index_catalog.h b/src/mongo/db/catalog/index_catalog.h index 7fde59471a9..5186645ac32 100644 --- a/src/mongo/db/catalog/index_catalog.h +++ b/src/mongo/db/catalog/index_catalog.h @@ -395,6 +395,8 @@ public: virtual void prepareInsertDeleteOptions(OperationContext* opCtx, const IndexDescriptor* desc, InsertDeleteOptions* options) const = 0; + + virtual void setNs(NamespaceString ns) = 0; }; } // namespace mongo diff --git a/src/mongo/db/catalog/index_catalog_entry.h b/src/mongo/db/catalog/index_catalog_entry.h index 25e423802e5..3b43a07ee1c 100644 --- a/src/mongo/db/catalog/index_catalog_entry.h +++ b/src/mongo/db/catalog/index_catalog_entry.h @@ -143,6 +143,8 @@ public: virtual boost::optional<Timestamp> getMinimumVisibleSnapshot() = 0; virtual void setMinimumVisibleSnapshot(const Timestamp name) = 0; + + virtual void setNs(NamespaceString ns) = 0; }; class IndexCatalogEntryContainer { diff --git a/src/mongo/db/catalog/index_catalog_entry_impl.cpp b/src/mongo/db/catalog/index_catalog_entry_impl.cpp index 548ceb77442..997b0b4164b 100644 --- a/src/mongo/db/catalog/index_catalog_entry_impl.cpp +++ b/src/mongo/db/catalog/index_catalog_entry_impl.cpp @@ -101,9 +101,8 @@ IndexCatalogEntryImpl::IndexCatalogEntryImpl(OperationContext* const opCtx, _indexTracksPathLevelMultikeyInfo = !_indexMultikeyPaths.empty(); } - if (BSONElement collationElement = _descriptor->getInfoElement("collation")) { - invariant(collationElement.isABSONObj()); - BSONObj collation = collationElement.Obj(); + const BSONObj& collation = _descriptor->collation(); + if (!collation.isEmpty()) { auto statusWithCollator = CollatorFactoryInterface::get(opCtx->getServiceContext())->makeFromBSON(collation); @@ -113,9 +112,9 @@ IndexCatalogEntryImpl::IndexCatalogEntryImpl(OperationContext* const opCtx, _collator = std::move(statusWithCollator.getValue()); } - if (BSONElement filterElement = _descriptor->getInfoElement("partialFilterExpression")) { - invariant(filterElement.isABSONObj()); - BSONObj filter = filterElement.Obj(); + if (_descriptor->isPartial()) { + const BSONObj& filter = _descriptor->partialFilterExpression(); + boost::intrusive_ptr<ExpressionContext> expCtx( new ExpressionContext(opCtx, _collator.get())); @@ -345,6 +344,11 @@ void IndexCatalogEntryImpl::setIndexKeyStringWithLongTypeBitsExistsOnDisk(Operat _collection->setIndexKeyStringWithLongTypeBitsExistsOnDisk(opCtx); } +void IndexCatalogEntryImpl::setNs(NamespaceString ns) { + _ns = ns.toString(); + _descriptor->setNs(std::move(ns)); +} + // ---- bool IndexCatalogEntryImpl::_catalogIsReady(OperationContext* opCtx) const { diff --git a/src/mongo/db/catalog/index_catalog_entry_impl.h b/src/mongo/db/catalog/index_catalog_entry_impl.h index abfa17a4de5..a4d800a8e5c 100644 --- a/src/mongo/db/catalog/index_catalog_entry_impl.h +++ b/src/mongo/db/catalog/index_catalog_entry_impl.h @@ -72,6 +72,8 @@ public: return _ns; } + void setNs(NamespaceString ns) final; + void init(std::unique_ptr<IndexAccessMethod> accessMethod) final; IndexDescriptor* descriptor() final { diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp index 3d19793dbbf..c4ea8d730fc 100644 --- a/src/mongo/db/catalog/index_catalog_impl.cpp +++ b/src/mongo/db/catalog/index_catalog_impl.cpp @@ -1427,4 +1427,17 @@ StatusWith<BSONObj> IndexCatalogImpl::_fixIndexSpec(OperationContext* opCtx, return b.obj(); } + +void IndexCatalogImpl::setNs(NamespaceString ns) { + for (auto&& ice : _entries) { + ice->setNs(ns); + } + + std::vector<BSONObj> newUnfinishedIndexes; + for (auto&& indexSpec : _unfinishedIndexes) { + newUnfinishedIndexes.push_back(IndexDescriptor::renameNsInIndexSpec(indexSpec, ns)); + } + _unfinishedIndexes.swap(newUnfinishedIndexes); +} + } // namespace mongo diff --git a/src/mongo/db/catalog/index_catalog_impl.h b/src/mongo/db/catalog/index_catalog_impl.h index f1c833ea01b..0baaf1fb3b6 100644 --- a/src/mongo/db/catalog/index_catalog_impl.h +++ b/src/mongo/db/catalog/index_catalog_impl.h @@ -342,6 +342,8 @@ public: const IndexDescriptor* desc, InsertDeleteOptions* options) const override; + void setNs(NamespaceString ns) override; + private: static const BSONObj _idObj; // { _id : 1 } diff --git a/src/mongo/db/catalog/rename_collection_test.cpp b/src/mongo/db/catalog/rename_collection_test.cpp index 7ed933b58d5..ddd0bdc85c4 100644 --- a/src/mongo/db/catalog/rename_collection_test.cpp +++ b/src/mongo/db/catalog/rename_collection_test.cpp @@ -36,6 +36,7 @@ #include "mongo/db/catalog/collection_catalog_entry.h" #include "mongo/db/catalog/collection_options.h" +#include "mongo/db/catalog/database_holder.h" #include "mongo/db/catalog/multi_index_block.h" #include "mongo/db/catalog/rename_collection.h" #include "mongo/db/catalog/uuid_catalog.h" @@ -95,7 +96,8 @@ public: repl::OpTime onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) override; + OptionalCollectionUUID uuid, + CollectionDropType dropType) override; void onRenameCollection(OperationContext* opCtx, const NamespaceString& fromCollection, @@ -127,6 +129,8 @@ public: OptionalCollectionUUID onRenameCollectionDropTarget; repl::OpTime renameOpTime = {Timestamp(Seconds(100), 1U), 1LL}; + repl::OpTime dropOpTime = {Timestamp(Seconds(100), 1U), 1LL}; + private: /** * Pushes 'operationName' into 'oplogEntries' if we can write to the oplog for this namespace. @@ -176,10 +180,16 @@ void OpObserverMock::onCreateCollection(OperationContext* opCtx, repl::OpTime OpObserverMock::onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) { + OptionalCollectionUUID uuid, + const CollectionDropType dropType) { _logOp(opCtx, collectionName, "drop"); - OpObserver::Times::get(opCtx).reservedOpTimes.push_back( - OpObserverNoop::onDropCollection(opCtx, collectionName, uuid)); + // If the oplog is not disabled for this namespace, then we need to reserve an op time for the + // drop. + if (!repl::ReplicationCoordinator::get(opCtx)->isOplogDisabledFor(opCtx, collectionName)) { + OpObserver::Times::get(opCtx).reservedOpTimes.push_back(dropOpTime); + } + auto noopOptime = OpObserverNoop::onDropCollection(opCtx, collectionName, uuid, dropType); + invariant(noopOptime.isNull()); return {}; } @@ -434,6 +444,19 @@ void _insertDocument(OperationContext* opCtx, const NamespaceString& nss, const }); } +/** + * Retrieves the pointer to a collection associated with the given namespace string from the + * catalog. The caller must hold the appropriate locks from the lock manager. + */ +Collection* _getCollection_inlock(OperationContext* opCtx, const NamespaceString& nss) { + invariant(opCtx->lockState()->isCollectionLockedForMode(nss.ns(), MODE_IS)); + auto* db = DatabaseHolder::getDatabaseHolder().get(opCtx, nss.db()); + if (!db) { + return nullptr; + } + return db->getCollection(opCtx, nss.ns()); +} + TEST_F(RenameCollectionTest, RenameCollectionReturnsNamespaceNotFoundIfDatabaseDoesNotExist) { ASSERT_FALSE(AutoGetDb(_opCtx.get(), _sourceNss.db(), MODE_X).getDb()); ASSERT_EQUALS(ErrorCodes::NamespaceNotFound, @@ -1098,4 +1121,115 @@ TEST_F(RenameCollectionTest, ASSERT_TRUE(_opObserver->onInsertsIsGlobalWriteLockExclusive); } +TEST_F(RenameCollectionTest, CollectionPointerRemainsValidThroughRename) { + _createCollection(_opCtx.get(), _sourceNss); + Lock::GlobalWrite globalWrite(_opCtx.get()); + + // Get a pointer to the source collection, and ensure that it reports the expected namespace + // string. + Collection* sourceColl = _getCollection_inlock(_opCtx.get(), _sourceNss); + ASSERT(sourceColl); + + ASSERT_OK(renameCollection(_opCtx.get(), _sourceNss, _targetNss, {})); + + // Retrieve the pointer associated with the target namespace, and ensure that its the same + // pointer (i.e. the renamed collection has the very same Collection instance). + Collection* targetColl = _getCollection_inlock(_opCtx.get(), _targetNss); + ASSERT(targetColl); + ASSERT_EQ(targetColl, sourceColl); + + // Verify that the Collection reports that its namespace is now the target namespace. + ASSERT_EQ(targetColl->ns(), _targetNss); +} + +TEST_F(RenameCollectionTest, CollectionCatalogEntryPointerRemainsValidThroughRename) { + _createCollection(_opCtx.get(), _sourceNss); + Lock::GlobalWrite globalWrite(_opCtx.get()); + + // Get a pointer to the source collection, and ensure that it reports the expected namespace + // string. + Collection* sourceColl = _getCollection_inlock(_opCtx.get(), _sourceNss); + ASSERT(sourceColl); + auto* sourceCatalogEntry = sourceColl->getCatalogEntry(); + ASSERT(sourceCatalogEntry); + ASSERT_EQ(sourceCatalogEntry->ns(), _sourceNss); + + ASSERT_OK(renameCollection(_opCtx.get(), _sourceNss, _targetNss, {})); + + // Verify that the CollectionCatalogEntry reports that its namespace is now the target + // namespace. + ASSERT_EQ(sourceCatalogEntry->ns(), _targetNss); +} + +TEST_F(RenameCollectionTest, CatalogPointersRenameValidThroughRenameAfterDroppingTarget) { + _createCollection(_opCtx.get(), _sourceNss); + _createCollection(_opCtx.get(), _targetNss); + Lock::GlobalWrite globalWrite(_opCtx.get()); + + Collection* sourceColl = _getCollection_inlock(_opCtx.get(), _sourceNss); + ASSERT(sourceColl); + auto* sourceCatalogEntry = sourceColl->getCatalogEntry(); + ASSERT(sourceCatalogEntry); + + RenameCollectionOptions options; + options.dropTarget = true; + ASSERT_OK(renameCollection(_opCtx.get(), _sourceNss, _targetNss, options)); + + // The same catalog pointers should now report that they are associated with the target + // namespace. + ASSERT_EQ(sourceColl->ns(), _targetNss); + ASSERT_EQ(sourceCatalogEntry->ns(), _targetNss); +} + +TEST_F(RenameCollectionTest, CatalogPointersRenameValidThroughRenameForApplyOps) { + _createCollection(_opCtx.get(), _sourceNss); + Collection* sourceColl = AutoGetCollectionForRead(_opCtx.get(), _sourceNss).getCollection(); + ASSERT(sourceColl); + + auto uuidDoc = BSON("ui" << UUID::gen()); + auto cmd = BSON("renameCollection" << _sourceNss.ns() << "to" << _targetNss.ns()); + ASSERT_OK(renameCollectionForApplyOps( + _opCtx.get(), _sourceNss.db().toString(), uuidDoc["ui"], cmd, {})); + ASSERT_FALSE(_collectionExists(_opCtx.get(), _sourceNss)); + + Collection* targetColl = AutoGetCollectionForRead(_opCtx.get(), _targetNss).getCollection(); + ASSERT(targetColl); + ASSERT_EQ(targetColl, sourceColl); + ASSERT_EQ(targetColl->ns(), _targetNss); +} + +TEST_F(RenameCollectionTest, RenameAcrossDatabasesDoesNotPreserveCatalogPointers) { + _createCollection(_opCtx.get(), _sourceNss); + Lock::GlobalWrite globalWrite(_opCtx.get()); + + // Get a pointer to the source collection, and ensure that it reports the expected namespace + // string. + Collection* sourceColl = _getCollection_inlock(_opCtx.get(), _sourceNss); + ASSERT(sourceColl); + auto* sourceCatalogEntry = sourceColl->getCatalogEntry(); + ASSERT(sourceCatalogEntry); + + ASSERT_OK(renameCollection(_opCtx.get(), _sourceNss, _targetNssDifferentDb, {})); + + // Verify that the CollectionCatalogEntry reports that its namespace is now the target + // namespace. + Collection* targetColl = _getCollection_inlock(_opCtx.get(), _targetNssDifferentDb); + ASSERT(targetColl); + ASSERT_NE(targetColl, sourceColl); + auto* targetCatalogEntry = targetColl->getCatalogEntry(); + ASSERT(targetCatalogEntry); + ASSERT_NE(targetCatalogEntry, sourceCatalogEntry); +} + +TEST_F(RenameCollectionTest, UUIDCatalogMappingRemainsIntactThroughRename) { + _createCollection(_opCtx.get(), _sourceNss); + Lock::GlobalWrite globalWrite(_opCtx.get()); + auto& uuidCatalog = UUIDCatalog::get(_opCtx.get()); + Collection* sourceColl = _getCollection_inlock(_opCtx.get(), _sourceNss); + ASSERT(sourceColl); + ASSERT_EQ(sourceColl, uuidCatalog.lookupCollectionByUUID(*sourceColl->uuid())); + ASSERT_OK(renameCollection(_opCtx.get(), _sourceNss, _targetNss, {})); + ASSERT_EQ(sourceColl, uuidCatalog.lookupCollectionByUUID(*sourceColl->uuid())); +} + } // namespace diff --git a/src/mongo/db/catalog/uuid_catalog.cpp b/src/mongo/db/catalog/uuid_catalog.cpp index 632568350a3..556251a2602 100644 --- a/src/mongo/db/catalog/uuid_catalog.cpp +++ b/src/mongo/db/catalog/uuid_catalog.cpp @@ -76,12 +76,23 @@ void UUIDCatalogObserver::onCollMod(OperationContext* opCtx, repl::OpTime UUIDCatalogObserver::onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) { + OptionalCollectionUUID uuid, + const CollectionDropType dropType) { if (!uuid) return {}; - UUIDCatalog& catalog = UUIDCatalog::get(opCtx); - catalog.onDropCollection(opCtx, uuid.get()); + + // Replicated drops are two-phase, meaning that the collection is first renamed into a "drop + // pending" state and reaped later. This op observer is only called for the rename phase, which + // means the UUID mapping is still valid. + // + // On the other hand, if the drop is not replicated, it takes effect immediately. In this case, + // the UUID mapping must be removed from the UUID catalog. + if (dropType == CollectionDropType::kOnePhase) { + UUIDCatalog& catalog = UUIDCatalog::get(opCtx); + catalog.onDropCollection(opCtx, uuid.get()); + } + return {}; } diff --git a/src/mongo/db/catalog/uuid_catalog.h b/src/mongo/db/catalog/uuid_catalog.h index f5e6fc0e284..9fd0b518aa8 100644 --- a/src/mongo/db/catalog/uuid_catalog.h +++ b/src/mongo/db/catalog/uuid_catalog.h @@ -86,7 +86,8 @@ public: void onDropDatabase(OperationContext* opCtx, const std::string& dbName) override {} repl::OpTime onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) override; + OptionalCollectionUUID uuid, + CollectionDropType dropType) override; void onDropIndex(OperationContext* opCtx, const NamespaceString& nss, OptionalCollectionUUID uuid, diff --git a/src/mongo/db/commands/SConscript b/src/mongo/db/commands/SConscript index 52c15bb7802..8cfc52999db 100644 --- a/src/mongo/db/commands/SConscript +++ b/src/mongo/db/commands/SConscript @@ -446,6 +446,7 @@ env.CppUnitTest( ], LIBDEPS=[ "$BUILD_DIR/mongo/db/auth/authmocks", + "$BUILD_DIR/mongo/db/repl/drop_pending_collection_reaper", "$BUILD_DIR/mongo/db/repl/replmocks", "$BUILD_DIR/mongo/db/repl/storage_interface_impl", "$BUILD_DIR/mongo/db/serveronly", diff --git a/src/mongo/db/commands/mr_test.cpp b/src/mongo/db/commands/mr_test.cpp index e7ef4571dfc..6ff0a8bd5a9 100644 --- a/src/mongo/db/commands/mr_test.cpp +++ b/src/mongo/db/commands/mr_test.cpp @@ -46,6 +46,7 @@ #include "mongo/db/json.h" #include "mongo/db/op_observer_noop.h" #include "mongo/db/op_observer_registry.h" +#include "mongo/db/repl/drop_pending_collection_reaper.h" #include "mongo/db/repl/replication_coordinator_mock.h" #include "mongo/db/repl/storage_interface_impl.h" #include "mongo/db/service_context_d_test_fixture.h" @@ -298,12 +299,19 @@ public: const BSONObj& idIndex, const OplogSlot& createOpTime) override; + repl::OpTime onDropCollection(OperationContext* opCtx, + const NamespaceString& collectionName, + OptionalCollectionUUID uuid, + CollectionDropType dropType) override; + // Hook for onInserts. Defaults to a no-op function but may be overridden to inject exceptions // while mapReduce inserts its results into the temporary output collection. std::function<void()> onInsertsFn = [] {}; // Holds namespaces of temporary collections created by mapReduce. std::vector<NamespaceString> tempNamespaces; + + const repl::OpTime dropOpTime = {Timestamp(Seconds(100), 1U), 1LL}; }; void MapReduceOpObserver::onInserts(OperationContext* opCtx, @@ -327,6 +335,18 @@ void MapReduceOpObserver::onCreateCollection(OperationContext*, tempNamespaces.push_back(collectionName); } +repl::OpTime MapReduceOpObserver::onDropCollection(OperationContext* opCtx, + const NamespaceString& collectionName, + OptionalCollectionUUID uuid, + const CollectionDropType dropType) { + // If the oplog is not disabled for this namespace, then we need to reserve an op time for the + // drop. + if (!repl::ReplicationCoordinator::get(opCtx)->isOplogDisabledFor(opCtx, collectionName)) { + OpObserver::Times::get(opCtx).reservedOpTimes.push_back(dropOpTime); + } + return {}; +} + /** * Test fixture for MapReduceCommand. */ @@ -382,6 +402,10 @@ void MapReduceCommandTest::setUp() { repl::ReplicationCoordinator::set(service, std::make_unique<repl::ReplicationCoordinatorMock>(service)); + repl::DropPendingCollectionReaper::set( + service, + stdx::make_unique<repl::DropPendingCollectionReaper>(repl::StorageInterface::get(service))); + // Set up an OpObserver to track the temporary collections mapReduce creates. auto opObserver = std::make_unique<MapReduceOpObserver>(); _opObserver = opObserver.get(); diff --git a/src/mongo/db/free_mon/free_mon_op_observer.cpp b/src/mongo/db/free_mon/free_mon_op_observer.cpp index 0a5986eea88..467960b3c26 100644 --- a/src/mongo/db/free_mon/free_mon_op_observer.cpp +++ b/src/mongo/db/free_mon/free_mon_op_observer.cpp @@ -57,7 +57,8 @@ FreeMonOpObserver::~FreeMonOpObserver() = default; repl::OpTime FreeMonOpObserver::onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) { + OptionalCollectionUUID uuid, + const CollectionDropType dropType) { if (collectionName == NamespaceString::kServerConfigurationNamespace) { auto controller = FreeMonController::get(opCtx->getServiceContext()); diff --git a/src/mongo/db/free_mon/free_mon_op_observer.h b/src/mongo/db/free_mon/free_mon_op_observer.h index 8a162bb25c0..535b61cacca 100644 --- a/src/mongo/db/free_mon/free_mon_op_observer.h +++ b/src/mongo/db/free_mon/free_mon_op_observer.h @@ -96,7 +96,8 @@ public: repl::OpTime onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) final; + OptionalCollectionUUID uuid, + CollectionDropType dropType) final; void onDropIndex(OperationContext* opCtx, const NamespaceString& nss, diff --git a/src/mongo/db/index/index_descriptor.cpp b/src/mongo/db/index/index_descriptor.cpp index ad2dca4237e..bf94a32542d 100644 --- a/src/mongo/db/index/index_descriptor.cpp +++ b/src/mongo/db/index/index_descriptor.cpp @@ -172,4 +172,25 @@ bool IndexDescriptor::areIndexOptionsEquivalent(const IndexDescriptor* other) co rhs.second); }); } + +void IndexDescriptor::setNs(NamespaceString ns) { + _parentNS = ns.toString(); + _indexNamespace = makeIndexNamespace(_parentNS, _indexName); + + // Construct a new infoObj with the namespace field replaced. + _infoObj = renameNsInIndexSpec(_infoObj, ns); } + +BSONObj IndexDescriptor::renameNsInIndexSpec(BSONObj spec, const NamespaceString& newNs) { + BSONObjBuilder builder; + for (auto&& elt : spec) { + if (elt.fieldNameStringData() == kNamespaceFieldName) { + builder.append(kNamespaceFieldName, newNs.ns()); + } else { + builder.append(elt); + } + } + return builder.obj(); +} + +} // namespace mongo diff --git a/src/mongo/db/index/index_descriptor.h b/src/mongo/db/index/index_descriptor.h index ac441696a38..c2c30cddbb5 100644 --- a/src/mongo/db/index/index_descriptor.h +++ b/src/mongo/db/index/index_descriptor.h @@ -88,6 +88,12 @@ public: static constexpr StringData kWeightsFieldName = "weights"_sd; /** + * Given a BSONObj representing an index spec, returns a new owned BSONObj which is identical to + * 'spec' after replacing the 'ns' field with the value of 'newNs'. + */ + static BSONObj renameNsInIndexSpec(BSONObj spec, const NamespaceString& newNs); + + /** * OnDiskIndexData is a pointer to the memory mapped per-index data. * infoObj is a copy of the index-describing BSONObj contained in the OnDiskIndexData. */ @@ -111,6 +117,16 @@ public: BSONElement e = _infoObj[IndexDescriptor::kIndexVersionFieldName]; fassert(50942, e.isNumber()); _version = static_cast<IndexVersion>(e.numberInt()); + + if (BSONElement filterElement = _infoObj[kPartialFilterExprFieldName]) { + invariant(filterElement.isABSONObj()); + _partialFilterExpression = filterElement.Obj().getOwned(); + } + + if (BSONElement collationElement = _infoObj[kCollationFieldName]) { + invariant(collationElement.isABSONObj()); + _collation = collationElement.Obj().getOwned(); + } } @@ -233,20 +249,6 @@ public: return _isIdIndex; } - // - // Properties that are Index-specific. - // - - // Allow access to arbitrary fields in the per-index info object. Some indices stash - // index-specific data there. - BSONElement getInfoElement(const std::string& name) const { - return _infoObj[name]; - } - - // - // "Internals" of accessing the index, used by IndexAccessMethod(s). - // - // Return a (rather compact) std::string representation. std::string toString() const { return _infoObj.toString(); @@ -265,6 +267,16 @@ public: bool areIndexOptionsEquivalent(const IndexDescriptor* other) const; + void setNs(NamespaceString ns); + + const BSONObj& collation() const { + return _collation; + } + + const BSONObj& partialFilterExpression() const { + return _partialFilterExpression; + } + static bool isIdIndexPattern(const BSONObj& pattern) { BSONObjIterator i(pattern); BSONElement e = i.next(); @@ -290,7 +302,7 @@ private: IndexType _indexType; // The BSONObj describing the index. Accessed through the various members above. - const BSONObj _infoObj; + BSONObj _infoObj; // --- cached data from _infoObj @@ -305,6 +317,8 @@ private: bool _unique; bool _partial; IndexVersion _version; + BSONObj _collation; + BSONObj _partialFilterExpression; // only used by IndexCatalogEntryContainer to do caching for perf // users not allowed to touch, and not part of API diff --git a/src/mongo/db/op_observer.h b/src/mongo/db/op_observer.h index fac5943f45d..52abe0618b5 100644 --- a/src/mongo/db/op_observer.h +++ b/src/mongo/db/op_observer.h @@ -80,6 +80,15 @@ struct TTLCollModInfo { */ class OpObserver { public: + enum class CollectionDropType { + // The collection is being dropped immediately, in one step. + kOnePhase, + + // The collection is being dropped in two phases, by renaming to a drop pending collection + // which is registered to be reaped later. + kTwoPhase, + }; + virtual ~OpObserver() = default; virtual void onCreateIndex(OperationContext* opCtx, const NamespaceString& nss, @@ -178,10 +187,13 @@ public: * This function logs an oplog entry when a 'drop' command on a collection is executed. * Returns the optime of the oplog entry successfully written to the oplog. * Returns a null optime if an oplog entry was not written for this operation. + * + * 'dropType' describes whether the collection drop is one-phase or two-phase. */ virtual repl::OpTime onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) = 0; + OptionalCollectionUUID uuid, + CollectionDropType dropType) = 0; /** * This function logs an oplog entry when an index is dropped. The namespace of the index, diff --git a/src/mongo/db/op_observer_impl.cpp b/src/mongo/db/op_observer_impl.cpp index b370f600453..a84ffeb1bdf 100644 --- a/src/mongo/db/op_observer_impl.cpp +++ b/src/mongo/db/op_observer_impl.cpp @@ -736,7 +736,8 @@ void OpObserverImpl::onDropDatabase(OperationContext* opCtx, const std::string& repl::OpTime OpObserverImpl::onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) { + OptionalCollectionUUID uuid, + const CollectionDropType dropType) { const auto cmdNss = collectionName.getCommandNS(); const auto cmdObj = BSON("drop" << collectionName.coll()); diff --git a/src/mongo/db/op_observer_impl.h b/src/mongo/db/op_observer_impl.h index 1e678a9d841..c42d15371b1 100644 --- a/src/mongo/db/op_observer_impl.h +++ b/src/mongo/db/op_observer_impl.h @@ -82,7 +82,8 @@ public: void onDropDatabase(OperationContext* opCtx, const std::string& dbName) final; repl::OpTime onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) final; + OptionalCollectionUUID uuid, + CollectionDropType dropType) final; void onDropIndex(OperationContext* opCtx, const NamespaceString& nss, OptionalCollectionUUID uuid, diff --git a/src/mongo/db/op_observer_impl_test.cpp b/src/mongo/db/op_observer_impl_test.cpp index bfe3a4dcf09..f10b89a8f71 100644 --- a/src/mongo/db/op_observer_impl_test.cpp +++ b/src/mongo/db/op_observer_impl_test.cpp @@ -217,7 +217,8 @@ TEST_F(OpObserverTest, OnDropCollectionReturnsDropOpTime) { { AutoGetDb autoDb(opCtx.get(), nss.db(), MODE_X); WriteUnitOfWork wunit(opCtx.get()); - opObserver.onDropCollection(opCtx.get(), nss, uuid); + opObserver.onDropCollection( + opCtx.get(), nss, uuid, OpObserver::CollectionDropType::kTwoPhase); dropOpTime = OpObserver::Times::get(opCtx.get()).reservedOpTimes.front(); wunit.commit(); } diff --git a/src/mongo/db/op_observer_noop.h b/src/mongo/db/op_observer_noop.h index 5b0afe0533f..350d6df41bf 100644 --- a/src/mongo/db/op_observer_noop.h +++ b/src/mongo/db/op_observer_noop.h @@ -77,7 +77,8 @@ public: void onDropDatabase(OperationContext* opCtx, const std::string& dbName) override {} repl::OpTime onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) override { + OptionalCollectionUUID uuid, + const CollectionDropType dropType) override { return {}; } void onDropIndex(OperationContext* opCtx, diff --git a/src/mongo/db/op_observer_registry.h b/src/mongo/db/op_observer_registry.h index 14fb9d325e5..5c8ab409ab7 100644 --- a/src/mongo/db/op_observer_registry.h +++ b/src/mongo/db/op_observer_registry.h @@ -144,10 +144,11 @@ public: repl::OpTime onDropCollection(OperationContext* const opCtx, const NamespaceString& collectionName, - const OptionalCollectionUUID uuid) override { + const OptionalCollectionUUID uuid, + const CollectionDropType dropType) override { ReservedTimes times{opCtx}; for (auto& observer : this->_observers) { - auto time = observer->onDropCollection(opCtx, collectionName, uuid); + auto time = observer->onDropCollection(opCtx, collectionName, uuid, dropType); invariant(time.isNull()); } return _getOpTimeToReturn(times.get().reservedOpTimes); diff --git a/src/mongo/db/op_observer_registry_test.cpp b/src/mongo/db/op_observer_registry_test.cpp index 271ebd478db..e7281598601 100644 --- a/src/mongo/db/op_observer_registry_test.cpp +++ b/src/mongo/db/op_observer_registry_test.cpp @@ -56,7 +56,8 @@ struct TestObserver : public OpObserverNoop { } repl::OpTime onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) { + OptionalCollectionUUID uuid, + const CollectionDropType dropType) { drops++; OpObserver::Times::get(opCtx).reservedOpTimes.push_back(opTime); return {}; @@ -169,7 +170,10 @@ TEST_F(OpObserverRegistryTest, OnDropCollectionObserverResultReturnsRightTime) { OperationContextNoop opCtx; registry.addObserver(std::move(unique1)); registry.addObserver(std::make_unique<OpObserverNoop>()); - auto op = [&]() -> repl::OpTime { return registry.onDropCollection(&opCtx, testNss, {}); }; + auto op = [&]() -> repl::OpTime { + return registry.onDropCollection( + &opCtx, testNss, {}, OpObserver::CollectionDropType::kOnePhase); + }; checkConsistentOpTime(op); } @@ -189,7 +193,10 @@ DEATH_TEST_F(OpObserverRegistryTest, OnDropCollectionReturnsInconsistentTime, "i OperationContextNoop opCtx; registry.addObserver(std::move(unique1)); registry.addObserver(std::move(unique2)); - auto op = [&]() -> repl::OpTime { return registry.onDropCollection(&opCtx, testNss, {}); }; + auto op = [&]() -> repl::OpTime { + return registry.onDropCollection( + &opCtx, testNss, {}, OpObserver::CollectionDropType::kOnePhase); + }; checkInconsistentOpTime(op); } diff --git a/src/mongo/db/query/plan_cache.h b/src/mongo/db/query/plan_cache.h index 6e9c4606c92..d88e269bebd 100644 --- a/src/mongo/db/query/plan_cache.h +++ b/src/mongo/db/query/plan_cache.h @@ -468,6 +468,10 @@ public: const std::function<BSONObj(const PlanCacheEntry&)>& serializationFunc, const std::function<bool(const BSONObj&)>& filterFunc) const; + void setNs(NamespaceString ns) { + _ns = ns.toString(); + } + private: struct NewEntryState { bool shouldBeCreated = false; diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript index a187fc04080..cdb85f50644 100644 --- a/src/mongo/db/repl/SConscript +++ b/src/mongo/db/repl/SConscript @@ -338,9 +338,10 @@ env.CppUnitTest( 'replication_recovery_test.cpp', ], LIBDEPS=[ + 'drop_pending_collection_reaper', 'oplog_interface_local', - 'replmocks', 'replication_recovery', + 'replmocks', 'storage_interface_impl', '$BUILD_DIR/mongo/db/auth/authmocks', '$BUILD_DIR/mongo/db/service_context_d_test_fixture', @@ -1627,6 +1628,7 @@ env.Library( 'mock_repl_coord_server_fixture.cpp', ], LIBDEPS=[ + 'drop_pending_collection_reaper', 'oplog', 'oplog_entry', 'replmocks', diff --git a/src/mongo/db/repl/mock_repl_coord_server_fixture.cpp b/src/mongo/db/repl/mock_repl_coord_server_fixture.cpp index f64f44ad02f..d565511db57 100644 --- a/src/mongo/db/repl/mock_repl_coord_server_fixture.cpp +++ b/src/mongo/db/repl/mock_repl_coord_server_fixture.cpp @@ -35,6 +35,7 @@ #include "mongo/db/db_raii.h" #include "mongo/db/dbdirectclient.h" #include "mongo/db/operation_context.h" +#include "mongo/db/repl/drop_pending_collection_reaper.h" #include "mongo/db/repl/mock_repl_coord_server_fixture.h" #include "mongo/db/repl/oplog.h" #include "mongo/db/repl/oplog_entry.h" @@ -86,6 +87,10 @@ void MockReplCoordServerFixture::setUp() { repl::setOplogCollectionName(service); repl::acquireOplogCollectionForLogging(opCtx()); + + repl::DropPendingCollectionReaper::set( + service, + stdx::make_unique<repl::DropPendingCollectionReaper>(repl::StorageInterface::get(service))); } void MockReplCoordServerFixture::tearDown() { diff --git a/src/mongo/db/repl/replication_recovery_test.cpp b/src/mongo/db/repl/replication_recovery_test.cpp index 48bb3ac9761..b056a6a6857 100644 --- a/src/mongo/db/repl/replication_recovery_test.cpp +++ b/src/mongo/db/repl/replication_recovery_test.cpp @@ -33,7 +33,10 @@ #include "mongo/db/client.h" #include "mongo/db/db_raii.h" #include "mongo/db/namespace_string.h" +#include "mongo/db/op_observer_noop.h" +#include "mongo/db/op_observer_registry.h" #include "mongo/db/operation_context.h" +#include "mongo/db/repl/drop_pending_collection_reaper.h" #include "mongo/db/repl/oplog_entry.h" #include "mongo/db/repl/oplog_interface_local.h" #include "mongo/db/repl/replication_consistency_markers_mock.h" @@ -97,6 +100,23 @@ private: bool _supportsRecoveryTimestamp = true; }; +class ReplicationRecoveryTestObObserver : public OpObserverNoop { +public: + repl::OpTime onDropCollection(OperationContext* opCtx, + const NamespaceString& collectionName, + OptionalCollectionUUID uuid, + const CollectionDropType dropType) override { + // If the oplog is not disabled for this namespace, then we need to reserve an op time for + // the drop. + if (!repl::ReplicationCoordinator::get(opCtx)->isOplogDisabledFor(opCtx, collectionName)) { + OpObserver::Times::get(opCtx).reservedOpTimes.push_back(dropOpTime); + } + return {}; + } + + const repl::OpTime dropOpTime = {Timestamp(Seconds(100), 1U), 1LL}; +}; + class ReplicationRecoveryTest : public ServiceContextMongoDTest { protected: OperationContext* getOperationContext() { @@ -149,6 +169,12 @@ private: getOperationContext(), testNs, generateOptionsWithUuid())); MongoDSessionCatalog::onStepUp(_opCtx.get()); + + auto observerRegistry = checked_cast<OpObserverRegistry*>(service->getOpObserver()); + observerRegistry->addObserver(std::make_unique<ReplicationRecoveryTestObObserver>()); + + repl::DropPendingCollectionReaper::set( + service, stdx::make_unique<repl::DropPendingCollectionReaper>(_storageInterface.get())); } void tearDown() override { diff --git a/src/mongo/db/repl/rollback_impl_test.cpp b/src/mongo/db/repl/rollback_impl_test.cpp index c888ee07949..56a3a2a2c48 100644 --- a/src/mongo/db/repl/rollback_impl_test.cpp +++ b/src/mongo/db/repl/rollback_impl_test.cpp @@ -1199,8 +1199,12 @@ DEATH_TEST_F(RollbackImplTest, _insertDocAndGenerateOplogEntry(obj, uuid, nss); // Drop the collection (immediately; not a two-phase drop), so that the namespace can no longer - // be found. - ASSERT_OK(_storageInterface->dropCollection(_opCtx.get(), nss)); + // be found. We enforce that the storage interface drops the collection immediately with an + // unreplicated writes block, since unreplicated collection drops are not two-phase. + { + repl::UnreplicatedWritesBlock uwb(_opCtx.get()); + ASSERT_OK(_storageInterface->dropCollection(_opCtx.get(), nss)); + } auto status = _rollback->runRollback(_opCtx.get()); unittest::log() << "mongod did not crash when expected; status: " << status; diff --git a/src/mongo/db/repl/rollback_test_fixture.cpp b/src/mongo/db/repl/rollback_test_fixture.cpp index 4b1fa5f93b4..fefed34d53f 100644 --- a/src/mongo/db/repl/rollback_test_fixture.cpp +++ b/src/mongo/db/repl/rollback_test_fixture.cpp @@ -38,6 +38,8 @@ #include "mongo/db/catalog/database_holder.h" #include "mongo/db/client.h" #include "mongo/db/db_raii.h" +#include "mongo/db/op_observer_noop.h" +#include "mongo/db/op_observer_registry.h" #include "mongo/db/repl/oplog.h" #include "mongo/db/repl/replication_consistency_markers_mock.h" #include "mongo/db/repl/replication_coordinator.h" @@ -48,7 +50,6 @@ #include "mongo/db/session_catalog.h" #include "mongo/logger/log_component.h" #include "mongo/logger/logger.h" - #include "mongo/stdx/memory.h" #include "mongo/util/mongoutils/str.h" @@ -67,6 +68,23 @@ ReplSettings createReplSettings() { return settings; } +class RollbackTestOpObserver : public OpObserverNoop { +public: + repl::OpTime onDropCollection(OperationContext* opCtx, + const NamespaceString& collectionName, + OptionalCollectionUUID uuid, + const CollectionDropType dropType) override { + // If the oplog is not disabled for this namespace, then we need to reserve an op time for + // the drop. + if (!repl::ReplicationCoordinator::get(opCtx)->isOplogDisabledFor(opCtx, collectionName)) { + OpObserver::Times::get(opCtx).reservedOpTimes.push_back(dropOpTime); + } + return {}; + } + + const repl::OpTime dropOpTime = {Timestamp(Seconds(100), 1U), 1LL}; +}; + } // namespace void RollbackTest::setUp() { @@ -94,6 +112,9 @@ void RollbackTest::setUp() { // Increase rollback log component verbosity for unit tests. mongo::logger::globalLogDomain()->setMinimumLoggedSeverity( logger::LogComponent::kReplicationRollback, logger::LogSeverity::Debug(2)); + + auto observerRegistry = checked_cast<OpObserverRegistry*>(serviceContext->getOpObserver()); + observerRegistry->addObserver(std::make_unique<RollbackTestOpObserver>()); } RollbackTest::ReplicationCoordinatorRollbackMock::ReplicationCoordinatorRollbackMock( diff --git a/src/mongo/db/s/config_server_op_observer.cpp b/src/mongo/db/s/config_server_op_observer.cpp index eefbe2e3991..cdf45de44f5 100644 --- a/src/mongo/db/s/config_server_op_observer.cpp +++ b/src/mongo/db/s/config_server_op_observer.cpp @@ -65,7 +65,8 @@ void ConfigServerOpObserver::onDelete(OperationContext* opCtx, repl::OpTime ConfigServerOpObserver::onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) { + OptionalCollectionUUID uuid, + const CollectionDropType dropType) { if (collectionName == VersionType::ConfigNS) { if (!repl::ReplicationCoordinator::get(opCtx)->getMemberState().rollback()) { uasserted(40303, "cannot drop config.version document while in --configsvr mode"); diff --git a/src/mongo/db/s/config_server_op_observer.h b/src/mongo/db/s/config_server_op_observer.h index 0f0e11ba04f..458d9627955 100644 --- a/src/mongo/db/s/config_server_op_observer.h +++ b/src/mongo/db/s/config_server_op_observer.h @@ -96,7 +96,8 @@ public: repl::OpTime onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) override; + OptionalCollectionUUID uuid, + CollectionDropType dropType) override; void onDropIndex(OperationContext* opCtx, const NamespaceString& nss, diff --git a/src/mongo/db/s/shard_server_op_observer.cpp b/src/mongo/db/s/shard_server_op_observer.cpp index dd50e93ab58..4330da209bc 100644 --- a/src/mongo/db/s/shard_server_op_observer.cpp +++ b/src/mongo/db/s/shard_server_op_observer.cpp @@ -391,7 +391,8 @@ void ShardServerOpObserver::onDelete(OperationContext* opCtx, repl::OpTime ShardServerOpObserver::onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) { + OptionalCollectionUUID uuid, + const CollectionDropType dropType) { if (collectionName == NamespaceString::kServerConfigurationNamespace) { // Dropping system collections is not allowed for end users invariant(!opCtx->writesAreReplicated()); diff --git a/src/mongo/db/s/shard_server_op_observer.h b/src/mongo/db/s/shard_server_op_observer.h index 88a9677389f..61e0f0dfca0 100644 --- a/src/mongo/db/s/shard_server_op_observer.h +++ b/src/mongo/db/s/shard_server_op_observer.h @@ -97,7 +97,8 @@ public: repl::OpTime onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) override; + OptionalCollectionUUID uuid, + CollectionDropType dropType) override; void onDropIndex(OperationContext* opCtx, const NamespaceString& nss, diff --git a/src/mongo/db/storage/kv/kv_database_catalog_entry_base.cpp b/src/mongo/db/storage/kv/kv_database_catalog_entry_base.cpp index 88713c1d224..a1214f0c6bb 100644 --- a/src/mongo/db/storage/kv/kv_database_catalog_entry_base.cpp +++ b/src/mongo/db/storage/kv/kv_database_catalog_entry_base.cpp @@ -117,6 +117,32 @@ public: const bool _dropOnCommit; }; +class KVDatabaseCatalogEntryBase::RenameCollectionChange final : public RecoveryUnit::Change { +public: + RenameCollectionChange(KVDatabaseCatalogEntryBase* dce, + KVCollectionCatalogEntry* coll, + NamespaceString fromNs, + NamespaceString toNs) + : _dce(dce), _coll(coll), _fromNs(std::move(fromNs)), _toNs(std::move(toNs)) {} + + void commit(boost::optional<Timestamp>) override {} + + void rollback() override { + auto it = _dce->_collections.find(_toNs.ns()); + invariant(it != _dce->_collections.end()); + invariant(it->second == _coll); + _dce->_collections[_fromNs.ns()] = _coll; + _dce->_collections.erase(it); + _coll->setNs(_fromNs); + } + +private: + KVDatabaseCatalogEntryBase* const _dce; + KVCollectionCatalogEntry* const _coll; + const NamespaceString _fromNs; + const NamespaceString _toNs; +}; + KVDatabaseCatalogEntryBase::KVDatabaseCatalogEntryBase(StringData db, KVStorageEngine* engine) : DatabaseCatalogEntry(db), _engine(engine) {} @@ -309,31 +335,27 @@ Status KVDatabaseCatalogEntryBase::renameCollection(OperationContext* opCtx, return status; const std::string identTo = _engine->getCatalog()->getCollectionIdent(toNS); - invariant(identFrom == identTo); - BSONCollectionCatalogEntry::MetaData md = _engine->getCatalog()->getMetaData(opCtx, toNS); - - opCtx->recoveryUnit()->registerChange( - new AddCollectionChange(opCtx, this, toNS, identTo, false)); - - auto rs = - _engine->getEngine()->getGroupedRecordStore(opCtx, toNS, identTo, md.options, md.prefix); - // Add the destination collection to _collections before erasing the source collection. This // is to ensure that _collections doesn't erroneously appear empty during listDatabases if // a database consists of a single collection and that collection gets renamed (see // SERVER-34531). There is no locking to prevent listDatabases from looking into // _collections as a rename is taking place. - _collections[toNS.toString()] = new KVCollectionCatalogEntry( - _engine->getEngine(), _engine->getCatalog(), toNS, identTo, std::move(rs)); - - const CollectionMap::iterator itFrom = _collections.find(fromNS.toString()); + auto itFrom = _collections.find(fromNS.toString()); invariant(itFrom != _collections.end()); - opCtx->recoveryUnit()->registerChange( - new RemoveCollectionChange(opCtx, this, fromNS, identFrom, itFrom->second, false)); + auto* collectionCatalogEntry = itFrom->second; + invariant(collectionCatalogEntry); + _collections[toNS.toString()] = collectionCatalogEntry; _collections.erase(itFrom); + collectionCatalogEntry->setNs(NamespaceString{toNS}); + + // Register a Change which, on rollback, will reinstall the collection catalog entry in the + // collections map so that it is associated with 'fromNS', not 'toNS'. + opCtx->recoveryUnit()->registerChange(new RenameCollectionChange( + this, collectionCatalogEntry, NamespaceString{fromNS}, NamespaceString{toNS})); + return Status::OK(); } diff --git a/src/mongo/db/storage/kv/kv_database_catalog_entry_base.h b/src/mongo/db/storage/kv/kv_database_catalog_entry_base.h index b245480ba31..cdce94f2209 100644 --- a/src/mongo/db/storage/kv/kv_database_catalog_entry_base.h +++ b/src/mongo/db/storage/kv/kv_database_catalog_entry_base.h @@ -90,6 +90,7 @@ public: protected: class AddCollectionChange; class RemoveCollectionChange; + class RenameCollectionChange; typedef std::map<std::string, KVCollectionCatalogEntry*> CollectionMap; diff --git a/src/mongo/db/storage/record_store.h b/src/mongo/db/storage/record_store.h index 5d75ac672c7..77742b69077 100644 --- a/src/mongo/db/storage/record_store.h +++ b/src/mongo/db/storage/record_store.h @@ -37,6 +37,7 @@ #include "mongo/base/owned_pointer_vector.h" #include "mongo/bson/mutable/damage_vector.h" #include "mongo/db/exec/collection_scan_common.h" +#include "mongo/db/namespace_string.h" #include "mongo/db/record_id.h" #include "mongo/db/storage/record_data.h" @@ -47,7 +48,6 @@ class Collection; struct CompactOptions; struct CompactStats; class MAdvise; -class NamespaceDetails; class OperationContext; class RecordStoreCompactAdaptor; @@ -250,6 +250,10 @@ public: return _ns; } + void setNs(NamespaceString ns) { + _ns = ns.ns(); + } + virtual const std::string& getIdent() const = 0; /** diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp index 1de88805a67..2e518d69816 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp @@ -209,7 +209,7 @@ StatusWith<std::string> WiredTigerIndex::generateCreateString(const std::string& // Raise an error about unrecognized fields that may be introduced in newer versions of // this storage engine. // Ensure that 'configString' field is a string. Raise an error if this is not the case. - BSONElement storageEngineElement = desc.getInfoElement("storageEngine"); + BSONElement storageEngineElement = desc.infoObj()["storageEngine"]; if (storageEngineElement.isABSONObj()) { BSONObj storageEngine = storageEngineElement.Obj(); StatusWith<std::string> parseStatus = diff --git a/src/mongo/db/transaction_participant_retryable_writes_test.cpp b/src/mongo/db/transaction_participant_retryable_writes_test.cpp index 56af96bf0ab..51bfc5188d2 100644 --- a/src/mongo/db/transaction_participant_retryable_writes_test.cpp +++ b/src/mongo/db/transaction_participant_retryable_writes_test.cpp @@ -121,6 +121,20 @@ public: onTransactionCommitFn = [this](boost::optional<OplogSlot> commitOplogEntryOpTime, boost::optional<Timestamp> commitTimestamp) { transactionCommitted = true; }; + + repl::OpTime onDropCollection(OperationContext* opCtx, + const NamespaceString& collectionName, + OptionalCollectionUUID uuid, + const CollectionDropType dropType) override { + // If the oplog is not disabled for this namespace, then we need to reserve an op time for + // the drop. + if (!repl::ReplicationCoordinator::get(opCtx)->isOplogDisabledFor(opCtx, collectionName)) { + OpObserver::Times::get(opCtx).reservedOpTimes.push_back(dropOpTime); + } + return {}; + } + + const repl::OpTime dropOpTime = {Timestamp(Seconds(100), 1U), 1LL}; }; class TransactionParticipantRetryableWritesTest : public MockReplCoordServerFixture { diff --git a/src/mongo/db/transaction_participant_test.cpp b/src/mongo/db/transaction_participant_test.cpp index 620680f03aa..f2c64b5afc5 100644 --- a/src/mongo/db/transaction_participant_test.cpp +++ b/src/mongo/db/transaction_participant_test.cpp @@ -111,6 +111,13 @@ public: bool onTransactionAbortThrowsException = false; bool transactionAborted = false; stdx::function<void()> onTransactionAbortFn = []() {}; + + repl::OpTime onDropCollection(OperationContext* opCtx, + const NamespaceString& collectionName, + OptionalCollectionUUID uuid, + CollectionDropType dropType) override; + + const repl::OpTime dropOpTime = {Timestamp(Seconds(100), 1U), 1LL}; }; void OpObserverMock::onTransactionPrepare(OperationContext* opCtx, const OplogSlot& prepareOpTime) { @@ -155,6 +162,18 @@ void OpObserverMock::onTransactionAbort(OperationContext* opCtx, onTransactionAbortFn(); } +repl::OpTime OpObserverMock::onDropCollection(OperationContext* opCtx, + const NamespaceString& collectionName, + OptionalCollectionUUID uuid, + const CollectionDropType dropType) { + // If the oplog is not disabled for this namespace, then we need to reserve an op time for the + // drop. + if (!repl::ReplicationCoordinator::get(opCtx)->isOplogDisabledFor(opCtx, collectionName)) { + OpObserver::Times::get(opCtx).reservedOpTimes.push_back(dropOpTime); + } + return {}; +} + // When this class is in scope, makes the system behave as if we're in a DBDirectClient class DirectClientSetter { public: |