From 2cdc2a96e1c8779658fe0eab459dcc38cf01c54d Mon Sep 17 00:00:00 2001 From: David Storch Date: Thu, 4 Oct 2018 18:27:48 -0400 Subject: SERVER-37443 Make catalog objects survive collection rename. This change only applies to collection renames within the same database. Rename across databases requires copying the data, and the resulting collection will have a new UUID. --- src/mongo/db/catalog/collection.h | 6 + src/mongo/db/catalog/collection_catalog_entry.h | 4 + src/mongo/db/catalog/collection_impl.cpp | 21 ++- src/mongo/db/catalog/collection_impl.h | 13 +- src/mongo/db/catalog/collection_info_cache.h | 6 + .../db/catalog/collection_info_cache_impl.cpp | 17 ++- src/mongo/db/catalog/collection_info_cache_impl.h | 5 +- src/mongo/db/catalog/collection_mock.h | 7 +- src/mongo/db/catalog/database_impl.cpp | 104 ++++++++++----- src/mongo/db/catalog/database_impl.h | 8 +- src/mongo/db/catalog/drop_database_test.cpp | 12 +- src/mongo/db/catalog/index_catalog.h | 2 + src/mongo/db/catalog/index_catalog_entry.h | 2 + src/mongo/db/catalog/index_catalog_entry_impl.cpp | 16 ++- src/mongo/db/catalog/index_catalog_entry_impl.h | 2 + src/mongo/db/catalog/index_catalog_impl.cpp | 13 ++ src/mongo/db/catalog/index_catalog_impl.h | 2 + src/mongo/db/catalog/rename_collection_test.cpp | 142 ++++++++++++++++++++- src/mongo/db/catalog/uuid_catalog.cpp | 17 ++- src/mongo/db/catalog/uuid_catalog.h | 3 +- src/mongo/db/commands/SConscript | 1 + src/mongo/db/commands/mr_test.cpp | 24 ++++ src/mongo/db/free_mon/free_mon_op_observer.cpp | 3 +- src/mongo/db/free_mon/free_mon_op_observer.h | 3 +- src/mongo/db/index/index_descriptor.cpp | 21 +++ src/mongo/db/index/index_descriptor.h | 44 ++++--- src/mongo/db/op_observer.h | 14 +- src/mongo/db/op_observer_impl.cpp | 3 +- src/mongo/db/op_observer_impl.h | 3 +- src/mongo/db/op_observer_impl_test.cpp | 3 +- src/mongo/db/op_observer_noop.h | 3 +- src/mongo/db/op_observer_registry.h | 5 +- src/mongo/db/op_observer_registry_test.cpp | 13 +- src/mongo/db/query/plan_cache.h | 4 + src/mongo/db/repl/SConscript | 4 +- .../db/repl/mock_repl_coord_server_fixture.cpp | 5 + src/mongo/db/repl/replication_recovery_test.cpp | 26 ++++ src/mongo/db/repl/rollback_impl_test.cpp | 8 +- src/mongo/db/repl/rollback_test_fixture.cpp | 23 +++- src/mongo/db/s/config_server_op_observer.cpp | 3 +- src/mongo/db/s/config_server_op_observer.h | 3 +- src/mongo/db/s/shard_server_op_observer.cpp | 3 +- src/mongo/db/s/shard_server_op_observer.h | 3 +- .../storage/kv/kv_database_catalog_entry_base.cpp | 52 +++++--- .../db/storage/kv/kv_database_catalog_entry_base.h | 1 + src/mongo/db/storage/record_store.h | 6 +- .../db/storage/wiredtiger/wiredtiger_index.cpp | 2 +- ...ansaction_participant_retryable_writes_test.cpp | 14 ++ src/mongo/db/transaction_participant_test.cpp | 19 +++ 49 files changed, 602 insertions(+), 116 deletions(-) (limited to 'src/mongo') diff --git a/src/mongo/db/catalog/collection.h b/src/mongo/db/catalog/collection.h index a990135de69..ea37ea27943 100644 --- a/src/mongo/db/catalog/collection.h +++ b/src/mongo/db/catalog/collection.h @@ -215,6 +215,8 @@ public: virtual const CollectionInfoCache* infoCache() const = 0; virtual const NamespaceString& ns() const = 0; + virtual void setNs(NamespaceString) = 0; + virtual OptionalCollectionUUID uuid() const = 0; virtual const IndexCatalog* getIndexCatalog() const = 0; @@ -397,6 +399,10 @@ public: return this->_impl().ns(); } + inline void setNs(NamespaceString nss) { + this->_impl().setNs(std::move(nss)); + } + inline OptionalCollectionUUID uuid() const { return this->_impl().uuid(); } diff --git a/src/mongo/db/catalog/collection_catalog_entry.h b/src/mongo/db/catalog/collection_catalog_entry.h index c84e587f1cd..729adb86bb9 100644 --- a/src/mongo/db/catalog/collection_catalog_entry.h +++ b/src/mongo/db/catalog/collection_catalog_entry.h @@ -55,6 +55,10 @@ public: return _ns; } + void setNs(NamespaceString ns) { + _ns = std::move(ns); + } + // ------- indexes ---------- virtual CollectionOptions getCollectionOptions(OperationContext* opCtx) const = 0; diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp index e0259d1d4ab..c2d8c76a754 100644 --- a/src/mongo/db/catalog/collection_impl.cpp +++ b/src/mongo/db/catalog/collection_impl.cpp @@ -168,7 +168,7 @@ CollectionImpl::CollectionImpl(Collection* _this_init, parseValidationAction(_details->getCollectionOptions(opCtx).validationAction))), _validationLevel(uassertStatusOK( parseValidationLevel(_details->getCollectionOptions(opCtx).validationLevel))), - _cursorManager(_ns), + _cursorManager(std::make_unique(_ns)), _cappedNotifier(_recordStore->isCapped() ? stdx::make_unique() : nullptr), _this(_this_init) {} @@ -792,7 +792,7 @@ Status CollectionImpl::truncate(OperationContext* opCtx) { // 2) drop indexes _indexCatalog->dropAllIndexes(opCtx, true); - _cursorManager.invalidateAll(opCtx, false, "collection truncated"); + _cursorManager->invalidateAll(opCtx, false, "collection truncated"); // 3) truncate record store auto status = _recordStore->truncate(opCtx); @@ -815,7 +815,7 @@ void CollectionImpl::cappedTruncateAfter(OperationContext* opCtx, RecordId end, BackgroundOperation::assertNoBgOpInProgForNs(ns()); invariant(_indexCatalog->numIndexesInProgress(opCtx) == 0); - _cursorManager.invalidateAll(opCtx, false, "capped collection truncated"); + _cursorManager->invalidateAll(opCtx, false, "capped collection truncated"); _recordStore->cappedTruncateAfter(opCtx, end, inclusive); } @@ -1293,4 +1293,19 @@ std::unique_ptr CollectionImpl::createMultiIndexBlock(Operation return std::make_unique(opCtx, _this); } +void CollectionImpl::setNs(NamespaceString nss) { + _ns = std::move(nss); + _indexCatalog->setNs(_ns); + _infoCache.setNs(_ns); + _recordStore->setNs(_ns); + + // Until the query layer is prepared for cursors to survive renames, all cursors are killed when + // the name of a collection changes. Therefore, the CursorManager should be empty. This means it + // is safe to re-establish it with a new namespace by tearing down the old one and allocating a + // new manager associated with the new name. This is done in order to ensure that the + // 'globalCursorIdCache' maintains the correct mapping from cursor id "prefix" (the high order + // bits) to namespace. + _cursorManager = std::make_unique(_ns); +} + } // namespace mongo diff --git a/src/mongo/db/catalog/collection_impl.h b/src/mongo/db/catalog/collection_impl.h index b374eaa70fb..5d616d01791 100644 --- a/src/mongo/db/catalog/collection_impl.h +++ b/src/mongo/db/catalog/collection_impl.h @@ -80,6 +80,8 @@ public: return _ns; } + void setNs(NamespaceString nss) final; + OptionalCollectionUUID uuid() const { return _uuid; } @@ -101,7 +103,7 @@ public: } CursorManager* getCursorManager() const final { - return &_cursorManager; + return _cursorManager.get(); } bool requiresIdIndex() const final; @@ -390,7 +392,7 @@ private: int _magic; - const NamespaceString _ns; + NamespaceString _ns; OptionalCollectionUUID _uuid; CollectionCatalogEntry* const _details; RecordStore* const _recordStore; @@ -415,10 +417,7 @@ private: ValidationAction _validationAction; ValidationLevel _validationLevel; - // this is mutable because read only users of the Collection class - // use it keep state. This seems valid as const correctness of Collection - // should be about the data. - mutable CursorManager _cursorManager; + std::unique_ptr _cursorManager; // Notifier object for awaitData. Threads polling a capped collection for new data can wait // on this object until notified of the arrival of new data. @@ -430,7 +429,5 @@ private: boost::optional _minVisibleSnapshot; Collection* _this; - - friend class NamespaceDetails; }; } // namespace mongo diff --git a/src/mongo/db/catalog/collection_info_cache.h b/src/mongo/db/catalog/collection_info_cache.h index 28bd6832e91..2426b16490d 100644 --- a/src/mongo/db/catalog/collection_info_cache.h +++ b/src/mongo/db/catalog/collection_info_cache.h @@ -70,6 +70,8 @@ public: virtual void notifyOfQuery(OperationContext* opCtx, const std::set& indexesUsed) = 0; + + virtual void setNs(NamespaceString ns) = 0; }; @@ -159,6 +161,10 @@ public: return this->_impl().notifyOfQuery(opCtx, indexesUsed); } + inline void setNs(NamespaceString ns) { + this->_impl().setNs(std::move(ns)); + } + std::unique_ptr _pimpl; // This structure exists to give us a customization point to decide how to force users of this diff --git a/src/mongo/db/catalog/collection_info_cache_impl.cpp b/src/mongo/db/catalog/collection_info_cache_impl.cpp index 17dde1830fc..d05615dd713 100644 --- a/src/mongo/db/catalog/collection_info_cache_impl.cpp +++ b/src/mongo/db/catalog/collection_info_cache_impl.cpp @@ -71,7 +71,7 @@ CollectionInfoCacheImpl::~CollectionInfoCacheImpl() { // Necessary because the collection cache will not explicitly get updated upon database drop. if (_hasTTLIndex) { TTLCollectionCache& ttlCollectionCache = TTLCollectionCache::get(getGlobalServiceContext()); - ttlCollectionCache.unregisterCollection(_ns); + ttlCollectionCache.unregisterCollection(_collection->ns()); } } @@ -254,4 +254,19 @@ void CollectionInfoCacheImpl::rebuildIndexData(OperationContext* opCtx) { CollectionIndexUsageMap CollectionInfoCacheImpl::getIndexUsageStats() const { return _indexUsageTracker.getUsageStats(); } + +void CollectionInfoCacheImpl::setNs(NamespaceString ns) { + auto oldNs = _ns; + _ns = std::move(ns); + + _planCache->setNs(_ns); + + // Update the TTL collection cache. + if (_hasTTLIndex) { + auto& ttlCollectionCache = TTLCollectionCache::get(getGlobalServiceContext()); + ttlCollectionCache.unregisterCollection(oldNs); + ttlCollectionCache.registerCollection(_ns); + } +} + } // namespace mongo diff --git a/src/mongo/db/catalog/collection_info_cache_impl.h b/src/mongo/db/catalog/collection_info_cache_impl.h index 5723603f39e..6900ba9c091 100644 --- a/src/mongo/db/catalog/collection_info_cache_impl.h +++ b/src/mongo/db/catalog/collection_info_cache_impl.h @@ -110,6 +110,8 @@ public: */ void notifyOfQuery(OperationContext* opCtx, const std::set& indexesUsed); + void setNs(NamespaceString ns) override; + private: void computeIndexKeys(OperationContext* opCtx); void updatePlanCacheIndexEntries(OperationContext* opCtx); @@ -121,7 +123,8 @@ private: void rebuildIndexData(OperationContext* opCtx); Collection* _collection; // not owned - const NamespaceString _ns; + + NamespaceString _ns; // --- index keys cache bool _keysComputed; diff --git a/src/mongo/db/catalog/collection_mock.h b/src/mongo/db/catalog/collection_mock.h index e74c40b6196..471dba32623 100644 --- a/src/mongo/db/catalog/collection_mock.h +++ b/src/mongo/db/catalog/collection_mock.h @@ -59,12 +59,17 @@ private: std::abort(); } - const NamespaceString _ns; + NamespaceString _ns; public: const NamespaceString& ns() const { return _ns; } + + void setNs(NamespaceString nss) final { + _ns = std::move(nss); + } + bool ok() const { std::abort(); } diff --git a/src/mongo/db/catalog/database_impl.cpp b/src/mongo/db/catalog/database_impl.cpp index 0ced8f869b1..9f63f793c2d 100644 --- a/src/mongo/db/catalog/database_impl.cpp +++ b/src/mongo/db/catalog/database_impl.cpp @@ -159,6 +159,36 @@ public: Collection* const _coll; }; +class DatabaseImpl::RenameCollectionChange final : public RecoveryUnit::Change { +public: + RenameCollectionChange(DatabaseImpl* db, + Collection* coll, + NamespaceString fromNs, + NamespaceString toNs) + : _db(db), _coll(coll), _fromNs(std::move(fromNs)), _toNs(std::move(toNs)) {} + + void commit(boost::optional commitTime) override { + // Ban reading from this collection on committed reads on snapshots before now. + if (commitTime) { + _coll->setMinimumVisibleSnapshot(commitTime.get()); + } + } + + void rollback() override { + auto it = _db->_collections.find(_toNs.ns()); + invariant(it != _db->_collections.end()); + invariant(it->second == _coll); + _db->_collections[_fromNs.ns()] = _coll; + _db->_collections.erase(it); + _coll->setNs(_fromNs); + } + + DatabaseImpl* const _db; + Collection* const _coll; + const NamespaceString _fromNs; + const NamespaceString _toNs; +}; + DatabaseImpl::~DatabaseImpl() { for (CollectionMap::const_iterator i = _collections.begin(); i != _collections.end(); ++i) delete i->second; @@ -524,7 +554,8 @@ Status DatabaseImpl::dropCollectionEvenIfSystem(OperationContext* opCtx, if (!status.isOK()) { return status; } - opObserver->onDropCollection(opCtx, fullns, uuid); + opObserver->onDropCollection( + opCtx, fullns, uuid, OpObserver::CollectionDropType::kOnePhase); return Status::OK(); } @@ -569,24 +600,24 @@ Status DatabaseImpl::dropCollectionEvenIfSystem(OperationContext* opCtx, // Log oplog entry for collection drop and proceed to complete rest of two phase drop // process. - dropOpTime = opObserver->onDropCollection(opCtx, fullns, uuid); + dropOpTime = opObserver->onDropCollection( + opCtx, fullns, uuid, OpObserver::CollectionDropType::kTwoPhase); - // Drop collection immediately if OpObserver did not write entry to oplog. + // The OpObserver should have written an entry to the oplog with a particular op time. // After writing the oplog entry, all errors are fatal. See getNextOpTime() comments in // oplog.cpp. if (dropOpTime.isNull()) { log() << "dropCollection: " << fullns << " (" << uuidString - << ") - no drop optime available for pending-drop. " - << "Dropping collection immediately."; - fassert(40462, _finishDropCollection(opCtx, fullns, collection)); - return Status::OK(); + << ") - expected oplog entry to be written"; + fassertFailed(40462); } } else { // If we are provided with a valid 'dropOpTime', it means we are dropping this collection // in the context of applying an oplog entry on a secondary. // OpObserver::onDropCollection() should be returning a null OpTime because we should not be // writing to the oplog. - auto opTime = opObserver->onDropCollection(opCtx, fullns, uuid); + auto opTime = opObserver->onDropCollection( + opCtx, fullns, uuid, OpObserver::CollectionDropType::kTwoPhase); if (!opTime.isNull()) { severe() << "dropCollection: " << fullns << " (" << uuidString << ") - unexpected oplog entry written to the oplog with optime " << opTime; @@ -678,36 +709,49 @@ Status DatabaseImpl::renameCollection(OperationContext* opCtx, BackgroundOperation::assertNoBgOpInProgForNs(fromNS); BackgroundOperation::assertNoBgOpInProgForNs(toNS); - NamespaceString fromNSS(fromNS); - NamespaceString toNSS(toNS); - { // remove anything cached - Collection* coll = getCollection(opCtx, fromNS); + const NamespaceString fromNSS(fromNS); + const NamespaceString toNSS(toNS); - if (!coll) - return Status(ErrorCodes::NamespaceNotFound, "collection not found to rename"); + invariant(fromNSS.db() == _name); + invariant(toNSS.db() == _name); + if (getCollection(opCtx, toNSS)) { + return Status(ErrorCodes::NamespaceExists, + str::stream() << "Cannot rename '" << fromNS << "' to '" << toNS + << "' because the destination namespace already exists"); + } - string clearCacheReason = str::stream() << "renamed collection '" << fromNS << "' to '" - << toNS << "'"; - IndexCatalog::IndexIterator ii = coll->getIndexCatalog()->getIndexIterator(opCtx, true); + Collection* collToRename = getCollection(opCtx, fromNSS); + if (!collToRename) { + return Status(ErrorCodes::NamespaceNotFound, "collection not found to rename"); + } - while (ii.more()) { - IndexDescriptor* desc = ii.next(); - _clearCollectionCache( - opCtx, desc->indexNamespace(), clearCacheReason, /*collectionGoingAway*/ true); - } + string clearCacheReason = str::stream() << "renamed collection '" << fromNS << "' to '" << toNS + << "'"; - _clearCollectionCache(opCtx, fromNS, clearCacheReason, /*collectionGoingAway*/ true); - _clearCollectionCache(opCtx, toNS, clearCacheReason, /*collectionGoingAway*/ false); + // Notify the cursor manager that it should kill all the cursors in the source and target + // collections. This is currently necessary since the query layer is not prepared for cursors to + // survive collection renames. + auto sourceManager = collToRename->getCursorManager(); + invariant(sourceManager); + sourceManager->invalidateAll(opCtx, /*collectionGoingAway*/ true, clearCacheReason); - Top::get(opCtx->getServiceContext()).collectionDropped(fromNS.toString()); + log() << "renameCollection: renaming collection " << collToRename->uuid()->toString() + << " from " << fromNS << " to " << toNS; - log() << "renameCollection: renaming collection " << coll->uuid()->toString() << " from " - << fromNS << " to " << toNS; - } + Top::get(opCtx->getServiceContext()).collectionDropped(fromNS.toString()); Status s = _dbEntry->renameCollection(opCtx, fromNS, toNS, stayTemp); - opCtx->recoveryUnit()->registerChange(new AddCollectionChange(opCtx, this, toNS)); - _collections[toNS] = _getOrCreateCollectionInstance(opCtx, toNSS); + // Make 'toNS' map to the collection instead of 'fromNS'. + _collections.erase(fromNS); + _collections[toNS] = collToRename; + + // Update Collection's ns. + collToRename->setNs(toNSS); + + // Register a Change which, on rollback, will reinstall the Collection* in the collections map + // so that it is associated with 'fromNS', not 'toNS'. + opCtx->recoveryUnit()->registerChange( + new RenameCollectionChange(this, collToRename, fromNSS, toNSS)); return s; } diff --git a/src/mongo/db/catalog/database_impl.h b/src/mongo/db/catalog/database_impl.h index 38213e06055..5a2e95070f7 100644 --- a/src/mongo/db/catalog/database_impl.h +++ b/src/mongo/db/catalog/database_impl.h @@ -50,7 +50,6 @@ namespace mongo { class Collection; class DatabaseCatalogEntry; class IndexCatalog; -class NamespaceDetails; class OperationContext; class PseudoRandom; @@ -202,6 +201,11 @@ public: Collection* getOrCreateCollection(OperationContext* opCtx, const NamespaceString& nss) final; + /** + * Renames the fully qualified namespace 'fromNS' to the fully qualified namespace 'toNS'. + * Illegal to call unless both 'fromNS' and 'toNS' are within this database. Returns an error if + * 'toNS' already exists or 'fromNS' does not exist. + */ Status renameCollection(OperationContext* opCtx, StringData fromNS, StringData toNS, @@ -272,6 +276,7 @@ private: class AddCollectionChange; class RemoveCollectionChange; + class RenameCollectionChange; const std::string _name; // "dbname" @@ -300,7 +305,6 @@ private: Database* _this; // Pointer to wrapper, for external caller compatibility. friend class Collection; - friend class NamespaceDetails; friend class IndexCatalog; }; diff --git a/src/mongo/db/catalog/drop_database_test.cpp b/src/mongo/db/catalog/drop_database_test.cpp index 89a99fb4bd2..642c759fc5d 100644 --- a/src/mongo/db/catalog/drop_database_test.cpp +++ b/src/mongo/db/catalog/drop_database_test.cpp @@ -69,12 +69,14 @@ public: void onDropDatabase(OperationContext* opCtx, const std::string& dbName) override; repl::OpTime onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) override; + OptionalCollectionUUID uuid, + CollectionDropType dropType) override; std::set droppedDatabaseNames; std::set droppedCollectionNames; Database* db = nullptr; bool onDropCollectionThrowsException = false; + const repl::OpTime dropOpTime = {Timestamp(Seconds(100), 1U), 1LL}; }; void OpObserverMock::onDropDatabase(OperationContext* opCtx, const std::string& dbName) { @@ -86,9 +88,11 @@ void OpObserverMock::onDropDatabase(OperationContext* opCtx, const std::string& repl::OpTime OpObserverMock::onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) { + OptionalCollectionUUID uuid, + const CollectionDropType dropType) { ASSERT_TRUE(opCtx->lockState()->inAWriteUnitOfWork()); - auto opTime = OpObserverNoop::onDropCollection(opCtx, collectionName, uuid); + auto opTime = OpObserverNoop::onDropCollection(opCtx, collectionName, uuid, dropType); + invariant(opTime.isNull()); // Do not update 'droppedCollectionNames' if OpObserverNoop::onDropCollection() throws. droppedCollectionNames.insert(collectionName); @@ -100,7 +104,7 @@ repl::OpTime OpObserverMock::onDropCollection(OperationContext* opCtx, uassert( ErrorCodes::OperationFailed, "onDropCollection() failed", !onDropCollectionThrowsException); - OpObserver::Times::get(opCtx).reservedOpTimes.push_back(opTime); + OpObserver::Times::get(opCtx).reservedOpTimes.push_back(dropOpTime); return {}; } diff --git a/src/mongo/db/catalog/index_catalog.h b/src/mongo/db/catalog/index_catalog.h index 7fde59471a9..5186645ac32 100644 --- a/src/mongo/db/catalog/index_catalog.h +++ b/src/mongo/db/catalog/index_catalog.h @@ -395,6 +395,8 @@ public: virtual void prepareInsertDeleteOptions(OperationContext* opCtx, const IndexDescriptor* desc, InsertDeleteOptions* options) const = 0; + + virtual void setNs(NamespaceString ns) = 0; }; } // namespace mongo diff --git a/src/mongo/db/catalog/index_catalog_entry.h b/src/mongo/db/catalog/index_catalog_entry.h index 25e423802e5..3b43a07ee1c 100644 --- a/src/mongo/db/catalog/index_catalog_entry.h +++ b/src/mongo/db/catalog/index_catalog_entry.h @@ -143,6 +143,8 @@ public: virtual boost::optional getMinimumVisibleSnapshot() = 0; virtual void setMinimumVisibleSnapshot(const Timestamp name) = 0; + + virtual void setNs(NamespaceString ns) = 0; }; class IndexCatalogEntryContainer { diff --git a/src/mongo/db/catalog/index_catalog_entry_impl.cpp b/src/mongo/db/catalog/index_catalog_entry_impl.cpp index 548ceb77442..997b0b4164b 100644 --- a/src/mongo/db/catalog/index_catalog_entry_impl.cpp +++ b/src/mongo/db/catalog/index_catalog_entry_impl.cpp @@ -101,9 +101,8 @@ IndexCatalogEntryImpl::IndexCatalogEntryImpl(OperationContext* const opCtx, _indexTracksPathLevelMultikeyInfo = !_indexMultikeyPaths.empty(); } - if (BSONElement collationElement = _descriptor->getInfoElement("collation")) { - invariant(collationElement.isABSONObj()); - BSONObj collation = collationElement.Obj(); + const BSONObj& collation = _descriptor->collation(); + if (!collation.isEmpty()) { auto statusWithCollator = CollatorFactoryInterface::get(opCtx->getServiceContext())->makeFromBSON(collation); @@ -113,9 +112,9 @@ IndexCatalogEntryImpl::IndexCatalogEntryImpl(OperationContext* const opCtx, _collator = std::move(statusWithCollator.getValue()); } - if (BSONElement filterElement = _descriptor->getInfoElement("partialFilterExpression")) { - invariant(filterElement.isABSONObj()); - BSONObj filter = filterElement.Obj(); + if (_descriptor->isPartial()) { + const BSONObj& filter = _descriptor->partialFilterExpression(); + boost::intrusive_ptr expCtx( new ExpressionContext(opCtx, _collator.get())); @@ -345,6 +344,11 @@ void IndexCatalogEntryImpl::setIndexKeyStringWithLongTypeBitsExistsOnDisk(Operat _collection->setIndexKeyStringWithLongTypeBitsExistsOnDisk(opCtx); } +void IndexCatalogEntryImpl::setNs(NamespaceString ns) { + _ns = ns.toString(); + _descriptor->setNs(std::move(ns)); +} + // ---- bool IndexCatalogEntryImpl::_catalogIsReady(OperationContext* opCtx) const { diff --git a/src/mongo/db/catalog/index_catalog_entry_impl.h b/src/mongo/db/catalog/index_catalog_entry_impl.h index abfa17a4de5..a4d800a8e5c 100644 --- a/src/mongo/db/catalog/index_catalog_entry_impl.h +++ b/src/mongo/db/catalog/index_catalog_entry_impl.h @@ -72,6 +72,8 @@ public: return _ns; } + void setNs(NamespaceString ns) final; + void init(std::unique_ptr accessMethod) final; IndexDescriptor* descriptor() final { diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp index 3d19793dbbf..c4ea8d730fc 100644 --- a/src/mongo/db/catalog/index_catalog_impl.cpp +++ b/src/mongo/db/catalog/index_catalog_impl.cpp @@ -1427,4 +1427,17 @@ StatusWith IndexCatalogImpl::_fixIndexSpec(OperationContext* opCtx, return b.obj(); } + +void IndexCatalogImpl::setNs(NamespaceString ns) { + for (auto&& ice : _entries) { + ice->setNs(ns); + } + + std::vector newUnfinishedIndexes; + for (auto&& indexSpec : _unfinishedIndexes) { + newUnfinishedIndexes.push_back(IndexDescriptor::renameNsInIndexSpec(indexSpec, ns)); + } + _unfinishedIndexes.swap(newUnfinishedIndexes); +} + } // namespace mongo diff --git a/src/mongo/db/catalog/index_catalog_impl.h b/src/mongo/db/catalog/index_catalog_impl.h index f1c833ea01b..0baaf1fb3b6 100644 --- a/src/mongo/db/catalog/index_catalog_impl.h +++ b/src/mongo/db/catalog/index_catalog_impl.h @@ -342,6 +342,8 @@ public: const IndexDescriptor* desc, InsertDeleteOptions* options) const override; + void setNs(NamespaceString ns) override; + private: static const BSONObj _idObj; // { _id : 1 } diff --git a/src/mongo/db/catalog/rename_collection_test.cpp b/src/mongo/db/catalog/rename_collection_test.cpp index 7ed933b58d5..ddd0bdc85c4 100644 --- a/src/mongo/db/catalog/rename_collection_test.cpp +++ b/src/mongo/db/catalog/rename_collection_test.cpp @@ -36,6 +36,7 @@ #include "mongo/db/catalog/collection_catalog_entry.h" #include "mongo/db/catalog/collection_options.h" +#include "mongo/db/catalog/database_holder.h" #include "mongo/db/catalog/multi_index_block.h" #include "mongo/db/catalog/rename_collection.h" #include "mongo/db/catalog/uuid_catalog.h" @@ -95,7 +96,8 @@ public: repl::OpTime onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) override; + OptionalCollectionUUID uuid, + CollectionDropType dropType) override; void onRenameCollection(OperationContext* opCtx, const NamespaceString& fromCollection, @@ -127,6 +129,8 @@ public: OptionalCollectionUUID onRenameCollectionDropTarget; repl::OpTime renameOpTime = {Timestamp(Seconds(100), 1U), 1LL}; + repl::OpTime dropOpTime = {Timestamp(Seconds(100), 1U), 1LL}; + private: /** * Pushes 'operationName' into 'oplogEntries' if we can write to the oplog for this namespace. @@ -176,10 +180,16 @@ void OpObserverMock::onCreateCollection(OperationContext* opCtx, repl::OpTime OpObserverMock::onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) { + OptionalCollectionUUID uuid, + const CollectionDropType dropType) { _logOp(opCtx, collectionName, "drop"); - OpObserver::Times::get(opCtx).reservedOpTimes.push_back( - OpObserverNoop::onDropCollection(opCtx, collectionName, uuid)); + // If the oplog is not disabled for this namespace, then we need to reserve an op time for the + // drop. + if (!repl::ReplicationCoordinator::get(opCtx)->isOplogDisabledFor(opCtx, collectionName)) { + OpObserver::Times::get(opCtx).reservedOpTimes.push_back(dropOpTime); + } + auto noopOptime = OpObserverNoop::onDropCollection(opCtx, collectionName, uuid, dropType); + invariant(noopOptime.isNull()); return {}; } @@ -434,6 +444,19 @@ void _insertDocument(OperationContext* opCtx, const NamespaceString& nss, const }); } +/** + * Retrieves the pointer to a collection associated with the given namespace string from the + * catalog. The caller must hold the appropriate locks from the lock manager. + */ +Collection* _getCollection_inlock(OperationContext* opCtx, const NamespaceString& nss) { + invariant(opCtx->lockState()->isCollectionLockedForMode(nss.ns(), MODE_IS)); + auto* db = DatabaseHolder::getDatabaseHolder().get(opCtx, nss.db()); + if (!db) { + return nullptr; + } + return db->getCollection(opCtx, nss.ns()); +} + TEST_F(RenameCollectionTest, RenameCollectionReturnsNamespaceNotFoundIfDatabaseDoesNotExist) { ASSERT_FALSE(AutoGetDb(_opCtx.get(), _sourceNss.db(), MODE_X).getDb()); ASSERT_EQUALS(ErrorCodes::NamespaceNotFound, @@ -1098,4 +1121,115 @@ TEST_F(RenameCollectionTest, ASSERT_TRUE(_opObserver->onInsertsIsGlobalWriteLockExclusive); } +TEST_F(RenameCollectionTest, CollectionPointerRemainsValidThroughRename) { + _createCollection(_opCtx.get(), _sourceNss); + Lock::GlobalWrite globalWrite(_opCtx.get()); + + // Get a pointer to the source collection, and ensure that it reports the expected namespace + // string. + Collection* sourceColl = _getCollection_inlock(_opCtx.get(), _sourceNss); + ASSERT(sourceColl); + + ASSERT_OK(renameCollection(_opCtx.get(), _sourceNss, _targetNss, {})); + + // Retrieve the pointer associated with the target namespace, and ensure that its the same + // pointer (i.e. the renamed collection has the very same Collection instance). + Collection* targetColl = _getCollection_inlock(_opCtx.get(), _targetNss); + ASSERT(targetColl); + ASSERT_EQ(targetColl, sourceColl); + + // Verify that the Collection reports that its namespace is now the target namespace. + ASSERT_EQ(targetColl->ns(), _targetNss); +} + +TEST_F(RenameCollectionTest, CollectionCatalogEntryPointerRemainsValidThroughRename) { + _createCollection(_opCtx.get(), _sourceNss); + Lock::GlobalWrite globalWrite(_opCtx.get()); + + // Get a pointer to the source collection, and ensure that it reports the expected namespace + // string. + Collection* sourceColl = _getCollection_inlock(_opCtx.get(), _sourceNss); + ASSERT(sourceColl); + auto* sourceCatalogEntry = sourceColl->getCatalogEntry(); + ASSERT(sourceCatalogEntry); + ASSERT_EQ(sourceCatalogEntry->ns(), _sourceNss); + + ASSERT_OK(renameCollection(_opCtx.get(), _sourceNss, _targetNss, {})); + + // Verify that the CollectionCatalogEntry reports that its namespace is now the target + // namespace. + ASSERT_EQ(sourceCatalogEntry->ns(), _targetNss); +} + +TEST_F(RenameCollectionTest, CatalogPointersRenameValidThroughRenameAfterDroppingTarget) { + _createCollection(_opCtx.get(), _sourceNss); + _createCollection(_opCtx.get(), _targetNss); + Lock::GlobalWrite globalWrite(_opCtx.get()); + + Collection* sourceColl = _getCollection_inlock(_opCtx.get(), _sourceNss); + ASSERT(sourceColl); + auto* sourceCatalogEntry = sourceColl->getCatalogEntry(); + ASSERT(sourceCatalogEntry); + + RenameCollectionOptions options; + options.dropTarget = true; + ASSERT_OK(renameCollection(_opCtx.get(), _sourceNss, _targetNss, options)); + + // The same catalog pointers should now report that they are associated with the target + // namespace. + ASSERT_EQ(sourceColl->ns(), _targetNss); + ASSERT_EQ(sourceCatalogEntry->ns(), _targetNss); +} + +TEST_F(RenameCollectionTest, CatalogPointersRenameValidThroughRenameForApplyOps) { + _createCollection(_opCtx.get(), _sourceNss); + Collection* sourceColl = AutoGetCollectionForRead(_opCtx.get(), _sourceNss).getCollection(); + ASSERT(sourceColl); + + auto uuidDoc = BSON("ui" << UUID::gen()); + auto cmd = BSON("renameCollection" << _sourceNss.ns() << "to" << _targetNss.ns()); + ASSERT_OK(renameCollectionForApplyOps( + _opCtx.get(), _sourceNss.db().toString(), uuidDoc["ui"], cmd, {})); + ASSERT_FALSE(_collectionExists(_opCtx.get(), _sourceNss)); + + Collection* targetColl = AutoGetCollectionForRead(_opCtx.get(), _targetNss).getCollection(); + ASSERT(targetColl); + ASSERT_EQ(targetColl, sourceColl); + ASSERT_EQ(targetColl->ns(), _targetNss); +} + +TEST_F(RenameCollectionTest, RenameAcrossDatabasesDoesNotPreserveCatalogPointers) { + _createCollection(_opCtx.get(), _sourceNss); + Lock::GlobalWrite globalWrite(_opCtx.get()); + + // Get a pointer to the source collection, and ensure that it reports the expected namespace + // string. + Collection* sourceColl = _getCollection_inlock(_opCtx.get(), _sourceNss); + ASSERT(sourceColl); + auto* sourceCatalogEntry = sourceColl->getCatalogEntry(); + ASSERT(sourceCatalogEntry); + + ASSERT_OK(renameCollection(_opCtx.get(), _sourceNss, _targetNssDifferentDb, {})); + + // Verify that the CollectionCatalogEntry reports that its namespace is now the target + // namespace. + Collection* targetColl = _getCollection_inlock(_opCtx.get(), _targetNssDifferentDb); + ASSERT(targetColl); + ASSERT_NE(targetColl, sourceColl); + auto* targetCatalogEntry = targetColl->getCatalogEntry(); + ASSERT(targetCatalogEntry); + ASSERT_NE(targetCatalogEntry, sourceCatalogEntry); +} + +TEST_F(RenameCollectionTest, UUIDCatalogMappingRemainsIntactThroughRename) { + _createCollection(_opCtx.get(), _sourceNss); + Lock::GlobalWrite globalWrite(_opCtx.get()); + auto& uuidCatalog = UUIDCatalog::get(_opCtx.get()); + Collection* sourceColl = _getCollection_inlock(_opCtx.get(), _sourceNss); + ASSERT(sourceColl); + ASSERT_EQ(sourceColl, uuidCatalog.lookupCollectionByUUID(*sourceColl->uuid())); + ASSERT_OK(renameCollection(_opCtx.get(), _sourceNss, _targetNss, {})); + ASSERT_EQ(sourceColl, uuidCatalog.lookupCollectionByUUID(*sourceColl->uuid())); +} + } // namespace diff --git a/src/mongo/db/catalog/uuid_catalog.cpp b/src/mongo/db/catalog/uuid_catalog.cpp index 632568350a3..556251a2602 100644 --- a/src/mongo/db/catalog/uuid_catalog.cpp +++ b/src/mongo/db/catalog/uuid_catalog.cpp @@ -76,12 +76,23 @@ void UUIDCatalogObserver::onCollMod(OperationContext* opCtx, repl::OpTime UUIDCatalogObserver::onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) { + OptionalCollectionUUID uuid, + const CollectionDropType dropType) { if (!uuid) return {}; - UUIDCatalog& catalog = UUIDCatalog::get(opCtx); - catalog.onDropCollection(opCtx, uuid.get()); + + // Replicated drops are two-phase, meaning that the collection is first renamed into a "drop + // pending" state and reaped later. This op observer is only called for the rename phase, which + // means the UUID mapping is still valid. + // + // On the other hand, if the drop is not replicated, it takes effect immediately. In this case, + // the UUID mapping must be removed from the UUID catalog. + if (dropType == CollectionDropType::kOnePhase) { + UUIDCatalog& catalog = UUIDCatalog::get(opCtx); + catalog.onDropCollection(opCtx, uuid.get()); + } + return {}; } diff --git a/src/mongo/db/catalog/uuid_catalog.h b/src/mongo/db/catalog/uuid_catalog.h index f5e6fc0e284..9fd0b518aa8 100644 --- a/src/mongo/db/catalog/uuid_catalog.h +++ b/src/mongo/db/catalog/uuid_catalog.h @@ -86,7 +86,8 @@ public: void onDropDatabase(OperationContext* opCtx, const std::string& dbName) override {} repl::OpTime onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) override; + OptionalCollectionUUID uuid, + CollectionDropType dropType) override; void onDropIndex(OperationContext* opCtx, const NamespaceString& nss, OptionalCollectionUUID uuid, diff --git a/src/mongo/db/commands/SConscript b/src/mongo/db/commands/SConscript index 52c15bb7802..8cfc52999db 100644 --- a/src/mongo/db/commands/SConscript +++ b/src/mongo/db/commands/SConscript @@ -446,6 +446,7 @@ env.CppUnitTest( ], LIBDEPS=[ "$BUILD_DIR/mongo/db/auth/authmocks", + "$BUILD_DIR/mongo/db/repl/drop_pending_collection_reaper", "$BUILD_DIR/mongo/db/repl/replmocks", "$BUILD_DIR/mongo/db/repl/storage_interface_impl", "$BUILD_DIR/mongo/db/serveronly", diff --git a/src/mongo/db/commands/mr_test.cpp b/src/mongo/db/commands/mr_test.cpp index e7ef4571dfc..6ff0a8bd5a9 100644 --- a/src/mongo/db/commands/mr_test.cpp +++ b/src/mongo/db/commands/mr_test.cpp @@ -46,6 +46,7 @@ #include "mongo/db/json.h" #include "mongo/db/op_observer_noop.h" #include "mongo/db/op_observer_registry.h" +#include "mongo/db/repl/drop_pending_collection_reaper.h" #include "mongo/db/repl/replication_coordinator_mock.h" #include "mongo/db/repl/storage_interface_impl.h" #include "mongo/db/service_context_d_test_fixture.h" @@ -298,12 +299,19 @@ public: const BSONObj& idIndex, const OplogSlot& createOpTime) override; + repl::OpTime onDropCollection(OperationContext* opCtx, + const NamespaceString& collectionName, + OptionalCollectionUUID uuid, + CollectionDropType dropType) override; + // Hook for onInserts. Defaults to a no-op function but may be overridden to inject exceptions // while mapReduce inserts its results into the temporary output collection. std::function onInsertsFn = [] {}; // Holds namespaces of temporary collections created by mapReduce. std::vector tempNamespaces; + + const repl::OpTime dropOpTime = {Timestamp(Seconds(100), 1U), 1LL}; }; void MapReduceOpObserver::onInserts(OperationContext* opCtx, @@ -327,6 +335,18 @@ void MapReduceOpObserver::onCreateCollection(OperationContext*, tempNamespaces.push_back(collectionName); } +repl::OpTime MapReduceOpObserver::onDropCollection(OperationContext* opCtx, + const NamespaceString& collectionName, + OptionalCollectionUUID uuid, + const CollectionDropType dropType) { + // If the oplog is not disabled for this namespace, then we need to reserve an op time for the + // drop. + if (!repl::ReplicationCoordinator::get(opCtx)->isOplogDisabledFor(opCtx, collectionName)) { + OpObserver::Times::get(opCtx).reservedOpTimes.push_back(dropOpTime); + } + return {}; +} + /** * Test fixture for MapReduceCommand. */ @@ -382,6 +402,10 @@ void MapReduceCommandTest::setUp() { repl::ReplicationCoordinator::set(service, std::make_unique(service)); + repl::DropPendingCollectionReaper::set( + service, + stdx::make_unique(repl::StorageInterface::get(service))); + // Set up an OpObserver to track the temporary collections mapReduce creates. auto opObserver = std::make_unique(); _opObserver = opObserver.get(); diff --git a/src/mongo/db/free_mon/free_mon_op_observer.cpp b/src/mongo/db/free_mon/free_mon_op_observer.cpp index 0a5986eea88..467960b3c26 100644 --- a/src/mongo/db/free_mon/free_mon_op_observer.cpp +++ b/src/mongo/db/free_mon/free_mon_op_observer.cpp @@ -57,7 +57,8 @@ FreeMonOpObserver::~FreeMonOpObserver() = default; repl::OpTime FreeMonOpObserver::onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) { + OptionalCollectionUUID uuid, + const CollectionDropType dropType) { if (collectionName == NamespaceString::kServerConfigurationNamespace) { auto controller = FreeMonController::get(opCtx->getServiceContext()); diff --git a/src/mongo/db/free_mon/free_mon_op_observer.h b/src/mongo/db/free_mon/free_mon_op_observer.h index 8a162bb25c0..535b61cacca 100644 --- a/src/mongo/db/free_mon/free_mon_op_observer.h +++ b/src/mongo/db/free_mon/free_mon_op_observer.h @@ -96,7 +96,8 @@ public: repl::OpTime onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) final; + OptionalCollectionUUID uuid, + CollectionDropType dropType) final; void onDropIndex(OperationContext* opCtx, const NamespaceString& nss, diff --git a/src/mongo/db/index/index_descriptor.cpp b/src/mongo/db/index/index_descriptor.cpp index ad2dca4237e..bf94a32542d 100644 --- a/src/mongo/db/index/index_descriptor.cpp +++ b/src/mongo/db/index/index_descriptor.cpp @@ -172,4 +172,25 @@ bool IndexDescriptor::areIndexOptionsEquivalent(const IndexDescriptor* other) co rhs.second); }); } + +void IndexDescriptor::setNs(NamespaceString ns) { + _parentNS = ns.toString(); + _indexNamespace = makeIndexNamespace(_parentNS, _indexName); + + // Construct a new infoObj with the namespace field replaced. + _infoObj = renameNsInIndexSpec(_infoObj, ns); } + +BSONObj IndexDescriptor::renameNsInIndexSpec(BSONObj spec, const NamespaceString& newNs) { + BSONObjBuilder builder; + for (auto&& elt : spec) { + if (elt.fieldNameStringData() == kNamespaceFieldName) { + builder.append(kNamespaceFieldName, newNs.ns()); + } else { + builder.append(elt); + } + } + return builder.obj(); +} + +} // namespace mongo diff --git a/src/mongo/db/index/index_descriptor.h b/src/mongo/db/index/index_descriptor.h index ac441696a38..c2c30cddbb5 100644 --- a/src/mongo/db/index/index_descriptor.h +++ b/src/mongo/db/index/index_descriptor.h @@ -87,6 +87,12 @@ public: static constexpr StringData kUniqueFieldName = "unique"_sd; static constexpr StringData kWeightsFieldName = "weights"_sd; + /** + * Given a BSONObj representing an index spec, returns a new owned BSONObj which is identical to + * 'spec' after replacing the 'ns' field with the value of 'newNs'. + */ + static BSONObj renameNsInIndexSpec(BSONObj spec, const NamespaceString& newNs); + /** * OnDiskIndexData is a pointer to the memory mapped per-index data. * infoObj is a copy of the index-describing BSONObj contained in the OnDiskIndexData. @@ -111,6 +117,16 @@ public: BSONElement e = _infoObj[IndexDescriptor::kIndexVersionFieldName]; fassert(50942, e.isNumber()); _version = static_cast(e.numberInt()); + + if (BSONElement filterElement = _infoObj[kPartialFilterExprFieldName]) { + invariant(filterElement.isABSONObj()); + _partialFilterExpression = filterElement.Obj().getOwned(); + } + + if (BSONElement collationElement = _infoObj[kCollationFieldName]) { + invariant(collationElement.isABSONObj()); + _collation = collationElement.Obj().getOwned(); + } } @@ -233,20 +249,6 @@ public: return _isIdIndex; } - // - // Properties that are Index-specific. - // - - // Allow access to arbitrary fields in the per-index info object. Some indices stash - // index-specific data there. - BSONElement getInfoElement(const std::string& name) const { - return _infoObj[name]; - } - - // - // "Internals" of accessing the index, used by IndexAccessMethod(s). - // - // Return a (rather compact) std::string representation. std::string toString() const { return _infoObj.toString(); @@ -265,6 +267,16 @@ public: bool areIndexOptionsEquivalent(const IndexDescriptor* other) const; + void setNs(NamespaceString ns); + + const BSONObj& collation() const { + return _collation; + } + + const BSONObj& partialFilterExpression() const { + return _partialFilterExpression; + } + static bool isIdIndexPattern(const BSONObj& pattern) { BSONObjIterator i(pattern); BSONElement e = i.next(); @@ -290,7 +302,7 @@ private: IndexType _indexType; // The BSONObj describing the index. Accessed through the various members above. - const BSONObj _infoObj; + BSONObj _infoObj; // --- cached data from _infoObj @@ -305,6 +317,8 @@ private: bool _unique; bool _partial; IndexVersion _version; + BSONObj _collation; + BSONObj _partialFilterExpression; // only used by IndexCatalogEntryContainer to do caching for perf // users not allowed to touch, and not part of API diff --git a/src/mongo/db/op_observer.h b/src/mongo/db/op_observer.h index fac5943f45d..52abe0618b5 100644 --- a/src/mongo/db/op_observer.h +++ b/src/mongo/db/op_observer.h @@ -80,6 +80,15 @@ struct TTLCollModInfo { */ class OpObserver { public: + enum class CollectionDropType { + // The collection is being dropped immediately, in one step. + kOnePhase, + + // The collection is being dropped in two phases, by renaming to a drop pending collection + // which is registered to be reaped later. + kTwoPhase, + }; + virtual ~OpObserver() = default; virtual void onCreateIndex(OperationContext* opCtx, const NamespaceString& nss, @@ -178,10 +187,13 @@ public: * This function logs an oplog entry when a 'drop' command on a collection is executed. * Returns the optime of the oplog entry successfully written to the oplog. * Returns a null optime if an oplog entry was not written for this operation. + * + * 'dropType' describes whether the collection drop is one-phase or two-phase. */ virtual repl::OpTime onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) = 0; + OptionalCollectionUUID uuid, + CollectionDropType dropType) = 0; /** * This function logs an oplog entry when an index is dropped. The namespace of the index, diff --git a/src/mongo/db/op_observer_impl.cpp b/src/mongo/db/op_observer_impl.cpp index b370f600453..a84ffeb1bdf 100644 --- a/src/mongo/db/op_observer_impl.cpp +++ b/src/mongo/db/op_observer_impl.cpp @@ -736,7 +736,8 @@ void OpObserverImpl::onDropDatabase(OperationContext* opCtx, const std::string& repl::OpTime OpObserverImpl::onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) { + OptionalCollectionUUID uuid, + const CollectionDropType dropType) { const auto cmdNss = collectionName.getCommandNS(); const auto cmdObj = BSON("drop" << collectionName.coll()); diff --git a/src/mongo/db/op_observer_impl.h b/src/mongo/db/op_observer_impl.h index 1e678a9d841..c42d15371b1 100644 --- a/src/mongo/db/op_observer_impl.h +++ b/src/mongo/db/op_observer_impl.h @@ -82,7 +82,8 @@ public: void onDropDatabase(OperationContext* opCtx, const std::string& dbName) final; repl::OpTime onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) final; + OptionalCollectionUUID uuid, + CollectionDropType dropType) final; void onDropIndex(OperationContext* opCtx, const NamespaceString& nss, OptionalCollectionUUID uuid, diff --git a/src/mongo/db/op_observer_impl_test.cpp b/src/mongo/db/op_observer_impl_test.cpp index bfe3a4dcf09..f10b89a8f71 100644 --- a/src/mongo/db/op_observer_impl_test.cpp +++ b/src/mongo/db/op_observer_impl_test.cpp @@ -217,7 +217,8 @@ TEST_F(OpObserverTest, OnDropCollectionReturnsDropOpTime) { { AutoGetDb autoDb(opCtx.get(), nss.db(), MODE_X); WriteUnitOfWork wunit(opCtx.get()); - opObserver.onDropCollection(opCtx.get(), nss, uuid); + opObserver.onDropCollection( + opCtx.get(), nss, uuid, OpObserver::CollectionDropType::kTwoPhase); dropOpTime = OpObserver::Times::get(opCtx.get()).reservedOpTimes.front(); wunit.commit(); } diff --git a/src/mongo/db/op_observer_noop.h b/src/mongo/db/op_observer_noop.h index 5b0afe0533f..350d6df41bf 100644 --- a/src/mongo/db/op_observer_noop.h +++ b/src/mongo/db/op_observer_noop.h @@ -77,7 +77,8 @@ public: void onDropDatabase(OperationContext* opCtx, const std::string& dbName) override {} repl::OpTime onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) override { + OptionalCollectionUUID uuid, + const CollectionDropType dropType) override { return {}; } void onDropIndex(OperationContext* opCtx, diff --git a/src/mongo/db/op_observer_registry.h b/src/mongo/db/op_observer_registry.h index 14fb9d325e5..5c8ab409ab7 100644 --- a/src/mongo/db/op_observer_registry.h +++ b/src/mongo/db/op_observer_registry.h @@ -144,10 +144,11 @@ public: repl::OpTime onDropCollection(OperationContext* const opCtx, const NamespaceString& collectionName, - const OptionalCollectionUUID uuid) override { + const OptionalCollectionUUID uuid, + const CollectionDropType dropType) override { ReservedTimes times{opCtx}; for (auto& observer : this->_observers) { - auto time = observer->onDropCollection(opCtx, collectionName, uuid); + auto time = observer->onDropCollection(opCtx, collectionName, uuid, dropType); invariant(time.isNull()); } return _getOpTimeToReturn(times.get().reservedOpTimes); diff --git a/src/mongo/db/op_observer_registry_test.cpp b/src/mongo/db/op_observer_registry_test.cpp index 271ebd478db..e7281598601 100644 --- a/src/mongo/db/op_observer_registry_test.cpp +++ b/src/mongo/db/op_observer_registry_test.cpp @@ -56,7 +56,8 @@ struct TestObserver : public OpObserverNoop { } repl::OpTime onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) { + OptionalCollectionUUID uuid, + const CollectionDropType dropType) { drops++; OpObserver::Times::get(opCtx).reservedOpTimes.push_back(opTime); return {}; @@ -169,7 +170,10 @@ TEST_F(OpObserverRegistryTest, OnDropCollectionObserverResultReturnsRightTime) { OperationContextNoop opCtx; registry.addObserver(std::move(unique1)); registry.addObserver(std::make_unique()); - auto op = [&]() -> repl::OpTime { return registry.onDropCollection(&opCtx, testNss, {}); }; + auto op = [&]() -> repl::OpTime { + return registry.onDropCollection( + &opCtx, testNss, {}, OpObserver::CollectionDropType::kOnePhase); + }; checkConsistentOpTime(op); } @@ -189,7 +193,10 @@ DEATH_TEST_F(OpObserverRegistryTest, OnDropCollectionReturnsInconsistentTime, "i OperationContextNoop opCtx; registry.addObserver(std::move(unique1)); registry.addObserver(std::move(unique2)); - auto op = [&]() -> repl::OpTime { return registry.onDropCollection(&opCtx, testNss, {}); }; + auto op = [&]() -> repl::OpTime { + return registry.onDropCollection( + &opCtx, testNss, {}, OpObserver::CollectionDropType::kOnePhase); + }; checkInconsistentOpTime(op); } diff --git a/src/mongo/db/query/plan_cache.h b/src/mongo/db/query/plan_cache.h index 6e9c4606c92..d88e269bebd 100644 --- a/src/mongo/db/query/plan_cache.h +++ b/src/mongo/db/query/plan_cache.h @@ -468,6 +468,10 @@ public: const std::function& serializationFunc, const std::function& filterFunc) const; + void setNs(NamespaceString ns) { + _ns = ns.toString(); + } + private: struct NewEntryState { bool shouldBeCreated = false; diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript index a187fc04080..cdb85f50644 100644 --- a/src/mongo/db/repl/SConscript +++ b/src/mongo/db/repl/SConscript @@ -338,9 +338,10 @@ env.CppUnitTest( 'replication_recovery_test.cpp', ], LIBDEPS=[ + 'drop_pending_collection_reaper', 'oplog_interface_local', - 'replmocks', 'replication_recovery', + 'replmocks', 'storage_interface_impl', '$BUILD_DIR/mongo/db/auth/authmocks', '$BUILD_DIR/mongo/db/service_context_d_test_fixture', @@ -1627,6 +1628,7 @@ env.Library( 'mock_repl_coord_server_fixture.cpp', ], LIBDEPS=[ + 'drop_pending_collection_reaper', 'oplog', 'oplog_entry', 'replmocks', diff --git a/src/mongo/db/repl/mock_repl_coord_server_fixture.cpp b/src/mongo/db/repl/mock_repl_coord_server_fixture.cpp index f64f44ad02f..d565511db57 100644 --- a/src/mongo/db/repl/mock_repl_coord_server_fixture.cpp +++ b/src/mongo/db/repl/mock_repl_coord_server_fixture.cpp @@ -35,6 +35,7 @@ #include "mongo/db/db_raii.h" #include "mongo/db/dbdirectclient.h" #include "mongo/db/operation_context.h" +#include "mongo/db/repl/drop_pending_collection_reaper.h" #include "mongo/db/repl/mock_repl_coord_server_fixture.h" #include "mongo/db/repl/oplog.h" #include "mongo/db/repl/oplog_entry.h" @@ -86,6 +87,10 @@ void MockReplCoordServerFixture::setUp() { repl::setOplogCollectionName(service); repl::acquireOplogCollectionForLogging(opCtx()); + + repl::DropPendingCollectionReaper::set( + service, + stdx::make_unique(repl::StorageInterface::get(service))); } void MockReplCoordServerFixture::tearDown() { diff --git a/src/mongo/db/repl/replication_recovery_test.cpp b/src/mongo/db/repl/replication_recovery_test.cpp index 48bb3ac9761..b056a6a6857 100644 --- a/src/mongo/db/repl/replication_recovery_test.cpp +++ b/src/mongo/db/repl/replication_recovery_test.cpp @@ -33,7 +33,10 @@ #include "mongo/db/client.h" #include "mongo/db/db_raii.h" #include "mongo/db/namespace_string.h" +#include "mongo/db/op_observer_noop.h" +#include "mongo/db/op_observer_registry.h" #include "mongo/db/operation_context.h" +#include "mongo/db/repl/drop_pending_collection_reaper.h" #include "mongo/db/repl/oplog_entry.h" #include "mongo/db/repl/oplog_interface_local.h" #include "mongo/db/repl/replication_consistency_markers_mock.h" @@ -97,6 +100,23 @@ private: bool _supportsRecoveryTimestamp = true; }; +class ReplicationRecoveryTestObObserver : public OpObserverNoop { +public: + repl::OpTime onDropCollection(OperationContext* opCtx, + const NamespaceString& collectionName, + OptionalCollectionUUID uuid, + const CollectionDropType dropType) override { + // If the oplog is not disabled for this namespace, then we need to reserve an op time for + // the drop. + if (!repl::ReplicationCoordinator::get(opCtx)->isOplogDisabledFor(opCtx, collectionName)) { + OpObserver::Times::get(opCtx).reservedOpTimes.push_back(dropOpTime); + } + return {}; + } + + const repl::OpTime dropOpTime = {Timestamp(Seconds(100), 1U), 1LL}; +}; + class ReplicationRecoveryTest : public ServiceContextMongoDTest { protected: OperationContext* getOperationContext() { @@ -149,6 +169,12 @@ private: getOperationContext(), testNs, generateOptionsWithUuid())); MongoDSessionCatalog::onStepUp(_opCtx.get()); + + auto observerRegistry = checked_cast(service->getOpObserver()); + observerRegistry->addObserver(std::make_unique()); + + repl::DropPendingCollectionReaper::set( + service, stdx::make_unique(_storageInterface.get())); } void tearDown() override { diff --git a/src/mongo/db/repl/rollback_impl_test.cpp b/src/mongo/db/repl/rollback_impl_test.cpp index c888ee07949..56a3a2a2c48 100644 --- a/src/mongo/db/repl/rollback_impl_test.cpp +++ b/src/mongo/db/repl/rollback_impl_test.cpp @@ -1199,8 +1199,12 @@ DEATH_TEST_F(RollbackImplTest, _insertDocAndGenerateOplogEntry(obj, uuid, nss); // Drop the collection (immediately; not a two-phase drop), so that the namespace can no longer - // be found. - ASSERT_OK(_storageInterface->dropCollection(_opCtx.get(), nss)); + // be found. We enforce that the storage interface drops the collection immediately with an + // unreplicated writes block, since unreplicated collection drops are not two-phase. + { + repl::UnreplicatedWritesBlock uwb(_opCtx.get()); + ASSERT_OK(_storageInterface->dropCollection(_opCtx.get(), nss)); + } auto status = _rollback->runRollback(_opCtx.get()); unittest::log() << "mongod did not crash when expected; status: " << status; diff --git a/src/mongo/db/repl/rollback_test_fixture.cpp b/src/mongo/db/repl/rollback_test_fixture.cpp index 4b1fa5f93b4..fefed34d53f 100644 --- a/src/mongo/db/repl/rollback_test_fixture.cpp +++ b/src/mongo/db/repl/rollback_test_fixture.cpp @@ -38,6 +38,8 @@ #include "mongo/db/catalog/database_holder.h" #include "mongo/db/client.h" #include "mongo/db/db_raii.h" +#include "mongo/db/op_observer_noop.h" +#include "mongo/db/op_observer_registry.h" #include "mongo/db/repl/oplog.h" #include "mongo/db/repl/replication_consistency_markers_mock.h" #include "mongo/db/repl/replication_coordinator.h" @@ -48,7 +50,6 @@ #include "mongo/db/session_catalog.h" #include "mongo/logger/log_component.h" #include "mongo/logger/logger.h" - #include "mongo/stdx/memory.h" #include "mongo/util/mongoutils/str.h" @@ -67,6 +68,23 @@ ReplSettings createReplSettings() { return settings; } +class RollbackTestOpObserver : public OpObserverNoop { +public: + repl::OpTime onDropCollection(OperationContext* opCtx, + const NamespaceString& collectionName, + OptionalCollectionUUID uuid, + const CollectionDropType dropType) override { + // If the oplog is not disabled for this namespace, then we need to reserve an op time for + // the drop. + if (!repl::ReplicationCoordinator::get(opCtx)->isOplogDisabledFor(opCtx, collectionName)) { + OpObserver::Times::get(opCtx).reservedOpTimes.push_back(dropOpTime); + } + return {}; + } + + const repl::OpTime dropOpTime = {Timestamp(Seconds(100), 1U), 1LL}; +}; + } // namespace void RollbackTest::setUp() { @@ -94,6 +112,9 @@ void RollbackTest::setUp() { // Increase rollback log component verbosity for unit tests. mongo::logger::globalLogDomain()->setMinimumLoggedSeverity( logger::LogComponent::kReplicationRollback, logger::LogSeverity::Debug(2)); + + auto observerRegistry = checked_cast(serviceContext->getOpObserver()); + observerRegistry->addObserver(std::make_unique()); } RollbackTest::ReplicationCoordinatorRollbackMock::ReplicationCoordinatorRollbackMock( diff --git a/src/mongo/db/s/config_server_op_observer.cpp b/src/mongo/db/s/config_server_op_observer.cpp index eefbe2e3991..cdf45de44f5 100644 --- a/src/mongo/db/s/config_server_op_observer.cpp +++ b/src/mongo/db/s/config_server_op_observer.cpp @@ -65,7 +65,8 @@ void ConfigServerOpObserver::onDelete(OperationContext* opCtx, repl::OpTime ConfigServerOpObserver::onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) { + OptionalCollectionUUID uuid, + const CollectionDropType dropType) { if (collectionName == VersionType::ConfigNS) { if (!repl::ReplicationCoordinator::get(opCtx)->getMemberState().rollback()) { uasserted(40303, "cannot drop config.version document while in --configsvr mode"); diff --git a/src/mongo/db/s/config_server_op_observer.h b/src/mongo/db/s/config_server_op_observer.h index 0f0e11ba04f..458d9627955 100644 --- a/src/mongo/db/s/config_server_op_observer.h +++ b/src/mongo/db/s/config_server_op_observer.h @@ -96,7 +96,8 @@ public: repl::OpTime onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) override; + OptionalCollectionUUID uuid, + CollectionDropType dropType) override; void onDropIndex(OperationContext* opCtx, const NamespaceString& nss, diff --git a/src/mongo/db/s/shard_server_op_observer.cpp b/src/mongo/db/s/shard_server_op_observer.cpp index dd50e93ab58..4330da209bc 100644 --- a/src/mongo/db/s/shard_server_op_observer.cpp +++ b/src/mongo/db/s/shard_server_op_observer.cpp @@ -391,7 +391,8 @@ void ShardServerOpObserver::onDelete(OperationContext* opCtx, repl::OpTime ShardServerOpObserver::onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) { + OptionalCollectionUUID uuid, + const CollectionDropType dropType) { if (collectionName == NamespaceString::kServerConfigurationNamespace) { // Dropping system collections is not allowed for end users invariant(!opCtx->writesAreReplicated()); diff --git a/src/mongo/db/s/shard_server_op_observer.h b/src/mongo/db/s/shard_server_op_observer.h index 88a9677389f..61e0f0dfca0 100644 --- a/src/mongo/db/s/shard_server_op_observer.h +++ b/src/mongo/db/s/shard_server_op_observer.h @@ -97,7 +97,8 @@ public: repl::OpTime onDropCollection(OperationContext* opCtx, const NamespaceString& collectionName, - OptionalCollectionUUID uuid) override; + OptionalCollectionUUID uuid, + CollectionDropType dropType) override; void onDropIndex(OperationContext* opCtx, const NamespaceString& nss, diff --git a/src/mongo/db/storage/kv/kv_database_catalog_entry_base.cpp b/src/mongo/db/storage/kv/kv_database_catalog_entry_base.cpp index 88713c1d224..a1214f0c6bb 100644 --- a/src/mongo/db/storage/kv/kv_database_catalog_entry_base.cpp +++ b/src/mongo/db/storage/kv/kv_database_catalog_entry_base.cpp @@ -117,6 +117,32 @@ public: const bool _dropOnCommit; }; +class KVDatabaseCatalogEntryBase::RenameCollectionChange final : public RecoveryUnit::Change { +public: + RenameCollectionChange(KVDatabaseCatalogEntryBase* dce, + KVCollectionCatalogEntry* coll, + NamespaceString fromNs, + NamespaceString toNs) + : _dce(dce), _coll(coll), _fromNs(std::move(fromNs)), _toNs(std::move(toNs)) {} + + void commit(boost::optional) override {} + + void rollback() override { + auto it = _dce->_collections.find(_toNs.ns()); + invariant(it != _dce->_collections.end()); + invariant(it->second == _coll); + _dce->_collections[_fromNs.ns()] = _coll; + _dce->_collections.erase(it); + _coll->setNs(_fromNs); + } + +private: + KVDatabaseCatalogEntryBase* const _dce; + KVCollectionCatalogEntry* const _coll; + const NamespaceString _fromNs; + const NamespaceString _toNs; +}; + KVDatabaseCatalogEntryBase::KVDatabaseCatalogEntryBase(StringData db, KVStorageEngine* engine) : DatabaseCatalogEntry(db), _engine(engine) {} @@ -309,31 +335,27 @@ Status KVDatabaseCatalogEntryBase::renameCollection(OperationContext* opCtx, return status; const std::string identTo = _engine->getCatalog()->getCollectionIdent(toNS); - invariant(identFrom == identTo); - BSONCollectionCatalogEntry::MetaData md = _engine->getCatalog()->getMetaData(opCtx, toNS); - - opCtx->recoveryUnit()->registerChange( - new AddCollectionChange(opCtx, this, toNS, identTo, false)); - - auto rs = - _engine->getEngine()->getGroupedRecordStore(opCtx, toNS, identTo, md.options, md.prefix); - // Add the destination collection to _collections before erasing the source collection. This // is to ensure that _collections doesn't erroneously appear empty during listDatabases if // a database consists of a single collection and that collection gets renamed (see // SERVER-34531). There is no locking to prevent listDatabases from looking into // _collections as a rename is taking place. - _collections[toNS.toString()] = new KVCollectionCatalogEntry( - _engine->getEngine(), _engine->getCatalog(), toNS, identTo, std::move(rs)); - - const CollectionMap::iterator itFrom = _collections.find(fromNS.toString()); + auto itFrom = _collections.find(fromNS.toString()); invariant(itFrom != _collections.end()); - opCtx->recoveryUnit()->registerChange( - new RemoveCollectionChange(opCtx, this, fromNS, identFrom, itFrom->second, false)); + auto* collectionCatalogEntry = itFrom->second; + invariant(collectionCatalogEntry); + _collections[toNS.toString()] = collectionCatalogEntry; _collections.erase(itFrom); + collectionCatalogEntry->setNs(NamespaceString{toNS}); + + // Register a Change which, on rollback, will reinstall the collection catalog entry in the + // collections map so that it is associated with 'fromNS', not 'toNS'. + opCtx->recoveryUnit()->registerChange(new RenameCollectionChange( + this, collectionCatalogEntry, NamespaceString{fromNS}, NamespaceString{toNS})); + return Status::OK(); } diff --git a/src/mongo/db/storage/kv/kv_database_catalog_entry_base.h b/src/mongo/db/storage/kv/kv_database_catalog_entry_base.h index b245480ba31..cdce94f2209 100644 --- a/src/mongo/db/storage/kv/kv_database_catalog_entry_base.h +++ b/src/mongo/db/storage/kv/kv_database_catalog_entry_base.h @@ -90,6 +90,7 @@ public: protected: class AddCollectionChange; class RemoveCollectionChange; + class RenameCollectionChange; typedef std::map CollectionMap; diff --git a/src/mongo/db/storage/record_store.h b/src/mongo/db/storage/record_store.h index 5d75ac672c7..77742b69077 100644 --- a/src/mongo/db/storage/record_store.h +++ b/src/mongo/db/storage/record_store.h @@ -37,6 +37,7 @@ #include "mongo/base/owned_pointer_vector.h" #include "mongo/bson/mutable/damage_vector.h" #include "mongo/db/exec/collection_scan_common.h" +#include "mongo/db/namespace_string.h" #include "mongo/db/record_id.h" #include "mongo/db/storage/record_data.h" @@ -47,7 +48,6 @@ class Collection; struct CompactOptions; struct CompactStats; class MAdvise; -class NamespaceDetails; class OperationContext; class RecordStoreCompactAdaptor; @@ -250,6 +250,10 @@ public: return _ns; } + void setNs(NamespaceString ns) { + _ns = ns.ns(); + } + virtual const std::string& getIdent() const = 0; /** diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp index 1de88805a67..2e518d69816 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp @@ -209,7 +209,7 @@ StatusWith WiredTigerIndex::generateCreateString(const std::string& // Raise an error about unrecognized fields that may be introduced in newer versions of // this storage engine. // Ensure that 'configString' field is a string. Raise an error if this is not the case. - BSONElement storageEngineElement = desc.getInfoElement("storageEngine"); + BSONElement storageEngineElement = desc.infoObj()["storageEngine"]; if (storageEngineElement.isABSONObj()) { BSONObj storageEngine = storageEngineElement.Obj(); StatusWith parseStatus = diff --git a/src/mongo/db/transaction_participant_retryable_writes_test.cpp b/src/mongo/db/transaction_participant_retryable_writes_test.cpp index 56af96bf0ab..51bfc5188d2 100644 --- a/src/mongo/db/transaction_participant_retryable_writes_test.cpp +++ b/src/mongo/db/transaction_participant_retryable_writes_test.cpp @@ -121,6 +121,20 @@ public: onTransactionCommitFn = [this](boost::optional commitOplogEntryOpTime, boost::optional commitTimestamp) { transactionCommitted = true; }; + + repl::OpTime onDropCollection(OperationContext* opCtx, + const NamespaceString& collectionName, + OptionalCollectionUUID uuid, + const CollectionDropType dropType) override { + // If the oplog is not disabled for this namespace, then we need to reserve an op time for + // the drop. + if (!repl::ReplicationCoordinator::get(opCtx)->isOplogDisabledFor(opCtx, collectionName)) { + OpObserver::Times::get(opCtx).reservedOpTimes.push_back(dropOpTime); + } + return {}; + } + + const repl::OpTime dropOpTime = {Timestamp(Seconds(100), 1U), 1LL}; }; class TransactionParticipantRetryableWritesTest : public MockReplCoordServerFixture { diff --git a/src/mongo/db/transaction_participant_test.cpp b/src/mongo/db/transaction_participant_test.cpp index 620680f03aa..f2c64b5afc5 100644 --- a/src/mongo/db/transaction_participant_test.cpp +++ b/src/mongo/db/transaction_participant_test.cpp @@ -111,6 +111,13 @@ public: bool onTransactionAbortThrowsException = false; bool transactionAborted = false; stdx::function onTransactionAbortFn = []() {}; + + repl::OpTime onDropCollection(OperationContext* opCtx, + const NamespaceString& collectionName, + OptionalCollectionUUID uuid, + CollectionDropType dropType) override; + + const repl::OpTime dropOpTime = {Timestamp(Seconds(100), 1U), 1LL}; }; void OpObserverMock::onTransactionPrepare(OperationContext* opCtx, const OplogSlot& prepareOpTime) { @@ -155,6 +162,18 @@ void OpObserverMock::onTransactionAbort(OperationContext* opCtx, onTransactionAbortFn(); } +repl::OpTime OpObserverMock::onDropCollection(OperationContext* opCtx, + const NamespaceString& collectionName, + OptionalCollectionUUID uuid, + const CollectionDropType dropType) { + // If the oplog is not disabled for this namespace, then we need to reserve an op time for the + // drop. + if (!repl::ReplicationCoordinator::get(opCtx)->isOplogDisabledFor(opCtx, collectionName)) { + OpObserver::Times::get(opCtx).reservedOpTimes.push_back(dropOpTime); + } + return {}; +} + // When this class is in scope, makes the system behave as if we're in a DBDirectClient class DirectClientSetter { public: -- cgit v1.2.1