diff options
28 files changed, 850 insertions, 383 deletions
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript index 599cd7912cd..8c3a4adb825 100644 --- a/src/mongo/db/SConscript +++ b/src/mongo/db/SConscript @@ -506,7 +506,6 @@ env.Library( target='service_context', source=[ 'client.cpp', - 'multi_key_path_tracker.cpp', 'operation_context.cpp', 'operation_context_group.cpp', 'service_context.cpp', @@ -515,6 +514,7 @@ env.Library( ], LIBDEPS=[ '$BUILD_DIR/mongo/db/logical_session_id', + '$BUILD_DIR/mongo/db/multi_key_path_tracker', '$BUILD_DIR/mongo/db/storage/write_unit_of_work', '$BUILD_DIR/mongo/transport/transport_layer_common', '$BUILD_DIR/mongo/util/clock_sources', @@ -540,6 +540,16 @@ env.CppUnitTest( ) env.Library( + target='multi_key_path_tracker', + source=[ + 'multi_key_path_tracker.cpp', + ], + LIBDEPS_PRIVATE=[ + '$BUILD_DIR/mongo/base', + ], +) + +env.Library( target='lasterror', source=[ "lasterror.cpp", diff --git a/src/mongo/db/catalog/SConscript b/src/mongo/db/catalog/SConscript index 30c58486dde..d678985f150 100644 --- a/src/mongo/db/catalog/SConscript +++ b/src/mongo/db/catalog/SConscript @@ -273,6 +273,7 @@ env.Library( "collection_info_cache_impl.cpp", "database_holder_impl.cpp", "database_impl.cpp", + "index_build_block.cpp", "index_catalog_entry_impl.cpp", "index_catalog_impl.cpp", "index_consistency.cpp", @@ -309,7 +310,8 @@ env.Library( '$BUILD_DIR/mongo/db/views/views_mongod', ], LIBDEPS_PRIVATE=[ - "$BUILD_DIR/mongo/db/commands/server_status_core", + '$BUILD_DIR/mongo/db/commands/server_status_core', + '$BUILD_DIR/mongo/db/index/index_build_interceptor', '$BUILD_DIR/mongo/db/logical_clock', '$BUILD_DIR/mongo/db/repl/repl_settings', '$BUILD_DIR/mongo/db/storage/storage_engine_common', diff --git a/src/mongo/db/catalog/collection.h b/src/mongo/db/catalog/collection.h index 79a987efdef..831321f00ac 100644 --- a/src/mongo/db/catalog/collection.h +++ b/src/mongo/db/catalog/collection.h @@ -338,6 +338,8 @@ public: OperationContext* opCtx, PlanExecutor::YieldPolicy yieldPolicy, ScanDirection scanDirection) = 0; + + virtual void indexBuildSuccess(OperationContext* opCtx, IndexCatalogEntry* index) = 0; }; public: @@ -734,6 +736,10 @@ public: return this->_impl().makePlanExecutor(opCtx, yieldPolicy, scanDirection); } + inline void indexBuildSuccess(OperationContext* opCtx, IndexCatalogEntry* index) { + return this->_impl().indexBuildSuccess(opCtx, index); + } + private: inline DatabaseCatalogEntry* dbce() const { return this->_impl().dbce(); diff --git a/src/mongo/db/catalog/collection_compact.cpp b/src/mongo/db/catalog/collection_compact.cpp index 5cd68625468..9eec0b1e8cb 100644 --- a/src/mongo/db/catalog/collection_compact.cpp +++ b/src/mongo/db/catalog/collection_compact.cpp @@ -67,13 +67,15 @@ StatusWith<CompactStats> compactCollection(OperationContext* opCtx, return StatusWith<CompactStats>(status); // Compact all indexes (not including unfinished indexes) - IndexCatalog::IndexIterator ii(indexCatalog->getIndexIterator(opCtx, false)); - while (ii.more()) { - IndexDescriptor* descriptor = ii.next(); - IndexAccessMethod* index = indexCatalog->getIndex(descriptor); + std::unique_ptr<IndexCatalog::IndexIterator> ii( + indexCatalog->getIndexIterator(opCtx, false)); + while (ii->more()) { + IndexCatalogEntry* entry = ii->next(); + IndexDescriptor* descriptor = entry->descriptor(); + IndexAccessMethod* iam = entry->accessMethod(); LOG(1) << "compacting index: " << descriptor->toString(); - Status status = index->compact(opCtx); + Status status = iam->compact(opCtx); if (!status.isOK()) { error() << "failed to compact index: " << descriptor->toString(); return status; @@ -89,9 +91,10 @@ StatusWith<CompactStats> compactCollection(OperationContext* opCtx, std::vector<BSONObj> indexSpecs; { - IndexCatalog::IndexIterator ii(indexCatalog->getIndexIterator(opCtx, false)); - while (ii.more()) { - IndexDescriptor* descriptor = ii.next(); + std::unique_ptr<IndexCatalog::IndexIterator> ii( + indexCatalog->getIndexIterator(opCtx, false)); + while (ii->more()) { + IndexDescriptor* descriptor = ii->next()->descriptor(); // Compact always creates the new index in the foreground. const BSONObj spec = diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp index 745f0203c1b..992707c4f73 100644 --- a/src/mongo/db/catalog/collection_impl.cpp +++ b/src/mongo/db/catalog/collection_impl.cpp @@ -660,11 +660,12 @@ RecordId CollectionImpl::updateDocument(OperationContext* opCtx, // newDoc. OwnedPointerMap<IndexDescriptor*, UpdateTicket> updateTickets; if (indexesAffected) { - IndexCatalog::IndexIterator ii = _indexCatalog->getIndexIterator(opCtx, true); - while (ii.more()) { - IndexDescriptor* descriptor = ii.next(); - IndexCatalogEntry* entry = ii.catalogEntry(descriptor); - IndexAccessMethod* iam = ii.accessMethod(descriptor); + std::unique_ptr<IndexCatalog::IndexIterator> ii = + _indexCatalog->getIndexIterator(opCtx, true); + while (ii->more()) { + IndexCatalogEntry* entry = ii->next(); + IndexDescriptor* descriptor = entry->descriptor(); + IndexAccessMethod* iam = entry->accessMethod(); InsertDeleteOptions options; _indexCatalog->prepareInsertDeleteOptions(opCtx, descriptor, &options); @@ -687,10 +688,12 @@ RecordId CollectionImpl::updateDocument(OperationContext* opCtx, // Update each index with each respective UpdateTicket. if (indexesAffected) { - IndexCatalog::IndexIterator ii = _indexCatalog->getIndexIterator(opCtx, true); - while (ii.more()) { - IndexDescriptor* descriptor = ii.next(); - IndexAccessMethod* iam = ii.accessMethod(descriptor); + std::unique_ptr<IndexCatalog::IndexIterator> ii = + _indexCatalog->getIndexIterator(opCtx, true); + while (ii->more()) { + IndexCatalogEntry* entry = ii->next(); + IndexDescriptor* descriptor = entry->descriptor(); + IndexAccessMethod* iam = entry->accessMethod(); int64_t keysInserted; int64_t keysDeleted; @@ -764,19 +767,20 @@ uint64_t CollectionImpl::dataSize(OperationContext* opCtx) const { uint64_t CollectionImpl::getIndexSize(OperationContext* opCtx, BSONObjBuilder* details, int scale) { IndexCatalog* idxCatalog = getIndexCatalog(); - IndexCatalog::IndexIterator ii = idxCatalog->getIndexIterator(opCtx, true); + std::unique_ptr<IndexCatalog::IndexIterator> ii = idxCatalog->getIndexIterator(opCtx, true); uint64_t totalSize = 0; - while (ii.more()) { - IndexDescriptor* d = ii.next(); - IndexAccessMethod* iam = idxCatalog->getIndex(d); + while (ii->more()) { + IndexCatalogEntry* entry = ii->next(); + IndexDescriptor* descriptor = entry->descriptor(); + IndexAccessMethod* iam = entry->accessMethod(); long long ds = iam->getSpaceUsedBytes(opCtx); totalSize += ds; if (details) { - details->appendNumber(d->indexName(), ds / scale); + details->appendNumber(descriptor->indexName(), ds / scale); } } @@ -798,9 +802,10 @@ Status CollectionImpl::truncate(OperationContext* opCtx) { // 1) store index specs vector<BSONObj> indexSpecs; { - IndexCatalog::IndexIterator ii = _indexCatalog->getIndexIterator(opCtx, false); - while (ii.more()) { - const IndexDescriptor* idx = ii.next(); + std::unique_ptr<IndexCatalog::IndexIterator> ii = + _indexCatalog->getIndexIterator(opCtx, false); + while (ii->more()) { + const IndexDescriptor* idx = ii->next()->descriptor(); indexSpecs.push_back(idx->infoObj().getOwned()); } } @@ -1031,14 +1036,16 @@ void _validateIndexes(OperationContext* opCtx, ValidateResultsMap* indexNsResultsMap, ValidateResults* results) { - IndexCatalog::IndexIterator i = indexCatalog->getIndexIterator(opCtx, false); + std::unique_ptr<IndexCatalog::IndexIterator> it = indexCatalog->getIndexIterator(opCtx, false); // Validate Indexes. - while (i.more()) { + while (it->more()) { opCtx->checkForInterrupt(); - const IndexDescriptor* descriptor = i.next(); + IndexCatalogEntry* entry = it->next(); + IndexDescriptor* descriptor = entry->descriptor(); + IndexAccessMethod* iam = entry->accessMethod(); + log(LogComponent::kIndex) << "validating index " << descriptor->indexNamespace() << endl; - IndexAccessMethod* iam = indexCatalog->getIndex(descriptor); ValidateResults& curIndexResults = (*indexNsResultsMap)[descriptor->indexNamespace()]; bool checkCounts = false; int64_t numTraversedKeys; @@ -1095,9 +1102,10 @@ void _validateIndexKeyCount(OperationContext* opCtx, RecordStoreValidateAdaptor* indexValidator, ValidateResultsMap* indexNsResultsMap) { - IndexCatalog::IndexIterator indexIterator = indexCatalog->getIndexIterator(opCtx, false); - while (indexIterator.more()) { - IndexDescriptor* descriptor = indexIterator.next(); + std::unique_ptr<IndexCatalog::IndexIterator> indexIterator = + indexCatalog->getIndexIterator(opCtx, false); + while (indexIterator->more()) { + IndexDescriptor* descriptor = indexIterator->next()->descriptor(); ValidateResults& curIndexResults = (*indexNsResultsMap)[descriptor->indexNamespace()]; if (curIndexResults.valid) { @@ -1287,10 +1295,11 @@ Status CollectionImpl::touch(OperationContext* opCtx, if (touchIndexes) { Timer t; - IndexCatalog::IndexIterator ii = _indexCatalog->getIndexIterator(opCtx, false); - while (ii.more()) { - const IndexDescriptor* desc = ii.next(); - const IndexAccessMethod* iam = _indexCatalog->getIndex(desc); + std::unique_ptr<IndexCatalog::IndexIterator> ii = + _indexCatalog->getIndexIterator(opCtx, false); + while (ii->more()) { + IndexCatalogEntry* entry = ii->next(); + IndexAccessMethod* iam = entry->accessMethod(); Status status = iam->touch(opCtx); if (!status.isOK()) return status; @@ -1326,4 +1335,8 @@ void CollectionImpl::setNs(NamespaceString nss) { _cursorManager = std::make_unique<CursorManager>(_ns); } +void CollectionImpl::indexBuildSuccess(OperationContext* opCtx, IndexCatalogEntry* index) { + _details->indexBuildSuccess(opCtx, index->descriptor()->indexName()); + _indexCatalog->indexBuildSuccess(opCtx, index); +} } // namespace mongo diff --git a/src/mongo/db/catalog/collection_impl.h b/src/mongo/db/catalog/collection_impl.h index 69c768f4118..e33f77d91c9 100644 --- a/src/mongo/db/catalog/collection_impl.h +++ b/src/mongo/db/catalog/collection_impl.h @@ -365,6 +365,8 @@ public: PlanExecutor::YieldPolicy yieldPolicy, ScanDirection scanDirection) final; + void indexBuildSuccess(OperationContext* opCtx, IndexCatalogEntry* index) final; + private: inline DatabaseCatalogEntry* dbce() const final { return this->_dbce; diff --git a/src/mongo/db/catalog/collection_info_cache_impl.cpp b/src/mongo/db/catalog/collection_info_cache_impl.cpp index 9223cba45b8..55a1d838642 100644 --- a/src/mongo/db/catalog/collection_info_cache_impl.cpp +++ b/src/mongo/db/catalog/collection_info_cache_impl.cpp @@ -88,14 +88,16 @@ void CollectionInfoCacheImpl::computeIndexKeys(OperationContext* opCtx) { bool hadTTLIndex = _hasTTLIndex; _hasTTLIndex = false; - IndexCatalog::IndexIterator i = _collection->getIndexCatalog()->getIndexIterator(opCtx, true); - while (i.more()) { - IndexDescriptor* descriptor = i.next(); + std::unique_ptr<IndexCatalog::IndexIterator> it = + _collection->getIndexCatalog()->getIndexIterator(opCtx, true); + while (it->more()) { + IndexCatalogEntry* entry = it->next(); + IndexDescriptor* descriptor = entry->descriptor(); + IndexAccessMethod* iam = entry->accessMethod(); if (descriptor->getAccessMethodName() == IndexNames::WILDCARD) { // Obtain the projection used by the $** index's key generator. - const auto* pathProj = - static_cast<WildcardAccessMethod*>(i.accessMethod(descriptor))->getProjectionExec(); + const auto* pathProj = static_cast<WildcardAccessMethod*>(iam)->getProjectionExec(); // If the projection is an exclusion, then we must check the new document's keys on all // updates, since we do not exhaustively know the set of paths to be indexed. if (pathProj->getType() == ProjectionExecAgg::ProjectionType::kExclusionProjection) { @@ -142,7 +144,6 @@ void CollectionInfoCacheImpl::computeIndexKeys(OperationContext* opCtx) { } // handle partial indexes - const IndexCatalogEntry* entry = i.catalogEntry(descriptor); const MatchExpression* filter = entry->getFilterExpression(); if (filter) { stdx::unordered_set<std::string> paths; @@ -199,11 +200,10 @@ void CollectionInfoCacheImpl::updatePlanCacheIndexEntries(OperationContext* opCt // TODO We shouldn't need to include unfinished indexes, but we must here because the index // catalog may be in an inconsistent state. SERVER-18346. const bool includeUnfinishedIndexes = true; - IndexCatalog::IndexIterator ii = + std::unique_ptr<IndexCatalog::IndexIterator> ii = _collection->getIndexCatalog()->getIndexIterator(opCtx, includeUnfinishedIndexes); - while (ii.more()) { - const IndexDescriptor* desc = ii.next(); - const IndexCatalogEntry* ice = ii.catalogEntry(desc); + while (ii->more()) { + const IndexCatalogEntry* ice = ii->next(); indexEntries.emplace_back(indexEntryFromIndexCatalogEntry(opCtx, *ice)); } @@ -215,10 +215,10 @@ void CollectionInfoCacheImpl::init(OperationContext* opCtx) { invariant(opCtx->lockState()->isCollectionLockedForMode(_collection->ns().ns(), MODE_X)); const bool includeUnfinishedIndexes = false; - IndexCatalog::IndexIterator ii = + std::unique_ptr<IndexCatalog::IndexIterator> ii = _collection->getIndexCatalog()->getIndexIterator(opCtx, includeUnfinishedIndexes); - while (ii.more()) { - const IndexDescriptor* desc = ii.next(); + while (ii->more()) { + const IndexDescriptor* desc = ii->next()->descriptor(); _indexUsageTracker.registerIndex(desc->indexName(), desc->keyPattern()); } diff --git a/src/mongo/db/catalog/collection_mock.h b/src/mongo/db/catalog/collection_mock.h index e8b4e0013f9..35c0a9f8b55 100644 --- a/src/mongo/db/catalog/collection_mock.h +++ b/src/mongo/db/catalog/collection_mock.h @@ -290,5 +290,9 @@ public: OptionalCollectionUUID uuid() const { std::abort(); } + + void indexBuildSuccess(OperationContext* opCtx, IndexCatalogEntry* index) { + std::abort(); + } }; } // namespace mongo diff --git a/src/mongo/db/catalog/database_impl.cpp b/src/mongo/db/catalog/database_impl.cpp index 92bb0636f57..bcf513d3304 100644 --- a/src/mongo/db/catalog/database_impl.cpp +++ b/src/mongo/db/catalog/database_impl.cpp @@ -578,8 +578,8 @@ Status DatabaseImpl::dropCollectionEvenIfSystem(OperationContext* opCtx, // Determine which index names are too long. Since we don't have the collection drop optime // at this time, use the maximum optime to check the index names. auto longDpns = fullns.makeDropPendingNamespace(repl::OpTime::max()); - while (indexIter.more()) { - auto index = indexIter.next(); + while (indexIter->more()) { + auto index = indexIter->next()->descriptor(); auto status = longDpns.checkLengthForRename(index->indexName().size()); if (!status.isOK()) { indexesToDrop.push_back(index); diff --git a/src/mongo/db/catalog/index_build_block.cpp b/src/mongo/db/catalog/index_build_block.cpp new file mode 100644 index 00000000000..016890de388 --- /dev/null +++ b/src/mongo/db/catalog/index_build_block.cpp @@ -0,0 +1,178 @@ +/** + * Copyright (C) 2018-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kIndex + +#include "mongo/platform/basic.h" + +#include "mongo/db/catalog/index_catalog_impl.h" + +#include <vector> + +#include "mongo/db/catalog/collection.h" +#include "mongo/db/catalog/collection_catalog_entry.h" +#include "mongo/db/catalog_raii.h" +#include "mongo/db/db_raii.h" +#include "mongo/db/logical_clock.h" +#include "mongo/db/operation_context.h" +#include "mongo/util/assert_util.h" +#include "mongo/util/log.h" + +namespace mongo { +IndexCatalogImpl::IndexBuildBlock::IndexBuildBlock(OperationContext* opCtx, + Collection* collection, + IndexCatalogImpl* catalog, + const BSONObj& spec) + : _collection(collection), + _catalog(catalog), + _ns(_collection->ns().ns()), + _spec(spec.getOwned()), + _entry(nullptr), + _opCtx(opCtx) { + invariant(collection); +} + +Status IndexCatalogImpl::IndexBuildBlock::init() { + // Being in a WUOW means all timestamping responsibility can be pushed up to the caller. + invariant(_opCtx->lockState()->inAWriteUnitOfWork()); + + // need this first for names, etc... + BSONObj keyPattern = _spec.getObjectField("key"); + auto descriptor = stdx::make_unique<IndexDescriptor>( + _collection, IndexNames::findPluginName(keyPattern), _spec); + + _indexName = descriptor->indexName(); + _indexNamespace = descriptor->indexNamespace(); + + bool isBackgroundIndex = _spec["background"].trueValue(); + bool isBackgroundSecondaryBuild = false; + if (auto replCoord = repl::ReplicationCoordinator::get(_opCtx)) { + isBackgroundSecondaryBuild = + replCoord->getReplicationMode() == repl::ReplicationCoordinator::Mode::modeReplSet && + replCoord->getMemberState().secondary() && isBackgroundIndex; + } + + // Setup on-disk structures. + Status status = _collection->getCatalogEntry()->prepareForIndexBuild( + _opCtx, descriptor.get(), isBackgroundSecondaryBuild); + if (!status.isOK()) + return status; + + auto* const descriptorPtr = descriptor.get(); + const bool initFromDisk = false; + const bool isReadyIndex = false; + _entry = _catalog->_setupInMemoryStructures( + _opCtx, std::move(descriptor), initFromDisk, isReadyIndex); + + if (isBackgroundIndex) { + _indexBuildInterceptor = stdx::make_unique<IndexBuildInterceptor>(); + _indexBuildInterceptor->ensureSideWritesCollectionExists(_opCtx); + _entry->setIndexBuildInterceptor(_indexBuildInterceptor.get()); + + _opCtx->recoveryUnit()->onCommit( + [ opCtx = _opCtx, entry = _entry, collection = _collection ]( + boost::optional<Timestamp> commitTime) { + // This will prevent the unfinished index from being visible on index iterators. + if (commitTime) { + entry->setMinimumVisibleSnapshot(commitTime.get()); + collection->setMinimumVisibleSnapshot(commitTime.get()); + } + }); + } + + // Register this index with the CollectionInfoCache to regenerate the cache. This way, updates + // occurring while an index is being build in the background will be aware of whether or not + // they need to modify any indexes. + _collection->infoCache()->addedIndex(_opCtx, descriptorPtr); + + return Status::OK(); +} + +IndexCatalogImpl::IndexBuildBlock::~IndexBuildBlock() { + // Don't need to call fail() here, as rollback will clean everything up for us. +} + +void IndexCatalogImpl::IndexBuildBlock::fail() { + // Being in a WUOW means all timestamping responsibility can be pushed up to the caller. + invariant(_opCtx->lockState()->inAWriteUnitOfWork()); + fassert(17204, _collection->ok()); // defensive + + NamespaceString ns(_indexNamespace); + invariant(_opCtx->lockState()->isDbLockedForMode(ns.db(), MODE_X)); + + if (_entry) { + invariant(_catalog->_dropIndex(_opCtx, _entry).isOK()); + if (_indexBuildInterceptor) { + _indexBuildInterceptor->removeSideWritesCollection(_opCtx); + _entry->setIndexBuildInterceptor(nullptr); + } + } else { + _catalog->_deleteIndexFromDisk(_opCtx, _indexName, _indexNamespace); + } +} + +void IndexCatalogImpl::IndexBuildBlock::success() { + // Being in a WUOW means all timestamping responsibility can be pushed up to the caller. + invariant(_opCtx->lockState()->inAWriteUnitOfWork()); + + fassert(17207, _collection->ok()); + NamespaceString ns(_indexNamespace); + invariant(_opCtx->lockState()->isDbLockedForMode(ns.db(), MODE_X)); + + _collection->indexBuildSuccess(_opCtx, _entry); + + OperationContext* opCtx = _opCtx; + LOG(2) << "marking index " << _indexName << " as ready in snapshot id " + << opCtx->recoveryUnit()->getSnapshotId(); + _opCtx->recoveryUnit()->onCommit( + [ opCtx, entry = _entry, collection = _collection ](boost::optional<Timestamp> commitTime) { + // Note: this runs after the WUOW commits but before we release our X lock on the + // collection. This means that any snapshot created after this must include the full + // index, and no one can try to read this index before we set the visibility. + if (!commitTime) { + // The end of background index builds on secondaries does not get a commit + // timestamp. We use the cluster time since it's guaranteed to be greater than the + // time of the index build. It is possible the cluster time could be in the future, + // and we will need to do another write to reach the minimum visible snapshot. + commitTime = LogicalClock::getClusterTimeForReplicaSet(opCtx).asTimestamp(); + } + entry->setMinimumVisibleSnapshot(commitTime.get()); + // We must also set the minimum visible snapshot on the collection like during init(). + // This prevents reads in the past from reading inconsistent metadata. We should be + // able to remove this when the catalog is versioned. + collection->setMinimumVisibleSnapshot(commitTime.get()); + }); + + _entry->setIsReady(true); + if (_indexBuildInterceptor) { + _indexBuildInterceptor->removeSideWritesCollection(_opCtx); + _entry->setIndexBuildInterceptor(nullptr); + } +} +} // namespace mongo diff --git a/src/mongo/db/catalog/index_catalog.cpp b/src/mongo/db/catalog/index_catalog.cpp index ab704d555e4..7f68b7ef77c 100644 --- a/src/mongo/db/catalog/index_catalog.cpp +++ b/src/mongo/db/catalog/index_catalog.cpp @@ -37,69 +37,66 @@ namespace mongo { +using IndexIterator = IndexCatalog::IndexIterator; +using ReadyIndexesIterator = IndexCatalog::ReadyIndexesIterator; +using AllIndexesIterator = IndexCatalog::AllIndexesIterator; -IndexCatalog::IndexIterator::IndexIterator(OperationContext* opCtx, - IndexCatalogEntryContainer::const_iterator beginIterator, - IndexCatalogEntryContainer::const_iterator endIterator, - bool includeUnfinishedIndexes) - : _includeUnfinishedIndexes(includeUnfinishedIndexes), - _opCtx(opCtx), - _iterator(beginIterator), - _endIterator(endIterator), - _start(true), - _prev(nullptr), - _next(nullptr) {} - -bool IndexCatalog::IndexIterator::more() { +bool IndexIterator::more() { if (_start) { - _advance(); + _next = _advance(); _start = false; } return _next != nullptr; } -IndexDescriptor* IndexCatalog::IndexIterator::next() { +IndexCatalogEntry* IndexIterator::next() { if (!more()) return nullptr; _prev = _next; - _advance(); - return _prev->descriptor(); -} - -IndexAccessMethod* IndexCatalog::IndexIterator::accessMethod(const IndexDescriptor* desc) { - invariant(desc == _prev->descriptor()); - return _prev->accessMethod(); -} - -IndexCatalogEntry* IndexCatalog::IndexIterator::catalogEntry(const IndexDescriptor* desc) { - invariant(desc == _prev->descriptor()); + _next = _advance(); return _prev; } -void IndexCatalog::IndexIterator::_advance() { - _next = nullptr; +ReadyIndexesIterator::ReadyIndexesIterator(OperationContext* const opCtx, + IndexCatalogEntryContainer::const_iterator beginIterator, + IndexCatalogEntryContainer::const_iterator endIterator) + : _opCtx(opCtx), _iterator(beginIterator), _endIterator(endIterator) {} +IndexCatalogEntry* ReadyIndexesIterator::_advance() { while (_iterator != _endIterator) { IndexCatalogEntry* entry = _iterator->get(); ++_iterator; - if (!_includeUnfinishedIndexes) { - if (auto minSnapshot = entry->getMinimumVisibleSnapshot()) { - if (auto mySnapshot = _opCtx->recoveryUnit()->getPointInTimeReadTimestamp()) { - if (mySnapshot < minSnapshot) { - // This index isn't finished in my snapshot. - continue; - } + if (auto minSnapshot = entry->getMinimumVisibleSnapshot()) { + if (auto mySnapshot = _opCtx->recoveryUnit()->getPointInTimeReadTimestamp()) { + if (mySnapshot < minSnapshot) { + // This index isn't finished in my snapshot. + continue; } } - - if (!entry->isReady(_opCtx)) - continue; } - _next = entry; - return; + return entry; } + + return nullptr; +} + +AllIndexesIterator::AllIndexesIterator( + OperationContext* const opCtx, std::unique_ptr<std::vector<IndexCatalogEntry*>> ownedContainer) + : _opCtx(opCtx), _ownedContainer(std::move(ownedContainer)) { + // Explicitly order calls onto the ownedContainer with respect to its move. + _iterator = _ownedContainer->begin(); + _endIterator = _ownedContainer->end(); } +IndexCatalogEntry* AllIndexesIterator::_advance() { + if (_iterator == _endIterator) { + return nullptr; + } + + IndexCatalogEntry* entry = *_iterator; + ++_iterator; + return entry; +} } // namespace mongo diff --git a/src/mongo/db/catalog/index_catalog.h b/src/mongo/db/catalog/index_catalog.h index 73667f5fa36..eadb7a56733 100644 --- a/src/mongo/db/catalog/index_catalog.h +++ b/src/mongo/db/catalog/index_catalog.h @@ -78,47 +78,54 @@ class IndexCatalog { public: class IndexIterator { public: - explicit IndexIterator(OperationContext* const opCtx, - IndexCatalogEntryContainer::const_iterator beginIterator, - IndexCatalogEntryContainer::const_iterator endIterator, - const bool includeUnfinishedIndexes); - - public: - inline ~IndexIterator() = default; - - inline IndexIterator(const IndexIterator& copy) = default; - inline IndexIterator& operator=(const IndexIterator& copy) = default; - - inline IndexIterator(IndexIterator&& copy) = default; - inline IndexIterator& operator=(IndexIterator&& copy) = default; - + virtual ~IndexIterator() = default; bool more(); + IndexCatalogEntry* next(); - IndexDescriptor* next(); - + protected: /** - * Returns the access method for the last return IndexDescriptor. + * Advance the underlying iterator and returns the next index entry. Returns nullptr when + * the iterator is exhausted. */ - IndexAccessMethod* accessMethod(const IndexDescriptor* const desc); - - /** - * Returns the IndexCatalogEntry for the last return IndexDescriptor. - */ - IndexCatalogEntry* catalogEntry(const IndexDescriptor* const desc); + virtual IndexCatalogEntry* _advance() = 0; private: - void _advance(); + bool _start = true; + IndexCatalogEntry* _prev = nullptr; + IndexCatalogEntry* _next = nullptr; + }; + + class ReadyIndexesIterator : public IndexIterator { + public: + ReadyIndexesIterator(OperationContext* const opCtx, + IndexCatalogEntryContainer::const_iterator beginIterator, + IndexCatalogEntryContainer::const_iterator endIterator); - bool _includeUnfinishedIndexes; + private: + IndexCatalogEntry* _advance() override; OperationContext* const _opCtx; IndexCatalogEntryContainer::const_iterator _iterator; IndexCatalogEntryContainer::const_iterator _endIterator; + }; - bool _start; // only true before we've called next() or more() + class AllIndexesIterator : public IndexIterator { + public: + /** + * `ownedContainer` is a container whose lifetime the begin and end iterators depend + * on. If the caller will keep control of the container for the entire iterator lifetime, + * it should pass in a null value. + */ + AllIndexesIterator(OperationContext* const opCtx, + std::unique_ptr<std::vector<IndexCatalogEntry*>> ownedContainer); - IndexCatalogEntry* _prev; - IndexCatalogEntry* _next; + private: + IndexCatalogEntry* _advance() override; + + OperationContext* const _opCtx; + std::vector<IndexCatalogEntry*>::const_iterator _iterator; + std::vector<IndexCatalogEntry*>::const_iterator _endIterator; + std::unique_ptr<std::vector<IndexCatalogEntry*>> _ownedContainer; }; /** @@ -170,7 +177,6 @@ public: virtual const BSONObj& getSpec() const = 0; }; -public: IndexCatalog() = default; virtual ~IndexCatalog() = default; @@ -291,8 +297,8 @@ public: /** * Returns an iterator for the index descriptors in this IndexCatalog. */ - virtual IndexIterator getIndexIterator(OperationContext* const opCtx, - const bool includeUnfinishedIndexes) const = 0; + virtual std::unique_ptr<IndexIterator> getIndexIterator( + OperationContext* const opCtx, const bool includeUnfinishedIndexes) const = 0; // ---- index set modifiers ------ @@ -396,6 +402,8 @@ public: InsertDeleteOptions* options) const = 0; virtual void setNs(NamespaceString ns) = 0; + + virtual void indexBuildSuccess(OperationContext* opCtx, IndexCatalogEntry* index) = 0; }; } // namespace mongo diff --git a/src/mongo/db/catalog/index_catalog_entry.h b/src/mongo/db/catalog/index_catalog_entry.h index 3b43a07ee1c..eb54f065e7b 100644 --- a/src/mongo/db/catalog/index_catalog_entry.h +++ b/src/mongo/db/catalog/index_catalog_entry.h @@ -50,6 +50,7 @@ class CollectionCatalogEntry; class CollectionInfoCache; class HeadManager; class IndexAccessMethod; +class IndexBuildInterceptor; class IndexDescriptor; class MatchExpression; class OperationContext; @@ -74,6 +75,12 @@ public: virtual const IndexAccessMethod* accessMethod() const = 0; + virtual bool isBuilding() const = 0; + + virtual IndexBuildInterceptor* indexBuildInterceptor() = 0; + + virtual void setIndexBuildInterceptor(IndexBuildInterceptor* interceptor) = 0; + virtual const Ordering& ordering() const = 0; virtual const MatchExpression* getFilterExpression() const = 0; diff --git a/src/mongo/db/catalog/index_catalog_entry_impl.h b/src/mongo/db/catalog/index_catalog_entry_impl.h index a4d800a8e5c..a5a53f2dff2 100644 --- a/src/mongo/db/catalog/index_catalog_entry_impl.h +++ b/src/mongo/db/catalog/index_catalog_entry_impl.h @@ -90,6 +90,18 @@ public: return _accessMethod.get(); } + bool isBuilding() const final { + return _indexBuildInterceptor != nullptr; + } + + IndexBuildInterceptor* indexBuildInterceptor() final { + return _indexBuildInterceptor; + } + + void setIndexBuildInterceptor(IndexBuildInterceptor* interceptor) final { + _indexBuildInterceptor = interceptor; + } + const Ordering& ordering() const final { return _ordering; } @@ -201,6 +213,8 @@ private: std::unique_ptr<IndexAccessMethod> _accessMethod; + IndexBuildInterceptor* _indexBuildInterceptor = nullptr; // not owned here + // Owned here. std::unique_ptr<HeadManager> _headManager; std::unique_ptr<CollatorInterface> _collator; diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp index 3e2ce124cd5..837ccfd7b4a 100644 --- a/src/mongo/db/catalog/index_catalog_impl.cpp +++ b/src/mongo/db/catalog/index_catalog_impl.cpp @@ -119,8 +119,9 @@ Status IndexCatalogImpl::init(OperationContext* opCtx) { auto descriptor = stdx::make_unique<IndexDescriptor>(_collection, _getAccessMethodName(keyPattern), spec); const bool initFromDisk = true; + const bool isReadyIndex = true; IndexCatalogEntry* entry = - _setupInMemoryStructures(opCtx, std::move(descriptor), initFromDisk); + _setupInMemoryStructures(opCtx, std::move(descriptor), initFromDisk, isReadyIndex); fassert(17340, entry->isReady(opCtx)); } @@ -137,7 +138,10 @@ Status IndexCatalogImpl::init(OperationContext* opCtx) { } IndexCatalogEntry* IndexCatalogImpl::_setupInMemoryStructures( - OperationContext* opCtx, std::unique_ptr<IndexDescriptor> descriptor, bool initFromDisk) { + OperationContext* opCtx, + std::unique_ptr<IndexDescriptor> descriptor, + bool initFromDisk, + bool isReadyIndex) { Status status = _isSpecOk(opCtx, descriptor->infoObj()); if (!status.isOK() && status != ErrorCodes::IndexAlreadyExists) { severe() << "Found an invalid index " << descriptor->infoObj() << " on the " @@ -156,20 +160,26 @@ IndexCatalogEntry* IndexCatalogImpl::_setupInMemoryStructures( entry->init(std::move(accessMethod)); IndexCatalogEntry* save = entry.get(); - _entries.add(entry.release()); + if (isReadyIndex) { + _readyIndexes.add(entry.release()); + } else { + _buildingIndexes.add(entry.release()); + } if (!initFromDisk) { - opCtx->recoveryUnit()->onRollback([ this, opCtx, descriptor = descriptorPtr ] { - // Need to preserve indexName as descriptor no longer exists after remove(). - const std::string indexName = descriptor->indexName(); - _entries.remove(descriptor); - _collection->infoCache()->droppedIndex(opCtx, indexName); - }); + opCtx->recoveryUnit()->onRollback( + [ this, opCtx, isReadyIndex, descriptor = descriptorPtr ] { + // Need to preserve indexName as descriptor no longer exists after remove(). + const std::string indexName = descriptor->indexName(); + if (isReadyIndex) { + _readyIndexes.remove(descriptor); + } else { + _buildingIndexes.remove(descriptor); + } + _collection->infoCache()->droppedIndex(opCtx, indexName); + }); } - invariant(save == _entries.find(descriptorPtr)); - invariant(save == _entries.find(descriptorPtr->indexName())); - return save; } @@ -195,10 +205,31 @@ Status IndexCatalogImpl::checkUnfinished() const { << _collection->ns().ns()); } -IndexCatalog::IndexIterator IndexCatalogImpl::getIndexIterator( +std::unique_ptr<IndexCatalog::IndexIterator> IndexCatalogImpl::getIndexIterator( OperationContext* const opCtx, const bool includeUnfinishedIndexes) const { - return IndexIterator(opCtx, _entries.begin(), _entries.end(), includeUnfinishedIndexes); -}; + if (!includeUnfinishedIndexes) { + // If the caller only wants the ready indexes, we return an iterator over the catalog's + // ready indexes vector. When the user advances this iterator, it will filter out any + // indexes that were not ready at the OperationContext's read timestamp. + return std::make_unique<ReadyIndexesIterator>( + opCtx, _readyIndexes.begin(), _readyIndexes.end()); + } + + // If the caller wants all indexes, for simplicity of implementation, we copy the pointers to + // a new vector. The vector's ownership is passed to the iterator. The query code path from an + // external client is not expected to hit this case so the cost isn't paid by the important + // code path. + auto allIndexes = std::make_unique<std::vector<IndexCatalogEntry*>>(); + for (auto it = _readyIndexes.begin(); it != _readyIndexes.end(); ++it) { + allIndexes->push_back(it->get()); + } + + for (auto it = _buildingIndexes.begin(); it != _buildingIndexes.end(); ++it) { + allIndexes->push_back(it->get()); + } + + return std::make_unique<AllIndexesIterator>(opCtx, std::move(allIndexes)); +} string IndexCatalogImpl::_getAccessMethodName(const BSONObj& keyPattern) const { string pluginName = IndexNames::findPluginName(keyPattern); @@ -272,7 +303,7 @@ StatusWith<BSONObj> IndexCatalogImpl::createIndexOnEmptyCollection(OperationCont invariant(entry); IndexDescriptor* descriptor = entry->descriptor(); invariant(descriptor); - invariant(entry == _entries.find(descriptor)); + invariant(entry == _buildingIndexes.find(descriptor)); status = entry->accessMethod()->initializeAsEmpty(opCtx); if (!status.isOK()) @@ -285,131 +316,6 @@ StatusWith<BSONObj> IndexCatalogImpl::createIndexOnEmptyCollection(OperationCont return spec; } -IndexCatalogImpl::IndexBuildBlock::IndexBuildBlock(OperationContext* opCtx, - Collection* collection, - IndexCatalogImpl* catalog, - const BSONObj& spec) - : _collection(collection), - _catalog(catalog), - _ns(_collection->ns().ns()), - _spec(spec.getOwned()), - _entry(nullptr), - _opCtx(opCtx) { - invariant(collection); -} - -Status IndexCatalogImpl::IndexBuildBlock::init() { - // Being in a WUOW means all timestamping responsibility can be pushed up to the caller. - invariant(_opCtx->lockState()->inAWriteUnitOfWork()); - - // need this first for names, etc... - BSONObj keyPattern = _spec.getObjectField("key"); - auto descriptor = stdx::make_unique<IndexDescriptor>( - _collection, IndexNames::findPluginName(keyPattern), _spec); - - _indexName = descriptor->indexName(); - _indexNamespace = descriptor->indexNamespace(); - - bool isBackgroundIndex = _spec["background"].trueValue(); - bool isBackgroundSecondaryBuild = false; - if (auto replCoord = repl::ReplicationCoordinator::get(_opCtx)) { - isBackgroundSecondaryBuild = - replCoord->getReplicationMode() == repl::ReplicationCoordinator::Mode::modeReplSet && - replCoord->getMemberState().secondary() && isBackgroundIndex; - } - - // Setup on-disk structures. - Status status = _collection->getCatalogEntry()->prepareForIndexBuild( - _opCtx, descriptor.get(), isBackgroundSecondaryBuild); - if (!status.isOK()) - return status; - - auto* const descriptorPtr = descriptor.get(); - const bool initFromDisk = false; - _entry = _catalog->_setupInMemoryStructures(_opCtx, std::move(descriptor), initFromDisk); - - if (isBackgroundIndex) { - _opCtx->recoveryUnit()->onCommit( - [ opCtx = _opCtx, entry = _entry, collection = _collection ]( - boost::optional<Timestamp> commitTime) { - // This will prevent the unfinished index from being visible on index iterators. - if (commitTime) { - entry->setMinimumVisibleSnapshot(commitTime.get()); - collection->setMinimumVisibleSnapshot(commitTime.get()); - } - }); - } - - // Register this index with the CollectionInfoCache to regenerate the cache. This way, updates - // occurring while an index is being build in the background will be aware of whether or not - // they need to modify any indexes. - _collection->infoCache()->addedIndex(_opCtx, descriptorPtr); - - return Status::OK(); -} - -IndexCatalogImpl::IndexBuildBlock::~IndexBuildBlock() { - // Don't need to call fail() here, as rollback will clean everything up for us. -} - -void IndexCatalogImpl::IndexBuildBlock::fail() { - // Being in a WUOW means all timestamping responsibility can be pushed up to the caller. - invariant(_opCtx->lockState()->inAWriteUnitOfWork()); - fassert(17204, _collection->ok()); // defensive - - NamespaceString ns(_indexNamespace); - invariant(_opCtx->lockState()->isDbLockedForMode(ns.db(), MODE_X)); - - IndexCatalogEntry* entry = _catalog->_entries.find(_indexName); - invariant(entry == _entry); - - if (entry) { - _catalog->_dropIndex(_opCtx, entry).transitional_ignore(); - } else { - _catalog->_deleteIndexFromDisk(_opCtx, _indexName, _indexNamespace); - } -} - -void IndexCatalogImpl::IndexBuildBlock::success() { - // Being in a WUOW means all timestamping responsibility can be pushed up to the caller. - invariant(_opCtx->lockState()->inAWriteUnitOfWork()); - - fassert(17207, _collection->ok()); - NamespaceString ns(_indexNamespace); - invariant(_opCtx->lockState()->isDbLockedForMode(ns.db(), MODE_X)); - - _collection->getCatalogEntry()->indexBuildSuccess(_opCtx, _indexName); - - IndexDescriptor* desc = _catalog->findIndexByName(_opCtx, _indexName, true); - fassert(17330, desc); - IndexCatalogEntry* entry = _catalog->_entries.find(desc); - fassert(17331, entry && entry == _entry); - - OperationContext* opCtx = _opCtx; - LOG(2) << "marking index " << _indexName << " as ready in snapshot id " - << opCtx->recoveryUnit()->getSnapshotId(); - _opCtx->recoveryUnit()->onCommit( - [ opCtx, entry, collection = _collection ](boost::optional<Timestamp> commitTime) { - // Note: this runs after the WUOW commits but before we release our X lock on the - // collection. This means that any snapshot created after this must include the full - // index, and no one can try to read this index before we set the visibility. - if (!commitTime) { - // The end of background index builds on secondaries does not get a commit - // timestamp. We use the cluster time since it's guaranteed to be greater than the - // time of the index build. It is possible the cluster time could be in the future, - // and we will need to do another write to reach the minimum visible snapshot. - commitTime = LogicalClock::getClusterTimeForReplicaSet(opCtx).asTimestamp(); - } - entry->setMinimumVisibleSnapshot(commitTime.get()); - // We must also set the minimum visible snapshot on the collection like during init(). - // This prevents reads in the past from reading inconsistent metadata. We should be - // able to remove this when the catalog is versioned. - collection->setMinimumVisibleSnapshot(commitTime.get()); - }); - - entry->setIsReady(true); -} - namespace { // While technically recursive, only current possible with 2 levels. Status _checkValidFilterExpressions(MatchExpression* expression, int level = 0) { @@ -519,6 +425,7 @@ Status IndexCatalogImpl::_isSpecOk(OperationContext* opCtx, const BSONObj& spec) // Drop pending collections are internal to the server and will not be exported to another // storage engine. The indexes contained in these collections are not subject to the same // namespace length constraints as the ones in created by users. + // // Index names do not limit the maximum allowable length of the target namespace under FCV 4.2 // and above. const auto checkIndexNamespace = @@ -830,13 +737,14 @@ void IndexCatalogImpl::dropAllIndexes(OperationContext* opCtx, bool haveIdIndex = false; + invariant(_buildingIndexes.size() == 0); vector<string> indexNamesToDrop; { int seen = 0; - IndexIterator ii = getIndexIterator(opCtx, true); - while (ii.more()) { + std::unique_ptr<IndexIterator> ii = getIndexIterator(opCtx, true); + while (ii->more()) { seen++; - IndexDescriptor* desc = ii.next(); + IndexDescriptor* desc = ii->next()->descriptor(); if (desc->isIdIndex() && includingIdIndex == false) { haveIdIndex = true; continue; @@ -851,7 +759,7 @@ void IndexCatalogImpl::dropAllIndexes(OperationContext* opCtx, IndexDescriptor* desc = findIndexByName(opCtx, indexName, true); invariant(desc); LOG(1) << "\t dropAllIndexes dropping: " << desc->toString(); - IndexCatalogEntry* entry = _entries.find(desc); + IndexCatalogEntry* entry = _readyIndexes.find(desc); invariant(entry); // If the onDrop function creates an oplog entry, it should run first so that the drop is @@ -859,7 +767,7 @@ void IndexCatalogImpl::dropAllIndexes(OperationContext* opCtx, if (onDropFn) { onDropFn(desc); } - _dropIndex(opCtx, entry).transitional_ignore(); + invariant(_dropIndex(opCtx, entry).isOK()); } // verify state is sane post cleaning @@ -871,19 +779,19 @@ void IndexCatalogImpl::dropAllIndexes(OperationContext* opCtx, fassert(17324, numIndexesTotal(opCtx) == 1); fassert(17325, numIndexesReady(opCtx) == 1); fassert(17326, numIndexesInCollectionCatalogEntry == 1); - fassert(17336, _entries.size() == 1); + fassert(17336, _readyIndexes.size() == 1); } else { - if (numIndexesTotal(opCtx) || numIndexesInCollectionCatalogEntry || _entries.size()) { + if (numIndexesTotal(opCtx) || numIndexesInCollectionCatalogEntry || _readyIndexes.size()) { error() << "About to fassert - " << " numIndexesTotal(): " << numIndexesTotal(opCtx) << " numSystemIndexesEntries: " << numIndexesInCollectionCatalogEntry - << " _entries.size(): " << _entries.size() + << " _readyIndexes.size(): " << _readyIndexes.size() << " indexNamesToDrop: " << indexNamesToDrop.size() << " haveIdIndex: " << haveIdIndex; } fassert(17327, numIndexesTotal(opCtx) == 0); fassert(17328, numIndexesInCollectionCatalogEntry == 0); - fassert(17337, _entries.size() == 0); + fassert(17337, _readyIndexes.size() == 0); } } @@ -893,7 +801,10 @@ void IndexCatalogImpl::dropAllIndexes(OperationContext* opCtx, bool includingIdI Status IndexCatalogImpl::dropIndex(OperationContext* opCtx, IndexDescriptor* desc) { invariant(opCtx->lockState()->isCollectionLockedForMode(_collection->ns().toString(), MODE_X)); - IndexCatalogEntry* entry = _entries.find(desc); + BackgroundOperation::assertNoBgOpInProgForNs(_collection->ns().ns()); + invariant(_buildingIndexes.size() == 0); + + IndexCatalogEntry* entry = _readyIndexes.find(desc); if (!entry) return Status(ErrorCodes::InternalError, "cannot find index to delete"); @@ -901,8 +812,6 @@ Status IndexCatalogImpl::dropIndex(OperationContext* opCtx, IndexDescriptor* des if (!entry->isReady(opCtx)) return Status(ErrorCodes::InternalError, "cannot delete not ready index"); - BackgroundOperation::assertNoBgOpInProgForNs(_collection->ns().ns()); - return _dropIndex(opCtx, entry); } @@ -919,8 +828,8 @@ public: // Ban reading from this collection on committed reads on snapshots before now. if (!commitTime) { // This is called when we refresh the index catalog entry, which does not always have - // a commit timestamp. We use the cluster time since it's guaranteed to be greater than - // the time of the index removal. It is possible the cluster time could be in the + // a commit timestamp. We use the cluster time since it's guaranteed to be greater + // than the time of the index removal. It is possible the cluster time could be in the // future, and we will need to do another write to reach the minimum visible snapshot. commitTime = LogicalClock::getClusterTimeForReplicaSet(_opCtx).asTimestamp(); } @@ -957,8 +866,9 @@ Status IndexCatalogImpl::_dropIndex(OperationContext* opCtx, IndexCatalogEntry* string indexNamespace = entry->descriptor()->indexNamespace(); // If any cursors could be using this index, invalidate them. Note that we do not use indexes - // until they are ready, so we do not need to invalidate anything if the index fails while it is - // being built. + // until they are ready, so we do not need to invalidate anything if the index fails while it + // is being built. + // // TODO only kill cursors that are actually using the index rather than everything on this // collection. if (entry->isReady(opCtx)) { @@ -969,16 +879,22 @@ Status IndexCatalogImpl::_dropIndex(OperationContext* opCtx, IndexCatalogEntry* // --------- START REAL WORK ---------- audit::logDropIndex(&cc(), indexName, _collection->ns().ns()); - invariant(_entries.release(entry->descriptor()) == entry); - opCtx->recoveryUnit()->registerChange( - new IndexRemoveChange(opCtx, _collection, &_entries, entry)); + auto released = _readyIndexes.release(entry->descriptor()); + if (released) { + invariant(released == entry); + opCtx->recoveryUnit()->registerChange( + new IndexRemoveChange(opCtx, _collection, &_readyIndexes, entry)); + } else { + invariant(_buildingIndexes.release(entry->descriptor()) == entry); + opCtx->recoveryUnit()->registerChange( + new IndexRemoveChange(opCtx, _collection, &_buildingIndexes, entry)); + } _collection->infoCache()->droppedIndex(opCtx, indexName); entry = nullptr; _deleteIndexFromDisk(opCtx, indexName, indexNamespace); _checkMagic(); - return Status::OK(); } @@ -1009,14 +925,14 @@ vector<BSONObj> IndexCatalogImpl::getAndClearUnfinishedIndexes(OperationContext* } bool IndexCatalogImpl::isMultikey(OperationContext* opCtx, const IndexDescriptor* idx) { - IndexCatalogEntry* entry = _entries.find(idx); + IndexCatalogEntry* entry = _readyIndexes.find(idx); invariant(entry); return entry->isMultikey(opCtx); } MultikeyPaths IndexCatalogImpl::getMultikeyPaths(OperationContext* opCtx, const IndexDescriptor* idx) { - IndexCatalogEntry* entry = _entries.find(idx); + IndexCatalogEntry* entry = _readyIndexes.find(idx); invariant(entry); return entry->getMultikeyPaths(opCtx); } @@ -1024,20 +940,20 @@ MultikeyPaths IndexCatalogImpl::getMultikeyPaths(OperationContext* opCtx, // --------------------------- bool IndexCatalogImpl::haveAnyIndexes() const { - return _entries.size() != 0; + return _readyIndexes.size() > 0 || _buildingIndexes.size() > 0; } int IndexCatalogImpl::numIndexesTotal(OperationContext* opCtx) const { - int count = _entries.size() + _unfinishedIndexes.size(); + int count = _readyIndexes.size() + _buildingIndexes.size() + _unfinishedIndexes.size(); dassert(_collection->getCatalogEntry()->getTotalIndexCount(opCtx) == count); return count; } int IndexCatalogImpl::numIndexesReady(OperationContext* opCtx) const { std::vector<IndexDescriptor*> itIndexes; - IndexIterator ii = getIndexIterator(opCtx, /*includeUnfinished*/ false); - while (ii.more()) { - itIndexes.push_back(ii.next()); + std::unique_ptr<IndexIterator> ii = getIndexIterator(opCtx, /*includeUnfinished*/ false); + while (ii->more()) { + itIndexes.push_back(ii->next()->descriptor()); } DEV { std::vector<std::string> completedIndexes; @@ -1069,9 +985,9 @@ bool IndexCatalogImpl::haveIdIndex(OperationContext* opCtx) const { } IndexDescriptor* IndexCatalogImpl::findIdIndex(OperationContext* opCtx) const { - IndexIterator ii = getIndexIterator(opCtx, false); - while (ii.more()) { - IndexDescriptor* desc = ii.next(); + std::unique_ptr<IndexIterator> ii = getIndexIterator(opCtx, false); + while (ii->more()) { + IndexDescriptor* desc = ii->next()->descriptor(); if (desc->isIdIndex()) return desc; } @@ -1081,9 +997,9 @@ IndexDescriptor* IndexCatalogImpl::findIdIndex(OperationContext* opCtx) const { IndexDescriptor* IndexCatalogImpl::findIndexByName(OperationContext* opCtx, StringData name, bool includeUnfinishedIndexes) const { - IndexIterator ii = getIndexIterator(opCtx, includeUnfinishedIndexes); - while (ii.more()) { - IndexDescriptor* desc = ii.next(); + std::unique_ptr<IndexIterator> ii = getIndexIterator(opCtx, includeUnfinishedIndexes); + while (ii->more()) { + IndexDescriptor* desc = ii->next()->descriptor(); if (desc->indexName() == name) return desc; } @@ -1095,9 +1011,9 @@ IndexDescriptor* IndexCatalogImpl::findIndexByKeyPatternAndCollationSpec( const BSONObj& key, const BSONObj& collationSpec, bool includeUnfinishedIndexes) const { - IndexIterator ii = getIndexIterator(opCtx, includeUnfinishedIndexes); - while (ii.more()) { - IndexDescriptor* desc = ii.next(); + std::unique_ptr<IndexIterator> ii = getIndexIterator(opCtx, includeUnfinishedIndexes); + while (ii->more()) { + IndexDescriptor* desc = ii->next()->descriptor(); if (SimpleBSONObjComparator::kInstance.evaluate(desc->keyPattern() == key) && SimpleBSONObjComparator::kInstance.evaluate( desc->infoObj().getObjectField("collation") == collationSpec)) { @@ -1112,9 +1028,9 @@ void IndexCatalogImpl::findIndexesByKeyPattern(OperationContext* opCtx, bool includeUnfinishedIndexes, std::vector<IndexDescriptor*>* matches) const { invariant(matches); - IndexIterator ii = getIndexIterator(opCtx, includeUnfinishedIndexes); - while (ii.more()) { - IndexDescriptor* desc = ii.next(); + std::unique_ptr<IndexIterator> ii = getIndexIterator(opCtx, includeUnfinishedIndexes); + while (ii->more()) { + IndexDescriptor* desc = ii->next()->descriptor(); if (SimpleBSONObjComparator::kInstance.evaluate(desc->keyPattern() == key)) { matches->push_back(desc); } @@ -1126,9 +1042,9 @@ IndexDescriptor* IndexCatalogImpl::findShardKeyPrefixedIndex(OperationContext* o bool requireSingleKey) const { IndexDescriptor* best = nullptr; - IndexIterator ii = getIndexIterator(opCtx, false); - while (ii.more()) { - IndexDescriptor* desc = ii.next(); + std::unique_ptr<IndexIterator> ii = getIndexIterator(opCtx, false); + while (ii->more()) { + IndexDescriptor* desc = ii->next()->descriptor(); bool hasSimpleCollation = desc->infoObj().getObjectField("collation").isEmpty(); if (desc->isPartial()) @@ -1151,9 +1067,9 @@ void IndexCatalogImpl::findIndexByType(OperationContext* opCtx, const string& type, vector<IndexDescriptor*>& matches, bool includeUnfinishedIndexes) const { - IndexIterator ii = getIndexIterator(opCtx, includeUnfinishedIndexes); - while (ii.more()) { - IndexDescriptor* desc = ii.next(); + std::unique_ptr<IndexIterator> ii = getIndexIterator(opCtx, includeUnfinishedIndexes); + while (ii->more()) { + IndexDescriptor* desc = ii->next()->descriptor(); if (IndexNames::findPluginName(desc->keyPattern()) == type) { matches.push_back(desc); } @@ -1161,7 +1077,11 @@ void IndexCatalogImpl::findIndexByType(OperationContext* opCtx, } IndexAccessMethod* IndexCatalogImpl::getIndex(const IndexDescriptor* desc) { - IndexCatalogEntry* entry = _entries.find(desc); + IndexCatalogEntry* entry = _readyIndexes.find(desc); + if (!entry) { + entry = _buildingIndexes.find(desc); + } + massert(17334, "cannot find index entry", entry); return entry->accessMethod(); } @@ -1171,7 +1091,11 @@ const IndexAccessMethod* IndexCatalogImpl::getIndex(const IndexDescriptor* desc) } const IndexCatalogEntry* IndexCatalogImpl::getEntry(const IndexDescriptor* desc) const { - const IndexCatalogEntry* entry = _entries.find(desc); + const IndexCatalogEntry* entry = _readyIndexes.find(desc); + if (!entry) { + entry = _buildingIndexes.find(desc); + } + massert(17357, "cannot find index entry", entry); return entry; } @@ -1181,6 +1105,7 @@ const IndexDescriptor* IndexCatalogImpl::refreshEntry(OperationContext* opCtx, const IndexDescriptor* oldDesc) { invariant(opCtx->lockState()->isCollectionLockedForMode(_collection->ns().ns(), MODE_X)); invariant(!BackgroundOperation::inProgForNs(_collection->ns())); + invariant(_buildingIndexes.size() == 0); const std::string indexName = oldDesc->indexName(); invariant(_collection->getCatalogEntry()->isIndexReady(opCtx, indexName)); @@ -1194,9 +1119,10 @@ const IndexDescriptor* IndexCatalogImpl::refreshEntry(OperationContext* opCtx, // Delete the IndexCatalogEntry that owns this descriptor. After deletion, 'oldDesc' is // invalid and should not be dereferenced. - IndexCatalogEntry* oldEntry = _entries.release(oldDesc); + IndexCatalogEntry* oldEntry = _readyIndexes.release(oldDesc); + invariant(oldEntry); opCtx->recoveryUnit()->registerChange( - new IndexRemoveChange(opCtx, _collection, &_entries, oldEntry)); + new IndexRemoveChange(opCtx, _collection, &_readyIndexes, oldEntry)); // Ask the CollectionCatalogEntry for the new index spec. BSONObj spec = _collection->getCatalogEntry()->getIndexSpec(opCtx, indexName).getOwned(); @@ -1206,8 +1132,9 @@ const IndexDescriptor* IndexCatalogImpl::refreshEntry(OperationContext* opCtx, auto newDesc = stdx::make_unique<IndexDescriptor>(_collection, _getAccessMethodName(keyPattern), spec); const bool initFromDisk = false; + const bool isReadyIndex = true; const IndexCatalogEntry* newEntry = - _setupInMemoryStructures(opCtx, std::move(newDesc), initFromDisk); + _setupInMemoryStructures(opCtx, std::move(newDesc), initFromDisk, isReadyIndex); invariant(newEntry->isReady(opCtx)); // Return the new descriptor. @@ -1232,14 +1159,30 @@ Status IndexCatalogImpl::_indexFilteredRecords(OperationContext* opCtx, return status; } - InsertResult result; - Status status = index->accessMethod()->insert( - opCtx, *bsonRecord.docPtr, bsonRecord.id, options, &result); - if (!status.isOK()) - return status; + Status status = Status::OK(); + const bool hybridBuildsEnabled = false; + if (hybridBuildsEnabled && index->isBuilding()) { + int64_t inserted; + status = index->indexBuildInterceptor()->sideWrite(opCtx, + index->accessMethod(), + bsonRecord.docPtr, + bsonRecord.id, + IndexBuildInterceptor::Op::kInsert, + &inserted); + if (keysInsertedOut) { + *keysInsertedOut += inserted; + } + } else { + InsertResult result; + status = index->accessMethod()->insert( + opCtx, *bsonRecord.docPtr, bsonRecord.id, options, &result); + if (keysInsertedOut) { + *keysInsertedOut += result.numInserted; + } + } - if (keysInsertedOut) { - *keysInsertedOut += result.numInserted; + if (!status.isOK()) { + return status; } } return Status::OK(); @@ -1268,6 +1211,18 @@ Status IndexCatalogImpl::_unindexRecord(OperationContext* opCtx, const RecordId& loc, bool logIfError, int64_t* keysDeletedOut) { + const bool hybridBuildsEnabled = false; + if (hybridBuildsEnabled && index->isBuilding()) { + int64_t removed; + auto status = index->indexBuildInterceptor()->sideWrite( + opCtx, index->accessMethod(), &obj, loc, IndexBuildInterceptor::Op::kDelete, &removed); + if (status.isOK() && keysDeletedOut) { + *keysDeletedOut += removed; + } + + return status; + } + InsertDeleteOptions options; prepareInsertDeleteOptions(opCtx, index->descriptor(), &options); options.logIfError = logIfError; @@ -1275,6 +1230,7 @@ Status IndexCatalogImpl::_unindexRecord(OperationContext* opCtx, // On WiredTiger, we do blind unindexing of records for efficiency. However, when duplicates // are allowed in unique indexes, WiredTiger does not do blind unindexing, and instead confirms // that the recordid matches the element we are removing. + // // We need to disable blind-deletes for in-progress indexes, in order to force recordid-matching // for unindex operations, since initial sync can build an index over a collection with // duplicates. See SERVER-17487 for more details. @@ -1295,7 +1251,6 @@ Status IndexCatalogImpl::_unindexRecord(OperationContext* opCtx, return Status::OK(); } - Status IndexCatalogImpl::indexRecords(OperationContext* opCtx, const std::vector<BsonRecord>& bsonRecords, int64_t* keysInsertedOut) { @@ -1303,9 +1258,14 @@ Status IndexCatalogImpl::indexRecords(OperationContext* opCtx, *keysInsertedOut = 0; } - for (IndexCatalogEntryContainer::const_iterator i = _entries.begin(); i != _entries.end(); - ++i) { - Status s = _indexRecords(opCtx, i->get(), bsonRecords, keysInsertedOut); + for (auto&& it : _readyIndexes) { + Status s = _indexRecords(opCtx, it.get(), bsonRecords, keysInsertedOut); + if (!s.isOK()) + return s; + } + + for (auto&& it : _buildingIndexes) { + Status s = _indexRecords(opCtx, it.get(), bsonRecords, keysInsertedOut); if (!s.isOK()) return s; } @@ -1322,13 +1282,23 @@ void IndexCatalogImpl::unindexRecord(OperationContext* opCtx, *keysDeletedOut = 0; } - for (IndexCatalogEntryContainer::const_iterator i = _entries.begin(); i != _entries.end(); - ++i) { - IndexCatalogEntry* entry = i->get(); + for (IndexCatalogEntryContainer::const_iterator it = _readyIndexes.begin(); + it != _readyIndexes.end(); + ++it) { + IndexCatalogEntry* entry = it->get(); + + bool logIfError = !noWarn; + invariant(_unindexRecord(opCtx, entry, obj, loc, logIfError, keysDeletedOut)); + } + + for (IndexCatalogEntryContainer::const_iterator it = _buildingIndexes.begin(); + it != _buildingIndexes.end(); + ++it) { + IndexCatalogEntry* entry = it->get(); // If it's a background index, we DO NOT want to log anything. bool logIfError = entry->isReady(opCtx) ? !noWarn : false; - _unindexRecord(opCtx, entry, obj, loc, logIfError, keysDeletedOut).transitional_ignore(); + invariant(_unindexRecord(opCtx, entry, obj, loc, logIfError, keysDeletedOut)); } } @@ -1338,10 +1308,10 @@ std::unique_ptr<IndexCatalog::IndexBuildBlockInterface> IndexCatalogImpl::create } std::string::size_type IndexCatalogImpl::getLongestIndexNameLength(OperationContext* opCtx) const { - IndexCatalog::IndexIterator it = getIndexIterator(opCtx, true); + std::unique_ptr<IndexIterator> it = getIndexIterator(opCtx, true); std::string::size_type longestIndexNameLength = 0; - while (it.more()) { - auto thisLength = it.next()->indexName().length(); + while (it->more()) { + auto thisLength = it->next()->descriptor()->indexName().length(); if (thisLength > longestIndexNameLength) longestIndexNameLength = thisLength; } @@ -1377,6 +1347,15 @@ void IndexCatalogImpl::prepareInsertDeleteOptions(OperationContext* opCtx, } } +void IndexCatalogImpl::indexBuildSuccess(OperationContext* opCtx, IndexCatalogEntry* index) { + invariant(_buildingIndexes.release(index->descriptor())); + _readyIndexes.add(index); + opCtx->recoveryUnit()->onRollback([this, index]() { + invariant(_readyIndexes.release(index->descriptor())); + _buildingIndexes.add(index); + }); +} + StatusWith<BSONObj> IndexCatalogImpl::_fixIndexSpec(OperationContext* opCtx, Collection* collection, const BSONObj& spec) const { @@ -1429,7 +1408,11 @@ StatusWith<BSONObj> IndexCatalogImpl::_fixIndexSpec(OperationContext* opCtx, } void IndexCatalogImpl::setNs(NamespaceString ns) { - for (auto&& ice : _entries) { + for (auto&& ice : _readyIndexes) { + ice->setNs(ns); + } + + for (auto&& ice : _buildingIndexes) { ice->setNs(ns); } @@ -1439,5 +1422,4 @@ void IndexCatalogImpl::setNs(NamespaceString ns) { } _unfinishedIndexes.swap(newUnfinishedIndexes); } - } // namespace mongo diff --git a/src/mongo/db/catalog/index_catalog_impl.h b/src/mongo/db/catalog/index_catalog_impl.h index b606098b31c..f37e145c5b9 100644 --- a/src/mongo/db/catalog/index_catalog_impl.h +++ b/src/mongo/db/catalog/index_catalog_impl.h @@ -35,6 +35,7 @@ #include "mongo/db/catalog/index_catalog.h" #include "mongo/db/catalog/index_catalog_entry.h" +#include "mongo/db/index/index_build_interceptor.h" #include "mongo/db/index/multikey_paths.h" #include "mongo/db/jsobj.h" #include "mongo/db/operation_context.h" @@ -174,8 +175,8 @@ public: Status checkUnfinished() const override; using IndexIterator = IndexCatalog::IndexIterator; - IndexIterator getIndexIterator(OperationContext* const opCtx, - const bool includeUnfinishedIndexes) const override; + std::unique_ptr<IndexIterator> getIndexIterator( + OperationContext* const opCtx, const bool includeUnfinishedIndexes) const override; // ---- index set modifiers ------ @@ -235,14 +236,6 @@ public: // --- these probably become private? - - /** - * disk creation order - * 1) collection's NamespaceDetails - * a) info + head - * b) _indexBuildsInProgress++ - * 2) indexes entry in .ns file - */ class IndexBuildBlock : public IndexCatalog::IndexBuildBlockInterface { MONGO_DISALLOW_COPYING(IndexBuildBlock); @@ -296,6 +289,7 @@ public: IndexCatalogEntry* _entry; OperationContext* _opCtx; + std::unique_ptr<IndexBuildInterceptor> _indexBuildInterceptor; }; // ----- data modifiers ------ @@ -343,6 +337,8 @@ public: void setNs(NamespaceString ns) override; + void indexBuildSuccess(OperationContext* opCtx, IndexCatalogEntry* index) override; + private: static const BSONObj _idObj; // { _id : 1 } @@ -387,9 +383,12 @@ private: // descriptor ownership passes to _setupInMemoryStructures // initFromDisk: Avoids registering a change to undo this operation when set to true. // You must set this flag if calling this function outside of a UnitOfWork. + // isReadyIndex: The index will be directly available for query usage without needing to + // complete the IndexBuildBlock process. IndexCatalogEntry* _setupInMemoryStructures(OperationContext* opCtx, std::unique_ptr<IndexDescriptor> descriptor, - bool initFromDisk); + bool initFromDisk, + bool isReadyIndex); // Apply a set of transformations to the user-provided index object 'spec' to make it // conform to the standard for insertion. This function adds the 'v' field if it didn't @@ -407,7 +406,8 @@ private: Collection* const _collection; const int _maxNumIndexesAllowed; - IndexCatalogEntryContainer _entries; + IndexCatalogEntryContainer _readyIndexes; + IndexCatalogEntryContainer _buildingIndexes; // These are the index specs of indexes that were "leftover". // "Leftover" means they were unfinished when a mongod shut down. diff --git a/src/mongo/db/catalog/index_consistency.cpp b/src/mongo/db/catalog/index_consistency.cpp index 78a6e39d0f6..e4e0169ea91 100644 --- a/src/mongo/db/catalog/index_consistency.cpp +++ b/src/mongo/db/catalog/index_consistency.cpp @@ -75,12 +75,13 @@ IndexConsistency::IndexConsistency(OperationContext* opCtx, Milliseconds(internalQueryExecYieldPeriodMS.load())) { IndexCatalog* indexCatalog = _collection->getIndexCatalog(); - IndexCatalog::IndexIterator indexIterator = indexCatalog->getIndexIterator(_opCtx, false); + std::unique_ptr<IndexCatalog::IndexIterator> indexIterator = + indexCatalog->getIndexIterator(_opCtx, false); int indexNumber = 0; - while (indexIterator.more()) { + while (indexIterator->more()) { - const IndexDescriptor* descriptor = indexIterator.next(); + const IndexDescriptor* descriptor = indexIterator->next()->descriptor(); std::string indexNs = descriptor->indexNamespace(); _indexNumber[descriptor->indexNamespace()] = indexNumber; diff --git a/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp b/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp index 3e748e52c0c..826cbbe4ebf 100644 --- a/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp +++ b/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp @@ -80,10 +80,11 @@ Status RecordStoreValidateAdaptor::validate(const RecordId& recordId, return status; } - IndexCatalog::IndexIterator i = _indexCatalog->getIndexIterator(_opCtx, false); + std::unique_ptr<IndexCatalog::IndexIterator> it = + _indexCatalog->getIndexIterator(_opCtx, false); - while (i.more()) { - const IndexDescriptor* descriptor = i.next(); + while (it->more()) { + const IndexDescriptor* descriptor = it->next()->descriptor(); const std::string indexNs = descriptor->indexNamespace(); int indexNumber = _indexConsistency->getIndexNumber(indexNs); ValidateResults curRecordResults; diff --git a/src/mongo/db/catalog/rename_collection.cpp b/src/mongo/db/catalog/rename_collection.cpp index 59744b074db..80e46f644de 100644 --- a/src/mongo/db/catalog/rename_collection.cpp +++ b/src/mongo/db/catalog/rename_collection.cpp @@ -306,8 +306,8 @@ Status renameCollectionCommon(OperationContext* opCtx, // Determine which index names are too long. Since we don't have the collection // rename optime at this time, use the maximum optime to check the index names. auto longDpns = target.makeDropPendingNamespace(repl::OpTime::max()); - while (indexIter.more()) { - auto index = indexIter.next(); + while (indexIter->more()) { + auto index = indexIter->next()->descriptor(); auto status = longDpns.checkLengthForRename(index->indexName().size()); if (!status.isOK()) { indexesToDrop.push_back(index); @@ -435,10 +435,10 @@ Status renameCollectionCommon(OperationContext* opCtx, indexer.allowInterruption(); std::vector<BSONObj> indexesToCopy; - IndexCatalog::IndexIterator sourceIndIt = + std::unique_ptr<IndexCatalog::IndexIterator> sourceIndIt = sourceColl->getIndexCatalog()->getIndexIterator(opCtx, true); - while (sourceIndIt.more()) { - auto descriptor = sourceIndIt.next(); + while (sourceIndIt->more()) { + auto descriptor = sourceIndIt->next()->descriptor(); if (descriptor->isIdIndex()) { continue; } diff --git a/src/mongo/db/commands/mr.cpp b/src/mongo/db/commands/mr.cpp index 9b039f98e96..3d529332bbc 100644 --- a/src/mongo/db/commands/mr.cpp +++ b/src/mongo/db/commands/mr.cpp @@ -530,11 +530,11 @@ void State::prepTempCollection() { if (finalColl) { finalOptions = finalColl->getCatalogEntry()->getCollectionOptions(_opCtx); - IndexCatalog::IndexIterator ii = + std::unique_ptr<IndexCatalog::IndexIterator> ii = finalColl->getIndexCatalog()->getIndexIterator(_opCtx, true); // Iterate over finalColl's indexes. - while (ii.more()) { - IndexDescriptor* currIndex = ii.next(); + while (ii->more()) { + IndexDescriptor* currIndex = ii->next()->descriptor(); BSONObjBuilder b; b.append("ns", _config.tempNamespace.ns()); @@ -1114,11 +1114,11 @@ void State::finalReduce(OperationContext* opCtx, CurOp* curOp, ProgressMeterHold assertCollectionNotNull(_config.incLong, autoIncColl); bool foundIndex = false; - IndexCatalog::IndexIterator ii = + std::unique_ptr<IndexCatalog::IndexIterator> ii = autoIncColl.getCollection()->getIndexCatalog()->getIndexIterator(_opCtx, true); // Iterate over incColl's indexes. - while (ii.more()) { - IndexDescriptor* currIndex = ii.next(); + while (ii->more()) { + IndexDescriptor* currIndex = ii->next()->descriptor(); BSONObj x = currIndex->infoObj(); if (sortKey.woCompare(x["key"].embeddedObject()) == 0) { foundIndex = true; diff --git a/src/mongo/db/index/SConscript b/src/mongo/db/index/SConscript index e3b4f5fe49a..1b2f9b7f934 100644 --- a/src/mongo/db/index/SConscript +++ b/src/mongo/db/index/SConscript @@ -144,3 +144,18 @@ env.Library( 'key_generator', ], ) + +env.Library( + target="index_build_interceptor", + source=[ + "index_build_interceptor.cpp", + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/base', + ], + LIBDEPS_PRIVATE=[ + '$BUILD_DIR/mongo/db/multi_key_path_tracker', + '$BUILD_DIR/mongo/db/s/sharding_api_d', + 'index_access_methods', + ], +) diff --git a/src/mongo/db/index/index_build_interceptor.cpp b/src/mongo/db/index/index_build_interceptor.cpp new file mode 100644 index 00000000000..6af48b542c1 --- /dev/null +++ b/src/mongo/db/index/index_build_interceptor.cpp @@ -0,0 +1,146 @@ +/** + * Copyright (C) 2018-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kIndex + +#include "mongo/platform/basic.h" + +#include "mongo/db/index/index_build_interceptor.h" + +#include "mongo/bson/bsonobj.h" +#include "mongo/db/catalog_raii.h" +#include "mongo/db/db_raii.h" +#include "mongo/db/index/index_access_method.h" +#include "mongo/db/multi_key_path_tracker.h" +#include "mongo/db/operation_context.h" +#include "mongo/db/s/operation_sharding_state.h" +#include "mongo/util/log.h" +#include "mongo/util/uuid.h" + +namespace mongo { + +namespace { +const bool makeCollections = false; +} + +NamespaceString IndexBuildInterceptor::makeTempSideWritesNs() { + return NamespaceString("local.system.sideWrites-" + UUID::gen().toString()); +} + +void IndexBuildInterceptor::ensureSideWritesCollectionExists(OperationContext* opCtx) { + if (!makeCollections) { + return; + } + + // TODO SERVER-38027 Consider pushing this higher into the createIndexes command logic. + OperationShardingState::get(opCtx).setAllowImplicitCollectionCreation(BSONElement()); + + AutoGetOrCreateDb local(opCtx, "local", LockMode::MODE_X); + CollectionOptions options; + options.setNoIdIndex(); + options.temp = true; + + local.getDb()->createCollection(opCtx, _sideWritesNs.ns(), options); +} + +void IndexBuildInterceptor::removeSideWritesCollection(OperationContext* opCtx) { + if (!makeCollections) { + return; + } + + AutoGetDb local(opCtx, "local", LockMode::MODE_X); + fassert(50994, local.getDb()->dropCollectionEvenIfSystem(opCtx, _sideWritesNs, repl::OpTime())); +} + +Status IndexBuildInterceptor::sideWrite(OperationContext* opCtx, + IndexAccessMethod* indexAccessMethod, + const BSONObj* obj, + RecordId loc, + Op op, + int64_t* numKeysOut) { + *numKeysOut = 0; + BSONObjSet keys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); + BSONObjSet multikeyMetadataKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); + MultikeyPaths multikeyPaths; + + indexAccessMethod->getKeys(*obj, + IndexAccessMethod::GetKeysMode::kEnforceConstraints, + &keys, + &multikeyMetadataKeys, + &multikeyPaths); + // Maintain parity with IndexAccessMethods handling of key counting. Only include + // `multikeyMetadataKeys` when inserting. + *numKeysOut = keys.size() + (op == Op::kInsert ? multikeyMetadataKeys.size() : 0); + + if (_multikeyPaths) { + MultikeyPathTracker::mergeMultikeyPaths(&_multikeyPaths.get(), multikeyPaths); + } else { + // `mergeMultikeyPaths` is sensitive to the two inputs having the same multikey + // "shape". Initialize `_multikeyPaths` with the right shape from the first result. + _multikeyPaths = multikeyPaths; + } + + AutoGetCollection coll(opCtx, _sideWritesNs, LockMode::MODE_IX); + invariant(coll.getCollection()); + + std::vector<InsertStatement> toInsert; + for (const auto& key : keys) { + // Documents inserted into this table must be consumed in insert-order. Today, we can rely + // on storage engines to return documents in insert-order, but with clustered indexes, + // that may no longer be true. + // + // Additionally, these writes should be timestamped with the same timestamps that the + // other writes making up this operation are given. When index builds can cope with + // replication rollbacks, side table writes associated with a CUD operation should + // remain/rollback along with the corresponding oplog entry. + toInsert.emplace_back(BSON( + "op" << (op == Op::kInsert ? "i" : "d") << "key" << key << "recordId" << loc.repr())); + } + + if (op == Op::kInsert) { + // Wildcard indexes write multikey path information, typically part of the catalog + // document, to the index itself. Multikey information is never deleted, so we only need + // to add this data on the insert path. + for (const auto& key : multikeyMetadataKeys) { + toInsert.emplace_back(BSON("op" + << "i" + << "key" + << key + << "recordId" + << static_cast<int64_t>( + RecordId::ReservedId::kWildcardMultikeyMetadataId))); + } + } + + OpDebug* const opDebug = nullptr; + const bool fromMigrate = false; + return coll.getCollection()->insertDocuments( + opCtx, toInsert.begin(), toInsert.end(), opDebug, fromMigrate); +} +} // namespace mongo diff --git a/src/mongo/db/index/index_build_interceptor.h b/src/mongo/db/index/index_build_interceptor.h new file mode 100644 index 00000000000..13ae79d10d2 --- /dev/null +++ b/src/mongo/db/index/index_build_interceptor.h @@ -0,0 +1,74 @@ +/** + * Copyright (C) 2018-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/index/multikey_paths.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/record_id.h" +#include "mongo/platform/atomic_word.h" + +namespace mongo { + +class BSONObj; +class IndexAccessMethod; +class OperationContext; + +class IndexBuildInterceptor { +public: + enum class Op { kInsert, kDelete }; + + IndexBuildInterceptor() : _sideWritesNs(makeTempSideWritesNs()) {} + IndexBuildInterceptor(NamespaceString sideWritesNs) : _sideWritesNs(sideWritesNs) {} + + static NamespaceString makeTempSideWritesNs(); + + void ensureSideWritesCollectionExists(OperationContext* opCtx); + void removeSideWritesCollection(OperationContext* opCtx); + + /** + * Client writes that are concurrent with an index build will have their index updates written + * to a temporary table. After the index table scan is complete, these updates will be applied + * to the underlying index table. + * + * On success, `numKeysOut` if non-null will contain the number of keys added or removed. + */ + Status sideWrite(OperationContext* opCtx, + IndexAccessMethod* indexAccessMethod, + const BSONObj* obj, + RecordId loc, + Op op, + int64_t* numKeysOut); + +private: + NamespaceString _sideWritesNs; + boost::optional<MultikeyPaths> _multikeyPaths; +}; + +} // namespace mongo diff --git a/src/mongo/db/multi_key_path_tracker.cpp b/src/mongo/db/multi_key_path_tracker.cpp index c3961e8ad98..99aa17cd60f 100644 --- a/src/mongo/db/multi_key_path_tracker.cpp +++ b/src/mongo/db/multi_key_path_tracker.cpp @@ -32,6 +32,8 @@ #include "mongo/db/multi_key_path_tracker.h" +#include "mongo/util/assert_util.h" + namespace mongo { const OperationContext::Decoration<MultikeyPathTracker> MultikeyPathTracker::get = diff --git a/src/mongo/db/pipeline/process_interface_standalone.cpp b/src/mongo/db/pipeline/process_interface_standalone.cpp index efbd610506e..b3eda6e6e76 100644 --- a/src/mongo/db/pipeline/process_interface_standalone.cpp +++ b/src/mongo/db/pipeline/process_interface_standalone.cpp @@ -448,9 +448,9 @@ bool MongoInterfaceStandalone::uniqueKeyIsSupportedByIndex( } auto indexIterator = collection->getIndexCatalog()->getIndexIterator(opCtx, false); - while (indexIterator.more()) { - IndexDescriptor* descriptor = indexIterator.next(); - if (supportsUniqueKey(expCtx, indexIterator.catalogEntry(descriptor), uniqueKeyPaths)) { + while (indexIterator->more()) { + IndexCatalogEntry* entry = indexIterator->next(); + if (supportsUniqueKey(expCtx, entry, uniqueKeyPaths)) { return true; } } diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index 14c2c137c88..72d667cfc64 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -187,10 +187,10 @@ void fillOutPlannerParams(OperationContext* opCtx, QueryPlannerParams* plannerParams) { invariant(canonicalQuery); // If it's not NULL, we may have indices. Access the catalog and fill out IndexEntry(s) - IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(opCtx, false); - while (ii.more()) { - const IndexDescriptor* desc = ii.next(); - IndexCatalogEntry* ice = ii.catalogEntry(desc); + std::unique_ptr<IndexCatalog::IndexIterator> ii = + collection->getIndexCatalog()->getIndexIterator(opCtx, false); + while (ii->more()) { + IndexCatalogEntry* ice = ii->next(); plannerParams->indices.push_back( indexEntryFromIndexCatalogEntry(opCtx, *ice, canonicalQuery)); } @@ -1491,18 +1491,19 @@ QueryPlannerParams fillOutPlannerParamsForDistinct(OperationContext* opCtx, QueryPlannerParams plannerParams; plannerParams.options = QueryPlannerParams::NO_TABLE_SCAN | plannerOptions; - IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(opCtx, false); + std::unique_ptr<IndexCatalog::IndexIterator> ii = + collection->getIndexCatalog()->getIndexIterator(opCtx, false); auto query = parsedDistinct.getQuery()->getQueryRequest().getFilter(); - while (ii.more()) { - const IndexDescriptor* desc = ii.next(); - IndexCatalogEntry* ice = ii.catalogEntry(desc); + while (ii->more()) { + IndexCatalogEntry* ice = ii->next(); + const IndexDescriptor* desc = ice->descriptor(); if (desc->keyPattern().hasField(parsedDistinct.getKey())) { plannerParams.indices.push_back( indexEntryFromIndexCatalogEntry(opCtx, *ice, parsedDistinct.getQuery())); } else if (desc->getIndexType() == IndexType::INDEX_WILDCARD && !query.isEmpty()) { // Check whether the $** projection captures the field over which we are distinct-ing. const auto* proj = - static_cast<WildcardAccessMethod*>(ii.accessMethod(desc))->getProjectionExec(); + static_cast<WildcardAccessMethod*>(ice->accessMethod())->getProjectionExec(); if (proj->applyProjectionToOneField(parsedDistinct.getKey())) { plannerParams.indices.push_back( indexEntryFromIndexCatalogEntry(opCtx, *ice, parsedDistinct.getQuery())); diff --git a/src/mongo/db/stats/storage_stats.cpp b/src/mongo/db/stats/storage_stats.cpp index 92f1054eaae..45db0f8439c 100644 --- a/src/mongo/db/stats/storage_stats.cpp +++ b/src/mongo/db/stats/storage_stats.cpp @@ -91,10 +91,11 @@ Status appendCollectionStorageStats(OperationContext* opCtx, BSONObjBuilder indexDetails; - IndexCatalog::IndexIterator i = indexCatalog->getIndexIterator(opCtx, false); - while (i.more()) { - const IndexDescriptor* descriptor = i.next(); - IndexAccessMethod* iam = indexCatalog->getIndex(descriptor); + std::unique_ptr<IndexCatalog::IndexIterator> it = indexCatalog->getIndexIterator(opCtx, false); + while (it->more()) { + IndexCatalogEntry* entry = it->next(); + IndexDescriptor* descriptor = entry->descriptor(); + IndexAccessMethod* iam = entry->accessMethod(); invariant(iam); BSONObjBuilder bob; diff --git a/src/mongo/dbtests/indexcatalogtests.cpp b/src/mongo/dbtests/indexcatalogtests.cpp index a0b17941ccb..7e82884edda 100644 --- a/src/mongo/dbtests/indexcatalogtests.cpp +++ b/src/mongo/dbtests/indexcatalogtests.cpp @@ -86,11 +86,11 @@ public: ASSERT_TRUE(_catalog->numIndexesReady(&opCtx) == numFinishedIndexesStart + 2); - IndexCatalog::IndexIterator ii = _catalog->getIndexIterator(&opCtx, false); + std::unique_ptr<IndexCatalog::IndexIterator> ii = _catalog->getIndexIterator(&opCtx, false); int indexesIterated = 0; bool foundIndex = false; - while (ii.more()) { - IndexDescriptor* indexDesc = ii.next(); + while (ii->more()) { + IndexDescriptor* indexDesc = ii->next()->descriptor(); indexesIterated++; BSONObjIterator boit(indexDesc->infoObj()); while (boit.more() && !foundIndex) { |