summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Gottlieb <daniel.gottlieb@mongodb.com>2018-11-12 16:21:17 -0500
committerDaniel Gottlieb <daniel.gottlieb@mongodb.com>2018-11-12 22:18:53 -0500
commita5ce10b0982c7a0378ba92f1c7d3e02d49d0b18a (patch)
tree6113b3c963340471b4ab926d57d94b25077b31dc
parent1a6ca6d1399d56656e3edd0f92fdc494cf491178 (diff)
downloadmongo-a5ce10b0982c7a0378ba92f1c7d3e02d49d0b18a.tar.gz
SERVER-37263: Write updates to background building indexes into a temp table.
- IndexIterator returns IndexCatalogEntry* - Split out ready from building indexes in the IndexCatalog.
-rw-r--r--src/mongo/db/SConscript12
-rw-r--r--src/mongo/db/catalog/SConscript4
-rw-r--r--src/mongo/db/catalog/collection.h6
-rw-r--r--src/mongo/db/catalog/collection_compact.cpp19
-rw-r--r--src/mongo/db/catalog/collection_impl.cpp69
-rw-r--r--src/mongo/db/catalog/collection_impl.h2
-rw-r--r--src/mongo/db/catalog/collection_info_cache_impl.cpp26
-rw-r--r--src/mongo/db/catalog/collection_mock.h4
-rw-r--r--src/mongo/db/catalog/database_impl.cpp4
-rw-r--r--src/mongo/db/catalog/index_build_block.cpp178
-rw-r--r--src/mongo/db/catalog/index_catalog.cpp77
-rw-r--r--src/mongo/db/catalog/index_catalog.h70
-rw-r--r--src/mongo/db/catalog/index_catalog_entry.h7
-rw-r--r--src/mongo/db/catalog/index_catalog_entry_impl.h14
-rw-r--r--src/mongo/db/catalog/index_catalog_impl.cpp404
-rw-r--r--src/mongo/db/catalog/index_catalog_impl.h24
-rw-r--r--src/mongo/db/catalog/index_consistency.cpp7
-rw-r--r--src/mongo/db/catalog/private/record_store_validate_adaptor.cpp7
-rw-r--r--src/mongo/db/catalog/rename_collection.cpp10
-rw-r--r--src/mongo/db/commands/mr.cpp12
-rw-r--r--src/mongo/db/index/SConscript15
-rw-r--r--src/mongo/db/index/index_build_interceptor.cpp146
-rw-r--r--src/mongo/db/index/index_build_interceptor.h74
-rw-r--r--src/mongo/db/multi_key_path_tracker.cpp2
-rw-r--r--src/mongo/db/pipeline/process_interface_standalone.cpp6
-rw-r--r--src/mongo/db/query/get_executor.cpp19
-rw-r--r--src/mongo/db/stats/storage_stats.cpp9
-rw-r--r--src/mongo/dbtests/indexcatalogtests.cpp6
28 files changed, 850 insertions, 383 deletions
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript
index 599cd7912cd..8c3a4adb825 100644
--- a/src/mongo/db/SConscript
+++ b/src/mongo/db/SConscript
@@ -506,7 +506,6 @@ env.Library(
target='service_context',
source=[
'client.cpp',
- 'multi_key_path_tracker.cpp',
'operation_context.cpp',
'operation_context_group.cpp',
'service_context.cpp',
@@ -515,6 +514,7 @@ env.Library(
],
LIBDEPS=[
'$BUILD_DIR/mongo/db/logical_session_id',
+ '$BUILD_DIR/mongo/db/multi_key_path_tracker',
'$BUILD_DIR/mongo/db/storage/write_unit_of_work',
'$BUILD_DIR/mongo/transport/transport_layer_common',
'$BUILD_DIR/mongo/util/clock_sources',
@@ -540,6 +540,16 @@ env.CppUnitTest(
)
env.Library(
+ target='multi_key_path_tracker',
+ source=[
+ 'multi_key_path_tracker.cpp',
+ ],
+ LIBDEPS_PRIVATE=[
+ '$BUILD_DIR/mongo/base',
+ ],
+)
+
+env.Library(
target='lasterror',
source=[
"lasterror.cpp",
diff --git a/src/mongo/db/catalog/SConscript b/src/mongo/db/catalog/SConscript
index 30c58486dde..d678985f150 100644
--- a/src/mongo/db/catalog/SConscript
+++ b/src/mongo/db/catalog/SConscript
@@ -273,6 +273,7 @@ env.Library(
"collection_info_cache_impl.cpp",
"database_holder_impl.cpp",
"database_impl.cpp",
+ "index_build_block.cpp",
"index_catalog_entry_impl.cpp",
"index_catalog_impl.cpp",
"index_consistency.cpp",
@@ -309,7 +310,8 @@ env.Library(
'$BUILD_DIR/mongo/db/views/views_mongod',
],
LIBDEPS_PRIVATE=[
- "$BUILD_DIR/mongo/db/commands/server_status_core",
+ '$BUILD_DIR/mongo/db/commands/server_status_core',
+ '$BUILD_DIR/mongo/db/index/index_build_interceptor',
'$BUILD_DIR/mongo/db/logical_clock',
'$BUILD_DIR/mongo/db/repl/repl_settings',
'$BUILD_DIR/mongo/db/storage/storage_engine_common',
diff --git a/src/mongo/db/catalog/collection.h b/src/mongo/db/catalog/collection.h
index 79a987efdef..831321f00ac 100644
--- a/src/mongo/db/catalog/collection.h
+++ b/src/mongo/db/catalog/collection.h
@@ -338,6 +338,8 @@ public:
OperationContext* opCtx,
PlanExecutor::YieldPolicy yieldPolicy,
ScanDirection scanDirection) = 0;
+
+ virtual void indexBuildSuccess(OperationContext* opCtx, IndexCatalogEntry* index) = 0;
};
public:
@@ -734,6 +736,10 @@ public:
return this->_impl().makePlanExecutor(opCtx, yieldPolicy, scanDirection);
}
+ inline void indexBuildSuccess(OperationContext* opCtx, IndexCatalogEntry* index) {
+ return this->_impl().indexBuildSuccess(opCtx, index);
+ }
+
private:
inline DatabaseCatalogEntry* dbce() const {
return this->_impl().dbce();
diff --git a/src/mongo/db/catalog/collection_compact.cpp b/src/mongo/db/catalog/collection_compact.cpp
index 5cd68625468..9eec0b1e8cb 100644
--- a/src/mongo/db/catalog/collection_compact.cpp
+++ b/src/mongo/db/catalog/collection_compact.cpp
@@ -67,13 +67,15 @@ StatusWith<CompactStats> compactCollection(OperationContext* opCtx,
return StatusWith<CompactStats>(status);
// Compact all indexes (not including unfinished indexes)
- IndexCatalog::IndexIterator ii(indexCatalog->getIndexIterator(opCtx, false));
- while (ii.more()) {
- IndexDescriptor* descriptor = ii.next();
- IndexAccessMethod* index = indexCatalog->getIndex(descriptor);
+ std::unique_ptr<IndexCatalog::IndexIterator> ii(
+ indexCatalog->getIndexIterator(opCtx, false));
+ while (ii->more()) {
+ IndexCatalogEntry* entry = ii->next();
+ IndexDescriptor* descriptor = entry->descriptor();
+ IndexAccessMethod* iam = entry->accessMethod();
LOG(1) << "compacting index: " << descriptor->toString();
- Status status = index->compact(opCtx);
+ Status status = iam->compact(opCtx);
if (!status.isOK()) {
error() << "failed to compact index: " << descriptor->toString();
return status;
@@ -89,9 +91,10 @@ StatusWith<CompactStats> compactCollection(OperationContext* opCtx,
std::vector<BSONObj> indexSpecs;
{
- IndexCatalog::IndexIterator ii(indexCatalog->getIndexIterator(opCtx, false));
- while (ii.more()) {
- IndexDescriptor* descriptor = ii.next();
+ std::unique_ptr<IndexCatalog::IndexIterator> ii(
+ indexCatalog->getIndexIterator(opCtx, false));
+ while (ii->more()) {
+ IndexDescriptor* descriptor = ii->next()->descriptor();
// Compact always creates the new index in the foreground.
const BSONObj spec =
diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp
index 745f0203c1b..992707c4f73 100644
--- a/src/mongo/db/catalog/collection_impl.cpp
+++ b/src/mongo/db/catalog/collection_impl.cpp
@@ -660,11 +660,12 @@ RecordId CollectionImpl::updateDocument(OperationContext* opCtx,
// newDoc.
OwnedPointerMap<IndexDescriptor*, UpdateTicket> updateTickets;
if (indexesAffected) {
- IndexCatalog::IndexIterator ii = _indexCatalog->getIndexIterator(opCtx, true);
- while (ii.more()) {
- IndexDescriptor* descriptor = ii.next();
- IndexCatalogEntry* entry = ii.catalogEntry(descriptor);
- IndexAccessMethod* iam = ii.accessMethod(descriptor);
+ std::unique_ptr<IndexCatalog::IndexIterator> ii =
+ _indexCatalog->getIndexIterator(opCtx, true);
+ while (ii->more()) {
+ IndexCatalogEntry* entry = ii->next();
+ IndexDescriptor* descriptor = entry->descriptor();
+ IndexAccessMethod* iam = entry->accessMethod();
InsertDeleteOptions options;
_indexCatalog->prepareInsertDeleteOptions(opCtx, descriptor, &options);
@@ -687,10 +688,12 @@ RecordId CollectionImpl::updateDocument(OperationContext* opCtx,
// Update each index with each respective UpdateTicket.
if (indexesAffected) {
- IndexCatalog::IndexIterator ii = _indexCatalog->getIndexIterator(opCtx, true);
- while (ii.more()) {
- IndexDescriptor* descriptor = ii.next();
- IndexAccessMethod* iam = ii.accessMethod(descriptor);
+ std::unique_ptr<IndexCatalog::IndexIterator> ii =
+ _indexCatalog->getIndexIterator(opCtx, true);
+ while (ii->more()) {
+ IndexCatalogEntry* entry = ii->next();
+ IndexDescriptor* descriptor = entry->descriptor();
+ IndexAccessMethod* iam = entry->accessMethod();
int64_t keysInserted;
int64_t keysDeleted;
@@ -764,19 +767,20 @@ uint64_t CollectionImpl::dataSize(OperationContext* opCtx) const {
uint64_t CollectionImpl::getIndexSize(OperationContext* opCtx, BSONObjBuilder* details, int scale) {
IndexCatalog* idxCatalog = getIndexCatalog();
- IndexCatalog::IndexIterator ii = idxCatalog->getIndexIterator(opCtx, true);
+ std::unique_ptr<IndexCatalog::IndexIterator> ii = idxCatalog->getIndexIterator(opCtx, true);
uint64_t totalSize = 0;
- while (ii.more()) {
- IndexDescriptor* d = ii.next();
- IndexAccessMethod* iam = idxCatalog->getIndex(d);
+ while (ii->more()) {
+ IndexCatalogEntry* entry = ii->next();
+ IndexDescriptor* descriptor = entry->descriptor();
+ IndexAccessMethod* iam = entry->accessMethod();
long long ds = iam->getSpaceUsedBytes(opCtx);
totalSize += ds;
if (details) {
- details->appendNumber(d->indexName(), ds / scale);
+ details->appendNumber(descriptor->indexName(), ds / scale);
}
}
@@ -798,9 +802,10 @@ Status CollectionImpl::truncate(OperationContext* opCtx) {
// 1) store index specs
vector<BSONObj> indexSpecs;
{
- IndexCatalog::IndexIterator ii = _indexCatalog->getIndexIterator(opCtx, false);
- while (ii.more()) {
- const IndexDescriptor* idx = ii.next();
+ std::unique_ptr<IndexCatalog::IndexIterator> ii =
+ _indexCatalog->getIndexIterator(opCtx, false);
+ while (ii->more()) {
+ const IndexDescriptor* idx = ii->next()->descriptor();
indexSpecs.push_back(idx->infoObj().getOwned());
}
}
@@ -1031,14 +1036,16 @@ void _validateIndexes(OperationContext* opCtx,
ValidateResultsMap* indexNsResultsMap,
ValidateResults* results) {
- IndexCatalog::IndexIterator i = indexCatalog->getIndexIterator(opCtx, false);
+ std::unique_ptr<IndexCatalog::IndexIterator> it = indexCatalog->getIndexIterator(opCtx, false);
// Validate Indexes.
- while (i.more()) {
+ while (it->more()) {
opCtx->checkForInterrupt();
- const IndexDescriptor* descriptor = i.next();
+ IndexCatalogEntry* entry = it->next();
+ IndexDescriptor* descriptor = entry->descriptor();
+ IndexAccessMethod* iam = entry->accessMethod();
+
log(LogComponent::kIndex) << "validating index " << descriptor->indexNamespace() << endl;
- IndexAccessMethod* iam = indexCatalog->getIndex(descriptor);
ValidateResults& curIndexResults = (*indexNsResultsMap)[descriptor->indexNamespace()];
bool checkCounts = false;
int64_t numTraversedKeys;
@@ -1095,9 +1102,10 @@ void _validateIndexKeyCount(OperationContext* opCtx,
RecordStoreValidateAdaptor* indexValidator,
ValidateResultsMap* indexNsResultsMap) {
- IndexCatalog::IndexIterator indexIterator = indexCatalog->getIndexIterator(opCtx, false);
- while (indexIterator.more()) {
- IndexDescriptor* descriptor = indexIterator.next();
+ std::unique_ptr<IndexCatalog::IndexIterator> indexIterator =
+ indexCatalog->getIndexIterator(opCtx, false);
+ while (indexIterator->more()) {
+ IndexDescriptor* descriptor = indexIterator->next()->descriptor();
ValidateResults& curIndexResults = (*indexNsResultsMap)[descriptor->indexNamespace()];
if (curIndexResults.valid) {
@@ -1287,10 +1295,11 @@ Status CollectionImpl::touch(OperationContext* opCtx,
if (touchIndexes) {
Timer t;
- IndexCatalog::IndexIterator ii = _indexCatalog->getIndexIterator(opCtx, false);
- while (ii.more()) {
- const IndexDescriptor* desc = ii.next();
- const IndexAccessMethod* iam = _indexCatalog->getIndex(desc);
+ std::unique_ptr<IndexCatalog::IndexIterator> ii =
+ _indexCatalog->getIndexIterator(opCtx, false);
+ while (ii->more()) {
+ IndexCatalogEntry* entry = ii->next();
+ IndexAccessMethod* iam = entry->accessMethod();
Status status = iam->touch(opCtx);
if (!status.isOK())
return status;
@@ -1326,4 +1335,8 @@ void CollectionImpl::setNs(NamespaceString nss) {
_cursorManager = std::make_unique<CursorManager>(_ns);
}
+void CollectionImpl::indexBuildSuccess(OperationContext* opCtx, IndexCatalogEntry* index) {
+ _details->indexBuildSuccess(opCtx, index->descriptor()->indexName());
+ _indexCatalog->indexBuildSuccess(opCtx, index);
+}
} // namespace mongo
diff --git a/src/mongo/db/catalog/collection_impl.h b/src/mongo/db/catalog/collection_impl.h
index 69c768f4118..e33f77d91c9 100644
--- a/src/mongo/db/catalog/collection_impl.h
+++ b/src/mongo/db/catalog/collection_impl.h
@@ -365,6 +365,8 @@ public:
PlanExecutor::YieldPolicy yieldPolicy,
ScanDirection scanDirection) final;
+ void indexBuildSuccess(OperationContext* opCtx, IndexCatalogEntry* index) final;
+
private:
inline DatabaseCatalogEntry* dbce() const final {
return this->_dbce;
diff --git a/src/mongo/db/catalog/collection_info_cache_impl.cpp b/src/mongo/db/catalog/collection_info_cache_impl.cpp
index 9223cba45b8..55a1d838642 100644
--- a/src/mongo/db/catalog/collection_info_cache_impl.cpp
+++ b/src/mongo/db/catalog/collection_info_cache_impl.cpp
@@ -88,14 +88,16 @@ void CollectionInfoCacheImpl::computeIndexKeys(OperationContext* opCtx) {
bool hadTTLIndex = _hasTTLIndex;
_hasTTLIndex = false;
- IndexCatalog::IndexIterator i = _collection->getIndexCatalog()->getIndexIterator(opCtx, true);
- while (i.more()) {
- IndexDescriptor* descriptor = i.next();
+ std::unique_ptr<IndexCatalog::IndexIterator> it =
+ _collection->getIndexCatalog()->getIndexIterator(opCtx, true);
+ while (it->more()) {
+ IndexCatalogEntry* entry = it->next();
+ IndexDescriptor* descriptor = entry->descriptor();
+ IndexAccessMethod* iam = entry->accessMethod();
if (descriptor->getAccessMethodName() == IndexNames::WILDCARD) {
// Obtain the projection used by the $** index's key generator.
- const auto* pathProj =
- static_cast<WildcardAccessMethod*>(i.accessMethod(descriptor))->getProjectionExec();
+ const auto* pathProj = static_cast<WildcardAccessMethod*>(iam)->getProjectionExec();
// If the projection is an exclusion, then we must check the new document's keys on all
// updates, since we do not exhaustively know the set of paths to be indexed.
if (pathProj->getType() == ProjectionExecAgg::ProjectionType::kExclusionProjection) {
@@ -142,7 +144,6 @@ void CollectionInfoCacheImpl::computeIndexKeys(OperationContext* opCtx) {
}
// handle partial indexes
- const IndexCatalogEntry* entry = i.catalogEntry(descriptor);
const MatchExpression* filter = entry->getFilterExpression();
if (filter) {
stdx::unordered_set<std::string> paths;
@@ -199,11 +200,10 @@ void CollectionInfoCacheImpl::updatePlanCacheIndexEntries(OperationContext* opCt
// TODO We shouldn't need to include unfinished indexes, but we must here because the index
// catalog may be in an inconsistent state. SERVER-18346.
const bool includeUnfinishedIndexes = true;
- IndexCatalog::IndexIterator ii =
+ std::unique_ptr<IndexCatalog::IndexIterator> ii =
_collection->getIndexCatalog()->getIndexIterator(opCtx, includeUnfinishedIndexes);
- while (ii.more()) {
- const IndexDescriptor* desc = ii.next();
- const IndexCatalogEntry* ice = ii.catalogEntry(desc);
+ while (ii->more()) {
+ const IndexCatalogEntry* ice = ii->next();
indexEntries.emplace_back(indexEntryFromIndexCatalogEntry(opCtx, *ice));
}
@@ -215,10 +215,10 @@ void CollectionInfoCacheImpl::init(OperationContext* opCtx) {
invariant(opCtx->lockState()->isCollectionLockedForMode(_collection->ns().ns(), MODE_X));
const bool includeUnfinishedIndexes = false;
- IndexCatalog::IndexIterator ii =
+ std::unique_ptr<IndexCatalog::IndexIterator> ii =
_collection->getIndexCatalog()->getIndexIterator(opCtx, includeUnfinishedIndexes);
- while (ii.more()) {
- const IndexDescriptor* desc = ii.next();
+ while (ii->more()) {
+ const IndexDescriptor* desc = ii->next()->descriptor();
_indexUsageTracker.registerIndex(desc->indexName(), desc->keyPattern());
}
diff --git a/src/mongo/db/catalog/collection_mock.h b/src/mongo/db/catalog/collection_mock.h
index e8b4e0013f9..35c0a9f8b55 100644
--- a/src/mongo/db/catalog/collection_mock.h
+++ b/src/mongo/db/catalog/collection_mock.h
@@ -290,5 +290,9 @@ public:
OptionalCollectionUUID uuid() const {
std::abort();
}
+
+ void indexBuildSuccess(OperationContext* opCtx, IndexCatalogEntry* index) {
+ std::abort();
+ }
};
} // namespace mongo
diff --git a/src/mongo/db/catalog/database_impl.cpp b/src/mongo/db/catalog/database_impl.cpp
index 92bb0636f57..bcf513d3304 100644
--- a/src/mongo/db/catalog/database_impl.cpp
+++ b/src/mongo/db/catalog/database_impl.cpp
@@ -578,8 +578,8 @@ Status DatabaseImpl::dropCollectionEvenIfSystem(OperationContext* opCtx,
// Determine which index names are too long. Since we don't have the collection drop optime
// at this time, use the maximum optime to check the index names.
auto longDpns = fullns.makeDropPendingNamespace(repl::OpTime::max());
- while (indexIter.more()) {
- auto index = indexIter.next();
+ while (indexIter->more()) {
+ auto index = indexIter->next()->descriptor();
auto status = longDpns.checkLengthForRename(index->indexName().size());
if (!status.isOK()) {
indexesToDrop.push_back(index);
diff --git a/src/mongo/db/catalog/index_build_block.cpp b/src/mongo/db/catalog/index_build_block.cpp
new file mode 100644
index 00000000000..016890de388
--- /dev/null
+++ b/src/mongo/db/catalog/index_build_block.cpp
@@ -0,0 +1,178 @@
+/**
+ * Copyright (C) 2018-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kIndex
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/catalog/index_catalog_impl.h"
+
+#include <vector>
+
+#include "mongo/db/catalog/collection.h"
+#include "mongo/db/catalog/collection_catalog_entry.h"
+#include "mongo/db/catalog_raii.h"
+#include "mongo/db/db_raii.h"
+#include "mongo/db/logical_clock.h"
+#include "mongo/db/operation_context.h"
+#include "mongo/util/assert_util.h"
+#include "mongo/util/log.h"
+
+namespace mongo {
+IndexCatalogImpl::IndexBuildBlock::IndexBuildBlock(OperationContext* opCtx,
+ Collection* collection,
+ IndexCatalogImpl* catalog,
+ const BSONObj& spec)
+ : _collection(collection),
+ _catalog(catalog),
+ _ns(_collection->ns().ns()),
+ _spec(spec.getOwned()),
+ _entry(nullptr),
+ _opCtx(opCtx) {
+ invariant(collection);
+}
+
+Status IndexCatalogImpl::IndexBuildBlock::init() {
+ // Being in a WUOW means all timestamping responsibility can be pushed up to the caller.
+ invariant(_opCtx->lockState()->inAWriteUnitOfWork());
+
+ // need this first for names, etc...
+ BSONObj keyPattern = _spec.getObjectField("key");
+ auto descriptor = stdx::make_unique<IndexDescriptor>(
+ _collection, IndexNames::findPluginName(keyPattern), _spec);
+
+ _indexName = descriptor->indexName();
+ _indexNamespace = descriptor->indexNamespace();
+
+ bool isBackgroundIndex = _spec["background"].trueValue();
+ bool isBackgroundSecondaryBuild = false;
+ if (auto replCoord = repl::ReplicationCoordinator::get(_opCtx)) {
+ isBackgroundSecondaryBuild =
+ replCoord->getReplicationMode() == repl::ReplicationCoordinator::Mode::modeReplSet &&
+ replCoord->getMemberState().secondary() && isBackgroundIndex;
+ }
+
+ // Setup on-disk structures.
+ Status status = _collection->getCatalogEntry()->prepareForIndexBuild(
+ _opCtx, descriptor.get(), isBackgroundSecondaryBuild);
+ if (!status.isOK())
+ return status;
+
+ auto* const descriptorPtr = descriptor.get();
+ const bool initFromDisk = false;
+ const bool isReadyIndex = false;
+ _entry = _catalog->_setupInMemoryStructures(
+ _opCtx, std::move(descriptor), initFromDisk, isReadyIndex);
+
+ if (isBackgroundIndex) {
+ _indexBuildInterceptor = stdx::make_unique<IndexBuildInterceptor>();
+ _indexBuildInterceptor->ensureSideWritesCollectionExists(_opCtx);
+ _entry->setIndexBuildInterceptor(_indexBuildInterceptor.get());
+
+ _opCtx->recoveryUnit()->onCommit(
+ [ opCtx = _opCtx, entry = _entry, collection = _collection ](
+ boost::optional<Timestamp> commitTime) {
+ // This will prevent the unfinished index from being visible on index iterators.
+ if (commitTime) {
+ entry->setMinimumVisibleSnapshot(commitTime.get());
+ collection->setMinimumVisibleSnapshot(commitTime.get());
+ }
+ });
+ }
+
+ // Register this index with the CollectionInfoCache to regenerate the cache. This way, updates
+ // occurring while an index is being build in the background will be aware of whether or not
+ // they need to modify any indexes.
+ _collection->infoCache()->addedIndex(_opCtx, descriptorPtr);
+
+ return Status::OK();
+}
+
+IndexCatalogImpl::IndexBuildBlock::~IndexBuildBlock() {
+ // Don't need to call fail() here, as rollback will clean everything up for us.
+}
+
+void IndexCatalogImpl::IndexBuildBlock::fail() {
+ // Being in a WUOW means all timestamping responsibility can be pushed up to the caller.
+ invariant(_opCtx->lockState()->inAWriteUnitOfWork());
+ fassert(17204, _collection->ok()); // defensive
+
+ NamespaceString ns(_indexNamespace);
+ invariant(_opCtx->lockState()->isDbLockedForMode(ns.db(), MODE_X));
+
+ if (_entry) {
+ invariant(_catalog->_dropIndex(_opCtx, _entry).isOK());
+ if (_indexBuildInterceptor) {
+ _indexBuildInterceptor->removeSideWritesCollection(_opCtx);
+ _entry->setIndexBuildInterceptor(nullptr);
+ }
+ } else {
+ _catalog->_deleteIndexFromDisk(_opCtx, _indexName, _indexNamespace);
+ }
+}
+
+void IndexCatalogImpl::IndexBuildBlock::success() {
+ // Being in a WUOW means all timestamping responsibility can be pushed up to the caller.
+ invariant(_opCtx->lockState()->inAWriteUnitOfWork());
+
+ fassert(17207, _collection->ok());
+ NamespaceString ns(_indexNamespace);
+ invariant(_opCtx->lockState()->isDbLockedForMode(ns.db(), MODE_X));
+
+ _collection->indexBuildSuccess(_opCtx, _entry);
+
+ OperationContext* opCtx = _opCtx;
+ LOG(2) << "marking index " << _indexName << " as ready in snapshot id "
+ << opCtx->recoveryUnit()->getSnapshotId();
+ _opCtx->recoveryUnit()->onCommit(
+ [ opCtx, entry = _entry, collection = _collection ](boost::optional<Timestamp> commitTime) {
+ // Note: this runs after the WUOW commits but before we release our X lock on the
+ // collection. This means that any snapshot created after this must include the full
+ // index, and no one can try to read this index before we set the visibility.
+ if (!commitTime) {
+ // The end of background index builds on secondaries does not get a commit
+ // timestamp. We use the cluster time since it's guaranteed to be greater than the
+ // time of the index build. It is possible the cluster time could be in the future,
+ // and we will need to do another write to reach the minimum visible snapshot.
+ commitTime = LogicalClock::getClusterTimeForReplicaSet(opCtx).asTimestamp();
+ }
+ entry->setMinimumVisibleSnapshot(commitTime.get());
+ // We must also set the minimum visible snapshot on the collection like during init().
+ // This prevents reads in the past from reading inconsistent metadata. We should be
+ // able to remove this when the catalog is versioned.
+ collection->setMinimumVisibleSnapshot(commitTime.get());
+ });
+
+ _entry->setIsReady(true);
+ if (_indexBuildInterceptor) {
+ _indexBuildInterceptor->removeSideWritesCollection(_opCtx);
+ _entry->setIndexBuildInterceptor(nullptr);
+ }
+}
+} // namespace mongo
diff --git a/src/mongo/db/catalog/index_catalog.cpp b/src/mongo/db/catalog/index_catalog.cpp
index ab704d555e4..7f68b7ef77c 100644
--- a/src/mongo/db/catalog/index_catalog.cpp
+++ b/src/mongo/db/catalog/index_catalog.cpp
@@ -37,69 +37,66 @@
namespace mongo {
+using IndexIterator = IndexCatalog::IndexIterator;
+using ReadyIndexesIterator = IndexCatalog::ReadyIndexesIterator;
+using AllIndexesIterator = IndexCatalog::AllIndexesIterator;
-IndexCatalog::IndexIterator::IndexIterator(OperationContext* opCtx,
- IndexCatalogEntryContainer::const_iterator beginIterator,
- IndexCatalogEntryContainer::const_iterator endIterator,
- bool includeUnfinishedIndexes)
- : _includeUnfinishedIndexes(includeUnfinishedIndexes),
- _opCtx(opCtx),
- _iterator(beginIterator),
- _endIterator(endIterator),
- _start(true),
- _prev(nullptr),
- _next(nullptr) {}
-
-bool IndexCatalog::IndexIterator::more() {
+bool IndexIterator::more() {
if (_start) {
- _advance();
+ _next = _advance();
_start = false;
}
return _next != nullptr;
}
-IndexDescriptor* IndexCatalog::IndexIterator::next() {
+IndexCatalogEntry* IndexIterator::next() {
if (!more())
return nullptr;
_prev = _next;
- _advance();
- return _prev->descriptor();
-}
-
-IndexAccessMethod* IndexCatalog::IndexIterator::accessMethod(const IndexDescriptor* desc) {
- invariant(desc == _prev->descriptor());
- return _prev->accessMethod();
-}
-
-IndexCatalogEntry* IndexCatalog::IndexIterator::catalogEntry(const IndexDescriptor* desc) {
- invariant(desc == _prev->descriptor());
+ _next = _advance();
return _prev;
}
-void IndexCatalog::IndexIterator::_advance() {
- _next = nullptr;
+ReadyIndexesIterator::ReadyIndexesIterator(OperationContext* const opCtx,
+ IndexCatalogEntryContainer::const_iterator beginIterator,
+ IndexCatalogEntryContainer::const_iterator endIterator)
+ : _opCtx(opCtx), _iterator(beginIterator), _endIterator(endIterator) {}
+IndexCatalogEntry* ReadyIndexesIterator::_advance() {
while (_iterator != _endIterator) {
IndexCatalogEntry* entry = _iterator->get();
++_iterator;
- if (!_includeUnfinishedIndexes) {
- if (auto minSnapshot = entry->getMinimumVisibleSnapshot()) {
- if (auto mySnapshot = _opCtx->recoveryUnit()->getPointInTimeReadTimestamp()) {
- if (mySnapshot < minSnapshot) {
- // This index isn't finished in my snapshot.
- continue;
- }
+ if (auto minSnapshot = entry->getMinimumVisibleSnapshot()) {
+ if (auto mySnapshot = _opCtx->recoveryUnit()->getPointInTimeReadTimestamp()) {
+ if (mySnapshot < minSnapshot) {
+ // This index isn't finished in my snapshot.
+ continue;
}
}
-
- if (!entry->isReady(_opCtx))
- continue;
}
- _next = entry;
- return;
+ return entry;
}
+
+ return nullptr;
+}
+
+AllIndexesIterator::AllIndexesIterator(
+ OperationContext* const opCtx, std::unique_ptr<std::vector<IndexCatalogEntry*>> ownedContainer)
+ : _opCtx(opCtx), _ownedContainer(std::move(ownedContainer)) {
+ // Explicitly order calls onto the ownedContainer with respect to its move.
+ _iterator = _ownedContainer->begin();
+ _endIterator = _ownedContainer->end();
}
+IndexCatalogEntry* AllIndexesIterator::_advance() {
+ if (_iterator == _endIterator) {
+ return nullptr;
+ }
+
+ IndexCatalogEntry* entry = *_iterator;
+ ++_iterator;
+ return entry;
+}
} // namespace mongo
diff --git a/src/mongo/db/catalog/index_catalog.h b/src/mongo/db/catalog/index_catalog.h
index 73667f5fa36..eadb7a56733 100644
--- a/src/mongo/db/catalog/index_catalog.h
+++ b/src/mongo/db/catalog/index_catalog.h
@@ -78,47 +78,54 @@ class IndexCatalog {
public:
class IndexIterator {
public:
- explicit IndexIterator(OperationContext* const opCtx,
- IndexCatalogEntryContainer::const_iterator beginIterator,
- IndexCatalogEntryContainer::const_iterator endIterator,
- const bool includeUnfinishedIndexes);
-
- public:
- inline ~IndexIterator() = default;
-
- inline IndexIterator(const IndexIterator& copy) = default;
- inline IndexIterator& operator=(const IndexIterator& copy) = default;
-
- inline IndexIterator(IndexIterator&& copy) = default;
- inline IndexIterator& operator=(IndexIterator&& copy) = default;
-
+ virtual ~IndexIterator() = default;
bool more();
+ IndexCatalogEntry* next();
- IndexDescriptor* next();
-
+ protected:
/**
- * Returns the access method for the last return IndexDescriptor.
+ * Advance the underlying iterator and returns the next index entry. Returns nullptr when
+ * the iterator is exhausted.
*/
- IndexAccessMethod* accessMethod(const IndexDescriptor* const desc);
-
- /**
- * Returns the IndexCatalogEntry for the last return IndexDescriptor.
- */
- IndexCatalogEntry* catalogEntry(const IndexDescriptor* const desc);
+ virtual IndexCatalogEntry* _advance() = 0;
private:
- void _advance();
+ bool _start = true;
+ IndexCatalogEntry* _prev = nullptr;
+ IndexCatalogEntry* _next = nullptr;
+ };
+
+ class ReadyIndexesIterator : public IndexIterator {
+ public:
+ ReadyIndexesIterator(OperationContext* const opCtx,
+ IndexCatalogEntryContainer::const_iterator beginIterator,
+ IndexCatalogEntryContainer::const_iterator endIterator);
- bool _includeUnfinishedIndexes;
+ private:
+ IndexCatalogEntry* _advance() override;
OperationContext* const _opCtx;
IndexCatalogEntryContainer::const_iterator _iterator;
IndexCatalogEntryContainer::const_iterator _endIterator;
+ };
- bool _start; // only true before we've called next() or more()
+ class AllIndexesIterator : public IndexIterator {
+ public:
+ /**
+ * `ownedContainer` is a container whose lifetime the begin and end iterators depend
+ * on. If the caller will keep control of the container for the entire iterator lifetime,
+ * it should pass in a null value.
+ */
+ AllIndexesIterator(OperationContext* const opCtx,
+ std::unique_ptr<std::vector<IndexCatalogEntry*>> ownedContainer);
- IndexCatalogEntry* _prev;
- IndexCatalogEntry* _next;
+ private:
+ IndexCatalogEntry* _advance() override;
+
+ OperationContext* const _opCtx;
+ std::vector<IndexCatalogEntry*>::const_iterator _iterator;
+ std::vector<IndexCatalogEntry*>::const_iterator _endIterator;
+ std::unique_ptr<std::vector<IndexCatalogEntry*>> _ownedContainer;
};
/**
@@ -170,7 +177,6 @@ public:
virtual const BSONObj& getSpec() const = 0;
};
-public:
IndexCatalog() = default;
virtual ~IndexCatalog() = default;
@@ -291,8 +297,8 @@ public:
/**
* Returns an iterator for the index descriptors in this IndexCatalog.
*/
- virtual IndexIterator getIndexIterator(OperationContext* const opCtx,
- const bool includeUnfinishedIndexes) const = 0;
+ virtual std::unique_ptr<IndexIterator> getIndexIterator(
+ OperationContext* const opCtx, const bool includeUnfinishedIndexes) const = 0;
// ---- index set modifiers ------
@@ -396,6 +402,8 @@ public:
InsertDeleteOptions* options) const = 0;
virtual void setNs(NamespaceString ns) = 0;
+
+ virtual void indexBuildSuccess(OperationContext* opCtx, IndexCatalogEntry* index) = 0;
};
} // namespace mongo
diff --git a/src/mongo/db/catalog/index_catalog_entry.h b/src/mongo/db/catalog/index_catalog_entry.h
index 3b43a07ee1c..eb54f065e7b 100644
--- a/src/mongo/db/catalog/index_catalog_entry.h
+++ b/src/mongo/db/catalog/index_catalog_entry.h
@@ -50,6 +50,7 @@ class CollectionCatalogEntry;
class CollectionInfoCache;
class HeadManager;
class IndexAccessMethod;
+class IndexBuildInterceptor;
class IndexDescriptor;
class MatchExpression;
class OperationContext;
@@ -74,6 +75,12 @@ public:
virtual const IndexAccessMethod* accessMethod() const = 0;
+ virtual bool isBuilding() const = 0;
+
+ virtual IndexBuildInterceptor* indexBuildInterceptor() = 0;
+
+ virtual void setIndexBuildInterceptor(IndexBuildInterceptor* interceptor) = 0;
+
virtual const Ordering& ordering() const = 0;
virtual const MatchExpression* getFilterExpression() const = 0;
diff --git a/src/mongo/db/catalog/index_catalog_entry_impl.h b/src/mongo/db/catalog/index_catalog_entry_impl.h
index a4d800a8e5c..a5a53f2dff2 100644
--- a/src/mongo/db/catalog/index_catalog_entry_impl.h
+++ b/src/mongo/db/catalog/index_catalog_entry_impl.h
@@ -90,6 +90,18 @@ public:
return _accessMethod.get();
}
+ bool isBuilding() const final {
+ return _indexBuildInterceptor != nullptr;
+ }
+
+ IndexBuildInterceptor* indexBuildInterceptor() final {
+ return _indexBuildInterceptor;
+ }
+
+ void setIndexBuildInterceptor(IndexBuildInterceptor* interceptor) final {
+ _indexBuildInterceptor = interceptor;
+ }
+
const Ordering& ordering() const final {
return _ordering;
}
@@ -201,6 +213,8 @@ private:
std::unique_ptr<IndexAccessMethod> _accessMethod;
+ IndexBuildInterceptor* _indexBuildInterceptor = nullptr; // not owned here
+
// Owned here.
std::unique_ptr<HeadManager> _headManager;
std::unique_ptr<CollatorInterface> _collator;
diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp
index 3e2ce124cd5..837ccfd7b4a 100644
--- a/src/mongo/db/catalog/index_catalog_impl.cpp
+++ b/src/mongo/db/catalog/index_catalog_impl.cpp
@@ -119,8 +119,9 @@ Status IndexCatalogImpl::init(OperationContext* opCtx) {
auto descriptor =
stdx::make_unique<IndexDescriptor>(_collection, _getAccessMethodName(keyPattern), spec);
const bool initFromDisk = true;
+ const bool isReadyIndex = true;
IndexCatalogEntry* entry =
- _setupInMemoryStructures(opCtx, std::move(descriptor), initFromDisk);
+ _setupInMemoryStructures(opCtx, std::move(descriptor), initFromDisk, isReadyIndex);
fassert(17340, entry->isReady(opCtx));
}
@@ -137,7 +138,10 @@ Status IndexCatalogImpl::init(OperationContext* opCtx) {
}
IndexCatalogEntry* IndexCatalogImpl::_setupInMemoryStructures(
- OperationContext* opCtx, std::unique_ptr<IndexDescriptor> descriptor, bool initFromDisk) {
+ OperationContext* opCtx,
+ std::unique_ptr<IndexDescriptor> descriptor,
+ bool initFromDisk,
+ bool isReadyIndex) {
Status status = _isSpecOk(opCtx, descriptor->infoObj());
if (!status.isOK() && status != ErrorCodes::IndexAlreadyExists) {
severe() << "Found an invalid index " << descriptor->infoObj() << " on the "
@@ -156,20 +160,26 @@ IndexCatalogEntry* IndexCatalogImpl::_setupInMemoryStructures(
entry->init(std::move(accessMethod));
IndexCatalogEntry* save = entry.get();
- _entries.add(entry.release());
+ if (isReadyIndex) {
+ _readyIndexes.add(entry.release());
+ } else {
+ _buildingIndexes.add(entry.release());
+ }
if (!initFromDisk) {
- opCtx->recoveryUnit()->onRollback([ this, opCtx, descriptor = descriptorPtr ] {
- // Need to preserve indexName as descriptor no longer exists after remove().
- const std::string indexName = descriptor->indexName();
- _entries.remove(descriptor);
- _collection->infoCache()->droppedIndex(opCtx, indexName);
- });
+ opCtx->recoveryUnit()->onRollback(
+ [ this, opCtx, isReadyIndex, descriptor = descriptorPtr ] {
+ // Need to preserve indexName as descriptor no longer exists after remove().
+ const std::string indexName = descriptor->indexName();
+ if (isReadyIndex) {
+ _readyIndexes.remove(descriptor);
+ } else {
+ _buildingIndexes.remove(descriptor);
+ }
+ _collection->infoCache()->droppedIndex(opCtx, indexName);
+ });
}
- invariant(save == _entries.find(descriptorPtr));
- invariant(save == _entries.find(descriptorPtr->indexName()));
-
return save;
}
@@ -195,10 +205,31 @@ Status IndexCatalogImpl::checkUnfinished() const {
<< _collection->ns().ns());
}
-IndexCatalog::IndexIterator IndexCatalogImpl::getIndexIterator(
+std::unique_ptr<IndexCatalog::IndexIterator> IndexCatalogImpl::getIndexIterator(
OperationContext* const opCtx, const bool includeUnfinishedIndexes) const {
- return IndexIterator(opCtx, _entries.begin(), _entries.end(), includeUnfinishedIndexes);
-};
+ if (!includeUnfinishedIndexes) {
+ // If the caller only wants the ready indexes, we return an iterator over the catalog's
+ // ready indexes vector. When the user advances this iterator, it will filter out any
+ // indexes that were not ready at the OperationContext's read timestamp.
+ return std::make_unique<ReadyIndexesIterator>(
+ opCtx, _readyIndexes.begin(), _readyIndexes.end());
+ }
+
+ // If the caller wants all indexes, for simplicity of implementation, we copy the pointers to
+ // a new vector. The vector's ownership is passed to the iterator. The query code path from an
+ // external client is not expected to hit this case so the cost isn't paid by the important
+ // code path.
+ auto allIndexes = std::make_unique<std::vector<IndexCatalogEntry*>>();
+ for (auto it = _readyIndexes.begin(); it != _readyIndexes.end(); ++it) {
+ allIndexes->push_back(it->get());
+ }
+
+ for (auto it = _buildingIndexes.begin(); it != _buildingIndexes.end(); ++it) {
+ allIndexes->push_back(it->get());
+ }
+
+ return std::make_unique<AllIndexesIterator>(opCtx, std::move(allIndexes));
+}
string IndexCatalogImpl::_getAccessMethodName(const BSONObj& keyPattern) const {
string pluginName = IndexNames::findPluginName(keyPattern);
@@ -272,7 +303,7 @@ StatusWith<BSONObj> IndexCatalogImpl::createIndexOnEmptyCollection(OperationCont
invariant(entry);
IndexDescriptor* descriptor = entry->descriptor();
invariant(descriptor);
- invariant(entry == _entries.find(descriptor));
+ invariant(entry == _buildingIndexes.find(descriptor));
status = entry->accessMethod()->initializeAsEmpty(opCtx);
if (!status.isOK())
@@ -285,131 +316,6 @@ StatusWith<BSONObj> IndexCatalogImpl::createIndexOnEmptyCollection(OperationCont
return spec;
}
-IndexCatalogImpl::IndexBuildBlock::IndexBuildBlock(OperationContext* opCtx,
- Collection* collection,
- IndexCatalogImpl* catalog,
- const BSONObj& spec)
- : _collection(collection),
- _catalog(catalog),
- _ns(_collection->ns().ns()),
- _spec(spec.getOwned()),
- _entry(nullptr),
- _opCtx(opCtx) {
- invariant(collection);
-}
-
-Status IndexCatalogImpl::IndexBuildBlock::init() {
- // Being in a WUOW means all timestamping responsibility can be pushed up to the caller.
- invariant(_opCtx->lockState()->inAWriteUnitOfWork());
-
- // need this first for names, etc...
- BSONObj keyPattern = _spec.getObjectField("key");
- auto descriptor = stdx::make_unique<IndexDescriptor>(
- _collection, IndexNames::findPluginName(keyPattern), _spec);
-
- _indexName = descriptor->indexName();
- _indexNamespace = descriptor->indexNamespace();
-
- bool isBackgroundIndex = _spec["background"].trueValue();
- bool isBackgroundSecondaryBuild = false;
- if (auto replCoord = repl::ReplicationCoordinator::get(_opCtx)) {
- isBackgroundSecondaryBuild =
- replCoord->getReplicationMode() == repl::ReplicationCoordinator::Mode::modeReplSet &&
- replCoord->getMemberState().secondary() && isBackgroundIndex;
- }
-
- // Setup on-disk structures.
- Status status = _collection->getCatalogEntry()->prepareForIndexBuild(
- _opCtx, descriptor.get(), isBackgroundSecondaryBuild);
- if (!status.isOK())
- return status;
-
- auto* const descriptorPtr = descriptor.get();
- const bool initFromDisk = false;
- _entry = _catalog->_setupInMemoryStructures(_opCtx, std::move(descriptor), initFromDisk);
-
- if (isBackgroundIndex) {
- _opCtx->recoveryUnit()->onCommit(
- [ opCtx = _opCtx, entry = _entry, collection = _collection ](
- boost::optional<Timestamp> commitTime) {
- // This will prevent the unfinished index from being visible on index iterators.
- if (commitTime) {
- entry->setMinimumVisibleSnapshot(commitTime.get());
- collection->setMinimumVisibleSnapshot(commitTime.get());
- }
- });
- }
-
- // Register this index with the CollectionInfoCache to regenerate the cache. This way, updates
- // occurring while an index is being build in the background will be aware of whether or not
- // they need to modify any indexes.
- _collection->infoCache()->addedIndex(_opCtx, descriptorPtr);
-
- return Status::OK();
-}
-
-IndexCatalogImpl::IndexBuildBlock::~IndexBuildBlock() {
- // Don't need to call fail() here, as rollback will clean everything up for us.
-}
-
-void IndexCatalogImpl::IndexBuildBlock::fail() {
- // Being in a WUOW means all timestamping responsibility can be pushed up to the caller.
- invariant(_opCtx->lockState()->inAWriteUnitOfWork());
- fassert(17204, _collection->ok()); // defensive
-
- NamespaceString ns(_indexNamespace);
- invariant(_opCtx->lockState()->isDbLockedForMode(ns.db(), MODE_X));
-
- IndexCatalogEntry* entry = _catalog->_entries.find(_indexName);
- invariant(entry == _entry);
-
- if (entry) {
- _catalog->_dropIndex(_opCtx, entry).transitional_ignore();
- } else {
- _catalog->_deleteIndexFromDisk(_opCtx, _indexName, _indexNamespace);
- }
-}
-
-void IndexCatalogImpl::IndexBuildBlock::success() {
- // Being in a WUOW means all timestamping responsibility can be pushed up to the caller.
- invariant(_opCtx->lockState()->inAWriteUnitOfWork());
-
- fassert(17207, _collection->ok());
- NamespaceString ns(_indexNamespace);
- invariant(_opCtx->lockState()->isDbLockedForMode(ns.db(), MODE_X));
-
- _collection->getCatalogEntry()->indexBuildSuccess(_opCtx, _indexName);
-
- IndexDescriptor* desc = _catalog->findIndexByName(_opCtx, _indexName, true);
- fassert(17330, desc);
- IndexCatalogEntry* entry = _catalog->_entries.find(desc);
- fassert(17331, entry && entry == _entry);
-
- OperationContext* opCtx = _opCtx;
- LOG(2) << "marking index " << _indexName << " as ready in snapshot id "
- << opCtx->recoveryUnit()->getSnapshotId();
- _opCtx->recoveryUnit()->onCommit(
- [ opCtx, entry, collection = _collection ](boost::optional<Timestamp> commitTime) {
- // Note: this runs after the WUOW commits but before we release our X lock on the
- // collection. This means that any snapshot created after this must include the full
- // index, and no one can try to read this index before we set the visibility.
- if (!commitTime) {
- // The end of background index builds on secondaries does not get a commit
- // timestamp. We use the cluster time since it's guaranteed to be greater than the
- // time of the index build. It is possible the cluster time could be in the future,
- // and we will need to do another write to reach the minimum visible snapshot.
- commitTime = LogicalClock::getClusterTimeForReplicaSet(opCtx).asTimestamp();
- }
- entry->setMinimumVisibleSnapshot(commitTime.get());
- // We must also set the minimum visible snapshot on the collection like during init().
- // This prevents reads in the past from reading inconsistent metadata. We should be
- // able to remove this when the catalog is versioned.
- collection->setMinimumVisibleSnapshot(commitTime.get());
- });
-
- entry->setIsReady(true);
-}
-
namespace {
// While technically recursive, only current possible with 2 levels.
Status _checkValidFilterExpressions(MatchExpression* expression, int level = 0) {
@@ -519,6 +425,7 @@ Status IndexCatalogImpl::_isSpecOk(OperationContext* opCtx, const BSONObj& spec)
// Drop pending collections are internal to the server and will not be exported to another
// storage engine. The indexes contained in these collections are not subject to the same
// namespace length constraints as the ones in created by users.
+ //
// Index names do not limit the maximum allowable length of the target namespace under FCV 4.2
// and above.
const auto checkIndexNamespace =
@@ -830,13 +737,14 @@ void IndexCatalogImpl::dropAllIndexes(OperationContext* opCtx,
bool haveIdIndex = false;
+ invariant(_buildingIndexes.size() == 0);
vector<string> indexNamesToDrop;
{
int seen = 0;
- IndexIterator ii = getIndexIterator(opCtx, true);
- while (ii.more()) {
+ std::unique_ptr<IndexIterator> ii = getIndexIterator(opCtx, true);
+ while (ii->more()) {
seen++;
- IndexDescriptor* desc = ii.next();
+ IndexDescriptor* desc = ii->next()->descriptor();
if (desc->isIdIndex() && includingIdIndex == false) {
haveIdIndex = true;
continue;
@@ -851,7 +759,7 @@ void IndexCatalogImpl::dropAllIndexes(OperationContext* opCtx,
IndexDescriptor* desc = findIndexByName(opCtx, indexName, true);
invariant(desc);
LOG(1) << "\t dropAllIndexes dropping: " << desc->toString();
- IndexCatalogEntry* entry = _entries.find(desc);
+ IndexCatalogEntry* entry = _readyIndexes.find(desc);
invariant(entry);
// If the onDrop function creates an oplog entry, it should run first so that the drop is
@@ -859,7 +767,7 @@ void IndexCatalogImpl::dropAllIndexes(OperationContext* opCtx,
if (onDropFn) {
onDropFn(desc);
}
- _dropIndex(opCtx, entry).transitional_ignore();
+ invariant(_dropIndex(opCtx, entry).isOK());
}
// verify state is sane post cleaning
@@ -871,19 +779,19 @@ void IndexCatalogImpl::dropAllIndexes(OperationContext* opCtx,
fassert(17324, numIndexesTotal(opCtx) == 1);
fassert(17325, numIndexesReady(opCtx) == 1);
fassert(17326, numIndexesInCollectionCatalogEntry == 1);
- fassert(17336, _entries.size() == 1);
+ fassert(17336, _readyIndexes.size() == 1);
} else {
- if (numIndexesTotal(opCtx) || numIndexesInCollectionCatalogEntry || _entries.size()) {
+ if (numIndexesTotal(opCtx) || numIndexesInCollectionCatalogEntry || _readyIndexes.size()) {
error() << "About to fassert - "
<< " numIndexesTotal(): " << numIndexesTotal(opCtx)
<< " numSystemIndexesEntries: " << numIndexesInCollectionCatalogEntry
- << " _entries.size(): " << _entries.size()
+ << " _readyIndexes.size(): " << _readyIndexes.size()
<< " indexNamesToDrop: " << indexNamesToDrop.size()
<< " haveIdIndex: " << haveIdIndex;
}
fassert(17327, numIndexesTotal(opCtx) == 0);
fassert(17328, numIndexesInCollectionCatalogEntry == 0);
- fassert(17337, _entries.size() == 0);
+ fassert(17337, _readyIndexes.size() == 0);
}
}
@@ -893,7 +801,10 @@ void IndexCatalogImpl::dropAllIndexes(OperationContext* opCtx, bool includingIdI
Status IndexCatalogImpl::dropIndex(OperationContext* opCtx, IndexDescriptor* desc) {
invariant(opCtx->lockState()->isCollectionLockedForMode(_collection->ns().toString(), MODE_X));
- IndexCatalogEntry* entry = _entries.find(desc);
+ BackgroundOperation::assertNoBgOpInProgForNs(_collection->ns().ns());
+ invariant(_buildingIndexes.size() == 0);
+
+ IndexCatalogEntry* entry = _readyIndexes.find(desc);
if (!entry)
return Status(ErrorCodes::InternalError, "cannot find index to delete");
@@ -901,8 +812,6 @@ Status IndexCatalogImpl::dropIndex(OperationContext* opCtx, IndexDescriptor* des
if (!entry->isReady(opCtx))
return Status(ErrorCodes::InternalError, "cannot delete not ready index");
- BackgroundOperation::assertNoBgOpInProgForNs(_collection->ns().ns());
-
return _dropIndex(opCtx, entry);
}
@@ -919,8 +828,8 @@ public:
// Ban reading from this collection on committed reads on snapshots before now.
if (!commitTime) {
// This is called when we refresh the index catalog entry, which does not always have
- // a commit timestamp. We use the cluster time since it's guaranteed to be greater than
- // the time of the index removal. It is possible the cluster time could be in the
+ // a commit timestamp. We use the cluster time since it's guaranteed to be greater
+ // than the time of the index removal. It is possible the cluster time could be in the
// future, and we will need to do another write to reach the minimum visible snapshot.
commitTime = LogicalClock::getClusterTimeForReplicaSet(_opCtx).asTimestamp();
}
@@ -957,8 +866,9 @@ Status IndexCatalogImpl::_dropIndex(OperationContext* opCtx, IndexCatalogEntry*
string indexNamespace = entry->descriptor()->indexNamespace();
// If any cursors could be using this index, invalidate them. Note that we do not use indexes
- // until they are ready, so we do not need to invalidate anything if the index fails while it is
- // being built.
+ // until they are ready, so we do not need to invalidate anything if the index fails while it
+ // is being built.
+ //
// TODO only kill cursors that are actually using the index rather than everything on this
// collection.
if (entry->isReady(opCtx)) {
@@ -969,16 +879,22 @@ Status IndexCatalogImpl::_dropIndex(OperationContext* opCtx, IndexCatalogEntry*
// --------- START REAL WORK ----------
audit::logDropIndex(&cc(), indexName, _collection->ns().ns());
- invariant(_entries.release(entry->descriptor()) == entry);
- opCtx->recoveryUnit()->registerChange(
- new IndexRemoveChange(opCtx, _collection, &_entries, entry));
+ auto released = _readyIndexes.release(entry->descriptor());
+ if (released) {
+ invariant(released == entry);
+ opCtx->recoveryUnit()->registerChange(
+ new IndexRemoveChange(opCtx, _collection, &_readyIndexes, entry));
+ } else {
+ invariant(_buildingIndexes.release(entry->descriptor()) == entry);
+ opCtx->recoveryUnit()->registerChange(
+ new IndexRemoveChange(opCtx, _collection, &_buildingIndexes, entry));
+ }
_collection->infoCache()->droppedIndex(opCtx, indexName);
entry = nullptr;
_deleteIndexFromDisk(opCtx, indexName, indexNamespace);
_checkMagic();
-
return Status::OK();
}
@@ -1009,14 +925,14 @@ vector<BSONObj> IndexCatalogImpl::getAndClearUnfinishedIndexes(OperationContext*
}
bool IndexCatalogImpl::isMultikey(OperationContext* opCtx, const IndexDescriptor* idx) {
- IndexCatalogEntry* entry = _entries.find(idx);
+ IndexCatalogEntry* entry = _readyIndexes.find(idx);
invariant(entry);
return entry->isMultikey(opCtx);
}
MultikeyPaths IndexCatalogImpl::getMultikeyPaths(OperationContext* opCtx,
const IndexDescriptor* idx) {
- IndexCatalogEntry* entry = _entries.find(idx);
+ IndexCatalogEntry* entry = _readyIndexes.find(idx);
invariant(entry);
return entry->getMultikeyPaths(opCtx);
}
@@ -1024,20 +940,20 @@ MultikeyPaths IndexCatalogImpl::getMultikeyPaths(OperationContext* opCtx,
// ---------------------------
bool IndexCatalogImpl::haveAnyIndexes() const {
- return _entries.size() != 0;
+ return _readyIndexes.size() > 0 || _buildingIndexes.size() > 0;
}
int IndexCatalogImpl::numIndexesTotal(OperationContext* opCtx) const {
- int count = _entries.size() + _unfinishedIndexes.size();
+ int count = _readyIndexes.size() + _buildingIndexes.size() + _unfinishedIndexes.size();
dassert(_collection->getCatalogEntry()->getTotalIndexCount(opCtx) == count);
return count;
}
int IndexCatalogImpl::numIndexesReady(OperationContext* opCtx) const {
std::vector<IndexDescriptor*> itIndexes;
- IndexIterator ii = getIndexIterator(opCtx, /*includeUnfinished*/ false);
- while (ii.more()) {
- itIndexes.push_back(ii.next());
+ std::unique_ptr<IndexIterator> ii = getIndexIterator(opCtx, /*includeUnfinished*/ false);
+ while (ii->more()) {
+ itIndexes.push_back(ii->next()->descriptor());
}
DEV {
std::vector<std::string> completedIndexes;
@@ -1069,9 +985,9 @@ bool IndexCatalogImpl::haveIdIndex(OperationContext* opCtx) const {
}
IndexDescriptor* IndexCatalogImpl::findIdIndex(OperationContext* opCtx) const {
- IndexIterator ii = getIndexIterator(opCtx, false);
- while (ii.more()) {
- IndexDescriptor* desc = ii.next();
+ std::unique_ptr<IndexIterator> ii = getIndexIterator(opCtx, false);
+ while (ii->more()) {
+ IndexDescriptor* desc = ii->next()->descriptor();
if (desc->isIdIndex())
return desc;
}
@@ -1081,9 +997,9 @@ IndexDescriptor* IndexCatalogImpl::findIdIndex(OperationContext* opCtx) const {
IndexDescriptor* IndexCatalogImpl::findIndexByName(OperationContext* opCtx,
StringData name,
bool includeUnfinishedIndexes) const {
- IndexIterator ii = getIndexIterator(opCtx, includeUnfinishedIndexes);
- while (ii.more()) {
- IndexDescriptor* desc = ii.next();
+ std::unique_ptr<IndexIterator> ii = getIndexIterator(opCtx, includeUnfinishedIndexes);
+ while (ii->more()) {
+ IndexDescriptor* desc = ii->next()->descriptor();
if (desc->indexName() == name)
return desc;
}
@@ -1095,9 +1011,9 @@ IndexDescriptor* IndexCatalogImpl::findIndexByKeyPatternAndCollationSpec(
const BSONObj& key,
const BSONObj& collationSpec,
bool includeUnfinishedIndexes) const {
- IndexIterator ii = getIndexIterator(opCtx, includeUnfinishedIndexes);
- while (ii.more()) {
- IndexDescriptor* desc = ii.next();
+ std::unique_ptr<IndexIterator> ii = getIndexIterator(opCtx, includeUnfinishedIndexes);
+ while (ii->more()) {
+ IndexDescriptor* desc = ii->next()->descriptor();
if (SimpleBSONObjComparator::kInstance.evaluate(desc->keyPattern() == key) &&
SimpleBSONObjComparator::kInstance.evaluate(
desc->infoObj().getObjectField("collation") == collationSpec)) {
@@ -1112,9 +1028,9 @@ void IndexCatalogImpl::findIndexesByKeyPattern(OperationContext* opCtx,
bool includeUnfinishedIndexes,
std::vector<IndexDescriptor*>* matches) const {
invariant(matches);
- IndexIterator ii = getIndexIterator(opCtx, includeUnfinishedIndexes);
- while (ii.more()) {
- IndexDescriptor* desc = ii.next();
+ std::unique_ptr<IndexIterator> ii = getIndexIterator(opCtx, includeUnfinishedIndexes);
+ while (ii->more()) {
+ IndexDescriptor* desc = ii->next()->descriptor();
if (SimpleBSONObjComparator::kInstance.evaluate(desc->keyPattern() == key)) {
matches->push_back(desc);
}
@@ -1126,9 +1042,9 @@ IndexDescriptor* IndexCatalogImpl::findShardKeyPrefixedIndex(OperationContext* o
bool requireSingleKey) const {
IndexDescriptor* best = nullptr;
- IndexIterator ii = getIndexIterator(opCtx, false);
- while (ii.more()) {
- IndexDescriptor* desc = ii.next();
+ std::unique_ptr<IndexIterator> ii = getIndexIterator(opCtx, false);
+ while (ii->more()) {
+ IndexDescriptor* desc = ii->next()->descriptor();
bool hasSimpleCollation = desc->infoObj().getObjectField("collation").isEmpty();
if (desc->isPartial())
@@ -1151,9 +1067,9 @@ void IndexCatalogImpl::findIndexByType(OperationContext* opCtx,
const string& type,
vector<IndexDescriptor*>& matches,
bool includeUnfinishedIndexes) const {
- IndexIterator ii = getIndexIterator(opCtx, includeUnfinishedIndexes);
- while (ii.more()) {
- IndexDescriptor* desc = ii.next();
+ std::unique_ptr<IndexIterator> ii = getIndexIterator(opCtx, includeUnfinishedIndexes);
+ while (ii->more()) {
+ IndexDescriptor* desc = ii->next()->descriptor();
if (IndexNames::findPluginName(desc->keyPattern()) == type) {
matches.push_back(desc);
}
@@ -1161,7 +1077,11 @@ void IndexCatalogImpl::findIndexByType(OperationContext* opCtx,
}
IndexAccessMethod* IndexCatalogImpl::getIndex(const IndexDescriptor* desc) {
- IndexCatalogEntry* entry = _entries.find(desc);
+ IndexCatalogEntry* entry = _readyIndexes.find(desc);
+ if (!entry) {
+ entry = _buildingIndexes.find(desc);
+ }
+
massert(17334, "cannot find index entry", entry);
return entry->accessMethod();
}
@@ -1171,7 +1091,11 @@ const IndexAccessMethod* IndexCatalogImpl::getIndex(const IndexDescriptor* desc)
}
const IndexCatalogEntry* IndexCatalogImpl::getEntry(const IndexDescriptor* desc) const {
- const IndexCatalogEntry* entry = _entries.find(desc);
+ const IndexCatalogEntry* entry = _readyIndexes.find(desc);
+ if (!entry) {
+ entry = _buildingIndexes.find(desc);
+ }
+
massert(17357, "cannot find index entry", entry);
return entry;
}
@@ -1181,6 +1105,7 @@ const IndexDescriptor* IndexCatalogImpl::refreshEntry(OperationContext* opCtx,
const IndexDescriptor* oldDesc) {
invariant(opCtx->lockState()->isCollectionLockedForMode(_collection->ns().ns(), MODE_X));
invariant(!BackgroundOperation::inProgForNs(_collection->ns()));
+ invariant(_buildingIndexes.size() == 0);
const std::string indexName = oldDesc->indexName();
invariant(_collection->getCatalogEntry()->isIndexReady(opCtx, indexName));
@@ -1194,9 +1119,10 @@ const IndexDescriptor* IndexCatalogImpl::refreshEntry(OperationContext* opCtx,
// Delete the IndexCatalogEntry that owns this descriptor. After deletion, 'oldDesc' is
// invalid and should not be dereferenced.
- IndexCatalogEntry* oldEntry = _entries.release(oldDesc);
+ IndexCatalogEntry* oldEntry = _readyIndexes.release(oldDesc);
+ invariant(oldEntry);
opCtx->recoveryUnit()->registerChange(
- new IndexRemoveChange(opCtx, _collection, &_entries, oldEntry));
+ new IndexRemoveChange(opCtx, _collection, &_readyIndexes, oldEntry));
// Ask the CollectionCatalogEntry for the new index spec.
BSONObj spec = _collection->getCatalogEntry()->getIndexSpec(opCtx, indexName).getOwned();
@@ -1206,8 +1132,9 @@ const IndexDescriptor* IndexCatalogImpl::refreshEntry(OperationContext* opCtx,
auto newDesc =
stdx::make_unique<IndexDescriptor>(_collection, _getAccessMethodName(keyPattern), spec);
const bool initFromDisk = false;
+ const bool isReadyIndex = true;
const IndexCatalogEntry* newEntry =
- _setupInMemoryStructures(opCtx, std::move(newDesc), initFromDisk);
+ _setupInMemoryStructures(opCtx, std::move(newDesc), initFromDisk, isReadyIndex);
invariant(newEntry->isReady(opCtx));
// Return the new descriptor.
@@ -1232,14 +1159,30 @@ Status IndexCatalogImpl::_indexFilteredRecords(OperationContext* opCtx,
return status;
}
- InsertResult result;
- Status status = index->accessMethod()->insert(
- opCtx, *bsonRecord.docPtr, bsonRecord.id, options, &result);
- if (!status.isOK())
- return status;
+ Status status = Status::OK();
+ const bool hybridBuildsEnabled = false;
+ if (hybridBuildsEnabled && index->isBuilding()) {
+ int64_t inserted;
+ status = index->indexBuildInterceptor()->sideWrite(opCtx,
+ index->accessMethod(),
+ bsonRecord.docPtr,
+ bsonRecord.id,
+ IndexBuildInterceptor::Op::kInsert,
+ &inserted);
+ if (keysInsertedOut) {
+ *keysInsertedOut += inserted;
+ }
+ } else {
+ InsertResult result;
+ status = index->accessMethod()->insert(
+ opCtx, *bsonRecord.docPtr, bsonRecord.id, options, &result);
+ if (keysInsertedOut) {
+ *keysInsertedOut += result.numInserted;
+ }
+ }
- if (keysInsertedOut) {
- *keysInsertedOut += result.numInserted;
+ if (!status.isOK()) {
+ return status;
}
}
return Status::OK();
@@ -1268,6 +1211,18 @@ Status IndexCatalogImpl::_unindexRecord(OperationContext* opCtx,
const RecordId& loc,
bool logIfError,
int64_t* keysDeletedOut) {
+ const bool hybridBuildsEnabled = false;
+ if (hybridBuildsEnabled && index->isBuilding()) {
+ int64_t removed;
+ auto status = index->indexBuildInterceptor()->sideWrite(
+ opCtx, index->accessMethod(), &obj, loc, IndexBuildInterceptor::Op::kDelete, &removed);
+ if (status.isOK() && keysDeletedOut) {
+ *keysDeletedOut += removed;
+ }
+
+ return status;
+ }
+
InsertDeleteOptions options;
prepareInsertDeleteOptions(opCtx, index->descriptor(), &options);
options.logIfError = logIfError;
@@ -1275,6 +1230,7 @@ Status IndexCatalogImpl::_unindexRecord(OperationContext* opCtx,
// On WiredTiger, we do blind unindexing of records for efficiency. However, when duplicates
// are allowed in unique indexes, WiredTiger does not do blind unindexing, and instead confirms
// that the recordid matches the element we are removing.
+ //
// We need to disable blind-deletes for in-progress indexes, in order to force recordid-matching
// for unindex operations, since initial sync can build an index over a collection with
// duplicates. See SERVER-17487 for more details.
@@ -1295,7 +1251,6 @@ Status IndexCatalogImpl::_unindexRecord(OperationContext* opCtx,
return Status::OK();
}
-
Status IndexCatalogImpl::indexRecords(OperationContext* opCtx,
const std::vector<BsonRecord>& bsonRecords,
int64_t* keysInsertedOut) {
@@ -1303,9 +1258,14 @@ Status IndexCatalogImpl::indexRecords(OperationContext* opCtx,
*keysInsertedOut = 0;
}
- for (IndexCatalogEntryContainer::const_iterator i = _entries.begin(); i != _entries.end();
- ++i) {
- Status s = _indexRecords(opCtx, i->get(), bsonRecords, keysInsertedOut);
+ for (auto&& it : _readyIndexes) {
+ Status s = _indexRecords(opCtx, it.get(), bsonRecords, keysInsertedOut);
+ if (!s.isOK())
+ return s;
+ }
+
+ for (auto&& it : _buildingIndexes) {
+ Status s = _indexRecords(opCtx, it.get(), bsonRecords, keysInsertedOut);
if (!s.isOK())
return s;
}
@@ -1322,13 +1282,23 @@ void IndexCatalogImpl::unindexRecord(OperationContext* opCtx,
*keysDeletedOut = 0;
}
- for (IndexCatalogEntryContainer::const_iterator i = _entries.begin(); i != _entries.end();
- ++i) {
- IndexCatalogEntry* entry = i->get();
+ for (IndexCatalogEntryContainer::const_iterator it = _readyIndexes.begin();
+ it != _readyIndexes.end();
+ ++it) {
+ IndexCatalogEntry* entry = it->get();
+
+ bool logIfError = !noWarn;
+ invariant(_unindexRecord(opCtx, entry, obj, loc, logIfError, keysDeletedOut));
+ }
+
+ for (IndexCatalogEntryContainer::const_iterator it = _buildingIndexes.begin();
+ it != _buildingIndexes.end();
+ ++it) {
+ IndexCatalogEntry* entry = it->get();
// If it's a background index, we DO NOT want to log anything.
bool logIfError = entry->isReady(opCtx) ? !noWarn : false;
- _unindexRecord(opCtx, entry, obj, loc, logIfError, keysDeletedOut).transitional_ignore();
+ invariant(_unindexRecord(opCtx, entry, obj, loc, logIfError, keysDeletedOut));
}
}
@@ -1338,10 +1308,10 @@ std::unique_ptr<IndexCatalog::IndexBuildBlockInterface> IndexCatalogImpl::create
}
std::string::size_type IndexCatalogImpl::getLongestIndexNameLength(OperationContext* opCtx) const {
- IndexCatalog::IndexIterator it = getIndexIterator(opCtx, true);
+ std::unique_ptr<IndexIterator> it = getIndexIterator(opCtx, true);
std::string::size_type longestIndexNameLength = 0;
- while (it.more()) {
- auto thisLength = it.next()->indexName().length();
+ while (it->more()) {
+ auto thisLength = it->next()->descriptor()->indexName().length();
if (thisLength > longestIndexNameLength)
longestIndexNameLength = thisLength;
}
@@ -1377,6 +1347,15 @@ void IndexCatalogImpl::prepareInsertDeleteOptions(OperationContext* opCtx,
}
}
+void IndexCatalogImpl::indexBuildSuccess(OperationContext* opCtx, IndexCatalogEntry* index) {
+ invariant(_buildingIndexes.release(index->descriptor()));
+ _readyIndexes.add(index);
+ opCtx->recoveryUnit()->onRollback([this, index]() {
+ invariant(_readyIndexes.release(index->descriptor()));
+ _buildingIndexes.add(index);
+ });
+}
+
StatusWith<BSONObj> IndexCatalogImpl::_fixIndexSpec(OperationContext* opCtx,
Collection* collection,
const BSONObj& spec) const {
@@ -1429,7 +1408,11 @@ StatusWith<BSONObj> IndexCatalogImpl::_fixIndexSpec(OperationContext* opCtx,
}
void IndexCatalogImpl::setNs(NamespaceString ns) {
- for (auto&& ice : _entries) {
+ for (auto&& ice : _readyIndexes) {
+ ice->setNs(ns);
+ }
+
+ for (auto&& ice : _buildingIndexes) {
ice->setNs(ns);
}
@@ -1439,5 +1422,4 @@ void IndexCatalogImpl::setNs(NamespaceString ns) {
}
_unfinishedIndexes.swap(newUnfinishedIndexes);
}
-
} // namespace mongo
diff --git a/src/mongo/db/catalog/index_catalog_impl.h b/src/mongo/db/catalog/index_catalog_impl.h
index b606098b31c..f37e145c5b9 100644
--- a/src/mongo/db/catalog/index_catalog_impl.h
+++ b/src/mongo/db/catalog/index_catalog_impl.h
@@ -35,6 +35,7 @@
#include "mongo/db/catalog/index_catalog.h"
#include "mongo/db/catalog/index_catalog_entry.h"
+#include "mongo/db/index/index_build_interceptor.h"
#include "mongo/db/index/multikey_paths.h"
#include "mongo/db/jsobj.h"
#include "mongo/db/operation_context.h"
@@ -174,8 +175,8 @@ public:
Status checkUnfinished() const override;
using IndexIterator = IndexCatalog::IndexIterator;
- IndexIterator getIndexIterator(OperationContext* const opCtx,
- const bool includeUnfinishedIndexes) const override;
+ std::unique_ptr<IndexIterator> getIndexIterator(
+ OperationContext* const opCtx, const bool includeUnfinishedIndexes) const override;
// ---- index set modifiers ------
@@ -235,14 +236,6 @@ public:
// --- these probably become private?
-
- /**
- * disk creation order
- * 1) collection's NamespaceDetails
- * a) info + head
- * b) _indexBuildsInProgress++
- * 2) indexes entry in .ns file
- */
class IndexBuildBlock : public IndexCatalog::IndexBuildBlockInterface {
MONGO_DISALLOW_COPYING(IndexBuildBlock);
@@ -296,6 +289,7 @@ public:
IndexCatalogEntry* _entry;
OperationContext* _opCtx;
+ std::unique_ptr<IndexBuildInterceptor> _indexBuildInterceptor;
};
// ----- data modifiers ------
@@ -343,6 +337,8 @@ public:
void setNs(NamespaceString ns) override;
+ void indexBuildSuccess(OperationContext* opCtx, IndexCatalogEntry* index) override;
+
private:
static const BSONObj _idObj; // { _id : 1 }
@@ -387,9 +383,12 @@ private:
// descriptor ownership passes to _setupInMemoryStructures
// initFromDisk: Avoids registering a change to undo this operation when set to true.
// You must set this flag if calling this function outside of a UnitOfWork.
+ // isReadyIndex: The index will be directly available for query usage without needing to
+ // complete the IndexBuildBlock process.
IndexCatalogEntry* _setupInMemoryStructures(OperationContext* opCtx,
std::unique_ptr<IndexDescriptor> descriptor,
- bool initFromDisk);
+ bool initFromDisk,
+ bool isReadyIndex);
// Apply a set of transformations to the user-provided index object 'spec' to make it
// conform to the standard for insertion. This function adds the 'v' field if it didn't
@@ -407,7 +406,8 @@ private:
Collection* const _collection;
const int _maxNumIndexesAllowed;
- IndexCatalogEntryContainer _entries;
+ IndexCatalogEntryContainer _readyIndexes;
+ IndexCatalogEntryContainer _buildingIndexes;
// These are the index specs of indexes that were "leftover".
// "Leftover" means they were unfinished when a mongod shut down.
diff --git a/src/mongo/db/catalog/index_consistency.cpp b/src/mongo/db/catalog/index_consistency.cpp
index 78a6e39d0f6..e4e0169ea91 100644
--- a/src/mongo/db/catalog/index_consistency.cpp
+++ b/src/mongo/db/catalog/index_consistency.cpp
@@ -75,12 +75,13 @@ IndexConsistency::IndexConsistency(OperationContext* opCtx,
Milliseconds(internalQueryExecYieldPeriodMS.load())) {
IndexCatalog* indexCatalog = _collection->getIndexCatalog();
- IndexCatalog::IndexIterator indexIterator = indexCatalog->getIndexIterator(_opCtx, false);
+ std::unique_ptr<IndexCatalog::IndexIterator> indexIterator =
+ indexCatalog->getIndexIterator(_opCtx, false);
int indexNumber = 0;
- while (indexIterator.more()) {
+ while (indexIterator->more()) {
- const IndexDescriptor* descriptor = indexIterator.next();
+ const IndexDescriptor* descriptor = indexIterator->next()->descriptor();
std::string indexNs = descriptor->indexNamespace();
_indexNumber[descriptor->indexNamespace()] = indexNumber;
diff --git a/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp b/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp
index 3e748e52c0c..826cbbe4ebf 100644
--- a/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp
+++ b/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp
@@ -80,10 +80,11 @@ Status RecordStoreValidateAdaptor::validate(const RecordId& recordId,
return status;
}
- IndexCatalog::IndexIterator i = _indexCatalog->getIndexIterator(_opCtx, false);
+ std::unique_ptr<IndexCatalog::IndexIterator> it =
+ _indexCatalog->getIndexIterator(_opCtx, false);
- while (i.more()) {
- const IndexDescriptor* descriptor = i.next();
+ while (it->more()) {
+ const IndexDescriptor* descriptor = it->next()->descriptor();
const std::string indexNs = descriptor->indexNamespace();
int indexNumber = _indexConsistency->getIndexNumber(indexNs);
ValidateResults curRecordResults;
diff --git a/src/mongo/db/catalog/rename_collection.cpp b/src/mongo/db/catalog/rename_collection.cpp
index 59744b074db..80e46f644de 100644
--- a/src/mongo/db/catalog/rename_collection.cpp
+++ b/src/mongo/db/catalog/rename_collection.cpp
@@ -306,8 +306,8 @@ Status renameCollectionCommon(OperationContext* opCtx,
// Determine which index names are too long. Since we don't have the collection
// rename optime at this time, use the maximum optime to check the index names.
auto longDpns = target.makeDropPendingNamespace(repl::OpTime::max());
- while (indexIter.more()) {
- auto index = indexIter.next();
+ while (indexIter->more()) {
+ auto index = indexIter->next()->descriptor();
auto status = longDpns.checkLengthForRename(index->indexName().size());
if (!status.isOK()) {
indexesToDrop.push_back(index);
@@ -435,10 +435,10 @@ Status renameCollectionCommon(OperationContext* opCtx,
indexer.allowInterruption();
std::vector<BSONObj> indexesToCopy;
- IndexCatalog::IndexIterator sourceIndIt =
+ std::unique_ptr<IndexCatalog::IndexIterator> sourceIndIt =
sourceColl->getIndexCatalog()->getIndexIterator(opCtx, true);
- while (sourceIndIt.more()) {
- auto descriptor = sourceIndIt.next();
+ while (sourceIndIt->more()) {
+ auto descriptor = sourceIndIt->next()->descriptor();
if (descriptor->isIdIndex()) {
continue;
}
diff --git a/src/mongo/db/commands/mr.cpp b/src/mongo/db/commands/mr.cpp
index 9b039f98e96..3d529332bbc 100644
--- a/src/mongo/db/commands/mr.cpp
+++ b/src/mongo/db/commands/mr.cpp
@@ -530,11 +530,11 @@ void State::prepTempCollection() {
if (finalColl) {
finalOptions = finalColl->getCatalogEntry()->getCollectionOptions(_opCtx);
- IndexCatalog::IndexIterator ii =
+ std::unique_ptr<IndexCatalog::IndexIterator> ii =
finalColl->getIndexCatalog()->getIndexIterator(_opCtx, true);
// Iterate over finalColl's indexes.
- while (ii.more()) {
- IndexDescriptor* currIndex = ii.next();
+ while (ii->more()) {
+ IndexDescriptor* currIndex = ii->next()->descriptor();
BSONObjBuilder b;
b.append("ns", _config.tempNamespace.ns());
@@ -1114,11 +1114,11 @@ void State::finalReduce(OperationContext* opCtx, CurOp* curOp, ProgressMeterHold
assertCollectionNotNull(_config.incLong, autoIncColl);
bool foundIndex = false;
- IndexCatalog::IndexIterator ii =
+ std::unique_ptr<IndexCatalog::IndexIterator> ii =
autoIncColl.getCollection()->getIndexCatalog()->getIndexIterator(_opCtx, true);
// Iterate over incColl's indexes.
- while (ii.more()) {
- IndexDescriptor* currIndex = ii.next();
+ while (ii->more()) {
+ IndexDescriptor* currIndex = ii->next()->descriptor();
BSONObj x = currIndex->infoObj();
if (sortKey.woCompare(x["key"].embeddedObject()) == 0) {
foundIndex = true;
diff --git a/src/mongo/db/index/SConscript b/src/mongo/db/index/SConscript
index e3b4f5fe49a..1b2f9b7f934 100644
--- a/src/mongo/db/index/SConscript
+++ b/src/mongo/db/index/SConscript
@@ -144,3 +144,18 @@ env.Library(
'key_generator',
],
)
+
+env.Library(
+ target="index_build_interceptor",
+ source=[
+ "index_build_interceptor.cpp",
+ ],
+ LIBDEPS=[
+ '$BUILD_DIR/mongo/base',
+ ],
+ LIBDEPS_PRIVATE=[
+ '$BUILD_DIR/mongo/db/multi_key_path_tracker',
+ '$BUILD_DIR/mongo/db/s/sharding_api_d',
+ 'index_access_methods',
+ ],
+)
diff --git a/src/mongo/db/index/index_build_interceptor.cpp b/src/mongo/db/index/index_build_interceptor.cpp
new file mode 100644
index 00000000000..6af48b542c1
--- /dev/null
+++ b/src/mongo/db/index/index_build_interceptor.cpp
@@ -0,0 +1,146 @@
+/**
+ * Copyright (C) 2018-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kIndex
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/index/index_build_interceptor.h"
+
+#include "mongo/bson/bsonobj.h"
+#include "mongo/db/catalog_raii.h"
+#include "mongo/db/db_raii.h"
+#include "mongo/db/index/index_access_method.h"
+#include "mongo/db/multi_key_path_tracker.h"
+#include "mongo/db/operation_context.h"
+#include "mongo/db/s/operation_sharding_state.h"
+#include "mongo/util/log.h"
+#include "mongo/util/uuid.h"
+
+namespace mongo {
+
+namespace {
+const bool makeCollections = false;
+}
+
+NamespaceString IndexBuildInterceptor::makeTempSideWritesNs() {
+ return NamespaceString("local.system.sideWrites-" + UUID::gen().toString());
+}
+
+void IndexBuildInterceptor::ensureSideWritesCollectionExists(OperationContext* opCtx) {
+ if (!makeCollections) {
+ return;
+ }
+
+ // TODO SERVER-38027 Consider pushing this higher into the createIndexes command logic.
+ OperationShardingState::get(opCtx).setAllowImplicitCollectionCreation(BSONElement());
+
+ AutoGetOrCreateDb local(opCtx, "local", LockMode::MODE_X);
+ CollectionOptions options;
+ options.setNoIdIndex();
+ options.temp = true;
+
+ local.getDb()->createCollection(opCtx, _sideWritesNs.ns(), options);
+}
+
+void IndexBuildInterceptor::removeSideWritesCollection(OperationContext* opCtx) {
+ if (!makeCollections) {
+ return;
+ }
+
+ AutoGetDb local(opCtx, "local", LockMode::MODE_X);
+ fassert(50994, local.getDb()->dropCollectionEvenIfSystem(opCtx, _sideWritesNs, repl::OpTime()));
+}
+
+Status IndexBuildInterceptor::sideWrite(OperationContext* opCtx,
+ IndexAccessMethod* indexAccessMethod,
+ const BSONObj* obj,
+ RecordId loc,
+ Op op,
+ int64_t* numKeysOut) {
+ *numKeysOut = 0;
+ BSONObjSet keys = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
+ BSONObjSet multikeyMetadataKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
+ MultikeyPaths multikeyPaths;
+
+ indexAccessMethod->getKeys(*obj,
+ IndexAccessMethod::GetKeysMode::kEnforceConstraints,
+ &keys,
+ &multikeyMetadataKeys,
+ &multikeyPaths);
+ // Maintain parity with IndexAccessMethods handling of key counting. Only include
+ // `multikeyMetadataKeys` when inserting.
+ *numKeysOut = keys.size() + (op == Op::kInsert ? multikeyMetadataKeys.size() : 0);
+
+ if (_multikeyPaths) {
+ MultikeyPathTracker::mergeMultikeyPaths(&_multikeyPaths.get(), multikeyPaths);
+ } else {
+ // `mergeMultikeyPaths` is sensitive to the two inputs having the same multikey
+ // "shape". Initialize `_multikeyPaths` with the right shape from the first result.
+ _multikeyPaths = multikeyPaths;
+ }
+
+ AutoGetCollection coll(opCtx, _sideWritesNs, LockMode::MODE_IX);
+ invariant(coll.getCollection());
+
+ std::vector<InsertStatement> toInsert;
+ for (const auto& key : keys) {
+ // Documents inserted into this table must be consumed in insert-order. Today, we can rely
+ // on storage engines to return documents in insert-order, but with clustered indexes,
+ // that may no longer be true.
+ //
+ // Additionally, these writes should be timestamped with the same timestamps that the
+ // other writes making up this operation are given. When index builds can cope with
+ // replication rollbacks, side table writes associated with a CUD operation should
+ // remain/rollback along with the corresponding oplog entry.
+ toInsert.emplace_back(BSON(
+ "op" << (op == Op::kInsert ? "i" : "d") << "key" << key << "recordId" << loc.repr()));
+ }
+
+ if (op == Op::kInsert) {
+ // Wildcard indexes write multikey path information, typically part of the catalog
+ // document, to the index itself. Multikey information is never deleted, so we only need
+ // to add this data on the insert path.
+ for (const auto& key : multikeyMetadataKeys) {
+ toInsert.emplace_back(BSON("op"
+ << "i"
+ << "key"
+ << key
+ << "recordId"
+ << static_cast<int64_t>(
+ RecordId::ReservedId::kWildcardMultikeyMetadataId)));
+ }
+ }
+
+ OpDebug* const opDebug = nullptr;
+ const bool fromMigrate = false;
+ return coll.getCollection()->insertDocuments(
+ opCtx, toInsert.begin(), toInsert.end(), opDebug, fromMigrate);
+}
+} // namespace mongo
diff --git a/src/mongo/db/index/index_build_interceptor.h b/src/mongo/db/index/index_build_interceptor.h
new file mode 100644
index 00000000000..13ae79d10d2
--- /dev/null
+++ b/src/mongo/db/index/index_build_interceptor.h
@@ -0,0 +1,74 @@
+/**
+ * Copyright (C) 2018-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/index/multikey_paths.h"
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/record_id.h"
+#include "mongo/platform/atomic_word.h"
+
+namespace mongo {
+
+class BSONObj;
+class IndexAccessMethod;
+class OperationContext;
+
+class IndexBuildInterceptor {
+public:
+ enum class Op { kInsert, kDelete };
+
+ IndexBuildInterceptor() : _sideWritesNs(makeTempSideWritesNs()) {}
+ IndexBuildInterceptor(NamespaceString sideWritesNs) : _sideWritesNs(sideWritesNs) {}
+
+ static NamespaceString makeTempSideWritesNs();
+
+ void ensureSideWritesCollectionExists(OperationContext* opCtx);
+ void removeSideWritesCollection(OperationContext* opCtx);
+
+ /**
+ * Client writes that are concurrent with an index build will have their index updates written
+ * to a temporary table. After the index table scan is complete, these updates will be applied
+ * to the underlying index table.
+ *
+ * On success, `numKeysOut` if non-null will contain the number of keys added or removed.
+ */
+ Status sideWrite(OperationContext* opCtx,
+ IndexAccessMethod* indexAccessMethod,
+ const BSONObj* obj,
+ RecordId loc,
+ Op op,
+ int64_t* numKeysOut);
+
+private:
+ NamespaceString _sideWritesNs;
+ boost::optional<MultikeyPaths> _multikeyPaths;
+};
+
+} // namespace mongo
diff --git a/src/mongo/db/multi_key_path_tracker.cpp b/src/mongo/db/multi_key_path_tracker.cpp
index c3961e8ad98..99aa17cd60f 100644
--- a/src/mongo/db/multi_key_path_tracker.cpp
+++ b/src/mongo/db/multi_key_path_tracker.cpp
@@ -32,6 +32,8 @@
#include "mongo/db/multi_key_path_tracker.h"
+#include "mongo/util/assert_util.h"
+
namespace mongo {
const OperationContext::Decoration<MultikeyPathTracker> MultikeyPathTracker::get =
diff --git a/src/mongo/db/pipeline/process_interface_standalone.cpp b/src/mongo/db/pipeline/process_interface_standalone.cpp
index efbd610506e..b3eda6e6e76 100644
--- a/src/mongo/db/pipeline/process_interface_standalone.cpp
+++ b/src/mongo/db/pipeline/process_interface_standalone.cpp
@@ -448,9 +448,9 @@ bool MongoInterfaceStandalone::uniqueKeyIsSupportedByIndex(
}
auto indexIterator = collection->getIndexCatalog()->getIndexIterator(opCtx, false);
- while (indexIterator.more()) {
- IndexDescriptor* descriptor = indexIterator.next();
- if (supportsUniqueKey(expCtx, indexIterator.catalogEntry(descriptor), uniqueKeyPaths)) {
+ while (indexIterator->more()) {
+ IndexCatalogEntry* entry = indexIterator->next();
+ if (supportsUniqueKey(expCtx, entry, uniqueKeyPaths)) {
return true;
}
}
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index 14c2c137c88..72d667cfc64 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -187,10 +187,10 @@ void fillOutPlannerParams(OperationContext* opCtx,
QueryPlannerParams* plannerParams) {
invariant(canonicalQuery);
// If it's not NULL, we may have indices. Access the catalog and fill out IndexEntry(s)
- IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(opCtx, false);
- while (ii.more()) {
- const IndexDescriptor* desc = ii.next();
- IndexCatalogEntry* ice = ii.catalogEntry(desc);
+ std::unique_ptr<IndexCatalog::IndexIterator> ii =
+ collection->getIndexCatalog()->getIndexIterator(opCtx, false);
+ while (ii->more()) {
+ IndexCatalogEntry* ice = ii->next();
plannerParams->indices.push_back(
indexEntryFromIndexCatalogEntry(opCtx, *ice, canonicalQuery));
}
@@ -1491,18 +1491,19 @@ QueryPlannerParams fillOutPlannerParamsForDistinct(OperationContext* opCtx,
QueryPlannerParams plannerParams;
plannerParams.options = QueryPlannerParams::NO_TABLE_SCAN | plannerOptions;
- IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(opCtx, false);
+ std::unique_ptr<IndexCatalog::IndexIterator> ii =
+ collection->getIndexCatalog()->getIndexIterator(opCtx, false);
auto query = parsedDistinct.getQuery()->getQueryRequest().getFilter();
- while (ii.more()) {
- const IndexDescriptor* desc = ii.next();
- IndexCatalogEntry* ice = ii.catalogEntry(desc);
+ while (ii->more()) {
+ IndexCatalogEntry* ice = ii->next();
+ const IndexDescriptor* desc = ice->descriptor();
if (desc->keyPattern().hasField(parsedDistinct.getKey())) {
plannerParams.indices.push_back(
indexEntryFromIndexCatalogEntry(opCtx, *ice, parsedDistinct.getQuery()));
} else if (desc->getIndexType() == IndexType::INDEX_WILDCARD && !query.isEmpty()) {
// Check whether the $** projection captures the field over which we are distinct-ing.
const auto* proj =
- static_cast<WildcardAccessMethod*>(ii.accessMethod(desc))->getProjectionExec();
+ static_cast<WildcardAccessMethod*>(ice->accessMethod())->getProjectionExec();
if (proj->applyProjectionToOneField(parsedDistinct.getKey())) {
plannerParams.indices.push_back(
indexEntryFromIndexCatalogEntry(opCtx, *ice, parsedDistinct.getQuery()));
diff --git a/src/mongo/db/stats/storage_stats.cpp b/src/mongo/db/stats/storage_stats.cpp
index 92f1054eaae..45db0f8439c 100644
--- a/src/mongo/db/stats/storage_stats.cpp
+++ b/src/mongo/db/stats/storage_stats.cpp
@@ -91,10 +91,11 @@ Status appendCollectionStorageStats(OperationContext* opCtx,
BSONObjBuilder indexDetails;
- IndexCatalog::IndexIterator i = indexCatalog->getIndexIterator(opCtx, false);
- while (i.more()) {
- const IndexDescriptor* descriptor = i.next();
- IndexAccessMethod* iam = indexCatalog->getIndex(descriptor);
+ std::unique_ptr<IndexCatalog::IndexIterator> it = indexCatalog->getIndexIterator(opCtx, false);
+ while (it->more()) {
+ IndexCatalogEntry* entry = it->next();
+ IndexDescriptor* descriptor = entry->descriptor();
+ IndexAccessMethod* iam = entry->accessMethod();
invariant(iam);
BSONObjBuilder bob;
diff --git a/src/mongo/dbtests/indexcatalogtests.cpp b/src/mongo/dbtests/indexcatalogtests.cpp
index a0b17941ccb..7e82884edda 100644
--- a/src/mongo/dbtests/indexcatalogtests.cpp
+++ b/src/mongo/dbtests/indexcatalogtests.cpp
@@ -86,11 +86,11 @@ public:
ASSERT_TRUE(_catalog->numIndexesReady(&opCtx) == numFinishedIndexesStart + 2);
- IndexCatalog::IndexIterator ii = _catalog->getIndexIterator(&opCtx, false);
+ std::unique_ptr<IndexCatalog::IndexIterator> ii = _catalog->getIndexIterator(&opCtx, false);
int indexesIterated = 0;
bool foundIndex = false;
- while (ii.more()) {
- IndexDescriptor* indexDesc = ii.next();
+ while (ii->more()) {
+ IndexDescriptor* indexDesc = ii->next()->descriptor();
indexesIterated++;
BSONObjIterator boit(indexDesc->infoObj());
while (boit.more() && !foundIndex) {