From 2fc36d0b12d1c44893a5f3c1fe0ac8fb0125a071 Mon Sep 17 00:00:00 2001 From: Samy Lanka Date: Fri, 31 Jul 2020 07:20:47 +0000 Subject: SERVER-48417 Reconstruct in-memory state when resuming index build --- jstests/noPassthrough/indexbg_restart_secondary.js | 22 ++- jstests/noPassthrough/libs/index_build.js | 16 +- .../resumable_index_build_drain_writes_phase.js | 3 +- src/mongo/db/catalog/index_build_block.cpp | 37 ++++- src/mongo/db/catalog/index_build_block.h | 9 ++ src/mongo/db/catalog/index_builds_manager.cpp | 5 +- src/mongo/db/catalog/index_builds_manager.h | 3 +- src/mongo/db/catalog/index_catalog.h | 6 +- src/mongo/db/catalog/index_catalog_impl.cpp | 12 +- src/mongo/db/catalog/index_catalog_impl.h | 6 +- src/mongo/db/catalog/index_catalog_noop.h | 6 +- src/mongo/db/catalog/multi_index_block.cpp | 43 +++-- src/mongo/db/catalog/multi_index_block.h | 14 +- src/mongo/db/index/duplicate_key_tracker.cpp | 14 ++ src/mongo/db/index/duplicate_key_tracker.h | 9 ++ src/mongo/db/index/index_build_interceptor.cpp | 55 +++++-- src/mongo/db/index/index_build_interceptor.h | 16 ++ src/mongo/db/index/skipped_record_tracker.cpp | 19 +++ src/mongo/db/index/skipped_record_tracker.h | 6 +- src/mongo/db/index_builds_coordinator.cpp | 173 +++++++++++++++++++-- src/mongo/db/index_builds_coordinator.h | 54 ++++++- src/mongo/db/index_builds_coordinator_mongod.cpp | 51 +++++- src/mongo/db/index_builds_coordinator_mongod.h | 23 +++ src/mongo/db/resumable_index_builds.idl | 2 +- src/mongo/db/storage/durable_catalog.h | 8 + src/mongo/db/storage/durable_catalog_impl.cpp | 23 +++ src/mongo/db/storage/durable_catalog_impl.h | 5 + src/mongo/db/storage/storage_engine.h | 14 +- src/mongo/db/storage/storage_engine_impl.cpp | 11 ++ src/mongo/db/storage/storage_engine_impl.h | 3 + src/mongo/db/storage/storage_engine_mock.h | 4 + .../embedded/index_builds_coordinator_embedded.cpp | 12 +- .../embedded/index_builds_coordinator_embedded.h | 8 + 33 files changed, 613 insertions(+), 79 deletions(-) diff --git a/jstests/noPassthrough/indexbg_restart_secondary.js b/jstests/noPassthrough/indexbg_restart_secondary.js index efd6d90d765..66c27d0e0c3 100644 --- a/jstests/noPassthrough/indexbg_restart_secondary.js +++ b/jstests/noPassthrough/indexbg_restart_secondary.js @@ -69,11 +69,15 @@ IndexBuildTest.waitForIndexBuildToStart(secondDB, coll.getName(), "y_1"); MongoRunner.stopMongod(second); -replTest.start( - second, - {setParameter: {"failpoint.hangAfterSettingUpIndexBuildUnlocked": tojson({mode: "alwaysOn"})}}, - /*restart=*/true, - /*wait=*/true); +replTest.start(second, + { + setParameter: { + "failpoint.hangAfterSettingUpIndexBuildUnlocked": tojson({mode: "alwaysOn"}), + "failpoint.hangAfterSettingUpResumableIndexBuild": tojson({mode: "alwaysOn"}) + } + }, + /*restart=*/true, + /*wait=*/true); // Make sure secondary comes back. try { @@ -90,7 +94,11 @@ try { assert.eq(size, secondDB.getCollection(collectionName).find({}).itcount()); // Verify that only the _id index is ready. - checkLog.containsJson(second, 4585201); + if (ResumableIndexBuildTest.resumableIndexBuildsEnabled(second)) { + checkLog.containsJson(second, 4841704); + } else { + checkLog.containsJson(second, 4585201); + } IndexBuildTest.assertIndexes(secondDB.getCollection(collectionName), 4, ["_id_"], @@ -99,6 +107,8 @@ try { } finally { assert.commandWorked(second.adminCommand( {configureFailPoint: 'hangAfterSettingUpIndexBuildUnlocked', mode: 'off'})); + assert.commandWorked(second.adminCommand( + {configureFailPoint: 'hangAfterSettingUpResumableIndexBuild', mode: 'off'})); // Let index build complete on primary, which replicates a commitIndexBuild to the secondary. IndexBuildTest.resumeIndexBuilds(primaryDB); diff --git a/jstests/noPassthrough/libs/index_build.js b/jstests/noPassthrough/libs/index_build.js index 98f14d50253..3f6a19dfa37 100644 --- a/jstests/noPassthrough/libs/index_build.js +++ b/jstests/noPassthrough/libs/index_build.js @@ -328,8 +328,14 @@ const ResumableIndexBuildTest = class { * insertIntoSideWritesTable will be inserted after the bulk load phase so that they are * inserted into the side writes table and processed during the drain writes phase. */ - static run( - rst, dbName, collName, indexSpec, failPointName, failPointData, insertIntoSideWritesTable) { + static run(rst, + dbName, + collName, + indexSpec, + failPointName, + failPointData, + insertIntoSideWritesTable, + postIndexBuildInserts = {}) { const primary = rst.getPrimary(); const coll = primary.getDB(dbName).getCollection(collName); const indexName = "resumable_index_build"; @@ -349,6 +355,12 @@ const ResumableIndexBuildTest = class { awaitInsertIntoSideWritesTable(); awaitCreateIndex(); + if (postIndexBuildInserts) { + assert.commandWorked(coll.insert(postIndexBuildInserts)); + } + + assert(coll.validate(), "Index validation failed"); + assert.commandWorked(coll.dropIndex(indexName)); } }; diff --git a/jstests/noPassthrough/resumable_index_build_drain_writes_phase.js b/jstests/noPassthrough/resumable_index_build_drain_writes_phase.js index 6a97d680927..c6f78592a0a 100644 --- a/jstests/noPassthrough/resumable_index_build_drain_writes_phase.js +++ b/jstests/noPassthrough/resumable_index_build_drain_writes_phase.js @@ -41,7 +41,8 @@ ResumableIndexBuildTest.run(rst, {a: 1}, "hangIndexBuildDuringDrainWritesPhase", {iteration: 1}, - [{a: 4}, {a: 5}]); + [{a: 4}, {a: 5}], + [{a: 6}, {a: 7}]); rst.stopSet(); })(); \ No newline at end of file diff --git a/src/mongo/db/catalog/index_build_block.cpp b/src/mongo/db/catalog/index_build_block.cpp index f17f541e57e..67d951a5aa1 100644 --- a/src/mongo/db/catalog/index_build_block.cpp +++ b/src/mongo/db/catalog/index_build_block.cpp @@ -71,6 +71,37 @@ void IndexBuildBlock::finalizeTemporaryTables(OperationContext* opCtx, } } +void IndexBuildBlock::_completeInit(OperationContext* opCtx, Collection* collection) { + // Register this index with the CollectionQueryInfo to regenerate the cache. This way, updates + // occurring while an index is being build in the background will be aware of whether or not + // they need to modify any indexes. + CollectionQueryInfo::get(collection) + .addedIndex(opCtx, collection, _indexCatalogEntry->descriptor()); +} + +void IndexBuildBlock::initForResume(OperationContext* opCtx, + Collection* collection, + const IndexSorterInfo& resumeInfo) { + + _indexName = _spec.getStringField("name"); + auto descriptor = + _indexCatalog->findIndexByName(opCtx, _indexName, true /* includeUnfinishedIndexes */); + + _indexCatalogEntry = descriptor->getEntry(); + invariant(_indexCatalogEntry); + invariant(_method == IndexBuildMethod::kHybrid); + + _indexBuildInterceptor = + std::make_unique(opCtx, + _indexCatalogEntry, + resumeInfo.getSideWritesTable(), + resumeInfo.getDuplicateKeyTrackerTable(), + resumeInfo.getSkippedRecordTrackerTable()); + _indexCatalogEntry->setIndexBuildInterceptor(_indexBuildInterceptor.get()); + + _completeInit(opCtx, collection); +} + Status IndexBuildBlock::init(OperationContext* opCtx, Collection* collection) { // Being in a WUOW means all timestamping responsibility can be pushed up to the caller. invariant(opCtx->lockState()->inAWriteUnitOfWork()); @@ -118,11 +149,7 @@ Status IndexBuildBlock::init(OperationContext* opCtx, Collection* collection) { }); } - // Register this index with the CollectionQueryInfo to regenerate the cache. This way, updates - // occurring while an index is being build in the background will be aware of whether or not - // they need to modify any indexes. - CollectionQueryInfo::get(collection) - .addedIndex(opCtx, collection, _indexCatalogEntry->descriptor()); + _completeInit(opCtx, collection); return Status::OK(); } diff --git a/src/mongo/db/catalog/index_build_block.h b/src/mongo/db/catalog/index_build_block.h index 07d404c52c3..95f384cf170 100644 --- a/src/mongo/db/catalog/index_build_block.h +++ b/src/mongo/db/catalog/index_build_block.h @@ -69,6 +69,13 @@ public: */ Status init(OperationContext* opCtx, Collection* collection); + /** + * Makes sure that an entry for the index was created at startup in the IndexCatalog. + */ + void initForResume(OperationContext* opCtx, + Collection* collection, + const IndexSorterInfo& resumeInfo); + /** * Marks the state of the index as 'ready' and commits the index to disk. * @@ -107,6 +114,8 @@ public: } private: + void _completeInit(OperationContext* opCtx, Collection* collection); + IndexCatalog* const _indexCatalog; const NamespaceString _nss; diff --git a/src/mongo/db/catalog/index_builds_manager.cpp b/src/mongo/db/catalog/index_builds_manager.cpp index af81468dbda..c94ba7caf38 100644 --- a/src/mongo/db/catalog/index_builds_manager.cpp +++ b/src/mongo/db/catalog/index_builds_manager.cpp @@ -84,7 +84,8 @@ Status IndexBuildsManager::setUpIndexBuild(OperationContext* opCtx, const std::vector& specs, const UUID& buildUUID, OnInitFn onInit, - SetupOptions options) { + SetupOptions options, + const boost::optional& resumeInfo) { _registerIndexBuild(buildUUID); const auto& nss = collection->ns(); @@ -110,7 +111,7 @@ Status IndexBuildsManager::setUpIndexBuild(OperationContext* opCtx, std::vector indexes; try { indexes = writeConflictRetry(opCtx, "IndexBuildsManager::setUpIndexBuild", nss.ns(), [&]() { - return uassertStatusOK(builder->init(opCtx, collection, specs, onInit)); + return uassertStatusOK(builder->init(opCtx, collection, specs, onInit, resumeInfo)); }); } catch (const DBException& ex) { return ex.toStatus(); diff --git a/src/mongo/db/catalog/index_builds_manager.h b/src/mongo/db/catalog/index_builds_manager.h index e7fc7644d95..e4b407b0333 100644 --- a/src/mongo/db/catalog/index_builds_manager.h +++ b/src/mongo/db/catalog/index_builds_manager.h @@ -84,7 +84,8 @@ public: const std::vector& specs, const UUID& buildUUID, OnInitFn onInit, - SetupOptions options = {}); + SetupOptions options = {}, + const boost::optional& resumeInfo = boost::none); /** * Unregisters the builder associated with the given buildUUID from the _builders map. diff --git a/src/mongo/db/catalog/index_catalog.h b/src/mongo/db/catalog/index_catalog.h index 00362fb1f1a..c33e8f9969b 100644 --- a/src/mongo/db/catalog/index_catalog.h +++ b/src/mongo/db/catalog/index_catalog.h @@ -327,8 +327,10 @@ public: * IndexAlreadyExists if the index already exists; IndexBuildAlreadyInProgress if the index is * already being built. */ - virtual StatusWith prepareSpecForCreate(OperationContext* const opCtx, - const BSONObj& original) const = 0; + virtual StatusWith prepareSpecForCreate( + OperationContext* const opCtx, + const BSONObj& original, + const boost::optional& resumeInfo) const = 0; /** * Returns a copy of 'indexSpecsToBuild' that does not contain index specifications that already diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp index 6e715af9349..b4cb9408674 100644 --- a/src/mongo/db/catalog/index_catalog_impl.cpp +++ b/src/mongo/db/catalog/index_catalog_impl.cpp @@ -308,8 +308,10 @@ void IndexCatalogImpl::_logInternalState(OperationContext* opCtx, namespace { std::string lastHaystackIndexLogged = ""; } -StatusWith IndexCatalogImpl::prepareSpecForCreate(OperationContext* opCtx, - const BSONObj& original) const { +StatusWith IndexCatalogImpl::prepareSpecForCreate( + OperationContext* opCtx, + const BSONObj& original, + const boost::optional& resumeInfo) const { auto swValidatedAndFixed = _validateAndFixIndexSpec(opCtx, original); if (!swValidatedAndFixed.isOK()) { return swValidatedAndFixed.getStatus().withContext( @@ -343,6 +345,12 @@ StatusWith IndexCatalogImpl::prepareSpecForCreate(OperationContext* opC return status; } + if (resumeInfo) { + // Don't check against unfinished indexes if this index is being resumed, since it will + // conflict with itself. + return validatedSpec; + } + // Now we will check against all indexes, in-progress included. // // The index catalog cannot currently iterate over only in-progress indexes. So by previously diff --git a/src/mongo/db/catalog/index_catalog_impl.h b/src/mongo/db/catalog/index_catalog_impl.h index 2b5ff4f5f4a..85dc5ebe7d8 100644 --- a/src/mongo/db/catalog/index_catalog_impl.h +++ b/src/mongo/db/catalog/index_catalog_impl.h @@ -179,8 +179,10 @@ public: StatusWith createIndexOnEmptyCollection(OperationContext* opCtx, BSONObj spec) override; - StatusWith prepareSpecForCreate(OperationContext* opCtx, - const BSONObj& original) const override; + StatusWith prepareSpecForCreate( + OperationContext* opCtx, + const BSONObj& original, + const boost::optional& resumeInfo = boost::none) const override; std::vector removeExistingIndexes(OperationContext* const opCtx, const std::vector& indexSpecsToBuild, diff --git a/src/mongo/db/catalog/index_catalog_noop.h b/src/mongo/db/catalog/index_catalog_noop.h index 12b97c208f8..92b74cfc84e 100644 --- a/src/mongo/db/catalog/index_catalog_noop.h +++ b/src/mongo/db/catalog/index_catalog_noop.h @@ -140,8 +140,10 @@ public: return spec; } - StatusWith prepareSpecForCreate(OperationContext* const opCtx, - const BSONObj& original) const override { + StatusWith prepareSpecForCreate( + OperationContext* const opCtx, + const BSONObj& original, + const boost::optional& resumeInfo) const override { return original; } diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp index 4fb95caa7af..a13c7acab89 100644 --- a/src/mongo/db/catalog/multi_index_block.cpp +++ b/src/mongo/db/catalog/multi_index_block.cpp @@ -152,13 +152,15 @@ StatusWith> MultiIndexBlock::init(OperationContext* opCtx, const BSONObj& spec, OnInitFn onInit) { const auto indexes = std::vector(1, spec); - return init(opCtx, collection, indexes, onInit); + return init(opCtx, collection, indexes, onInit, boost::none); } -StatusWith> MultiIndexBlock::init(OperationContext* opCtx, - Collection* collection, - const std::vector& indexSpecs, - OnInitFn onInit) { +StatusWith> MultiIndexBlock::init( + OperationContext* opCtx, + Collection* collection, + const std::vector& indexSpecs, + OnInitFn onInit, + const boost::optional& resumeInfo) { invariant(opCtx->lockState()->isCollectionLockedForMode(collection->ns(), MODE_X), str::stream() << "Collection " << collection->ns() << " with UUID " << collection->uuid() << " is holding the incorrect lock"); @@ -170,6 +172,10 @@ StatusWith> MultiIndexBlock::init(OperationContext* opCtx, invariant(_indexes.empty()); + if (resumeInfo) { + _phase = resumeInfo->getPhase(); + } + // Guarantees that exceptions cannot be returned from index builder initialization except for // WriteConflictExceptions, which should be dealt with by the caller. try { @@ -209,7 +215,7 @@ StatusWith> MultiIndexBlock::init(OperationContext* opCtx, for (size_t i = 0; i < indexSpecs.size(); i++) { BSONObj info = indexSpecs[i]; StatusWith statusWithInfo = - collection->getIndexCatalog()->prepareSpecForCreate(opCtx, info); + collection->getIndexCatalog()->prepareSpecForCreate(opCtx, info, resumeInfo); Status status = statusWithInfo.getStatus(); if (!status.isOK()) { // If we were given two identical indexes to build, we will run into an error trying @@ -232,9 +238,22 @@ StatusWith> MultiIndexBlock::init(OperationContext* opCtx, IndexToBuild index; index.block = std::make_unique( collection->getIndexCatalog(), collection->ns(), info, _method, _buildUUID); - status = index.block->init(opCtx, collection); - if (!status.isOK()) - return status; + if (resumeInfo) { + auto resumeInfoIndexes = resumeInfo->getIndexes(); + // Find the resume information that corresponds to this spec. + auto indexResumeInfo = + std::find_if(resumeInfoIndexes.begin(), + resumeInfoIndexes.end(), + [&info](const IndexSorterInfo& indexInfo) { + return info.woCompare(indexInfo.getSpec()) == 0; + }); + + index.block->initForResume(opCtx, collection, *indexResumeInfo); + } else { + status = index.block->init(opCtx, collection); + if (!status.isOK()) + return status; + } auto indexCleanupGuard = makeGuard([opCtx, &index] { index.block->finalizeTemporaryTables( @@ -271,8 +290,10 @@ StatusWith> MultiIndexBlock::init(OperationContext* opCtx, index.filterExpression = index.block->getEntry()->getFilterExpression(); - // TODO SERVER-14888 Suppress this in cases we don't want to audit. - audit::logCreateIndex(opCtx->getClient(), &info, descriptor->indexName(), ns); + if (!resumeInfo) { + // TODO SERVER-14888 Suppress this in cases we don't want to audit. + audit::logCreateIndex(opCtx->getClient(), &info, descriptor->indexName(), ns); + } indexCleanupGuard.dismiss(); _indexes.push_back(std::move(index)); diff --git a/src/mongo/db/catalog/multi_index_block.h b/src/mongo/db/catalog/multi_index_block.h index 56fb44bffa3..11e67eecefe 100644 --- a/src/mongo/db/catalog/multi_index_block.h +++ b/src/mongo/db/catalog/multi_index_block.h @@ -109,14 +109,20 @@ public: * Requires holding an exclusive lock on the collection. */ using OnInitFn = std::function& specs)>; - StatusWith> init(OperationContext* opCtx, - Collection* collection, - const std::vector& specs, - OnInitFn onInit); + StatusWith> init( + OperationContext* opCtx, + Collection* collection, + const std::vector& specs, + OnInitFn onInit, + const boost::optional& resumeInfo = boost::none); StatusWith> init(OperationContext* opCtx, Collection* collection, const BSONObj& spec, OnInitFn onInit); + StatusWith> initForResume(OperationContext* opCtx, + Collection* collection, + const std::vector& specs, + const ResumeIndexInfo& resumeInfo); /** * Not all index initializations need an OnInitFn, in particular index builds that do not need diff --git a/src/mongo/db/index/duplicate_key_tracker.cpp b/src/mongo/db/index/duplicate_key_tracker.cpp index 0f997415068..4d3d88bd075 100644 --- a/src/mongo/db/index/duplicate_key_tracker.cpp +++ b/src/mongo/db/index/duplicate_key_tracker.cpp @@ -55,6 +55,20 @@ DuplicateKeyTracker::DuplicateKeyTracker(OperationContext* opCtx, const IndexCat invariant(_indexCatalogEntry->descriptor()->unique()); } +DuplicateKeyTracker::DuplicateKeyTracker(OperationContext* opCtx, + const IndexCatalogEntry* entry, + StringData ident) + : _indexCatalogEntry(entry) { + _keyConstraintsTable = + opCtx->getServiceContext()->getStorageEngine()->makeTemporaryRecordStoreFromExistingIdent( + opCtx, ident); + + invariant(_indexCatalogEntry->descriptor()->unique(), + str::stream() << "Duplicate key tracker table exists on disk with ident: " << ident + << " but the index is not unique: " + << _indexCatalogEntry->descriptor()); +} + void DuplicateKeyTracker::finalizeTemporaryTable(OperationContext* opCtx, TemporaryRecordStore::FinalizationAction action) { _keyConstraintsTable->finalizeTemporaryTable(opCtx, action); diff --git a/src/mongo/db/index/duplicate_key_tracker.h b/src/mongo/db/index/duplicate_key_tracker.h index a30c5bc2487..1a384cc3d5c 100644 --- a/src/mongo/db/index/duplicate_key_tracker.h +++ b/src/mongo/db/index/duplicate_key_tracker.h @@ -57,6 +57,15 @@ public: */ DuplicateKeyTracker(OperationContext* opCtx, const IndexCatalogEntry* indexCatalogEntry); + /** + * Finds the temporary table associated with storing any duplicate key constraint violations for + * this index build. Only used when resuming an index build and the temporary table already + * exists on disk. finalizeTemporaryTable() must be called before destruction. + */ + DuplicateKeyTracker(OperationContext* opCtx, + const IndexCatalogEntry* indexCatalogEntry, + StringData ident); + /** * Deletes or keeps the temporary table for the duplicate key constraint violations. Must be * called before object destruction. diff --git a/src/mongo/db/index/index_build_interceptor.cpp b/src/mongo/db/index/index_build_interceptor.cpp index 1ab952d3640..f569fc00e46 100644 --- a/src/mongo/db/index/index_build_interceptor.cpp +++ b/src/mongo/db/index/index_build_interceptor.cpp @@ -83,25 +83,62 @@ bool IndexBuildInterceptor::typeCanFastpathMultikeyUpdates(IndexType indexType) return (indexType == INDEX_BTREE); } +void IndexBuildInterceptor::_initializeMultiKeyPaths(IndexCatalogEntry* entry) { + // `mergeMultikeyPaths` is sensitive to the two inputs having the same multikey + // "shape". Initialize `_multikeyPaths` with the right shape from the IndexCatalogEntry. + auto indexType = entry->descriptor()->getIndexType(); + if (typeCanFastpathMultikeyUpdates(indexType)) { + auto numFields = entry->descriptor()->getNumFields(); + _multikeyPaths = MultikeyPaths{}; + auto it = _multikeyPaths->begin(); + _multikeyPaths->insert(it, numFields, {}); + } +} + IndexBuildInterceptor::IndexBuildInterceptor(OperationContext* opCtx, IndexCatalogEntry* entry) : _indexCatalogEntry(entry), _sideWritesTable( opCtx->getServiceContext()->getStorageEngine()->makeTemporaryRecordStore(opCtx)), - _skippedRecordTracker(entry), + _skippedRecordTracker(opCtx, entry, boost::none), _sideWritesCounter(std::make_shared>()) { if (entry->descriptor()->unique()) { _duplicateKeyTracker = std::make_unique(opCtx, entry); } - // `mergeMultikeyPaths` is sensitive to the two inputs having the same multikey - // "shape". Initialize `_multikeyPaths` with the right shape from the IndexCatalogEntry. - auto indexType = entry->descriptor()->getIndexType(); - if (typeCanFastpathMultikeyUpdates(indexType)) { - auto numFields = entry->descriptor()->getNumFields(); - _multikeyPaths = MultikeyPaths{}; - auto it = _multikeyPaths->begin(); - _multikeyPaths->insert(it, numFields, {}); + + _initializeMultiKeyPaths(entry); +} + +IndexBuildInterceptor::IndexBuildInterceptor(OperationContext* opCtx, + IndexCatalogEntry* entry, + StringData sideWritesIdent, + boost::optional duplicateKeyTrackerIdent, + boost::optional skippedRecordTrackerIdent) + : _indexCatalogEntry(entry), + _skippedRecordTracker(opCtx, entry, skippedRecordTrackerIdent), + _sideWritesCounter(std::make_shared>()) { + + _sideWritesTable = + opCtx->getServiceContext()->getStorageEngine()->makeTemporaryRecordStoreFromExistingIdent( + opCtx, sideWritesIdent); + auto finalizeTableOnFailure = makeGuard([&] { + _sideWritesTable->finalizeTemporaryTable(opCtx, + TemporaryRecordStore::FinalizationAction::kDelete); + }); + + auto dupKeyTrackerIdentExists = duplicateKeyTrackerIdent ? true : false; + uassert(ErrorCodes::BadValue, + str::stream() << "Resume info must contain the duplicate key tracker ident [" + << duplicateKeyTrackerIdent + << "] if and only if the index is unique: " << entry->descriptor(), + entry->descriptor()->unique() == dupKeyTrackerIdentExists); + if (duplicateKeyTrackerIdent) { + _duplicateKeyTracker = + std::make_unique(opCtx, entry, duplicateKeyTrackerIdent.get()); } + + _initializeMultiKeyPaths(entry); + finalizeTableOnFailure.dismiss(); } void IndexBuildInterceptor::finalizeTemporaryTables( diff --git a/src/mongo/db/index/index_build_interceptor.h b/src/mongo/db/index/index_build_interceptor.h index f82669c3392..a9215367feb 100644 --- a/src/mongo/db/index/index_build_interceptor.h +++ b/src/mongo/db/index/index_build_interceptor.h @@ -73,6 +73,20 @@ public: */ IndexBuildInterceptor(OperationContext* opCtx, IndexCatalogEntry* entry); + /** + * Finds the temporary table associated with storing writes during this index build. Only used + * Only used when resuming an index build and the temporary table already exists on disk. + * Additionally will find the tmeporary table associated with storing duplicate key constraint + * violations found during the build, if the index being built has uniqueness constraints. + * + * finalizeTemporaryTable() must be called before destruction. + */ + IndexBuildInterceptor(OperationContext* opCtx, + IndexCatalogEntry* entry, + StringData sideWritesIdent, + boost::optional duplicateKeyTrackerIdent, + boost::optional skippedRecordTrackerIdent); + /** * Deletes or keeps the temporary side writes and duplicate key constraint violations tables. * Must be called before object destruction. @@ -162,6 +176,8 @@ public: private: using SideWriteRecord = std::pair; + + void _initializeMultiKeyPaths(IndexCatalogEntry* entry); Status _applyWrite(OperationContext* opCtx, const Collection* coll, const BSONObj& doc, diff --git a/src/mongo/db/index/skipped_record_tracker.cpp b/src/mongo/db/index/skipped_record_tracker.cpp index 9fd39f158aa..0c32261e1d2 100644 --- a/src/mongo/db/index/skipped_record_tracker.cpp +++ b/src/mongo/db/index/skipped_record_tracker.cpp @@ -41,6 +41,25 @@ namespace { static constexpr StringData kRecordIdField = "recordId"_sd; } +SkippedRecordTracker::SkippedRecordTracker(IndexCatalogEntry* indexCatalogEntry) { + SkippedRecordTracker(nullptr, indexCatalogEntry, boost::none); +} + +SkippedRecordTracker::SkippedRecordTracker(OperationContext* opCtx, + IndexCatalogEntry* indexCatalogEntry, + boost::optional ident) + : _indexCatalogEntry(indexCatalogEntry) { + if (!ident) { + return; + } + + // Only initialize the table when resuming an index build if an ident already exists. Otherwise, + // lazily initialize table when we record the first document. + _skippedRecordsTable = + opCtx->getServiceContext()->getStorageEngine()->makeTemporaryRecordStoreFromExistingIdent( + opCtx, ident.get()); +} + void SkippedRecordTracker::finalizeTemporaryTable(OperationContext* opCtx, TemporaryRecordStore::FinalizationAction action) { if (_skippedRecordsTable) { diff --git a/src/mongo/db/index/skipped_record_tracker.h b/src/mongo/db/index/skipped_record_tracker.h index 16ea1a6106e..856ebed9197 100644 --- a/src/mongo/db/index/skipped_record_tracker.h +++ b/src/mongo/db/index/skipped_record_tracker.h @@ -46,8 +46,10 @@ class SkippedRecordTracker { SkippedRecordTracker(const SkippedRecordTracker&) = delete; public: - SkippedRecordTracker(IndexCatalogEntry* indexCatalogEntry) - : _indexCatalogEntry(indexCatalogEntry) {} + explicit SkippedRecordTracker(IndexCatalogEntry* indexCatalogEntry); + SkippedRecordTracker(OperationContext* opCtx, + IndexCatalogEntry* indexCatalogEntry, + boost::optional ident); /** * Records a RecordId that was unable to be indexed due to a key generation error. At the diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index 8699084e704..b0e1daf3208 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -75,6 +75,7 @@ MONGO_FAIL_POINT_DEFINE(hangBeforeCompletingAbort); MONGO_FAIL_POINT_DEFINE(failIndexBuildOnCommit); MONGO_FAIL_POINT_DEFINE(hangIndexBuildBeforeAbortCleanUp); MONGO_FAIL_POINT_DEFINE(hangIndexBuildOnStepUp); +MONGO_FAIL_POINT_DEFINE(hangAfterSettingUpResumableIndexBuild); namespace { @@ -542,6 +543,89 @@ Status IndexBuildsCoordinator::_startIndexBuildForRecovery(OperationContext* opC return Status::OK(); } +Status IndexBuildsCoordinator::_setUpResumeIndexBuild(OperationContext* opCtx, + std::string dbName, + CollectionUUID collectionUUID, + const std::vector& specs, + const UUID& buildUUID, + const ResumeIndexInfo& resumeInfo) { + NamespaceStringOrUUID nssOrUuid{dbName, collectionUUID}; + + Lock::DBLock dbLock(opCtx, dbName, MODE_IX); + Lock::CollectionLock collLock(opCtx, nssOrUuid, MODE_X); + + auto& collectionCatalog = CollectionCatalog::get(opCtx->getServiceContext()); + auto collection = + collectionCatalog.lookupCollectionByUUID(opCtx, resumeInfo.getCollectionUUID()); + invariant(collection); + auto durableCatalog = DurableCatalog::get(opCtx); + + for (auto spec : specs) { + std::string indexName = spec.getStringField(IndexDescriptor::kIndexNameFieldName); + if (indexName.empty()) { + return Status(ErrorCodes::CannotCreateIndex, + str::stream() + << "Cannot create an index for a spec '" << spec + << "' without a non-empty string value for the 'name' field"); + } + + // Check that the information in the durable catalog matches the resume info. + uassert(4841702, + "Index not found in durable catalog while attempting to resume index build", + durableCatalog->isIndexPresent(opCtx, collection->getCatalogId(), indexName)); + + const auto durableBuildUUID = + durableCatalog->getIndexBuildUUID(opCtx, collection->getCatalogId(), indexName); + uassert(ErrorCodes::IndexNotFound, + str::stream() << "Cannot resume index build with a buildUUID: " << buildUUID + << " that did not match the buildUUID in the durable catalog: " + << durableBuildUUID, + durableBuildUUID == buildUUID); + + auto indexIdent = + durableCatalog->getIndexIdent(opCtx, collection->getCatalogId(), indexName); + uassert( + 4841703, + str::stream() << "No index ident found on disk that matches the index build to resume: " + << indexName, + indexIdent.size() > 0); + + uassertStatusOK(durableCatalog->checkMetaDataForIndex( + opCtx, collection->getCatalogId(), indexName, spec)); + } + + if (!collection->isInitialized()) { + collection->init(opCtx); + } + + auto protocol = IndexBuildProtocol::kTwoPhase; + auto replIndexBuildState = std::make_shared( + buildUUID, collection->uuid(), dbName, specs, protocol); + + Status status = [&]() { + stdx::unique_lock lk(_mutex); + return _registerIndexBuild(lk, replIndexBuildState); + }(); + if (!status.isOK()) { + return status; + } + + IndexBuildsManager::SetupOptions options; + options.protocol = protocol; + status = _indexBuildsManager.setUpIndexBuild( + opCtx, collection, specs, buildUUID, MultiIndexBlock::kNoopOnInitFn, options, resumeInfo); + if (!status.isOK()) { + LOGV2(4841705, + "Failed to resume index build", + "buildUUID"_attr = buildUUID, + logAttrs(collection->ns()), + "collectionUUID"_attr = collectionUUID, + "error"_attr = status); + } + + return status; +} + std::string IndexBuildsCoordinator::_indexBuildActionToString(IndexBuildAction action) { if (action == IndexBuildAction::kNoAction) { return "No action"; @@ -1325,14 +1409,58 @@ void IndexBuildsCoordinator::restartIndexBuildsForRecovery( OperationContext* opCtx, const IndexBuilds& buildsToRestart, const std::vector& buildsToResume) { + + stdx::unordered_set successfullyResumed; + + for (const auto& resumeInfo : buildsToResume) { + auto buildUUID = resumeInfo.getBuildUUID(); + auto collUUID = resumeInfo.getCollectionUUID(); + + boost::optional nss = + CollectionCatalog::get(opCtx).lookupNSSByUUID(opCtx, resumeInfo.getCollectionUUID()); + invariant(nss); + + std::vector indexSpecs; + indexSpecs.reserve(resumeInfo.getIndexes().size()); + + for (const auto& index : resumeInfo.getIndexes()) { + indexSpecs.push_back(index.getSpec()); + } + + LOGV2(4841700, + "Index build: resuming", + logAttrs(nss.get()), + "collectionUUID"_attr = collUUID, + "buildUUID"_attr = buildUUID, + "specs"_attr = indexSpecs); + + try { + // This spawns a new thread and returns immediately. These index builds will resume and + // wait for a commit or abort to be replicated. + MONGO_COMPILER_VARIABLE_UNUSED auto fut = uassertStatusOK(resumeIndexBuild( + opCtx, nss->db().toString(), collUUID, indexSpecs, buildUUID, resumeInfo)); + successfullyResumed.insert(buildUUID); + } catch (const DBException& e) { + LOGV2(4841701, + "Failed to resume index build, restarting instead", + "buildUUID"_attr = buildUUID, + "error"_attr = e); + } + } + for (auto& [buildUUID, build] : buildsToRestart) { + // Don't restart an index build that was already resumed. + if (successfullyResumed.contains(buildUUID)) { + continue; + } + boost::optional nss = CollectionCatalog::get(opCtx).lookupNSSByUUID(opCtx, build.collUUID); invariant(nss); LOGV2(20660, - "Restarting index build", - "collection"_attr = nss, + "Index build: restarting", + logAttrs(nss.get()), "collectionUUID"_attr = build.collUUID, "buildUUID"_attr = buildUUID); IndexBuildsCoordinator::IndexBuildOptions indexBuildOptions; @@ -1944,9 +2072,11 @@ Status IndexBuildsCoordinator::_setUpIndexBuild(OperationContext* opCtx, return Status::OK(); } -void IndexBuildsCoordinator::_runIndexBuild(OperationContext* opCtx, - const UUID& buildUUID, - const IndexBuildOptions& indexBuildOptions) noexcept { +void IndexBuildsCoordinator::_runIndexBuild( + OperationContext* opCtx, + const UUID& buildUUID, + const IndexBuildOptions& indexBuildOptions, + const boost::optional& resumeInfo) noexcept { { stdx::unique_lock lk(_mutex); while (_sleepForTest) { @@ -1987,7 +2117,7 @@ void IndexBuildsCoordinator::_runIndexBuild(OperationContext* opCtx, auto status = [&]() { try { - _runIndexBuildInner(opCtx, replState, indexBuildOptions); + _runIndexBuildInner(opCtx, replState, indexBuildOptions, resumeInfo); } catch (const DBException& ex) { return ex.toStatus(); } @@ -2089,9 +2219,11 @@ void IndexBuildsCoordinator::_cleanUpTwoPhaseAfterFailure( }); } -void IndexBuildsCoordinator::_runIndexBuildInner(OperationContext* opCtx, - std::shared_ptr replState, - const IndexBuildOptions& indexBuildOptions) { +void IndexBuildsCoordinator::_runIndexBuildInner( + OperationContext* opCtx, + std::shared_ptr replState, + const IndexBuildOptions& indexBuildOptions, + const boost::optional& resumeInfo) { // This Status stays unchanged unless we catch an exception in the following try-catch block. auto status = Status::OK(); try { @@ -2099,7 +2231,12 @@ void IndexBuildsCoordinator::_runIndexBuildInner(OperationContext* opCtx, hangAfterInitializingIndexBuild.pauseWhileSet(opCtx); } - _buildIndex(opCtx, replState, indexBuildOptions); + if (resumeInfo) { + _resumeIndexBuildFromPhase(opCtx, replState, indexBuildOptions, resumeInfo.get()); + } else { + _buildIndex(opCtx, replState, indexBuildOptions); + } + } catch (const DBException& ex) { status = ex.toStatus(); } @@ -2157,6 +2294,22 @@ void IndexBuildsCoordinator::_runIndexBuildInner(OperationContext* opCtx, uassertStatusOK(status); } +void IndexBuildsCoordinator::_resumeIndexBuildFromPhase( + OperationContext* opCtx, + std::shared_ptr replState, + const IndexBuildOptions& indexBuildOptions, + const ResumeIndexInfo& resumeInfo) { + if (MONGO_unlikely(hangAfterSettingUpResumableIndexBuild.shouldFail())) { + LOGV2(4841704, + "Hanging index build due to failpoint 'hangAfterSettingUpResumableIndexBuild'"); + hangAfterSettingUpResumableIndexBuild.pauseWhileSet(); + } + _insertKeysFromSideTablesWithoutBlockingWrites(opCtx, replState); + _signalPrimaryForCommitReadiness(opCtx, replState); + _insertKeysFromSideTablesBlockingWrites(opCtx, replState, indexBuildOptions); + _waitForNextIndexBuildActionAndCommit(opCtx, replState, indexBuildOptions); +} + void IndexBuildsCoordinator::_awaitLastOpTimeBeforeInterceptorsMajorityCommitted( OperationContext* opCtx, std::shared_ptr replState) { auto replCoord = repl::ReplicationCoordinator::get(opCtx); diff --git a/src/mongo/db/index_builds_coordinator.h b/src/mongo/db/index_builds_coordinator.h index c78134998db..e5568b48ba2 100644 --- a/src/mongo/db/index_builds_coordinator.h +++ b/src/mongo/db/index_builds_coordinator.h @@ -132,7 +132,7 @@ public: * * A Future is returned that will complete when the index build commits or aborts. * - * On a successful index build, calling Future::get(), or Future::getNoThrows(), returns index + * On a successful index build, calling Future::get(), or Future::getNoThrow(), returns index * catalog statistics. * * Returns an error status if there are any errors setting up the index build. @@ -147,9 +147,28 @@ public: IndexBuildOptions indexBuildOptions) = 0; /** - * Resumes and restarts index builds for recovery. Anything that fails to resume will be - * started in a background thread. Each index build will wait for a replicated commit or abort, - * as in steady-state. + * Reconstructs the in-memory state of the index build. When successful, returns after the index + * build has been resumed from the phase it left off in. + * + * A Future is returned that will complete when the index build commits or aborts. + * + * On a successful index build, calling Future::get(), or Future::getNoThrow(), returns index + * catalog statistics. + * + * Returns an error status if there are any errors setting up the index build. + */ + virtual StatusWith> resumeIndexBuild( + OperationContext* opCtx, + std::string dbName, + CollectionUUID collectionUUID, + const std::vector& specs, + const UUID& buildUUID, + const ResumeIndexInfo& resumeInfo) = 0; + + /** + * Resumes and restarts index builds for recovery. Anything that fails to resume will be started + * in a background thread. Each index build will wait for a replicated commit or abort, as in + * steady-state. */ void restartIndexBuildsForRecovery(OperationContext* opCtx, const IndexBuilds& buildsToRestart, @@ -534,6 +553,17 @@ protected: CollectionUUID collectionUUID, const std::vector& specs, const UUID& buildUUID); + /** + * Reconstructs the in-memory state of the index build so that it can be resumed from the phase + * it was in when the node cleanly shut down. + */ + Status _setUpResumeIndexBuild(OperationContext* opCtx, + std::string dbName, + CollectionUUID collectionUUID, + const std::vector& specs, + const UUID& buildUUID, + const ResumeIndexInfo& resumeInfo); + /** * Runs the index build on the caller thread. Handles unregistering the index build and setting * the index build's Promise with the outcome of the index build. @@ -541,7 +571,8 @@ protected: */ void _runIndexBuild(OperationContext* opCtx, const UUID& buildUUID, - const IndexBuildOptions& indexBuildOptions) noexcept; + const IndexBuildOptions& indexBuildOptions, + const boost::optional& resumeInfo) noexcept; /** * Acquires locks and runs index build. Throws on error. @@ -549,7 +580,18 @@ protected: */ void _runIndexBuildInner(OperationContext* opCtx, std::shared_ptr replState, - const IndexBuildOptions& indexBuildOptions); + const IndexBuildOptions& indexBuildOptions, + const boost::optional& resumeInfo); + + /** + * Resumes the index build from the phase that it was in when the node cleanly shut down. By the + * time this function is called, the in-memory state of the index build should already have been + * reconstructed. + */ + void _resumeIndexBuildFromPhase(OperationContext* opCtx, + std::shared_ptr replState, + const IndexBuildOptions& indexBuildOptions, + const ResumeIndexInfo& resumeInfo); /** * Cleans up a single-phase index build after a failure. diff --git a/src/mongo/db/index_builds_coordinator_mongod.cpp b/src/mongo/db/index_builds_coordinator_mongod.cpp index 5a3f9061e90..c66627251b5 100644 --- a/src/mongo/db/index_builds_coordinator_mongod.cpp +++ b/src/mongo/db/index_builds_coordinator_mongod.cpp @@ -122,6 +122,38 @@ IndexBuildsCoordinatorMongod::startIndexBuild(OperationContext* opCtx, const UUID& buildUUID, IndexBuildProtocol protocol, IndexBuildOptions indexBuildOptions) { + return _startIndexBuild( + opCtx, dbName, collectionUUID, specs, buildUUID, protocol, indexBuildOptions, boost::none); +} + +StatusWith> +IndexBuildsCoordinatorMongod::resumeIndexBuild(OperationContext* opCtx, + std::string dbName, + CollectionUUID collectionUUID, + const std::vector& specs, + const UUID& buildUUID, + const ResumeIndexInfo& resumeInfo) { + IndexBuildsCoordinator::IndexBuildOptions indexBuildOptions; + indexBuildOptions.applicationMode = ApplicationMode::kStartupRepair; + return _startIndexBuild(opCtx, + dbName, + collectionUUID, + specs, + buildUUID, + IndexBuildProtocol::kTwoPhase, + indexBuildOptions, + resumeInfo); +} + +StatusWith> +IndexBuildsCoordinatorMongod::_startIndexBuild(OperationContext* opCtx, + std::string dbName, + CollectionUUID collectionUUID, + const std::vector& specs, + const UUID& buildUUID, + IndexBuildProtocol protocol, + IndexBuildOptions indexBuildOptions, + const boost::optional& resumeInfo) { const NamespaceStringOrUUID nssOrUuid{dbName, collectionUUID}; { @@ -178,11 +210,17 @@ IndexBuildsCoordinatorMongod::startIndexBuild(OperationContext* opCtx, }); if (indexBuildOptions.applicationMode == ApplicationMode::kStartupRepair) { - // Two phase index build recovery goes though a different set-up procedure because the - // original index will be dropped first. + // Two phase index build recovery goes though a different set-up procedure because we will + // either resume the index build or the original index will be dropped first. invariant(protocol == IndexBuildProtocol::kTwoPhase); - auto status = - _setUpIndexBuildForTwoPhaseRecovery(opCtx, dbName, collectionUUID, specs, buildUUID); + auto status = Status::OK(); + if (resumeInfo) { + status = _setUpResumeIndexBuild( + opCtx, dbName, collectionUUID, specs, buildUUID, resumeInfo.get()); + } else { + status = _setUpIndexBuildForTwoPhaseRecovery( + opCtx, dbName, collectionUUID, specs, buildUUID); + } if (!status.isOK()) { return status; } @@ -247,7 +285,8 @@ IndexBuildsCoordinatorMongod::startIndexBuild(OperationContext* opCtx, startPromise = std::move(startPromise), startTimestamp, shardVersion, - dbVersion + dbVersion, + resumeInfo ](auto status) mutable noexcept { auto onScopeExitGuard = makeGuard([&] { stdx::unique_lock lk(_mutex); @@ -297,7 +336,7 @@ IndexBuildsCoordinatorMongod::startIndexBuild(OperationContext* opCtx, // Runs the remainder of the index build. Sets the promise result and cleans up the index // build. - _runIndexBuild(opCtx.get(), buildUUID, indexBuildOptions); + _runIndexBuild(opCtx.get(), buildUUID, indexBuildOptions, resumeInfo); // Do not exit with an incomplete future. invariant(replState->sharedPromise.getFuture().isReady()); diff --git a/src/mongo/db/index_builds_coordinator_mongod.h b/src/mongo/db/index_builds_coordinator_mongod.h index c9bf15b6b3a..a16cbb842e1 100644 --- a/src/mongo/db/index_builds_coordinator_mongod.h +++ b/src/mongo/db/index_builds_coordinator_mongod.h @@ -78,6 +78,19 @@ public: IndexBuildProtocol protocol, IndexBuildOptions indexBuildOptions) override; + /** + * Reconstructs the in-memory state of the index build, then passes the build off to an + * asynchronous thread to run. A Future is returned so that the user can await the asynchronous + * build result. + */ + StatusWith> resumeIndexBuild( + OperationContext* opCtx, + std::string dbName, + CollectionUUID collectionUUID, + const std::vector& specs, + const UUID& buildUUID, + const ResumeIndexInfo& resumeInfo) override; + Status voteCommitIndexBuild(OperationContext* opCtx, const UUID& buildUUID, const HostAndPort& hostAndPort) override; @@ -159,6 +172,16 @@ private: std::shared_ptr replState, const IndexBuildOptions& indexBuildOptions) override; + StatusWith> _startIndexBuild( + OperationContext* opCtx, + std::string dbName, + CollectionUUID collectionUUID, + const std::vector& specs, + const UUID& buildUUID, + IndexBuildProtocol protocol, + IndexBuildOptions indexBuildOptions, + const boost::optional& resumeInfo); + // Thread pool on which index builds are run. ThreadPool _threadPool; diff --git a/src/mongo/db/resumable_index_builds.idl b/src/mongo/db/resumable_index_builds.idl index 9f8fc4a6818..3e4fdeff29d 100644 --- a/src/mongo/db/resumable_index_builds.idl +++ b/src/mongo/db/resumable_index_builds.idl @@ -96,7 +96,7 @@ structs: optional: true spec: description: "The index specification" - type: object + type: object_owned ResumeIndexInfo: description: "Information needed to resume index builds" diff --git a/src/mongo/db/storage/durable_catalog.h b/src/mongo/db/storage/durable_catalog.h index b8b1df02095..2ee532cd484 100644 --- a/src/mongo/db/storage/durable_catalog.h +++ b/src/mongo/db/storage/durable_catalog.h @@ -93,6 +93,14 @@ public: RecordId id, BSONCollectionCatalogEntry::MetaData& md) = 0; + /** + * Checks that the metadata for the index exists and matches the given spec. + */ + virtual Status checkMetaDataForIndex(OperationContext* opCtx, + RecordId catalogId, + const std::string& indexName, + const BSONObj& spec) = 0; + virtual std::vector getAllIdents(OperationContext* opCtx) const = 0; virtual bool isUserDataIdent(StringData ident) const = 0; diff --git a/src/mongo/db/storage/durable_catalog_impl.cpp b/src/mongo/db/storage/durable_catalog_impl.cpp index e239aa61f0c..eda5c179748 100644 --- a/src/mongo/db/storage/durable_catalog_impl.cpp +++ b/src/mongo/db/storage/durable_catalog_impl.cpp @@ -684,6 +684,29 @@ void DurableCatalogImpl::putMetaData(OperationContext* opCtx, fassert(28521, status); } +Status DurableCatalogImpl::checkMetaDataForIndex(OperationContext* opCtx, + RecordId catalogId, + const std::string& indexName, + const BSONObj& spec) { + auto md = getMetaData(opCtx, catalogId); + int offset = md.findIndexOffset(indexName); + if (offset < 0) { + return {ErrorCodes::IndexNotFound, + str::stream() << "Index [" << indexName + << "] not found in metadata for recordId: " << catalogId}; + } + + if (spec.woCompare(md.indexes[offset].spec)) { + return {ErrorCodes::BadValue, + str::stream() << "Spec for index [" << indexName + << "] does not match spec in the metadata for recordId: " << catalogId + << ". Spec: " << spec + << " metadata's spec: " << md.indexes[offset].spec}; + } + + return Status::OK(); +} + Status DurableCatalogImpl::_replaceEntry(OperationContext* opCtx, RecordId catalogId, const NamespaceString& toNss, diff --git a/src/mongo/db/storage/durable_catalog_impl.h b/src/mongo/db/storage/durable_catalog_impl.h index 5c674af4824..2b054b424eb 100644 --- a/src/mongo/db/storage/durable_catalog_impl.h +++ b/src/mongo/db/storage/durable_catalog_impl.h @@ -81,6 +81,11 @@ public: RecordId catalogId, BSONCollectionCatalogEntry::MetaData& md); + Status checkMetaDataForIndex(OperationContext* opCtx, + RecordId catalogId, + const std::string& indexName, + const BSONObj& spec); + std::vector getAllIdents(OperationContext* opCtx) const; bool isUserDataIdent(StringData ident) const; diff --git a/src/mongo/db/storage/storage_engine.h b/src/mongo/db/storage/storage_engine.h index 6ce5eeb3add..085336a23c6 100644 --- a/src/mongo/db/storage/storage_engine.h +++ b/src/mongo/db/storage/storage_engine.h @@ -355,14 +355,20 @@ public: const NamespaceString& nss) = 0; /** - * Creates a temporary RecordStore on the storage engine. This record store will drop itself - * automatically when it goes out of scope. This means the TemporaryRecordStore should not exist - * any longer than the OperationContext used to create it. On startup, the storage engine will - * drop any un-dropped temporary record stores. + * Creates a temporary RecordStore on the storage engine. On startup after an unclean shutdown, + * the storage engine will drop any un-dropped temporary record stores. */ virtual std::unique_ptr makeTemporaryRecordStore( OperationContext* opCtx) = 0; + /** + * Creates a temporary RecordStore on the storage engine from an existing ident on disk. On + * startup after an unclean shutdown, the storage engine will drop any un-dropped temporary + * record stores. + */ + virtual std::unique_ptr makeTemporaryRecordStoreFromExistingIdent( + OperationContext* opCtx, StringData ident) = 0; + /** * This method will be called before there is a clean shutdown. Storage engines should * override this method if they have clean-up to do that is different from unclean shutdown. diff --git a/src/mongo/db/storage/storage_engine_impl.cpp b/src/mongo/db/storage/storage_engine_impl.cpp index a4590dd850f..93742937d27 100644 --- a/src/mongo/db/storage/storage_engine_impl.cpp +++ b/src/mongo/db/storage/storage_engine_impl.cpp @@ -368,6 +368,11 @@ bool StorageEngineImpl::_handleInternalIdents( reconcileResult->indexBuildsToResume.push_back(resumeInfo); + // Once we have parsed the resume info, we can safely drop the internal ident. + // TODO SERVER-49846: revisit this logic since this could cause the side tables + // associated with the index build to be orphaned if resuming fails. + internalIdentsToDrop->insert(ident); + LOGV2(4916301, "Found unfinished index build to resume", "buildUUID"_attr = resumeInfo.getBuildUUID(), @@ -842,6 +847,12 @@ std::unique_ptr StorageEngineImpl::makeTemporaryRecordStor return std::make_unique(getEngine(), std::move(rs)); } +std::unique_ptr StorageEngineImpl::makeTemporaryRecordStoreFromExistingIdent( + OperationContext* opCtx, StringData ident) { + auto rs = _engine->getRecordStore(opCtx, "", ident, CollectionOptions()); + return std::make_unique(getEngine(), std::move(rs)); +} + void StorageEngineImpl::setJournalListener(JournalListener* jl) { _engine->setJournalListener(jl); } diff --git a/src/mongo/db/storage/storage_engine_impl.h b/src/mongo/db/storage/storage_engine_impl.h index 926d3900649..2e09d2ff82a 100644 --- a/src/mongo/db/storage/storage_engine_impl.h +++ b/src/mongo/db/storage/storage_engine_impl.h @@ -111,6 +111,9 @@ public: virtual std::unique_ptr makeTemporaryRecordStore( OperationContext* opCtx) override; + virtual std::unique_ptr makeTemporaryRecordStoreFromExistingIdent( + OperationContext* opCtx, StringData ident) override; + virtual void cleanShutdown() override; virtual void setStableTimestamp(Timestamp stableTimestamp, bool force = false) override; diff --git a/src/mongo/db/storage/storage_engine_mock.h b/src/mongo/db/storage/storage_engine_mock.h index a363c11f800..1b303b35985 100644 --- a/src/mongo/db/storage/storage_engine_mock.h +++ b/src/mongo/db/storage/storage_engine_mock.h @@ -93,6 +93,10 @@ public: std::unique_ptr makeTemporaryRecordStore(OperationContext* opCtx) final { return {}; } + std::unique_ptr makeTemporaryRecordStoreFromExistingIdent( + OperationContext* opCtx, StringData ident) final { + return {}; + } void cleanShutdown() final {} SnapshotManager* getSnapshotManager() const final { return nullptr; diff --git a/src/mongo/embedded/index_builds_coordinator_embedded.cpp b/src/mongo/embedded/index_builds_coordinator_embedded.cpp index b4d3fe88450..ffbcaccd0e6 100644 --- a/src/mongo/embedded/index_builds_coordinator_embedded.cpp +++ b/src/mongo/embedded/index_builds_coordinator_embedded.cpp @@ -69,12 +69,22 @@ IndexBuildsCoordinatorEmbedded::startIndexBuild(OperationContext* opCtx, if (!status.isOK()) { return status; } - _runIndexBuild(opCtx, buildUUID, indexBuildOptions); + _runIndexBuild(opCtx, buildUUID, indexBuildOptions, boost::none /* resumeInfo */); auto replState = invariant(_getIndexBuild(buildUUID)); return replState->sharedPromise.getFuture(); } +StatusWith> +IndexBuildsCoordinatorEmbedded::resumeIndexBuild(OperationContext* opCtx, + std::string dbName, + CollectionUUID collectionUUID, + const std::vector& specs, + const UUID& buildUUID, + const ResumeIndexInfo& resumeInfo) { + MONGO_UNREACHABLE; +} + void IndexBuildsCoordinatorEmbedded::_signalPrimaryForCommitReadiness( OperationContext* opCtx, std::shared_ptr replState) {} diff --git a/src/mongo/embedded/index_builds_coordinator_embedded.h b/src/mongo/embedded/index_builds_coordinator_embedded.h index 84c9d30ded7..81b02ddada9 100644 --- a/src/mongo/embedded/index_builds_coordinator_embedded.h +++ b/src/mongo/embedded/index_builds_coordinator_embedded.h @@ -65,6 +65,14 @@ public: IndexBuildProtocol protocol, IndexBuildOptions indexBuildOptions) override; + StatusWith> resumeIndexBuild( + OperationContext* opCtx, + std::string dbName, + CollectionUUID collectionUUID, + const std::vector& specs, + const UUID& buildUUID, + const ResumeIndexInfo& resumeInfo) override; + void setSignalAndCancelVoteRequestCbkIfActive(WithLock ReplIndexBuildStateLk, OperationContext* opCtx, std::shared_ptr replState, -- cgit v1.2.1