diff options
author | Gregory Wlodarek <gregory.wlodarek@mongodb.com> | 2022-09-28 17:43:41 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-11-10 06:22:26 +0000 |
commit | 33ee05c9dd6458cae61414c50453ae3f7c4ee835 (patch) | |
tree | eaf4030d59dcd39057a41f6e222ed60287a6824c | |
parent | e570e79c4761bc09b6814eb3c4cc175c2dfd8462 (diff) | |
download | mongo-33ee05c9dd6458cae61414c50453ae3f7c4ee835.tar.gz |
SERVER-69877 Remove untimestamped writes to the catalog when restarting unfinished index builds during startup recovery
(cherry picked from commit a61ed7a6f6c702224ba822dfd8a4caeba335efcd)
23 files changed, 341 insertions, 217 deletions
diff --git a/jstests/noPassthrough/delete_incomplete_index_data_on_startup_recovery.js b/jstests/noPassthrough/delete_incomplete_index_data_on_startup_recovery.js deleted file mode 100644 index baeef175b78..00000000000 --- a/jstests/noPassthrough/delete_incomplete_index_data_on_startup_recovery.js +++ /dev/null @@ -1,92 +0,0 @@ -/** - * Test that the data tables for an incomplete index on unclean shutdown is dropped immediately as - * opposed to deferred per the usual two-phase index drop logic. - * - * @tags: [ - * requires_replication, - * # The primary is restarted and must retain its data. - * requires_persistence, - * ] - */ - -(function() { -"use strict"; - -load("jstests/libs/fail_point_util.js"); - -const rst = new ReplSetTest({name: jsTest.name(), nodes: 1}); -rst.startSet(); -rst.initiate(); - -const dbName = "testDB"; -const collName = "fantasticalCollName"; -const primary = rst.getPrimary(); -const primaryDB = primary.getDB(dbName); -const primaryColl = primaryDB[collName]; - -assert.commandWorked(primaryDB.createCollection(collName)); -assert.commandWorked(primaryColl.insert({x: 1})); -assert.commandWorked(primaryColl.insert({x: 2})); -assert.commandWorked(primaryColl.insert({x: 3})); - -const failPoint = configureFailPoint(primaryDB, "hangIndexBuildBeforeCommit"); -let ps = undefined; -try { - TestData.dbName = dbName; - TestData.collName = collName; - TestData.indexSpec = {x: 1}; - TestData.indexName = {name: "myFantasticalIndex"}; - - ps = startParallelShell(() => { - jsTestLog("Starting an index build that will hang, establishing an unfinished index " + - "to be found on startup."); - // Crashing the server while this command is running may cause the parallel shell code to - // error and stop executing. We will therefore ignore the result of this command and - // parallel shell. Test correctness is guaranteed by waiting for the failpoint this command - // hits. - db.getSiblingDB(TestData.dbName)[TestData.collName].createIndex(TestData.indexSpec, - TestData.indexName); - }, primary.port); - - jsTest.log("Waiting for the async index build to hit the failpoint."); - failPoint.wait(); - - jsTest.log( - "Force a checkpoint, now that the index is present in the catalog. This ensures that on " + - "startup the index will not be an 'unknown ident' discarded immediately because it does " + - "not have an associcated catalog entry. Or some other unexpected checkpoint situation."); - assert.commandWorked(primary.adminCommand({fsync: 1})); - - // Crash and restart the primary, which should cause the old index data to be deleted before - // rebuilding the index from scratch. - rst.stop(primary, 9, {allowedExitCode: MongoRunner.EXIT_SIGKILL}); -} catch (error) { - // Turn off the failpoint before allowing the test to end, so nothing hangs while the server - // shuts down or in post-test hooks. - failPoint.off(); - throw error; -} finally { - if (ps) { - ps({checkExitSuccess: false}); - } -} - -// Restart the node and set 'noCleanData' so that the node's data doesn't get wiped out. -rst.start(primary, {noCleanData: true}); - -// Wait for the node to become available. -rst.getPrimary(); - -// For debugging purposes, fetch and log the test collection's indexes after restart. -const indexes = - assert.commandWorked(rst.getPrimary().getDB(dbName).runCommand({listIndexes: collName})) - .cursor.firstBatch; -jsTestLog("The node restarted with the following indexes on coll '" + collName + - "': " + tojson(indexes)); - -// Now that the node is up and running, check that the expected code path was hit, to immediately -// drop the index data table on startup. -checkLog.containsJson(rst.getPrimary(), 6361201, {index: "myFantasticalIndex"}); - -rst.stopSet(); -})(); diff --git a/jstests/noPassthrough/durable_history_index_usage.js b/jstests/noPassthrough/durable_history_index_usage.js index 0b32d7a717d..b2f7c01f6c5 100644 --- a/jstests/noPassthrough/durable_history_index_usage.js +++ b/jstests/noPassthrough/durable_history_index_usage.js @@ -109,13 +109,15 @@ replTest.start( true /* restart */); const checkLogs = function() { - // The index build was not yet completed at the recovery timestamp, it will be dropped and - // rebuilt. - checkLog.containsJson(primary(), 6361201, { + // Found index from unfinished build. + checkLog.containsJson(primary(), 22253, { index: "a_1", namespace: coll().getFullName(), }); + // Resetting unfinished index. + checkLog.containsJson(primary(), 6987700, {namespace: coll().getFullName(), index: "a_1"}); + // Index build restarting. checkLog.containsJson(primary(), 20660); }; diff --git a/jstests/noPassthrough/libs/missing_index_ident.js b/jstests/noPassthrough/libs/missing_index_ident.js index 74cb057ed38..3be61a404c2 100644 --- a/jstests/noPassthrough/libs/missing_index_ident.js +++ b/jstests/noPassthrough/libs/missing_index_ident.js @@ -76,8 +76,8 @@ const MissingIndexIdent = class { }); // Since the index build was not yet completed at the recovery timestamp, its ident will - // be dropped. - checkLog.containsJson(conn, 6361201, { + // be reset. + checkLog.containsJson(conn, 6987700, { index: 'a_1', namespace: coll.getFullName(), ident: ident, diff --git a/jstests/noPassthrough/missing_index_ident_standalone_drop.js b/jstests/noPassthrough/missing_index_ident_standalone_drop.js index abb59d5bf64..79a0a65230c 100644 --- a/jstests/noPassthrough/missing_index_ident_standalone_drop.js +++ b/jstests/noPassthrough/missing_index_ident_standalone_drop.js @@ -24,6 +24,12 @@ IndexBuildTest.assertIndexes(coll, 2, ['_id_', 'a_1']); assert.commandWorked(standalone.getDB('test')[jsTestName()].dropIndex('a_1')); IndexBuildTest.assertIndexes(coll, 1, ['_id_']); +// Completing drop for index table immediately. +checkLog.containsJson(standalone, 6361201, { + index: 'a_1', + namespace: coll.getFullName(), +}); + MongoRunner.stopMongod(standalone); replTest.start(0, undefined, true /* restart */); IndexBuildTest.assertIndexes(replTest.getPrimary().getDB('test')[jsTestName()], 1, ['_id_']); diff --git a/jstests/noPassthrough/unfinished_index_builds_restart_using_same_ident.js b/jstests/noPassthrough/unfinished_index_builds_restart_using_same_ident.js new file mode 100644 index 00000000000..919504b0600 --- /dev/null +++ b/jstests/noPassthrough/unfinished_index_builds_restart_using_same_ident.js @@ -0,0 +1,80 @@ +/** + * Tests that during startup recovery unfinished index builds that are not resumable will drop and + * recreate the index table using the same ident to avoid doing untimestamped writes to the catalog. + * + * @tags: [ + * requires_persistence, + * requires_replication, + * requires_wiredtiger, + * ] + */ +(function() { +"use strict"; + +load("jstests/libs/fail_point_util.js"); +load('jstests/noPassthrough/libs/index_build.js'); + +const replSet = new ReplSetTest({nodes: 1}); +replSet.startSet(); +replSet.initiate(); + +const dbName = "test"; +const collName = jsTestName(); + +let primary = replSet.getPrimary(); +let primaryDB = primary.getDB(dbName); +let primaryColl = primaryDB[collName]; + +assert.commandWorked(primaryDB.createCollection(collName)); +assert.commandWorked(primaryColl.insert({x: 1})); +assert.commandWorked(primaryColl.insert({x: 2})); +assert.commandWorked(primaryColl.insert({x: 3})); + +const failPoint = configureFailPoint(primaryDB, "hangIndexBuildBeforeCommit"); +const indexBuild = IndexBuildTest.startIndexBuild(primaryDB.getMongo(), + primaryColl.getFullName(), + {x: 1}, + {}, + [ErrorCodes.InterruptedAtShutdown]); +failPoint.wait(); + +// Get the index ident. +const ident = assert.commandWorked(primaryDB.runCommand({collStats: collName})) + .indexDetails.x_1.uri.substring('statistics:table:'.length); +jsTestLog("Ident: " + ident); + +// Take a checkpoint so that the unfinished index is present in the catalog during the next startup. +assert.commandWorked(primary.adminCommand({fsync: 1})); + +// Crash and restart the node. +replSet.stop(primary, 9, {allowedExitCode: MongoRunner.EXIT_SIGKILL}); +indexBuild({checkExitSuccess: false}); +replSet.start(primary, {noCleanData: true, setParameter: {logLevel: 1}}); + +primary = replSet.getPrimary(); +primaryDB = primary.getDB(dbName); +primaryColl = primaryDB[collName]; + +// Resetting unfinished index. +checkLog.containsJson( + primary, 6987700, {namespace: primaryColl.getFullName(), index: "x_1", ident: ident}); + +// WT drop. +checkLog.containsJson(primary, 22338, {uri: "table:" + ident}); + +// Create uri. +checkLog.containsJson(primary, 51780, {uri: "table:" + ident}); + +// Index build starting. +checkLog.containsJson(primary, 20384, {ident: ident}); + +IndexBuildTest.waitForIndexBuildToStop(primaryDB); +IndexBuildTest.assertIndexes(primaryColl, 2, ["_id_", "x_1"]); + +assert.commandWorked(primaryColl.insert({x: 4})); +assert.commandWorked(primaryColl.insert({x: 5})); + +assert.eq(5, primaryColl.find().hint("x_1").count()); + +replSet.stopSet(); +}()); diff --git a/src/mongo/db/catalog/database_test.cpp b/src/mongo/db/catalog/database_test.cpp index 0b0200a7bf9..720c3c50dd9 100644 --- a/src/mongo/db/catalog/database_test.cpp +++ b/src/mongo/db/catalog/database_test.cpp @@ -253,7 +253,7 @@ void _testDropCollectionThrowsExceptionIfThereAreIndexesInProgress(OperationCont collection->ns(), indexInfoObj, IndexBuildMethod::kHybrid, UUID::gen()); { WriteUnitOfWork wuow(opCtx); - ASSERT_OK(indexBuildBlock->init(opCtx, collection)); + ASSERT_OK(indexBuildBlock->init(opCtx, collection, /*forRecovery=*/false)); wuow.commit(); } ON_BLOCK_EXIT([&indexBuildBlock, opCtx, collection] { diff --git a/src/mongo/db/catalog/index_build_block.cpp b/src/mongo/db/catalog/index_build_block.cpp index e1b628d8842..375d7e59e31 100644 --- a/src/mongo/db/catalog/index_build_block.cpp +++ b/src/mongo/db/catalog/index_build_block.cpp @@ -85,6 +85,11 @@ void IndexBuildBlock::_completeInit(OperationContext* opCtx, Collection* collect .registerIndex(desc->indexName(), desc->keyPattern(), IndexFeatures::make(desc, collection->ns().isOnInternalDb())); + opCtx->recoveryUnit()->onRollback( + [collectionDecorations = collection->getSharedDecorations(), indexName = _indexName] { + CollectionIndexUsageTrackerDecoration::get(collectionDecorations) + .unregisterIndex(indexName); + }); } Status IndexBuildBlock::initForResume(OperationContext* opCtx, @@ -132,7 +137,7 @@ Status IndexBuildBlock::initForResume(OperationContext* opCtx, return Status::OK(); } -Status IndexBuildBlock::init(OperationContext* opCtx, Collection* collection) { +Status IndexBuildBlock::init(OperationContext* opCtx, Collection* collection, bool forRecovery) { // Being in a WUOW means all timestamping responsibility can be pushed up to the caller. invariant(opCtx->lockState()->inAWriteUnitOfWork()); @@ -160,14 +165,25 @@ Status IndexBuildBlock::init(OperationContext* opCtx, Collection* collection) { !replCoord->getMemberState().primary() && isBackgroundIndex; } - // Setup on-disk structures. - Status status = collection->prepareForIndexBuild( - opCtx, descriptor.get(), _buildUUID, isBackgroundSecondaryBuild); - if (!status.isOK()) - return status; + if (!forRecovery) { + // Setup on-disk structures. We skip this during startup recovery for unfinished indexes as + // everything is already in-place. + Status status = collection->prepareForIndexBuild( + opCtx, descriptor.get(), _buildUUID, isBackgroundSecondaryBuild); + if (!status.isOK()) + return status; + } - auto indexCatalogEntry = collection->getIndexCatalog()->createIndexEntry( - opCtx, collection, std::move(descriptor), CreateIndexEntryFlags::kNone); + auto indexCatalog = collection->getIndexCatalog(); + IndexCatalogEntry* indexCatalogEntry = nullptr; + if (forRecovery) { + auto desc = indexCatalog->findIndexByName( + opCtx, _indexName, IndexCatalog::InclusionPolicy::kUnfinished); + indexCatalogEntry = indexCatalog->getEntryShared(desc).get(); + } else { + indexCatalogEntry = indexCatalog->createIndexEntry( + opCtx, collection, std::move(descriptor), CreateIndexEntryFlags::kNone); + } if (_method == IndexBuildMethod::kHybrid) { _indexBuildInterceptor = std::make_unique<IndexBuildInterceptor>(opCtx, indexCatalogEntry); diff --git a/src/mongo/db/catalog/index_build_block.h b/src/mongo/db/catalog/index_build_block.h index 48d0f6e49af..b4668717ef2 100644 --- a/src/mongo/db/catalog/index_build_block.h +++ b/src/mongo/db/catalog/index_build_block.h @@ -62,7 +62,7 @@ public: * * Must be called from within a `WriteUnitOfWork` */ - Status init(OperationContext* opCtx, Collection* collection); + Status init(OperationContext* opCtx, Collection* collection, bool forRecovery); /** * Makes sure that an entry for the index was created at startup in the IndexCatalog. Returns diff --git a/src/mongo/db/catalog/index_builds_manager.cpp b/src/mongo/db/catalog/index_builds_manager.cpp index d872fa28daa..f5f0c949428 100644 --- a/src/mongo/db/catalog/index_builds_manager.cpp +++ b/src/mongo/db/catalog/index_builds_manager.cpp @@ -115,7 +115,8 @@ Status IndexBuildsManager::setUpIndexBuild(OperationContext* opCtx, std::vector<BSONObj> indexes; try { indexes = writeConflictRetry(opCtx, "IndexBuildsManager::setUpIndexBuild", nss.ns(), [&]() { - return uassertStatusOK(builder->init(opCtx, collection, specs, onInit, resumeInfo)); + return uassertStatusOK( + builder->init(opCtx, collection, specs, onInit, options.forRecovery, resumeInfo)); }); } catch (const DBException& ex) { return ex.toStatus(); diff --git a/src/mongo/db/catalog/index_builds_manager.h b/src/mongo/db/catalog/index_builds_manager.h index 8504aab98bd..da7de14dca3 100644 --- a/src/mongo/db/catalog/index_builds_manager.h +++ b/src/mongo/db/catalog/index_builds_manager.h @@ -73,6 +73,7 @@ public: IndexConstraints indexConstraints = IndexConstraints::kEnforce; IndexBuildProtocol protocol = IndexBuildProtocol::kSinglePhase; IndexBuildMethod method = IndexBuildMethod::kHybrid; + bool forRecovery = false; }; IndexBuildsManager() = default; diff --git a/src/mongo/db/catalog/index_catalog.h b/src/mongo/db/catalog/index_catalog.h index 625a7bc56cc..a5ebf55718c 100644 --- a/src/mongo/db/catalog/index_catalog.h +++ b/src/mongo/db/catalog/index_catalog.h @@ -420,6 +420,16 @@ public: const IndexDescriptor* desc) = 0; /** + * Resets the index given its descriptor. + * + * This can only be called during startup recovery as it involves recreating the index table to + * allow bulk cursors to be used again. + */ + virtual Status resetUnfinishedIndexForRecovery(OperationContext* opCtx, + Collection* collection, + const IndexDescriptor* desc) = 0; + + /** * Drops an unfinished index given its descriptor. * * The caller must hold the collection X lock. diff --git a/src/mongo/db/catalog/index_catalog_entry.h b/src/mongo/db/catalog/index_catalog_entry.h index 9761f590c9d..2cf80bb8d3f 100644 --- a/src/mongo/db/catalog/index_catalog_entry.h +++ b/src/mongo/db/catalog/index_catalog_entry.h @@ -95,6 +95,7 @@ public: /// --------------------- virtual void setIsReady(bool newIsReady) = 0; + virtual void setIsFrozen(bool newIsFrozen) = 0; virtual void setDropped() = 0; virtual bool isDropped() const = 0; diff --git a/src/mongo/db/catalog/index_catalog_entry_impl.cpp b/src/mongo/db/catalog/index_catalog_entry_impl.cpp index 318e1007bc4..a5b00ac39f8 100644 --- a/src/mongo/db/catalog/index_catalog_entry_impl.cpp +++ b/src/mongo/db/catalog/index_catalog_entry_impl.cpp @@ -181,6 +181,10 @@ void IndexCatalogEntryImpl::setIsReady(bool newIsReady) { _isReady = newIsReady; } +void IndexCatalogEntryImpl::setIsFrozen(bool newIsFrozen) { + _isFrozen = newIsFrozen; +} + void IndexCatalogEntryImpl::setMultikey(OperationContext* opCtx, const CollectionPtr& collection, const KeyStringSet& multikeyMetadataKeys, diff --git a/src/mongo/db/catalog/index_catalog_entry_impl.h b/src/mongo/db/catalog/index_catalog_entry_impl.h index 30ef5a80921..0760989b99a 100644 --- a/src/mongo/db/catalog/index_catalog_entry_impl.h +++ b/src/mongo/db/catalog/index_catalog_entry_impl.h @@ -110,6 +110,8 @@ public: void setIsReady(bool newIsReady) final; + void setIsFrozen(bool newIsFrozen) final; + void setDropped() final { _isDropped.store(true); } diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp index d0af43246f6..537d47305c1 100644 --- a/src/mongo/db/catalog/index_catalog_impl.cpp +++ b/src/mongo/db/catalog/index_catalog_impl.cpp @@ -615,7 +615,7 @@ StatusWith<BSONObj> IndexCatalogImpl::createIndexOnEmptyCollection(OperationCont boost::optional<UUID> buildUUID = boost::none; IndexBuildBlock indexBuildBlock( collection->ns(), spec, IndexBuildMethod::kForeground, buildUUID); - status = indexBuildBlock.init(opCtx, collection); + status = indexBuildBlock.init(opCtx, collection, /*forRecovery=*/false); if (!status.isOK()) return status; @@ -1273,6 +1273,70 @@ Status IndexCatalogImpl::dropIndex(OperationContext* opCtx, return dropIndexEntry(opCtx, collection, entry); } +Status IndexCatalogImpl::resetUnfinishedIndexForRecovery(OperationContext* opCtx, + Collection* collection, + const IndexDescriptor* desc) { + invariant(opCtx->lockState()->isCollectionLockedForMode(collection->ns(), MODE_X)); + invariant(opCtx->lockState()->inAWriteUnitOfWork()); + + IndexCatalogEntry* entry = desc->getEntry(); + const std::string indexName = entry->descriptor()->indexName(); + + // Only indexes that aren't ready can be reset. + invariant(!collection->isIndexReady(indexName)); + + auto released = [&] { + if (auto released = _readyIndexes.release(entry->descriptor())) { + invariant(!released, "Cannot reset a ready index"); + } + if (auto released = _buildingIndexes.release(entry->descriptor())) { + return released; + } + if (auto released = _frozenIndexes.release(entry->descriptor())) { + return released; + } + MONGO_UNREACHABLE; + }(); + + LOGV2(6987700, + "Resetting unfinished index", + logAttrs(collection->ns()), + "index"_attr = indexName, + "ident"_attr = released->getIdent()); + + invariant(released.get() == entry); + + // Drop the ident if it exists. The storage engine will return OK if the ident is not found. + auto engine = opCtx->getServiceContext()->getStorageEngine(); + const std::string ident = released->getIdent(); + Status status = engine->getEngine()->dropIdent(opCtx->recoveryUnit(), ident); + if (!status.isOK()) { + return status; + } + + // Recreate the ident on-disk. DurableCatalog::createIndex() will lookup the ident internally + // using the catalogId and index name. + status = DurableCatalog::get(opCtx)->createIndex(opCtx, + collection->getCatalogId(), + collection->ns(), + collection->getCollectionOptions(), + released->descriptor()); + if (!status.isOK()) { + return status; + } + + // Update the index entry state in preparation to rebuild the index. + if (!released->accessMethod()) { + released->setAccessMethod(IndexAccessMethodFactory::get(opCtx)->make( + opCtx, collection->ns(), collection->getCollectionOptions(), released.get(), ident)); + } + + released->setIsFrozen(false); + _buildingIndexes.add(std::move(released)); + + return Status::OK(); +} + Status IndexCatalogImpl::dropUnfinishedIndex(OperationContext* opCtx, Collection* collection, const IndexDescriptor* desc) { diff --git a/src/mongo/db/catalog/index_catalog_impl.h b/src/mongo/db/catalog/index_catalog_impl.h index 655c43e6858..446897b015e 100644 --- a/src/mongo/db/catalog/index_catalog_impl.h +++ b/src/mongo/db/catalog/index_catalog_impl.h @@ -202,6 +202,9 @@ public: Status dropIndex(OperationContext* opCtx, Collection* collection, const IndexDescriptor* desc) override; + Status resetUnfinishedIndexForRecovery(OperationContext* opCtx, + Collection* collection, + const IndexDescriptor* desc) override; Status dropUnfinishedIndex(OperationContext* opCtx, Collection* collection, const IndexDescriptor* desc) override; diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp index 03c41a60991..79c7d013395 100644 --- a/src/mongo/db/catalog/multi_index_block.cpp +++ b/src/mongo/db/catalog/multi_index_block.cpp @@ -196,7 +196,7 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(OperationContext* opCtx, const BSONObj& spec, OnInitFn onInit) { const auto indexes = std::vector<BSONObj>(1, spec); - return init(opCtx, collection, indexes, onInit, boost::none); + return init(opCtx, collection, indexes, onInit, /*forRecovery=*/false, boost::none); } StatusWith<std::vector<BSONObj>> MultiIndexBlock::init( @@ -204,6 +204,7 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init( CollectionWriter& collection, const std::vector<BSONObj>& indexSpecs, OnInitFn onInit, + bool forRecovery, const boost::optional<ResumeIndexInfo>& resumeInfo) { invariant(opCtx->lockState()->isCollectionLockedForMode(collection->ns(), MODE_X), str::stream() << "Collection " << collection->ns() << " with UUID " @@ -257,27 +258,31 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init( for (size_t i = 0; i < indexSpecs.size(); i++) { BSONObj info = indexSpecs[i]; - StatusWith<BSONObj> statusWithInfo = - collection->getIndexCatalog()->prepareSpecForCreate( - opCtx, collection.get(), info, resumeInfo); - Status status = statusWithInfo.getStatus(); - if (!status.isOK()) { - // If we were given two identical indexes to build, we will run into an error trying - // to set up the same index a second time in this for-loop. This is the only way to - // encounter this error because callers filter out ready/in-progress indexes and - // start the build while holding a lock throughout. - if (status == ErrorCodes::IndexBuildAlreadyInProgress) { - invariant(indexSpecs.size() > 1, - str::stream() - << "Collection: " << collection->ns() << " (" << _collectionUUID - << "), Index spec: " << indexSpecs.front()); - return { - ErrorCodes::OperationFailed, - "Cannot build two identical indexes. Try again without duplicate indexes."}; + if (!forRecovery) { + // We skip this step when initializing unfinished index builds during startup + // recovery as they are already in the index catalog. + StatusWith<BSONObj> statusWithInfo = + collection->getIndexCatalog()->prepareSpecForCreate( + opCtx, collection.get(), info, resumeInfo); + Status status = statusWithInfo.getStatus(); + if (!status.isOK()) { + // If we were given two identical indexes to build, we will run into an error + // trying to set up the same index a second time in this for-loop. This is the + // only way to encounter this error because callers filter out ready/in-progress + // indexes and start the build while holding a lock throughout. + if (status == ErrorCodes::IndexBuildAlreadyInProgress) { + invariant(indexSpecs.size() > 1, + str::stream() << "Collection: " << collection->ns() << " (" + << _collectionUUID + << "), Index spec: " << indexSpecs.front()); + return {ErrorCodes::OperationFailed, + "Cannot build two identical indexes. Try again without duplicate " + "indexes."}; + } + return status; } - return status; + info = statusWithInfo.getValue(); } - info = statusWithInfo.getValue(); indexInfoObjs.push_back(info); boost::optional<TimeseriesOptions> options = collection->getTimeseriesOptions(); @@ -315,7 +320,8 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init( *stateInfo, resumeInfo->getPhase()); } else { - status = index.block->init(opCtx, collection.getWritableCollection(opCtx)); + status = + index.block->init(opCtx, collection.getWritableCollection(opCtx), forRecovery); } if (!status.isOK()) return status; diff --git a/src/mongo/db/catalog/multi_index_block.h b/src/mongo/db/catalog/multi_index_block.h index 711f5351562..5b5648a6326 100644 --- a/src/mongo/db/catalog/multi_index_block.h +++ b/src/mongo/db/catalog/multi_index_block.h @@ -114,6 +114,7 @@ public: CollectionWriter& collection, const std::vector<BSONObj>& specs, OnInitFn onInit, + bool forRecovery, const boost::optional<ResumeIndexInfo>& resumeInfo = boost::none); StatusWith<std::vector<BSONObj>> init(OperationContext* opCtx, CollectionWriter& collection, diff --git a/src/mongo/db/catalog/multi_index_block_test.cpp b/src/mongo/db/catalog/multi_index_block_test.cpp index 2990d9ec956..4d63b0689f1 100644 --- a/src/mongo/db/catalog/multi_index_block_test.cpp +++ b/src/mongo/db/catalog/multi_index_block_test.cpp @@ -90,8 +90,11 @@ TEST_F(MultiIndexBlockTest, CommitWithoutInsertingDocuments) { AutoGetCollection autoColl(operationContext(), getNSS(), MODE_X); CollectionWriter coll(operationContext(), autoColl); - auto specs = unittest::assertGet(indexer->init( - operationContext(), coll, std::vector<BSONObj>(), MultiIndexBlock::kNoopOnInitFn)); + auto specs = unittest::assertGet(indexer->init(operationContext(), + coll, + std::vector<BSONObj>(), + MultiIndexBlock::kNoopOnInitFn, + /*forRecovery=*/false)); ASSERT_EQUALS(0U, specs.size()); ASSERT_OK(indexer->dumpInsertsFromBulk(operationContext(), coll.get())); @@ -113,8 +116,11 @@ TEST_F(MultiIndexBlockTest, CommitAfterInsertingSingleDocument) { AutoGetCollection autoColl(operationContext(), getNSS(), MODE_X); CollectionWriter coll(operationContext(), autoColl); - auto specs = unittest::assertGet(indexer->init( - operationContext(), coll, std::vector<BSONObj>(), MultiIndexBlock::kNoopOnInitFn)); + auto specs = unittest::assertGet(indexer->init(operationContext(), + coll, + std::vector<BSONObj>(), + MultiIndexBlock::kNoopOnInitFn, + /*forRecovery=*/false)); ASSERT_EQUALS(0U, specs.size()); ASSERT_OK( @@ -146,8 +152,11 @@ TEST_F(MultiIndexBlockTest, AbortWithoutCleanupAfterInsertingSingleDocument) { AutoGetCollection autoColl(operationContext(), getNSS(), MODE_X); CollectionWriter coll(operationContext(), autoColl); - auto specs = unittest::assertGet(indexer->init( - operationContext(), coll, std::vector<BSONObj>(), MultiIndexBlock::kNoopOnInitFn)); + auto specs = unittest::assertGet(indexer->init(operationContext(), + coll, + std::vector<BSONObj>(), + MultiIndexBlock::kNoopOnInitFn, + /*forRecovery=*/false)); ASSERT_EQUALS(0U, specs.size()); ASSERT_OK( indexer->insertSingleDocumentForInitialSyncOrRecovery(operationContext(), diff --git a/src/mongo/db/commands/drop_indexes.cpp b/src/mongo/db/commands/drop_indexes.cpp index ae15287286e..bae18e07f3b 100644 --- a/src/mongo/db/commands/drop_indexes.cpp +++ b/src/mongo/db/commands/drop_indexes.cpp @@ -238,8 +238,8 @@ public: collection.getWritableCollection(opCtx)->getIndexCatalog()->dropAllIndexes( opCtx, collection.getWritableCollection(opCtx), true, {}); - swIndexesToRebuild = - indexer->init(opCtx, collection, all, MultiIndexBlock::kNoopOnInitFn); + swIndexesToRebuild = indexer->init( + opCtx, collection, all, MultiIndexBlock::kNoopOnInitFn, /*forRecovery=*/false); uassertStatusOK(swIndexesToRebuild.getStatus()); wunit.commit(); }); diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index 0f54b324420..39106fbf5e3 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -552,88 +552,50 @@ Status IndexBuildsCoordinator::_startIndexBuildForRecovery(OperationContext* opC CollectionWriter collection(opCtx, nss); { - // TODO SERVER-64760: Remove this usage of `allowUntimestampedWrite`. We often have a valid - // timestamp for this write, but the callers of this function don't pass it through. - opCtx->recoveryUnit()->allowUntimestampedWrite(); - - // These steps are combined into a single WUOW to ensure there are no commits without - // the indexes. - // 1) Drop all unfinished indexes. - // 2) Start, but do not complete the index build process. + // These steps are combined into a single WUOW to ensure there are no commits without the + // indexes for repair. WriteUnitOfWork wuow(opCtx); - auto indexCatalog = collection.getWritableCollection(opCtx)->getIndexCatalog(); + // We need to initialize the collection to rebuild the indexes. The collection may already + // be initialized when rebuilding multiple unfinished indexes on the same collection. + if (!collection->isInitialized()) { + collection.getWritableCollection(opCtx)->init(opCtx); + } - for (size_t i = 0; i < indexNames.size(); i++) { - auto descriptor = indexCatalog->findIndexByName( - opCtx, indexNames[i], IndexCatalog::InclusionPolicy::kReady); - if (descriptor) { - Status s = indexCatalog->dropIndex( - opCtx, collection.getWritableCollection(opCtx), descriptor); - if (!s.isOK()) { - return s; - } - continue; - } - - // If the index is not present in the catalog, then we are trying to drop an already - // aborted index. This may happen when rollback-via-refetch restarts an index build - // after an abort has been rolled back. - if (!collection->isIndexPresent(indexNames[i])) { - LOGV2(20652, - "An index was not found in the catalog while trying to drop the index during " - "recovery", - "buildUUID"_attr = buildUUID, - "index"_attr = indexNames[i]); - continue; + if (storageGlobalParams.repair) { + Status status = _dropIndexesForRepair(opCtx, collection, indexNames); + if (!status.isOK()) { + return status; } + } else { + // Unfinished index builds that are not resumable will drop and recreate the index table + // using the same ident to avoid doing untimestamped writes to the catalog. + for (const auto& indexName : indexNames) { + auto indexCatalog = collection.getWritableCollection(opCtx)->getIndexCatalog(); + auto desc = + indexCatalog->findIndexByName(opCtx, + indexName, + IndexCatalog::InclusionPolicy::kUnfinished | + IndexCatalog::InclusionPolicy::kFrozen); + Status status = indexCatalog->resetUnfinishedIndexForRecovery( + opCtx, collection.getWritableCollection(opCtx), desc); + if (!status.isOK()) { + return status; + } - const auto durableBuildUUID = collection->getIndexBuildUUID(indexNames[i]); - - // A build UUID is present if and only if we are rebuilding a two-phase build. - invariant((protocol == IndexBuildProtocol::kTwoPhase) == durableBuildUUID.has_value()); - // When a buildUUID is present, it must match the build UUID parameter to this - // function. - invariant(!durableBuildUUID || *durableBuildUUID == buildUUID, - str::stream() << "durable build UUID: " << durableBuildUUID - << "buildUUID: " << buildUUID); + const auto durableBuildUUID = collection->getIndexBuildUUID(indexName); - // If the unfinished index is in the IndexCatalog, drop it through there, otherwise drop - // it from the DurableCatalog. Rollback-via-refetch does not clear any in-memory state, - // so we should do it manually here. - descriptor = indexCatalog->findIndexByName( - opCtx, - indexNames[i], - IndexCatalog::InclusionPolicy::kReady | IndexCatalog::InclusionPolicy::kUnfinished | - IndexCatalog::InclusionPolicy::kFrozen); - if (descriptor) { - Status s = indexCatalog->dropUnfinishedIndex( - opCtx, collection.getWritableCollection(opCtx), descriptor); - if (!s.isOK()) { - return s; - } - } else { - // There are no concurrent users of the index during startup recovery, so it is OK - // to pass in a nullptr for the index 'ident', promising that the index is not in - // use. - catalog::removeIndex( - opCtx, - indexNames[i], - collection.getWritableCollection(opCtx), - nullptr /* ident */, - // Unfinished or partially dropped indexes do not need two-phase drop b/c the - // incomplete index will never be recovered. This is an optimization that will - // return disk space to the user more quickly. - catalog::DataRemoval::kImmediate); + // A build UUID is present if and only if we are rebuilding a two-phase build. + invariant((protocol == IndexBuildProtocol::kTwoPhase) == + durableBuildUUID.has_value()); + // When a buildUUID is present, it must match the build UUID parameter to this + // function. + invariant(!durableBuildUUID || *durableBuildUUID == buildUUID, + str::stream() << "durable build UUID: " << durableBuildUUID + << "buildUUID: " << buildUUID); } } - // We need to initialize the collection to rebuild the indexes. The collection may already - // be initialized when rebuilding indexes with rollback-via-refetch. - if (!collection->isInitialized()) { - collection.getWritableCollection(opCtx)->init(opCtx); - } - auto dbName = nss.db().toString(); auto replIndexBuildState = std::make_shared<ReplIndexBuildState>( buildUUID, collection->uuid(), dbName, specs, protocol); @@ -646,6 +608,8 @@ Status IndexBuildsCoordinator::_startIndexBuildForRecovery(OperationContext* opC IndexBuildsManager::SetupOptions options; options.protocol = protocol; + // All indexes are dropped during repair and should be rebuilt normally. + options.forRecovery = !storageGlobalParams.repair; status = _indexBuildsManager.setUpIndexBuild( opCtx, collection, specs, buildUUID, MultiIndexBlock::kNoopOnInitFn, options); if (!status.isOK()) { @@ -660,6 +624,39 @@ Status IndexBuildsCoordinator::_startIndexBuildForRecovery(OperationContext* opC return Status::OK(); } +Status IndexBuildsCoordinator::_dropIndexesForRepair(OperationContext* opCtx, + CollectionWriter& collection, + const std::vector<std::string>& indexNames) { + invariant(collection->isInitialized()); + for (const auto& indexName : indexNames) { + auto indexCatalog = collection.getWritableCollection(opCtx)->getIndexCatalog(); + auto descriptor = + indexCatalog->findIndexByName(opCtx, indexName, IndexCatalog::InclusionPolicy::kReady); + if (descriptor) { + Status s = + indexCatalog->dropIndex(opCtx, collection.getWritableCollection(opCtx), descriptor); + if (!s.isOK()) { + return s; + } + continue; + } + + // The index must be unfinished or frozen if it isn't ready. + descriptor = indexCatalog->findIndexByName(opCtx, + indexName, + IndexCatalog::InclusionPolicy::kUnfinished | + IndexCatalog::InclusionPolicy::kFrozen); + invariant(descriptor); + Status s = indexCatalog->dropUnfinishedIndex( + opCtx, collection.getWritableCollection(opCtx), descriptor); + if (!s.isOK()) { + return s; + } + } + + return Status::OK(); +} + Status IndexBuildsCoordinator::_setUpResumeIndexBuild(OperationContext* opCtx, std::string dbName, const UUID& collectionUUID, diff --git a/src/mongo/db/index_builds_coordinator.h b/src/mongo/db/index_builds_coordinator.h index 5a6803a0e78..4d3cf6558db 100644 --- a/src/mongo/db/index_builds_coordinator.h +++ b/src/mongo/db/index_builds_coordinator.h @@ -537,8 +537,8 @@ private: /** * Sets up the in-memory and durable state of the index build. * - * This function should only be called when in recovery mode, because we drop and replace - * existing indexes in a single WriteUnitOfWork. + * This function should only be called when in recovery mode, because the index tables are + * recreated. */ Status _startIndexBuildForRecovery(OperationContext* opCtx, const NamespaceString& nss, @@ -546,6 +546,16 @@ private: const UUID& buildUUID, IndexBuildProtocol protocol); + /** + * Removes the in-memory and durable state of the passed in indexes in preparation of rebuilding + * them for repair. + * + * This function should only be called when in recovery mode. + */ + Status _dropIndexesForRepair(OperationContext* opCtx, + CollectionWriter& collection, + const std::vector<std::string>& indexNames); + protected: /** * Sets up the in-memory state of the index build. Validates index specs and filters out diff --git a/src/mongo/db/repl/collection_bulk_loader_impl.cpp b/src/mongo/db/repl/collection_bulk_loader_impl.cpp index 7cabcb659d0..d405146eb37 100644 --- a/src/mongo/db/repl/collection_bulk_loader_impl.cpp +++ b/src/mongo/db/repl/collection_bulk_loader_impl.cpp @@ -97,10 +97,13 @@ Status CollectionBulkLoaderImpl::init(const std::vector<BSONObj>& secondaryIndex _opCtx.get(), collWriter.get(), secondaryIndexSpecs); if (specs.size()) { _secondaryIndexesBlock->ignoreUniqueConstraint(); - auto status = - _secondaryIndexesBlock - ->init(_opCtx.get(), collWriter, specs, MultiIndexBlock::kNoopOnInitFn) - .getStatus(); + auto status = _secondaryIndexesBlock + ->init(_opCtx.get(), + collWriter, + specs, + MultiIndexBlock::kNoopOnInitFn, + /*forRecovery=*/false) + .getStatus(); if (!status.isOK()) { return status; } |