summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamy Lanka <samy.lanka@mongodb.com>2020-07-31 07:20:47 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-08-04 08:33:16 +0000
commit2fc36d0b12d1c44893a5f3c1fe0ac8fb0125a071 (patch)
tree735ccccb879cd841d19f953282f1e74a27a21bd1
parent4b967fd6c5e679b33922a4d287755ea987dc882c (diff)
downloadmongo-2fc36d0b12d1c44893a5f3c1fe0ac8fb0125a071.tar.gz
SERVER-48417 Reconstruct in-memory state when resuming index build
-rw-r--r--jstests/noPassthrough/indexbg_restart_secondary.js22
-rw-r--r--jstests/noPassthrough/libs/index_build.js16
-rw-r--r--jstests/noPassthrough/resumable_index_build_drain_writes_phase.js3
-rw-r--r--src/mongo/db/catalog/index_build_block.cpp37
-rw-r--r--src/mongo/db/catalog/index_build_block.h9
-rw-r--r--src/mongo/db/catalog/index_builds_manager.cpp5
-rw-r--r--src/mongo/db/catalog/index_builds_manager.h3
-rw-r--r--src/mongo/db/catalog/index_catalog.h6
-rw-r--r--src/mongo/db/catalog/index_catalog_impl.cpp12
-rw-r--r--src/mongo/db/catalog/index_catalog_impl.h6
-rw-r--r--src/mongo/db/catalog/index_catalog_noop.h6
-rw-r--r--src/mongo/db/catalog/multi_index_block.cpp43
-rw-r--r--src/mongo/db/catalog/multi_index_block.h14
-rw-r--r--src/mongo/db/index/duplicate_key_tracker.cpp14
-rw-r--r--src/mongo/db/index/duplicate_key_tracker.h9
-rw-r--r--src/mongo/db/index/index_build_interceptor.cpp55
-rw-r--r--src/mongo/db/index/index_build_interceptor.h16
-rw-r--r--src/mongo/db/index/skipped_record_tracker.cpp19
-rw-r--r--src/mongo/db/index/skipped_record_tracker.h6
-rw-r--r--src/mongo/db/index_builds_coordinator.cpp173
-rw-r--r--src/mongo/db/index_builds_coordinator.h54
-rw-r--r--src/mongo/db/index_builds_coordinator_mongod.cpp51
-rw-r--r--src/mongo/db/index_builds_coordinator_mongod.h23
-rw-r--r--src/mongo/db/resumable_index_builds.idl2
-rw-r--r--src/mongo/db/storage/durable_catalog.h8
-rw-r--r--src/mongo/db/storage/durable_catalog_impl.cpp23
-rw-r--r--src/mongo/db/storage/durable_catalog_impl.h5
-rw-r--r--src/mongo/db/storage/storage_engine.h14
-rw-r--r--src/mongo/db/storage/storage_engine_impl.cpp11
-rw-r--r--src/mongo/db/storage/storage_engine_impl.h3
-rw-r--r--src/mongo/db/storage/storage_engine_mock.h4
-rw-r--r--src/mongo/embedded/index_builds_coordinator_embedded.cpp12
-rw-r--r--src/mongo/embedded/index_builds_coordinator_embedded.h8
33 files changed, 613 insertions, 79 deletions
diff --git a/jstests/noPassthrough/indexbg_restart_secondary.js b/jstests/noPassthrough/indexbg_restart_secondary.js
index efd6d90d765..66c27d0e0c3 100644
--- a/jstests/noPassthrough/indexbg_restart_secondary.js
+++ b/jstests/noPassthrough/indexbg_restart_secondary.js
@@ -69,11 +69,15 @@ IndexBuildTest.waitForIndexBuildToStart(secondDB, coll.getName(), "y_1");
MongoRunner.stopMongod(second);
-replTest.start(
- second,
- {setParameter: {"failpoint.hangAfterSettingUpIndexBuildUnlocked": tojson({mode: "alwaysOn"})}},
- /*restart=*/true,
- /*wait=*/true);
+replTest.start(second,
+ {
+ setParameter: {
+ "failpoint.hangAfterSettingUpIndexBuildUnlocked": tojson({mode: "alwaysOn"}),
+ "failpoint.hangAfterSettingUpResumableIndexBuild": tojson({mode: "alwaysOn"})
+ }
+ },
+ /*restart=*/true,
+ /*wait=*/true);
// Make sure secondary comes back.
try {
@@ -90,7 +94,11 @@ try {
assert.eq(size, secondDB.getCollection(collectionName).find({}).itcount());
// Verify that only the _id index is ready.
- checkLog.containsJson(second, 4585201);
+ if (ResumableIndexBuildTest.resumableIndexBuildsEnabled(second)) {
+ checkLog.containsJson(second, 4841704);
+ } else {
+ checkLog.containsJson(second, 4585201);
+ }
IndexBuildTest.assertIndexes(secondDB.getCollection(collectionName),
4,
["_id_"],
@@ -99,6 +107,8 @@ try {
} finally {
assert.commandWorked(second.adminCommand(
{configureFailPoint: 'hangAfterSettingUpIndexBuildUnlocked', mode: 'off'}));
+ assert.commandWorked(second.adminCommand(
+ {configureFailPoint: 'hangAfterSettingUpResumableIndexBuild', mode: 'off'}));
// Let index build complete on primary, which replicates a commitIndexBuild to the secondary.
IndexBuildTest.resumeIndexBuilds(primaryDB);
diff --git a/jstests/noPassthrough/libs/index_build.js b/jstests/noPassthrough/libs/index_build.js
index 98f14d50253..3f6a19dfa37 100644
--- a/jstests/noPassthrough/libs/index_build.js
+++ b/jstests/noPassthrough/libs/index_build.js
@@ -328,8 +328,14 @@ const ResumableIndexBuildTest = class {
* insertIntoSideWritesTable will be inserted after the bulk load phase so that they are
* inserted into the side writes table and processed during the drain writes phase.
*/
- static run(
- rst, dbName, collName, indexSpec, failPointName, failPointData, insertIntoSideWritesTable) {
+ static run(rst,
+ dbName,
+ collName,
+ indexSpec,
+ failPointName,
+ failPointData,
+ insertIntoSideWritesTable,
+ postIndexBuildInserts = {}) {
const primary = rst.getPrimary();
const coll = primary.getDB(dbName).getCollection(collName);
const indexName = "resumable_index_build";
@@ -349,6 +355,12 @@ const ResumableIndexBuildTest = class {
awaitInsertIntoSideWritesTable();
awaitCreateIndex();
+ if (postIndexBuildInserts) {
+ assert.commandWorked(coll.insert(postIndexBuildInserts));
+ }
+
+ assert(coll.validate(), "Index validation failed");
+
assert.commandWorked(coll.dropIndex(indexName));
}
};
diff --git a/jstests/noPassthrough/resumable_index_build_drain_writes_phase.js b/jstests/noPassthrough/resumable_index_build_drain_writes_phase.js
index 6a97d680927..c6f78592a0a 100644
--- a/jstests/noPassthrough/resumable_index_build_drain_writes_phase.js
+++ b/jstests/noPassthrough/resumable_index_build_drain_writes_phase.js
@@ -41,7 +41,8 @@ ResumableIndexBuildTest.run(rst,
{a: 1},
"hangIndexBuildDuringDrainWritesPhase",
{iteration: 1},
- [{a: 4}, {a: 5}]);
+ [{a: 4}, {a: 5}],
+ [{a: 6}, {a: 7}]);
rst.stopSet();
})(); \ No newline at end of file
diff --git a/src/mongo/db/catalog/index_build_block.cpp b/src/mongo/db/catalog/index_build_block.cpp
index f17f541e57e..67d951a5aa1 100644
--- a/src/mongo/db/catalog/index_build_block.cpp
+++ b/src/mongo/db/catalog/index_build_block.cpp
@@ -71,6 +71,37 @@ void IndexBuildBlock::finalizeTemporaryTables(OperationContext* opCtx,
}
}
+void IndexBuildBlock::_completeInit(OperationContext* opCtx, Collection* collection) {
+ // Register this index with the CollectionQueryInfo to regenerate the cache. This way, updates
+ // occurring while an index is being build in the background will be aware of whether or not
+ // they need to modify any indexes.
+ CollectionQueryInfo::get(collection)
+ .addedIndex(opCtx, collection, _indexCatalogEntry->descriptor());
+}
+
+void IndexBuildBlock::initForResume(OperationContext* opCtx,
+ Collection* collection,
+ const IndexSorterInfo& resumeInfo) {
+
+ _indexName = _spec.getStringField("name");
+ auto descriptor =
+ _indexCatalog->findIndexByName(opCtx, _indexName, true /* includeUnfinishedIndexes */);
+
+ _indexCatalogEntry = descriptor->getEntry();
+ invariant(_indexCatalogEntry);
+ invariant(_method == IndexBuildMethod::kHybrid);
+
+ _indexBuildInterceptor =
+ std::make_unique<IndexBuildInterceptor>(opCtx,
+ _indexCatalogEntry,
+ resumeInfo.getSideWritesTable(),
+ resumeInfo.getDuplicateKeyTrackerTable(),
+ resumeInfo.getSkippedRecordTrackerTable());
+ _indexCatalogEntry->setIndexBuildInterceptor(_indexBuildInterceptor.get());
+
+ _completeInit(opCtx, collection);
+}
+
Status IndexBuildBlock::init(OperationContext* opCtx, Collection* collection) {
// Being in a WUOW means all timestamping responsibility can be pushed up to the caller.
invariant(opCtx->lockState()->inAWriteUnitOfWork());
@@ -118,11 +149,7 @@ Status IndexBuildBlock::init(OperationContext* opCtx, Collection* collection) {
});
}
- // Register this index with the CollectionQueryInfo to regenerate the cache. This way, updates
- // occurring while an index is being build in the background will be aware of whether or not
- // they need to modify any indexes.
- CollectionQueryInfo::get(collection)
- .addedIndex(opCtx, collection, _indexCatalogEntry->descriptor());
+ _completeInit(opCtx, collection);
return Status::OK();
}
diff --git a/src/mongo/db/catalog/index_build_block.h b/src/mongo/db/catalog/index_build_block.h
index 07d404c52c3..95f384cf170 100644
--- a/src/mongo/db/catalog/index_build_block.h
+++ b/src/mongo/db/catalog/index_build_block.h
@@ -70,6 +70,13 @@ public:
Status init(OperationContext* opCtx, Collection* collection);
/**
+ * Makes sure that an entry for the index was created at startup in the IndexCatalog.
+ */
+ void initForResume(OperationContext* opCtx,
+ Collection* collection,
+ const IndexSorterInfo& resumeInfo);
+
+ /**
* Marks the state of the index as 'ready' and commits the index to disk.
*
* Must be called from within a `WriteUnitOfWork`
@@ -107,6 +114,8 @@ public:
}
private:
+ void _completeInit(OperationContext* opCtx, Collection* collection);
+
IndexCatalog* const _indexCatalog;
const NamespaceString _nss;
diff --git a/src/mongo/db/catalog/index_builds_manager.cpp b/src/mongo/db/catalog/index_builds_manager.cpp
index af81468dbda..c94ba7caf38 100644
--- a/src/mongo/db/catalog/index_builds_manager.cpp
+++ b/src/mongo/db/catalog/index_builds_manager.cpp
@@ -84,7 +84,8 @@ Status IndexBuildsManager::setUpIndexBuild(OperationContext* opCtx,
const std::vector<BSONObj>& specs,
const UUID& buildUUID,
OnInitFn onInit,
- SetupOptions options) {
+ SetupOptions options,
+ const boost::optional<ResumeIndexInfo>& resumeInfo) {
_registerIndexBuild(buildUUID);
const auto& nss = collection->ns();
@@ -110,7 +111,7 @@ Status IndexBuildsManager::setUpIndexBuild(OperationContext* opCtx,
std::vector<BSONObj> indexes;
try {
indexes = writeConflictRetry(opCtx, "IndexBuildsManager::setUpIndexBuild", nss.ns(), [&]() {
- return uassertStatusOK(builder->init(opCtx, collection, specs, onInit));
+ return uassertStatusOK(builder->init(opCtx, collection, specs, onInit, resumeInfo));
});
} catch (const DBException& ex) {
return ex.toStatus();
diff --git a/src/mongo/db/catalog/index_builds_manager.h b/src/mongo/db/catalog/index_builds_manager.h
index e7fc7644d95..e4b407b0333 100644
--- a/src/mongo/db/catalog/index_builds_manager.h
+++ b/src/mongo/db/catalog/index_builds_manager.h
@@ -84,7 +84,8 @@ public:
const std::vector<BSONObj>& specs,
const UUID& buildUUID,
OnInitFn onInit,
- SetupOptions options = {});
+ SetupOptions options = {},
+ const boost::optional<ResumeIndexInfo>& resumeInfo = boost::none);
/**
* Unregisters the builder associated with the given buildUUID from the _builders map.
diff --git a/src/mongo/db/catalog/index_catalog.h b/src/mongo/db/catalog/index_catalog.h
index 00362fb1f1a..c33e8f9969b 100644
--- a/src/mongo/db/catalog/index_catalog.h
+++ b/src/mongo/db/catalog/index_catalog.h
@@ -327,8 +327,10 @@ public:
* IndexAlreadyExists if the index already exists; IndexBuildAlreadyInProgress if the index is
* already being built.
*/
- virtual StatusWith<BSONObj> prepareSpecForCreate(OperationContext* const opCtx,
- const BSONObj& original) const = 0;
+ virtual StatusWith<BSONObj> prepareSpecForCreate(
+ OperationContext* const opCtx,
+ const BSONObj& original,
+ const boost::optional<ResumeIndexInfo>& resumeInfo) const = 0;
/**
* Returns a copy of 'indexSpecsToBuild' that does not contain index specifications that already
diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp
index 6e715af9349..b4cb9408674 100644
--- a/src/mongo/db/catalog/index_catalog_impl.cpp
+++ b/src/mongo/db/catalog/index_catalog_impl.cpp
@@ -308,8 +308,10 @@ void IndexCatalogImpl::_logInternalState(OperationContext* opCtx,
namespace {
std::string lastHaystackIndexLogged = "";
}
-StatusWith<BSONObj> IndexCatalogImpl::prepareSpecForCreate(OperationContext* opCtx,
- const BSONObj& original) const {
+StatusWith<BSONObj> IndexCatalogImpl::prepareSpecForCreate(
+ OperationContext* opCtx,
+ const BSONObj& original,
+ const boost::optional<ResumeIndexInfo>& resumeInfo) const {
auto swValidatedAndFixed = _validateAndFixIndexSpec(opCtx, original);
if (!swValidatedAndFixed.isOK()) {
return swValidatedAndFixed.getStatus().withContext(
@@ -343,6 +345,12 @@ StatusWith<BSONObj> IndexCatalogImpl::prepareSpecForCreate(OperationContext* opC
return status;
}
+ if (resumeInfo) {
+ // Don't check against unfinished indexes if this index is being resumed, since it will
+ // conflict with itself.
+ return validatedSpec;
+ }
+
// Now we will check against all indexes, in-progress included.
//
// The index catalog cannot currently iterate over only in-progress indexes. So by previously
diff --git a/src/mongo/db/catalog/index_catalog_impl.h b/src/mongo/db/catalog/index_catalog_impl.h
index 2b5ff4f5f4a..85dc5ebe7d8 100644
--- a/src/mongo/db/catalog/index_catalog_impl.h
+++ b/src/mongo/db/catalog/index_catalog_impl.h
@@ -179,8 +179,10 @@ public:
StatusWith<BSONObj> createIndexOnEmptyCollection(OperationContext* opCtx,
BSONObj spec) override;
- StatusWith<BSONObj> prepareSpecForCreate(OperationContext* opCtx,
- const BSONObj& original) const override;
+ StatusWith<BSONObj> prepareSpecForCreate(
+ OperationContext* opCtx,
+ const BSONObj& original,
+ const boost::optional<ResumeIndexInfo>& resumeInfo = boost::none) const override;
std::vector<BSONObj> removeExistingIndexes(OperationContext* const opCtx,
const std::vector<BSONObj>& indexSpecsToBuild,
diff --git a/src/mongo/db/catalog/index_catalog_noop.h b/src/mongo/db/catalog/index_catalog_noop.h
index 12b97c208f8..92b74cfc84e 100644
--- a/src/mongo/db/catalog/index_catalog_noop.h
+++ b/src/mongo/db/catalog/index_catalog_noop.h
@@ -140,8 +140,10 @@ public:
return spec;
}
- StatusWith<BSONObj> prepareSpecForCreate(OperationContext* const opCtx,
- const BSONObj& original) const override {
+ StatusWith<BSONObj> prepareSpecForCreate(
+ OperationContext* const opCtx,
+ const BSONObj& original,
+ const boost::optional<ResumeIndexInfo>& resumeInfo) const override {
return original;
}
diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp
index 4fb95caa7af..a13c7acab89 100644
--- a/src/mongo/db/catalog/multi_index_block.cpp
+++ b/src/mongo/db/catalog/multi_index_block.cpp
@@ -152,13 +152,15 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(OperationContext* opCtx,
const BSONObj& spec,
OnInitFn onInit) {
const auto indexes = std::vector<BSONObj>(1, spec);
- return init(opCtx, collection, indexes, onInit);
+ return init(opCtx, collection, indexes, onInit, boost::none);
}
-StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(OperationContext* opCtx,
- Collection* collection,
- const std::vector<BSONObj>& indexSpecs,
- OnInitFn onInit) {
+StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(
+ OperationContext* opCtx,
+ Collection* collection,
+ const std::vector<BSONObj>& indexSpecs,
+ OnInitFn onInit,
+ const boost::optional<ResumeIndexInfo>& resumeInfo) {
invariant(opCtx->lockState()->isCollectionLockedForMode(collection->ns(), MODE_X),
str::stream() << "Collection " << collection->ns() << " with UUID "
<< collection->uuid() << " is holding the incorrect lock");
@@ -170,6 +172,10 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(OperationContext* opCtx,
invariant(_indexes.empty());
+ if (resumeInfo) {
+ _phase = resumeInfo->getPhase();
+ }
+
// Guarantees that exceptions cannot be returned from index builder initialization except for
// WriteConflictExceptions, which should be dealt with by the caller.
try {
@@ -209,7 +215,7 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(OperationContext* opCtx,
for (size_t i = 0; i < indexSpecs.size(); i++) {
BSONObj info = indexSpecs[i];
StatusWith<BSONObj> statusWithInfo =
- collection->getIndexCatalog()->prepareSpecForCreate(opCtx, info);
+ collection->getIndexCatalog()->prepareSpecForCreate(opCtx, info, resumeInfo);
Status status = statusWithInfo.getStatus();
if (!status.isOK()) {
// If we were given two identical indexes to build, we will run into an error trying
@@ -232,9 +238,22 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(OperationContext* opCtx,
IndexToBuild index;
index.block = std::make_unique<IndexBuildBlock>(
collection->getIndexCatalog(), collection->ns(), info, _method, _buildUUID);
- status = index.block->init(opCtx, collection);
- if (!status.isOK())
- return status;
+ if (resumeInfo) {
+ auto resumeInfoIndexes = resumeInfo->getIndexes();
+ // Find the resume information that corresponds to this spec.
+ auto indexResumeInfo =
+ std::find_if(resumeInfoIndexes.begin(),
+ resumeInfoIndexes.end(),
+ [&info](const IndexSorterInfo& indexInfo) {
+ return info.woCompare(indexInfo.getSpec()) == 0;
+ });
+
+ index.block->initForResume(opCtx, collection, *indexResumeInfo);
+ } else {
+ status = index.block->init(opCtx, collection);
+ if (!status.isOK())
+ return status;
+ }
auto indexCleanupGuard = makeGuard([opCtx, &index] {
index.block->finalizeTemporaryTables(
@@ -271,8 +290,10 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(OperationContext* opCtx,
index.filterExpression = index.block->getEntry()->getFilterExpression();
- // TODO SERVER-14888 Suppress this in cases we don't want to audit.
- audit::logCreateIndex(opCtx->getClient(), &info, descriptor->indexName(), ns);
+ if (!resumeInfo) {
+ // TODO SERVER-14888 Suppress this in cases we don't want to audit.
+ audit::logCreateIndex(opCtx->getClient(), &info, descriptor->indexName(), ns);
+ }
indexCleanupGuard.dismiss();
_indexes.push_back(std::move(index));
diff --git a/src/mongo/db/catalog/multi_index_block.h b/src/mongo/db/catalog/multi_index_block.h
index 56fb44bffa3..11e67eecefe 100644
--- a/src/mongo/db/catalog/multi_index_block.h
+++ b/src/mongo/db/catalog/multi_index_block.h
@@ -109,14 +109,20 @@ public:
* Requires holding an exclusive lock on the collection.
*/
using OnInitFn = std::function<Status(std::vector<BSONObj>& specs)>;
- StatusWith<std::vector<BSONObj>> init(OperationContext* opCtx,
- Collection* collection,
- const std::vector<BSONObj>& specs,
- OnInitFn onInit);
+ StatusWith<std::vector<BSONObj>> init(
+ OperationContext* opCtx,
+ Collection* collection,
+ const std::vector<BSONObj>& specs,
+ OnInitFn onInit,
+ const boost::optional<ResumeIndexInfo>& resumeInfo = boost::none);
StatusWith<std::vector<BSONObj>> init(OperationContext* opCtx,
Collection* collection,
const BSONObj& spec,
OnInitFn onInit);
+ StatusWith<std::vector<BSONObj>> initForResume(OperationContext* opCtx,
+ Collection* collection,
+ const std::vector<BSONObj>& specs,
+ const ResumeIndexInfo& resumeInfo);
/**
* Not all index initializations need an OnInitFn, in particular index builds that do not need
diff --git a/src/mongo/db/index/duplicate_key_tracker.cpp b/src/mongo/db/index/duplicate_key_tracker.cpp
index 0f997415068..4d3d88bd075 100644
--- a/src/mongo/db/index/duplicate_key_tracker.cpp
+++ b/src/mongo/db/index/duplicate_key_tracker.cpp
@@ -55,6 +55,20 @@ DuplicateKeyTracker::DuplicateKeyTracker(OperationContext* opCtx, const IndexCat
invariant(_indexCatalogEntry->descriptor()->unique());
}
+DuplicateKeyTracker::DuplicateKeyTracker(OperationContext* opCtx,
+ const IndexCatalogEntry* entry,
+ StringData ident)
+ : _indexCatalogEntry(entry) {
+ _keyConstraintsTable =
+ opCtx->getServiceContext()->getStorageEngine()->makeTemporaryRecordStoreFromExistingIdent(
+ opCtx, ident);
+
+ invariant(_indexCatalogEntry->descriptor()->unique(),
+ str::stream() << "Duplicate key tracker table exists on disk with ident: " << ident
+ << " but the index is not unique: "
+ << _indexCatalogEntry->descriptor());
+}
+
void DuplicateKeyTracker::finalizeTemporaryTable(OperationContext* opCtx,
TemporaryRecordStore::FinalizationAction action) {
_keyConstraintsTable->finalizeTemporaryTable(opCtx, action);
diff --git a/src/mongo/db/index/duplicate_key_tracker.h b/src/mongo/db/index/duplicate_key_tracker.h
index a30c5bc2487..1a384cc3d5c 100644
--- a/src/mongo/db/index/duplicate_key_tracker.h
+++ b/src/mongo/db/index/duplicate_key_tracker.h
@@ -58,6 +58,15 @@ public:
DuplicateKeyTracker(OperationContext* opCtx, const IndexCatalogEntry* indexCatalogEntry);
/**
+ * Finds the temporary table associated with storing any duplicate key constraint violations for
+ * this index build. Only used when resuming an index build and the temporary table already
+ * exists on disk. finalizeTemporaryTable() must be called before destruction.
+ */
+ DuplicateKeyTracker(OperationContext* opCtx,
+ const IndexCatalogEntry* indexCatalogEntry,
+ StringData ident);
+
+ /**
* Deletes or keeps the temporary table for the duplicate key constraint violations. Must be
* called before object destruction.
*/
diff --git a/src/mongo/db/index/index_build_interceptor.cpp b/src/mongo/db/index/index_build_interceptor.cpp
index 1ab952d3640..f569fc00e46 100644
--- a/src/mongo/db/index/index_build_interceptor.cpp
+++ b/src/mongo/db/index/index_build_interceptor.cpp
@@ -83,25 +83,62 @@ bool IndexBuildInterceptor::typeCanFastpathMultikeyUpdates(IndexType indexType)
return (indexType == INDEX_BTREE);
}
+void IndexBuildInterceptor::_initializeMultiKeyPaths(IndexCatalogEntry* entry) {
+ // `mergeMultikeyPaths` is sensitive to the two inputs having the same multikey
+ // "shape". Initialize `_multikeyPaths` with the right shape from the IndexCatalogEntry.
+ auto indexType = entry->descriptor()->getIndexType();
+ if (typeCanFastpathMultikeyUpdates(indexType)) {
+ auto numFields = entry->descriptor()->getNumFields();
+ _multikeyPaths = MultikeyPaths{};
+ auto it = _multikeyPaths->begin();
+ _multikeyPaths->insert(it, numFields, {});
+ }
+}
+
IndexBuildInterceptor::IndexBuildInterceptor(OperationContext* opCtx, IndexCatalogEntry* entry)
: _indexCatalogEntry(entry),
_sideWritesTable(
opCtx->getServiceContext()->getStorageEngine()->makeTemporaryRecordStore(opCtx)),
- _skippedRecordTracker(entry),
+ _skippedRecordTracker(opCtx, entry, boost::none),
_sideWritesCounter(std::make_shared<AtomicWord<long long>>()) {
if (entry->descriptor()->unique()) {
_duplicateKeyTracker = std::make_unique<DuplicateKeyTracker>(opCtx, entry);
}
- // `mergeMultikeyPaths` is sensitive to the two inputs having the same multikey
- // "shape". Initialize `_multikeyPaths` with the right shape from the IndexCatalogEntry.
- auto indexType = entry->descriptor()->getIndexType();
- if (typeCanFastpathMultikeyUpdates(indexType)) {
- auto numFields = entry->descriptor()->getNumFields();
- _multikeyPaths = MultikeyPaths{};
- auto it = _multikeyPaths->begin();
- _multikeyPaths->insert(it, numFields, {});
+
+ _initializeMultiKeyPaths(entry);
+}
+
+IndexBuildInterceptor::IndexBuildInterceptor(OperationContext* opCtx,
+ IndexCatalogEntry* entry,
+ StringData sideWritesIdent,
+ boost::optional<StringData> duplicateKeyTrackerIdent,
+ boost::optional<StringData> skippedRecordTrackerIdent)
+ : _indexCatalogEntry(entry),
+ _skippedRecordTracker(opCtx, entry, skippedRecordTrackerIdent),
+ _sideWritesCounter(std::make_shared<AtomicWord<long long>>()) {
+
+ _sideWritesTable =
+ opCtx->getServiceContext()->getStorageEngine()->makeTemporaryRecordStoreFromExistingIdent(
+ opCtx, sideWritesIdent);
+ auto finalizeTableOnFailure = makeGuard([&] {
+ _sideWritesTable->finalizeTemporaryTable(opCtx,
+ TemporaryRecordStore::FinalizationAction::kDelete);
+ });
+
+ auto dupKeyTrackerIdentExists = duplicateKeyTrackerIdent ? true : false;
+ uassert(ErrorCodes::BadValue,
+ str::stream() << "Resume info must contain the duplicate key tracker ident ["
+ << duplicateKeyTrackerIdent
+ << "] if and only if the index is unique: " << entry->descriptor(),
+ entry->descriptor()->unique() == dupKeyTrackerIdentExists);
+ if (duplicateKeyTrackerIdent) {
+ _duplicateKeyTracker =
+ std::make_unique<DuplicateKeyTracker>(opCtx, entry, duplicateKeyTrackerIdent.get());
}
+
+ _initializeMultiKeyPaths(entry);
+ finalizeTableOnFailure.dismiss();
}
void IndexBuildInterceptor::finalizeTemporaryTables(
diff --git a/src/mongo/db/index/index_build_interceptor.h b/src/mongo/db/index/index_build_interceptor.h
index f82669c3392..a9215367feb 100644
--- a/src/mongo/db/index/index_build_interceptor.h
+++ b/src/mongo/db/index/index_build_interceptor.h
@@ -74,6 +74,20 @@ public:
IndexBuildInterceptor(OperationContext* opCtx, IndexCatalogEntry* entry);
/**
+ * Finds the temporary table associated with storing writes during this index build. Only used
+ * Only used when resuming an index build and the temporary table already exists on disk.
+ * Additionally will find the tmeporary table associated with storing duplicate key constraint
+ * violations found during the build, if the index being built has uniqueness constraints.
+ *
+ * finalizeTemporaryTable() must be called before destruction.
+ */
+ IndexBuildInterceptor(OperationContext* opCtx,
+ IndexCatalogEntry* entry,
+ StringData sideWritesIdent,
+ boost::optional<StringData> duplicateKeyTrackerIdent,
+ boost::optional<StringData> skippedRecordTrackerIdent);
+
+ /**
* Deletes or keeps the temporary side writes and duplicate key constraint violations tables.
* Must be called before object destruction.
*/
@@ -162,6 +176,8 @@ public:
private:
using SideWriteRecord = std::pair<RecordId, BSONObj>;
+
+ void _initializeMultiKeyPaths(IndexCatalogEntry* entry);
Status _applyWrite(OperationContext* opCtx,
const Collection* coll,
const BSONObj& doc,
diff --git a/src/mongo/db/index/skipped_record_tracker.cpp b/src/mongo/db/index/skipped_record_tracker.cpp
index 9fd39f158aa..0c32261e1d2 100644
--- a/src/mongo/db/index/skipped_record_tracker.cpp
+++ b/src/mongo/db/index/skipped_record_tracker.cpp
@@ -41,6 +41,25 @@ namespace {
static constexpr StringData kRecordIdField = "recordId"_sd;
}
+SkippedRecordTracker::SkippedRecordTracker(IndexCatalogEntry* indexCatalogEntry) {
+ SkippedRecordTracker(nullptr, indexCatalogEntry, boost::none);
+}
+
+SkippedRecordTracker::SkippedRecordTracker(OperationContext* opCtx,
+ IndexCatalogEntry* indexCatalogEntry,
+ boost::optional<StringData> ident)
+ : _indexCatalogEntry(indexCatalogEntry) {
+ if (!ident) {
+ return;
+ }
+
+ // Only initialize the table when resuming an index build if an ident already exists. Otherwise,
+ // lazily initialize table when we record the first document.
+ _skippedRecordsTable =
+ opCtx->getServiceContext()->getStorageEngine()->makeTemporaryRecordStoreFromExistingIdent(
+ opCtx, ident.get());
+}
+
void SkippedRecordTracker::finalizeTemporaryTable(OperationContext* opCtx,
TemporaryRecordStore::FinalizationAction action) {
if (_skippedRecordsTable) {
diff --git a/src/mongo/db/index/skipped_record_tracker.h b/src/mongo/db/index/skipped_record_tracker.h
index 16ea1a6106e..856ebed9197 100644
--- a/src/mongo/db/index/skipped_record_tracker.h
+++ b/src/mongo/db/index/skipped_record_tracker.h
@@ -46,8 +46,10 @@ class SkippedRecordTracker {
SkippedRecordTracker(const SkippedRecordTracker&) = delete;
public:
- SkippedRecordTracker(IndexCatalogEntry* indexCatalogEntry)
- : _indexCatalogEntry(indexCatalogEntry) {}
+ explicit SkippedRecordTracker(IndexCatalogEntry* indexCatalogEntry);
+ SkippedRecordTracker(OperationContext* opCtx,
+ IndexCatalogEntry* indexCatalogEntry,
+ boost::optional<StringData> ident);
/**
* Records a RecordId that was unable to be indexed due to a key generation error. At the
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index 8699084e704..b0e1daf3208 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -75,6 +75,7 @@ MONGO_FAIL_POINT_DEFINE(hangBeforeCompletingAbort);
MONGO_FAIL_POINT_DEFINE(failIndexBuildOnCommit);
MONGO_FAIL_POINT_DEFINE(hangIndexBuildBeforeAbortCleanUp);
MONGO_FAIL_POINT_DEFINE(hangIndexBuildOnStepUp);
+MONGO_FAIL_POINT_DEFINE(hangAfterSettingUpResumableIndexBuild);
namespace {
@@ -542,6 +543,89 @@ Status IndexBuildsCoordinator::_startIndexBuildForRecovery(OperationContext* opC
return Status::OK();
}
+Status IndexBuildsCoordinator::_setUpResumeIndexBuild(OperationContext* opCtx,
+ std::string dbName,
+ CollectionUUID collectionUUID,
+ const std::vector<BSONObj>& specs,
+ const UUID& buildUUID,
+ const ResumeIndexInfo& resumeInfo) {
+ NamespaceStringOrUUID nssOrUuid{dbName, collectionUUID};
+
+ Lock::DBLock dbLock(opCtx, dbName, MODE_IX);
+ Lock::CollectionLock collLock(opCtx, nssOrUuid, MODE_X);
+
+ auto& collectionCatalog = CollectionCatalog::get(opCtx->getServiceContext());
+ auto collection =
+ collectionCatalog.lookupCollectionByUUID(opCtx, resumeInfo.getCollectionUUID());
+ invariant(collection);
+ auto durableCatalog = DurableCatalog::get(opCtx);
+
+ for (auto spec : specs) {
+ std::string indexName = spec.getStringField(IndexDescriptor::kIndexNameFieldName);
+ if (indexName.empty()) {
+ return Status(ErrorCodes::CannotCreateIndex,
+ str::stream()
+ << "Cannot create an index for a spec '" << spec
+ << "' without a non-empty string value for the 'name' field");
+ }
+
+ // Check that the information in the durable catalog matches the resume info.
+ uassert(4841702,
+ "Index not found in durable catalog while attempting to resume index build",
+ durableCatalog->isIndexPresent(opCtx, collection->getCatalogId(), indexName));
+
+ const auto durableBuildUUID =
+ durableCatalog->getIndexBuildUUID(opCtx, collection->getCatalogId(), indexName);
+ uassert(ErrorCodes::IndexNotFound,
+ str::stream() << "Cannot resume index build with a buildUUID: " << buildUUID
+ << " that did not match the buildUUID in the durable catalog: "
+ << durableBuildUUID,
+ durableBuildUUID == buildUUID);
+
+ auto indexIdent =
+ durableCatalog->getIndexIdent(opCtx, collection->getCatalogId(), indexName);
+ uassert(
+ 4841703,
+ str::stream() << "No index ident found on disk that matches the index build to resume: "
+ << indexName,
+ indexIdent.size() > 0);
+
+ uassertStatusOK(durableCatalog->checkMetaDataForIndex(
+ opCtx, collection->getCatalogId(), indexName, spec));
+ }
+
+ if (!collection->isInitialized()) {
+ collection->init(opCtx);
+ }
+
+ auto protocol = IndexBuildProtocol::kTwoPhase;
+ auto replIndexBuildState = std::make_shared<ReplIndexBuildState>(
+ buildUUID, collection->uuid(), dbName, specs, protocol);
+
+ Status status = [&]() {
+ stdx::unique_lock<Latch> lk(_mutex);
+ return _registerIndexBuild(lk, replIndexBuildState);
+ }();
+ if (!status.isOK()) {
+ return status;
+ }
+
+ IndexBuildsManager::SetupOptions options;
+ options.protocol = protocol;
+ status = _indexBuildsManager.setUpIndexBuild(
+ opCtx, collection, specs, buildUUID, MultiIndexBlock::kNoopOnInitFn, options, resumeInfo);
+ if (!status.isOK()) {
+ LOGV2(4841705,
+ "Failed to resume index build",
+ "buildUUID"_attr = buildUUID,
+ logAttrs(collection->ns()),
+ "collectionUUID"_attr = collectionUUID,
+ "error"_attr = status);
+ }
+
+ return status;
+}
+
std::string IndexBuildsCoordinator::_indexBuildActionToString(IndexBuildAction action) {
if (action == IndexBuildAction::kNoAction) {
return "No action";
@@ -1325,14 +1409,58 @@ void IndexBuildsCoordinator::restartIndexBuildsForRecovery(
OperationContext* opCtx,
const IndexBuilds& buildsToRestart,
const std::vector<ResumeIndexInfo>& buildsToResume) {
+
+ stdx::unordered_set<UUID, UUID::Hash> successfullyResumed;
+
+ for (const auto& resumeInfo : buildsToResume) {
+ auto buildUUID = resumeInfo.getBuildUUID();
+ auto collUUID = resumeInfo.getCollectionUUID();
+
+ boost::optional<NamespaceString> nss =
+ CollectionCatalog::get(opCtx).lookupNSSByUUID(opCtx, resumeInfo.getCollectionUUID());
+ invariant(nss);
+
+ std::vector<BSONObj> indexSpecs;
+ indexSpecs.reserve(resumeInfo.getIndexes().size());
+
+ for (const auto& index : resumeInfo.getIndexes()) {
+ indexSpecs.push_back(index.getSpec());
+ }
+
+ LOGV2(4841700,
+ "Index build: resuming",
+ logAttrs(nss.get()),
+ "collectionUUID"_attr = collUUID,
+ "buildUUID"_attr = buildUUID,
+ "specs"_attr = indexSpecs);
+
+ try {
+ // This spawns a new thread and returns immediately. These index builds will resume and
+ // wait for a commit or abort to be replicated.
+ MONGO_COMPILER_VARIABLE_UNUSED auto fut = uassertStatusOK(resumeIndexBuild(
+ opCtx, nss->db().toString(), collUUID, indexSpecs, buildUUID, resumeInfo));
+ successfullyResumed.insert(buildUUID);
+ } catch (const DBException& e) {
+ LOGV2(4841701,
+ "Failed to resume index build, restarting instead",
+ "buildUUID"_attr = buildUUID,
+ "error"_attr = e);
+ }
+ }
+
for (auto& [buildUUID, build] : buildsToRestart) {
+ // Don't restart an index build that was already resumed.
+ if (successfullyResumed.contains(buildUUID)) {
+ continue;
+ }
+
boost::optional<NamespaceString> nss =
CollectionCatalog::get(opCtx).lookupNSSByUUID(opCtx, build.collUUID);
invariant(nss);
LOGV2(20660,
- "Restarting index build",
- "collection"_attr = nss,
+ "Index build: restarting",
+ logAttrs(nss.get()),
"collectionUUID"_attr = build.collUUID,
"buildUUID"_attr = buildUUID);
IndexBuildsCoordinator::IndexBuildOptions indexBuildOptions;
@@ -1944,9 +2072,11 @@ Status IndexBuildsCoordinator::_setUpIndexBuild(OperationContext* opCtx,
return Status::OK();
}
-void IndexBuildsCoordinator::_runIndexBuild(OperationContext* opCtx,
- const UUID& buildUUID,
- const IndexBuildOptions& indexBuildOptions) noexcept {
+void IndexBuildsCoordinator::_runIndexBuild(
+ OperationContext* opCtx,
+ const UUID& buildUUID,
+ const IndexBuildOptions& indexBuildOptions,
+ const boost::optional<ResumeIndexInfo>& resumeInfo) noexcept {
{
stdx::unique_lock<Latch> lk(_mutex);
while (_sleepForTest) {
@@ -1987,7 +2117,7 @@ void IndexBuildsCoordinator::_runIndexBuild(OperationContext* opCtx,
auto status = [&]() {
try {
- _runIndexBuildInner(opCtx, replState, indexBuildOptions);
+ _runIndexBuildInner(opCtx, replState, indexBuildOptions, resumeInfo);
} catch (const DBException& ex) {
return ex.toStatus();
}
@@ -2089,9 +2219,11 @@ void IndexBuildsCoordinator::_cleanUpTwoPhaseAfterFailure(
});
}
-void IndexBuildsCoordinator::_runIndexBuildInner(OperationContext* opCtx,
- std::shared_ptr<ReplIndexBuildState> replState,
- const IndexBuildOptions& indexBuildOptions) {
+void IndexBuildsCoordinator::_runIndexBuildInner(
+ OperationContext* opCtx,
+ std::shared_ptr<ReplIndexBuildState> replState,
+ const IndexBuildOptions& indexBuildOptions,
+ const boost::optional<ResumeIndexInfo>& resumeInfo) {
// This Status stays unchanged unless we catch an exception in the following try-catch block.
auto status = Status::OK();
try {
@@ -2099,7 +2231,12 @@ void IndexBuildsCoordinator::_runIndexBuildInner(OperationContext* opCtx,
hangAfterInitializingIndexBuild.pauseWhileSet(opCtx);
}
- _buildIndex(opCtx, replState, indexBuildOptions);
+ if (resumeInfo) {
+ _resumeIndexBuildFromPhase(opCtx, replState, indexBuildOptions, resumeInfo.get());
+ } else {
+ _buildIndex(opCtx, replState, indexBuildOptions);
+ }
+
} catch (const DBException& ex) {
status = ex.toStatus();
}
@@ -2157,6 +2294,22 @@ void IndexBuildsCoordinator::_runIndexBuildInner(OperationContext* opCtx,
uassertStatusOK(status);
}
+void IndexBuildsCoordinator::_resumeIndexBuildFromPhase(
+ OperationContext* opCtx,
+ std::shared_ptr<ReplIndexBuildState> replState,
+ const IndexBuildOptions& indexBuildOptions,
+ const ResumeIndexInfo& resumeInfo) {
+ if (MONGO_unlikely(hangAfterSettingUpResumableIndexBuild.shouldFail())) {
+ LOGV2(4841704,
+ "Hanging index build due to failpoint 'hangAfterSettingUpResumableIndexBuild'");
+ hangAfterSettingUpResumableIndexBuild.pauseWhileSet();
+ }
+ _insertKeysFromSideTablesWithoutBlockingWrites(opCtx, replState);
+ _signalPrimaryForCommitReadiness(opCtx, replState);
+ _insertKeysFromSideTablesBlockingWrites(opCtx, replState, indexBuildOptions);
+ _waitForNextIndexBuildActionAndCommit(opCtx, replState, indexBuildOptions);
+}
+
void IndexBuildsCoordinator::_awaitLastOpTimeBeforeInterceptorsMajorityCommitted(
OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) {
auto replCoord = repl::ReplicationCoordinator::get(opCtx);
diff --git a/src/mongo/db/index_builds_coordinator.h b/src/mongo/db/index_builds_coordinator.h
index c78134998db..e5568b48ba2 100644
--- a/src/mongo/db/index_builds_coordinator.h
+++ b/src/mongo/db/index_builds_coordinator.h
@@ -132,7 +132,7 @@ public:
*
* A Future is returned that will complete when the index build commits or aborts.
*
- * On a successful index build, calling Future::get(), or Future::getNoThrows(), returns index
+ * On a successful index build, calling Future::get(), or Future::getNoThrow(), returns index
* catalog statistics.
*
* Returns an error status if there are any errors setting up the index build.
@@ -147,9 +147,28 @@ public:
IndexBuildOptions indexBuildOptions) = 0;
/**
- * Resumes and restarts index builds for recovery. Anything that fails to resume will be
- * started in a background thread. Each index build will wait for a replicated commit or abort,
- * as in steady-state.
+ * Reconstructs the in-memory state of the index build. When successful, returns after the index
+ * build has been resumed from the phase it left off in.
+ *
+ * A Future is returned that will complete when the index build commits or aborts.
+ *
+ * On a successful index build, calling Future::get(), or Future::getNoThrow(), returns index
+ * catalog statistics.
+ *
+ * Returns an error status if there are any errors setting up the index build.
+ */
+ virtual StatusWith<SharedSemiFuture<ReplIndexBuildState::IndexCatalogStats>> resumeIndexBuild(
+ OperationContext* opCtx,
+ std::string dbName,
+ CollectionUUID collectionUUID,
+ const std::vector<BSONObj>& specs,
+ const UUID& buildUUID,
+ const ResumeIndexInfo& resumeInfo) = 0;
+
+ /**
+ * Resumes and restarts index builds for recovery. Anything that fails to resume will be started
+ * in a background thread. Each index build will wait for a replicated commit or abort, as in
+ * steady-state.
*/
void restartIndexBuildsForRecovery(OperationContext* opCtx,
const IndexBuilds& buildsToRestart,
@@ -535,13 +554,25 @@ protected:
const std::vector<BSONObj>& specs,
const UUID& buildUUID);
/**
+ * Reconstructs the in-memory state of the index build so that it can be resumed from the phase
+ * it was in when the node cleanly shut down.
+ */
+ Status _setUpResumeIndexBuild(OperationContext* opCtx,
+ std::string dbName,
+ CollectionUUID collectionUUID,
+ const std::vector<BSONObj>& specs,
+ const UUID& buildUUID,
+ const ResumeIndexInfo& resumeInfo);
+
+ /**
* Runs the index build on the caller thread. Handles unregistering the index build and setting
* the index build's Promise with the outcome of the index build.
* 'IndexBuildOptios::replSetAndNotPrimary' is determined at the start of the index build.
*/
void _runIndexBuild(OperationContext* opCtx,
const UUID& buildUUID,
- const IndexBuildOptions& indexBuildOptions) noexcept;
+ const IndexBuildOptions& indexBuildOptions,
+ const boost::optional<ResumeIndexInfo>& resumeInfo) noexcept;
/**
* Acquires locks and runs index build. Throws on error.
@@ -549,7 +580,18 @@ protected:
*/
void _runIndexBuildInner(OperationContext* opCtx,
std::shared_ptr<ReplIndexBuildState> replState,
- const IndexBuildOptions& indexBuildOptions);
+ const IndexBuildOptions& indexBuildOptions,
+ const boost::optional<ResumeIndexInfo>& resumeInfo);
+
+ /**
+ * Resumes the index build from the phase that it was in when the node cleanly shut down. By the
+ * time this function is called, the in-memory state of the index build should already have been
+ * reconstructed.
+ */
+ void _resumeIndexBuildFromPhase(OperationContext* opCtx,
+ std::shared_ptr<ReplIndexBuildState> replState,
+ const IndexBuildOptions& indexBuildOptions,
+ const ResumeIndexInfo& resumeInfo);
/**
* Cleans up a single-phase index build after a failure.
diff --git a/src/mongo/db/index_builds_coordinator_mongod.cpp b/src/mongo/db/index_builds_coordinator_mongod.cpp
index 5a3f9061e90..c66627251b5 100644
--- a/src/mongo/db/index_builds_coordinator_mongod.cpp
+++ b/src/mongo/db/index_builds_coordinator_mongod.cpp
@@ -122,6 +122,38 @@ IndexBuildsCoordinatorMongod::startIndexBuild(OperationContext* opCtx,
const UUID& buildUUID,
IndexBuildProtocol protocol,
IndexBuildOptions indexBuildOptions) {
+ return _startIndexBuild(
+ opCtx, dbName, collectionUUID, specs, buildUUID, protocol, indexBuildOptions, boost::none);
+}
+
+StatusWith<SharedSemiFuture<ReplIndexBuildState::IndexCatalogStats>>
+IndexBuildsCoordinatorMongod::resumeIndexBuild(OperationContext* opCtx,
+ std::string dbName,
+ CollectionUUID collectionUUID,
+ const std::vector<BSONObj>& specs,
+ const UUID& buildUUID,
+ const ResumeIndexInfo& resumeInfo) {
+ IndexBuildsCoordinator::IndexBuildOptions indexBuildOptions;
+ indexBuildOptions.applicationMode = ApplicationMode::kStartupRepair;
+ return _startIndexBuild(opCtx,
+ dbName,
+ collectionUUID,
+ specs,
+ buildUUID,
+ IndexBuildProtocol::kTwoPhase,
+ indexBuildOptions,
+ resumeInfo);
+}
+
+StatusWith<SharedSemiFuture<ReplIndexBuildState::IndexCatalogStats>>
+IndexBuildsCoordinatorMongod::_startIndexBuild(OperationContext* opCtx,
+ std::string dbName,
+ CollectionUUID collectionUUID,
+ const std::vector<BSONObj>& specs,
+ const UUID& buildUUID,
+ IndexBuildProtocol protocol,
+ IndexBuildOptions indexBuildOptions,
+ const boost::optional<ResumeIndexInfo>& resumeInfo) {
const NamespaceStringOrUUID nssOrUuid{dbName, collectionUUID};
{
@@ -178,11 +210,17 @@ IndexBuildsCoordinatorMongod::startIndexBuild(OperationContext* opCtx,
});
if (indexBuildOptions.applicationMode == ApplicationMode::kStartupRepair) {
- // Two phase index build recovery goes though a different set-up procedure because the
- // original index will be dropped first.
+ // Two phase index build recovery goes though a different set-up procedure because we will
+ // either resume the index build or the original index will be dropped first.
invariant(protocol == IndexBuildProtocol::kTwoPhase);
- auto status =
- _setUpIndexBuildForTwoPhaseRecovery(opCtx, dbName, collectionUUID, specs, buildUUID);
+ auto status = Status::OK();
+ if (resumeInfo) {
+ status = _setUpResumeIndexBuild(
+ opCtx, dbName, collectionUUID, specs, buildUUID, resumeInfo.get());
+ } else {
+ status = _setUpIndexBuildForTwoPhaseRecovery(
+ opCtx, dbName, collectionUUID, specs, buildUUID);
+ }
if (!status.isOK()) {
return status;
}
@@ -247,7 +285,8 @@ IndexBuildsCoordinatorMongod::startIndexBuild(OperationContext* opCtx,
startPromise = std::move(startPromise),
startTimestamp,
shardVersion,
- dbVersion
+ dbVersion,
+ resumeInfo
](auto status) mutable noexcept {
auto onScopeExitGuard = makeGuard([&] {
stdx::unique_lock<Latch> lk(_mutex);
@@ -297,7 +336,7 @@ IndexBuildsCoordinatorMongod::startIndexBuild(OperationContext* opCtx,
// Runs the remainder of the index build. Sets the promise result and cleans up the index
// build.
- _runIndexBuild(opCtx.get(), buildUUID, indexBuildOptions);
+ _runIndexBuild(opCtx.get(), buildUUID, indexBuildOptions, resumeInfo);
// Do not exit with an incomplete future.
invariant(replState->sharedPromise.getFuture().isReady());
diff --git a/src/mongo/db/index_builds_coordinator_mongod.h b/src/mongo/db/index_builds_coordinator_mongod.h
index c9bf15b6b3a..a16cbb842e1 100644
--- a/src/mongo/db/index_builds_coordinator_mongod.h
+++ b/src/mongo/db/index_builds_coordinator_mongod.h
@@ -78,6 +78,19 @@ public:
IndexBuildProtocol protocol,
IndexBuildOptions indexBuildOptions) override;
+ /**
+ * Reconstructs the in-memory state of the index build, then passes the build off to an
+ * asynchronous thread to run. A Future is returned so that the user can await the asynchronous
+ * build result.
+ */
+ StatusWith<SharedSemiFuture<ReplIndexBuildState::IndexCatalogStats>> resumeIndexBuild(
+ OperationContext* opCtx,
+ std::string dbName,
+ CollectionUUID collectionUUID,
+ const std::vector<BSONObj>& specs,
+ const UUID& buildUUID,
+ const ResumeIndexInfo& resumeInfo) override;
+
Status voteCommitIndexBuild(OperationContext* opCtx,
const UUID& buildUUID,
const HostAndPort& hostAndPort) override;
@@ -159,6 +172,16 @@ private:
std::shared_ptr<ReplIndexBuildState> replState,
const IndexBuildOptions& indexBuildOptions) override;
+ StatusWith<SharedSemiFuture<ReplIndexBuildState::IndexCatalogStats>> _startIndexBuild(
+ OperationContext* opCtx,
+ std::string dbName,
+ CollectionUUID collectionUUID,
+ const std::vector<BSONObj>& specs,
+ const UUID& buildUUID,
+ IndexBuildProtocol protocol,
+ IndexBuildOptions indexBuildOptions,
+ const boost::optional<ResumeIndexInfo>& resumeInfo);
+
// Thread pool on which index builds are run.
ThreadPool _threadPool;
diff --git a/src/mongo/db/resumable_index_builds.idl b/src/mongo/db/resumable_index_builds.idl
index 9f8fc4a6818..3e4fdeff29d 100644
--- a/src/mongo/db/resumable_index_builds.idl
+++ b/src/mongo/db/resumable_index_builds.idl
@@ -96,7 +96,7 @@ structs:
optional: true
spec:
description: "The index specification"
- type: object
+ type: object_owned
ResumeIndexInfo:
description: "Information needed to resume index builds"
diff --git a/src/mongo/db/storage/durable_catalog.h b/src/mongo/db/storage/durable_catalog.h
index b8b1df02095..2ee532cd484 100644
--- a/src/mongo/db/storage/durable_catalog.h
+++ b/src/mongo/db/storage/durable_catalog.h
@@ -93,6 +93,14 @@ public:
RecordId id,
BSONCollectionCatalogEntry::MetaData& md) = 0;
+ /**
+ * Checks that the metadata for the index exists and matches the given spec.
+ */
+ virtual Status checkMetaDataForIndex(OperationContext* opCtx,
+ RecordId catalogId,
+ const std::string& indexName,
+ const BSONObj& spec) = 0;
+
virtual std::vector<std::string> getAllIdents(OperationContext* opCtx) const = 0;
virtual bool isUserDataIdent(StringData ident) const = 0;
diff --git a/src/mongo/db/storage/durable_catalog_impl.cpp b/src/mongo/db/storage/durable_catalog_impl.cpp
index e239aa61f0c..eda5c179748 100644
--- a/src/mongo/db/storage/durable_catalog_impl.cpp
+++ b/src/mongo/db/storage/durable_catalog_impl.cpp
@@ -684,6 +684,29 @@ void DurableCatalogImpl::putMetaData(OperationContext* opCtx,
fassert(28521, status);
}
+Status DurableCatalogImpl::checkMetaDataForIndex(OperationContext* opCtx,
+ RecordId catalogId,
+ const std::string& indexName,
+ const BSONObj& spec) {
+ auto md = getMetaData(opCtx, catalogId);
+ int offset = md.findIndexOffset(indexName);
+ if (offset < 0) {
+ return {ErrorCodes::IndexNotFound,
+ str::stream() << "Index [" << indexName
+ << "] not found in metadata for recordId: " << catalogId};
+ }
+
+ if (spec.woCompare(md.indexes[offset].spec)) {
+ return {ErrorCodes::BadValue,
+ str::stream() << "Spec for index [" << indexName
+ << "] does not match spec in the metadata for recordId: " << catalogId
+ << ". Spec: " << spec
+ << " metadata's spec: " << md.indexes[offset].spec};
+ }
+
+ return Status::OK();
+}
+
Status DurableCatalogImpl::_replaceEntry(OperationContext* opCtx,
RecordId catalogId,
const NamespaceString& toNss,
diff --git a/src/mongo/db/storage/durable_catalog_impl.h b/src/mongo/db/storage/durable_catalog_impl.h
index 5c674af4824..2b054b424eb 100644
--- a/src/mongo/db/storage/durable_catalog_impl.h
+++ b/src/mongo/db/storage/durable_catalog_impl.h
@@ -81,6 +81,11 @@ public:
RecordId catalogId,
BSONCollectionCatalogEntry::MetaData& md);
+ Status checkMetaDataForIndex(OperationContext* opCtx,
+ RecordId catalogId,
+ const std::string& indexName,
+ const BSONObj& spec);
+
std::vector<std::string> getAllIdents(OperationContext* opCtx) const;
bool isUserDataIdent(StringData ident) const;
diff --git a/src/mongo/db/storage/storage_engine.h b/src/mongo/db/storage/storage_engine.h
index 6ce5eeb3add..085336a23c6 100644
--- a/src/mongo/db/storage/storage_engine.h
+++ b/src/mongo/db/storage/storage_engine.h
@@ -355,15 +355,21 @@ public:
const NamespaceString& nss) = 0;
/**
- * Creates a temporary RecordStore on the storage engine. This record store will drop itself
- * automatically when it goes out of scope. This means the TemporaryRecordStore should not exist
- * any longer than the OperationContext used to create it. On startup, the storage engine will
- * drop any un-dropped temporary record stores.
+ * Creates a temporary RecordStore on the storage engine. On startup after an unclean shutdown,
+ * the storage engine will drop any un-dropped temporary record stores.
*/
virtual std::unique_ptr<TemporaryRecordStore> makeTemporaryRecordStore(
OperationContext* opCtx) = 0;
/**
+ * Creates a temporary RecordStore on the storage engine from an existing ident on disk. On
+ * startup after an unclean shutdown, the storage engine will drop any un-dropped temporary
+ * record stores.
+ */
+ virtual std::unique_ptr<TemporaryRecordStore> makeTemporaryRecordStoreFromExistingIdent(
+ OperationContext* opCtx, StringData ident) = 0;
+
+ /**
* This method will be called before there is a clean shutdown. Storage engines should
* override this method if they have clean-up to do that is different from unclean shutdown.
* MongoDB will not call into the storage subsystem after calling this function.
diff --git a/src/mongo/db/storage/storage_engine_impl.cpp b/src/mongo/db/storage/storage_engine_impl.cpp
index a4590dd850f..93742937d27 100644
--- a/src/mongo/db/storage/storage_engine_impl.cpp
+++ b/src/mongo/db/storage/storage_engine_impl.cpp
@@ -368,6 +368,11 @@ bool StorageEngineImpl::_handleInternalIdents(
reconcileResult->indexBuildsToResume.push_back(resumeInfo);
+ // Once we have parsed the resume info, we can safely drop the internal ident.
+ // TODO SERVER-49846: revisit this logic since this could cause the side tables
+ // associated with the index build to be orphaned if resuming fails.
+ internalIdentsToDrop->insert(ident);
+
LOGV2(4916301,
"Found unfinished index build to resume",
"buildUUID"_attr = resumeInfo.getBuildUUID(),
@@ -842,6 +847,12 @@ std::unique_ptr<TemporaryRecordStore> StorageEngineImpl::makeTemporaryRecordStor
return std::make_unique<TemporaryKVRecordStore>(getEngine(), std::move(rs));
}
+std::unique_ptr<TemporaryRecordStore> StorageEngineImpl::makeTemporaryRecordStoreFromExistingIdent(
+ OperationContext* opCtx, StringData ident) {
+ auto rs = _engine->getRecordStore(opCtx, "", ident, CollectionOptions());
+ return std::make_unique<TemporaryKVRecordStore>(getEngine(), std::move(rs));
+}
+
void StorageEngineImpl::setJournalListener(JournalListener* jl) {
_engine->setJournalListener(jl);
}
diff --git a/src/mongo/db/storage/storage_engine_impl.h b/src/mongo/db/storage/storage_engine_impl.h
index 926d3900649..2e09d2ff82a 100644
--- a/src/mongo/db/storage/storage_engine_impl.h
+++ b/src/mongo/db/storage/storage_engine_impl.h
@@ -111,6 +111,9 @@ public:
virtual std::unique_ptr<TemporaryRecordStore> makeTemporaryRecordStore(
OperationContext* opCtx) override;
+ virtual std::unique_ptr<TemporaryRecordStore> makeTemporaryRecordStoreFromExistingIdent(
+ OperationContext* opCtx, StringData ident) override;
+
virtual void cleanShutdown() override;
virtual void setStableTimestamp(Timestamp stableTimestamp, bool force = false) override;
diff --git a/src/mongo/db/storage/storage_engine_mock.h b/src/mongo/db/storage/storage_engine_mock.h
index a363c11f800..1b303b35985 100644
--- a/src/mongo/db/storage/storage_engine_mock.h
+++ b/src/mongo/db/storage/storage_engine_mock.h
@@ -93,6 +93,10 @@ public:
std::unique_ptr<TemporaryRecordStore> makeTemporaryRecordStore(OperationContext* opCtx) final {
return {};
}
+ std::unique_ptr<TemporaryRecordStore> makeTemporaryRecordStoreFromExistingIdent(
+ OperationContext* opCtx, StringData ident) final {
+ return {};
+ }
void cleanShutdown() final {}
SnapshotManager* getSnapshotManager() const final {
return nullptr;
diff --git a/src/mongo/embedded/index_builds_coordinator_embedded.cpp b/src/mongo/embedded/index_builds_coordinator_embedded.cpp
index b4d3fe88450..ffbcaccd0e6 100644
--- a/src/mongo/embedded/index_builds_coordinator_embedded.cpp
+++ b/src/mongo/embedded/index_builds_coordinator_embedded.cpp
@@ -69,12 +69,22 @@ IndexBuildsCoordinatorEmbedded::startIndexBuild(OperationContext* opCtx,
if (!status.isOK()) {
return status;
}
- _runIndexBuild(opCtx, buildUUID, indexBuildOptions);
+ _runIndexBuild(opCtx, buildUUID, indexBuildOptions, boost::none /* resumeInfo */);
auto replState = invariant(_getIndexBuild(buildUUID));
return replState->sharedPromise.getFuture();
}
+StatusWith<SharedSemiFuture<ReplIndexBuildState::IndexCatalogStats>>
+IndexBuildsCoordinatorEmbedded::resumeIndexBuild(OperationContext* opCtx,
+ std::string dbName,
+ CollectionUUID collectionUUID,
+ const std::vector<BSONObj>& specs,
+ const UUID& buildUUID,
+ const ResumeIndexInfo& resumeInfo) {
+ MONGO_UNREACHABLE;
+}
+
void IndexBuildsCoordinatorEmbedded::_signalPrimaryForCommitReadiness(
OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) {}
diff --git a/src/mongo/embedded/index_builds_coordinator_embedded.h b/src/mongo/embedded/index_builds_coordinator_embedded.h
index 84c9d30ded7..81b02ddada9 100644
--- a/src/mongo/embedded/index_builds_coordinator_embedded.h
+++ b/src/mongo/embedded/index_builds_coordinator_embedded.h
@@ -65,6 +65,14 @@ public:
IndexBuildProtocol protocol,
IndexBuildOptions indexBuildOptions) override;
+ StatusWith<SharedSemiFuture<ReplIndexBuildState::IndexCatalogStats>> resumeIndexBuild(
+ OperationContext* opCtx,
+ std::string dbName,
+ CollectionUUID collectionUUID,
+ const std::vector<BSONObj>& specs,
+ const UUID& buildUUID,
+ const ResumeIndexInfo& resumeInfo) override;
+
void setSignalAndCancelVoteRequestCbkIfActive(WithLock ReplIndexBuildStateLk,
OperationContext* opCtx,
std::shared_ptr<ReplIndexBuildState> replState,