summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--etc/evergreen.yml1
-rw-r--r--jstests/noPassthrough/backup_restore_fsync_lock.js3
-rw-r--r--jstests/noPassthrough/backup_restore_rolling.js3
-rw-r--r--jstests/noPassthrough/backup_restore_stop_start.js3
-rw-r--r--jstests/noPassthrough/characterize_index_builds_on_restart.js3
-rw-r--r--jstests/noPassthrough/indexbg_shutdown.js8
-rw-r--r--jstests/noPassthroughWithMongod/indexbg_restart_secondary.js11
-rw-r--r--jstests/replsets/rollback_waits_for_bgindex_completion.js2
-rw-r--r--src/mongo/db/catalog/catalog_control.cpp13
-rw-r--r--src/mongo/db/catalog/catalog_control_test.cpp4
-rw-r--r--src/mongo/db/catalog/index_builds_manager.cpp9
-rw-r--r--src/mongo/db/catalog/index_builds_manager.h1
-rw-r--r--src/mongo/db/index_builds_coordinator.cpp183
-rw-r--r--src/mongo/db/index_builds_coordinator.h46
-rw-r--r--src/mongo/db/index_builds_coordinator_mongod.cpp40
-rw-r--r--src/mongo/db/repair_database.cpp4
-rw-r--r--src/mongo/db/repair_database_and_check_version.cpp9
-rw-r--r--src/mongo/db/repl/rollback_impl.cpp9
-rw-r--r--src/mongo/db/storage/kv/storage_engine_test.cpp149
-rw-r--r--src/mongo/db/storage/storage_engine.h34
-rw-r--r--src/mongo/db/storage/storage_engine_impl.cpp36
-rw-r--r--src/mongo/db/storage/storage_engine_impl.h6
-rw-r--r--src/mongo/db/storage/storage_engine_test_fixture.h25
23 files changed, 396 insertions, 206 deletions
diff --git a/etc/evergreen.yml b/etc/evergreen.yml
index 8ecf15dd47f..b61822bf1d0 100644
--- a/etc/evergreen.yml
+++ b/etc/evergreen.yml
@@ -11226,6 +11226,7 @@ buildvariants:
- name: replica_sets
distros:
- rhel62-large
+ - name: rollback_fuzzer_gen
- name: sharding_gen
- name: enterprise-rhel-62-64-bit-coverage
diff --git a/jstests/noPassthrough/backup_restore_fsync_lock.js b/jstests/noPassthrough/backup_restore_fsync_lock.js
index f6f720377aa..57b12a6ab24 100644
--- a/jstests/noPassthrough/backup_restore_fsync_lock.js
+++ b/jstests/noPassthrough/backup_restore_fsync_lock.js
@@ -11,9 +11,12 @@
*
* Some methods for backup used in this test checkpoint the files in the dbpath. This technique will
* not work for ephemeral storage engines, as they do not store any data in the dbpath.
+ * TODO: (SERVER-44467) Remove two_phase_index_builds_unsupported tag when startup recovery works
+ * for two-phase index builds.
* @tags: [
* requires_persistence,
* requires_replication,
+ * two_phase_index_builds_unsupported,
* ]
*/
diff --git a/jstests/noPassthrough/backup_restore_rolling.js b/jstests/noPassthrough/backup_restore_rolling.js
index 63f6b1a7cd2..358f33b0194 100644
--- a/jstests/noPassthrough/backup_restore_rolling.js
+++ b/jstests/noPassthrough/backup_restore_rolling.js
@@ -11,9 +11,12 @@
*
* Some methods for backup used in this test checkpoint the files in the dbpath. This technique will
* not work for ephemeral storage engines, as they do not store any data in the dbpath.
+ * TODO: (SERVER-44467) Remove two_phase_index_builds_unsupported tag when startup recovery works
+ * for two-phase index builds.
* @tags: [
* requires_persistence,
* requires_wiredtiger,
+ * two_phase_index_builds_unsupported,
* ]
*/
diff --git a/jstests/noPassthrough/backup_restore_stop_start.js b/jstests/noPassthrough/backup_restore_stop_start.js
index 2598b8670ab..71ad88c7dd5 100644
--- a/jstests/noPassthrough/backup_restore_stop_start.js
+++ b/jstests/noPassthrough/backup_restore_stop_start.js
@@ -11,9 +11,12 @@
*
* Some methods for backup used in this test checkpoint the files in the dbpath. This technique will
* not work for ephemeral storage engines, as they do not store any data in the dbpath.
+ * TODO: (SERVER-44467) Remove two_phase_index_builds_unsupported tag when startup recovery works
+ * for two-phase index builds.
* @tags: [
* requires_persistence,
* requires_replication,
+ * two_phase_index_builds_unsupported,
* ]
*/
diff --git a/jstests/noPassthrough/characterize_index_builds_on_restart.js b/jstests/noPassthrough/characterize_index_builds_on_restart.js
index b5520883a4f..64c2ba5fe91 100644
--- a/jstests/noPassthrough/characterize_index_builds_on_restart.js
+++ b/jstests/noPassthrough/characterize_index_builds_on_restart.js
@@ -2,10 +2,13 @@
* Characterizes the actions (rebuilds or drops the index) taken upon unfinished indexes when
* restarting mongod from (standalone -> standalone) and (replica set member -> standalone).
*
+ * TODO(SERVER-44468): Remove two_phase_index_builds_unsupported tag when standalone startup works
+ * with two-phase builds
* @tags: [
* requires_majority_read_concern,
* requires_persistence,
* requires_replication,
+ * two_phase_index_builds_unsupported,
* ]
*/
(function() {
diff --git a/jstests/noPassthrough/indexbg_shutdown.js b/jstests/noPassthrough/indexbg_shutdown.js
index ae7d1f3c3e6..4e9bc9cf016 100644
--- a/jstests/noPassthrough/indexbg_shutdown.js
+++ b/jstests/noPassthrough/indexbg_shutdown.js
@@ -3,7 +3,13 @@
* shuts down a secondary while it's building that index, and confirms that the secondary
* shuts down cleanly, without an fassert.
* Also confirms that killOp has no effect on the background index build on the secondary.
- * @tags: [requires_replication]
+ *
+ * TODO(SERVER-44467): Remove two_phase_index_builds_unsupported tag when startup recovery works
+ * for two-phase index builds.
+ * @tags: [
+ * requires_replication,
+ * two_phase_index_builds_unsupported,
+ * ]
*/
(function() {
diff --git a/jstests/noPassthroughWithMongod/indexbg_restart_secondary.js b/jstests/noPassthroughWithMongod/indexbg_restart_secondary.js
index 2e5d2e85bf6..e12973b7ff8 100644
--- a/jstests/noPassthroughWithMongod/indexbg_restart_secondary.js
+++ b/jstests/noPassthroughWithMongod/indexbg_restart_secondary.js
@@ -2,9 +2,18 @@
* Starts a replica set with arbiter, builds an index in background.
* Kills the secondary once the index build starts with a failpoint.
* The index build should resume when the secondary is restarted.
+ *
+ * TODO: (SERVER-44467) Remove two_phase_index_builds_unsupported tag when startup recovery works
+ * for two-phase index builds.
+ *
+ * @tags: [
+ * requires_persistence,
+ * requires_journaling,
+ * requires_replication,
+ * two_phase_index_builds_unsupported,
+ * ]
*/
-// @tags: [requires_persistence, requires_journaling, requires_replication]
(function() {
'use strict';
diff --git a/jstests/replsets/rollback_waits_for_bgindex_completion.js b/jstests/replsets/rollback_waits_for_bgindex_completion.js
index 892ddcd34e2..7bfb36ba9ad 100644
--- a/jstests/replsets/rollback_waits_for_bgindex_completion.js
+++ b/jstests/replsets/rollback_waits_for_bgindex_completion.js
@@ -2,12 +2,10 @@
* Test to ensure that rollback waits for in-progress background index builds to finish before
* starting the rollback process. Only applies to Recoverable Rollback via WiredTiger checkpoints.
*
- * TODO(SERVER-39451): Remove two_phase_index_builds_unsupported tag.
* @tags: [
* requires_wiredtiger,
* requires_journaling,
* requires_majority_read_concern,
- * two_phase_index_builds_unsupported,
* ]
*/
(function() {
diff --git a/src/mongo/db/catalog/catalog_control.cpp b/src/mongo/db/catalog/catalog_control.cpp
index 916deb9f2c7..bbcea9f741c 100644
--- a/src/mongo/db/catalog/catalog_control.cpp
+++ b/src/mongo/db/catalog/catalog_control.cpp
@@ -110,13 +110,12 @@ void openCatalog(OperationContext* opCtx, const MinVisibleTimestampMap& minVisib
storageEngine->loadCatalog(opCtx);
log() << "openCatalog: reconciling catalog and idents";
- auto indexesToRebuild = storageEngine->reconcileCatalogAndIdents(opCtx);
- fassert(40688, indexesToRebuild.getStatus());
+ auto reconcileResult = fassert(40688, storageEngine->reconcileCatalogAndIdents(opCtx));
// Determine which indexes need to be rebuilt. rebuildIndexesOnCollection() requires that all
// indexes on that collection are done at once, so we use a map to group them together.
StringMap<IndexNameObjs> nsToIndexNameObjMap;
- for (StorageEngine::IndexIdentifier indexIdentifier : indexesToRebuild.getValue()) {
+ for (StorageEngine::IndexIdentifier indexIdentifier : reconcileResult.indexesToRebuild) {
auto indexName = indexIdentifier.indexName;
auto indexSpecs =
getIndexNameObjs(opCtx,
@@ -158,6 +157,13 @@ void openCatalog(OperationContext* opCtx, const MinVisibleTimestampMap& minVisib
fassert(40690, rebuildIndexesOnCollection(opCtx, collection, indexSpecs));
}
+ // Once all unfinished index builds have been dropped and the catalog has been reloaded, restart
+ // any unfinished index builds. This will not restart any index builds to completion, but rather
+ // start the background thread, build the index, and wait for a replicated commit or abort oplog
+ // entry.
+ IndexBuildsCoordinator::get(opCtx)->restartIndexBuildsForRecovery(
+ opCtx, reconcileResult.indexBuildsToRestart);
+
// Open all databases and repopulate the CollectionCatalog.
log() << "openCatalog: reopening all databases";
auto databaseHolder = DatabaseHolder::get(opCtx);
@@ -187,6 +193,7 @@ void openCatalog(OperationContext* opCtx, const MinVisibleTimestampMap& minVisib
}
}
}
+
// Opening CollectionCatalog: The collection catalog is now in sync with the storage engine
// catalog. Clear the pre-closing state.
CollectionCatalog::get(opCtx).onOpenCatalog(opCtx);
diff --git a/src/mongo/db/catalog/catalog_control_test.cpp b/src/mongo/db/catalog/catalog_control_test.cpp
index 21740aa1b12..d2527e7efb9 100644
--- a/src/mongo/db/catalog/catalog_control_test.cpp
+++ b/src/mongo/db/catalog/catalog_control_test.cpp
@@ -148,9 +148,9 @@ public:
}
void setCachePressureForTest(int pressure) final {}
void triggerJournalFlush() const final {}
- StatusWith<std::vector<IndexIdentifier>> reconcileCatalogAndIdents(
+ StatusWith<StorageEngine::ReconcileResult> reconcileCatalogAndIdents(
OperationContext* opCtx) final {
- return std::vector<IndexIdentifier>();
+ return ReconcileResult{};
}
Timestamp getAllDurableTimestamp() const final {
return {};
diff --git a/src/mongo/db/catalog/index_builds_manager.cpp b/src/mongo/db/catalog/index_builds_manager.cpp
index 952fb1c05b9..0c85b725664 100644
--- a/src/mongo/db/catalog/index_builds_manager.cpp
+++ b/src/mongo/db/catalog/index_builds_manager.cpp
@@ -109,13 +109,8 @@ Status IndexBuildsManager::setUpIndexBuild(OperationContext* opCtx,
return ex.toStatus();
}
- if (options.forRecovery) {
- log() << "Index build initialized: " << buildUUID << ": " << nss
- << ": indexes: " << indexes.size();
- } else {
- log() << "Index build initialized: " << buildUUID << ": " << nss << " ("
- << collection->uuid() << " ): indexes: " << indexes.size();
- }
+ log() << "Index build initialized: " << buildUUID << ": " << nss << " (" << collection->uuid()
+ << " ): indexes: " << indexes.size();
return Status::OK();
}
diff --git a/src/mongo/db/catalog/index_builds_manager.h b/src/mongo/db/catalog/index_builds_manager.h
index 1843177d5ae..c16b5b57af4 100644
--- a/src/mongo/db/catalog/index_builds_manager.h
+++ b/src/mongo/db/catalog/index_builds_manager.h
@@ -69,7 +69,6 @@ public:
SetupOptions();
IndexConstraints indexConstraints = IndexConstraints::kEnforce;
IndexBuildProtocol protocol = IndexBuildProtocol::kSinglePhase;
- bool forRecovery = false;
};
IndexBuildsManager() = default;
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index ed3e8f7e176..00e4628cb46 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -314,12 +314,30 @@ bool IndexBuildsCoordinator::supportsTwoPhaseIndexBuild() const {
return true;
}
-StatusWith<std::pair<long long, long long>> IndexBuildsCoordinator::startIndexRebuildForRecovery(
+StatusWith<std::pair<long long, long long>> IndexBuildsCoordinator::rebuildIndexesForRecovery(
OperationContext* opCtx,
const NamespaceString& nss,
const std::vector<BSONObj>& specs,
const UUID& buildUUID) {
- // Index builds in recovery mode have the global write lock.
+
+ const auto protocol = IndexBuildProtocol::kSinglePhase;
+ auto status = _startIndexBuildForRecovery(opCtx, nss, specs, buildUUID, protocol);
+ if (!status.isOK()) {
+ return status;
+ }
+
+ auto& collectionCatalog = CollectionCatalog::get(getGlobalServiceContext());
+ Collection* collection = collectionCatalog.lookupCollectionByNamespace(nss);
+
+ // Complete the index build.
+ return _runIndexRebuildForRecovery(opCtx, collection, buildUUID);
+}
+
+Status IndexBuildsCoordinator::_startIndexBuildForRecovery(OperationContext* opCtx,
+ const NamespaceString& nss,
+ const std::vector<BSONObj>& specs,
+ const UUID& buildUUID,
+ IndexBuildProtocol protocol) {
invariant(opCtx->lockState()->isW());
std::vector<std::string> indexNames;
@@ -334,53 +352,57 @@ StatusWith<std::pair<long long, long long>> IndexBuildsCoordinator::startIndexRe
indexNames.push_back(name);
}
- ReplIndexBuildState::IndexCatalogStats indexCatalogStats;
-
auto& collectionCatalog = CollectionCatalog::get(getGlobalServiceContext());
Collection* collection = collectionCatalog.lookupCollectionByNamespace(nss);
auto indexCatalog = collection->getIndexCatalog();
{
// These steps are combined into a single WUOW to ensure there are no commits without
// the indexes.
- // 1) Drop all indexes.
- // 2) Re-create the Collection.
- // 3) Start the index build process.
+ // 1) Drop all unfinished indexes.
+ // 2) Start, but do not complete the index build process.
WriteUnitOfWork wuow(opCtx);
- // 1
- {
- for (size_t i = 0; i < indexNames.size(); i++) {
- auto descriptor = indexCatalog->findIndexByName(opCtx, indexNames[i], false);
- if (!descriptor) {
- // If it's unfinished index, drop it directly via removeIndex.
- Status status = DurableCatalog::get(opCtx)->removeIndex(
- opCtx, collection->getCatalogId(), indexNames[i]);
- continue;
- }
- Status s = indexCatalog->dropIndex(opCtx, descriptor);
- if (!s.isOK()) {
- return s;
+ for (size_t i = 0; i < indexNames.size(); i++) {
+ const bool includeUnfinished = false;
+ auto descriptor =
+ indexCatalog->findIndexByName(opCtx, indexNames[i], includeUnfinished);
+ if (!descriptor) {
+ const auto durableBuildUUID = DurableCatalog::get(opCtx)->getIndexBuildUUID(
+ opCtx, collection->getCatalogId(), indexNames[i]);
+
+ // A build UUID is present if and only if we are rebuilding a two-phase build.
+ invariant((protocol == IndexBuildProtocol::kTwoPhase) ==
+ durableBuildUUID.is_initialized());
+ // When a buildUUID is present, it must match the build UUID parameter to this
+ // function.
+ invariant(!durableBuildUUID || *durableBuildUUID == buildUUID,
+ str::stream() << "durable build UUID: " << durableBuildUUID
+ << "buildUUID: " << buildUUID);
+
+ // If it's unfinished index, drop it directly via removeIndex.
+ Status status = DurableCatalog::get(opCtx)->removeIndex(
+ opCtx, collection->getCatalogId(), indexNames[i]);
+ if (!status.isOK()) {
+ return status;
}
+ continue;
+ }
+ Status s = indexCatalog->dropIndex(opCtx, descriptor);
+ if (!s.isOK()) {
+ return s;
}
}
- // We need to initialize the collection to drop and rebuild the indexes.
+ // We need to initialize the collection to rebuild the indexes.
collection->init(opCtx);
- // Register the index build. During recovery, collections may not have UUIDs present yet to
- // due upgrading. We don't require collection UUIDs during recovery except to create a
- // ReplIndexBuildState object.
- auto collectionUUID = UUID::gen();
auto dbName = nss.db().toString();
-
- // We run the index build using the single phase protocol as we already hold the global
- // write lock.
auto replIndexBuildState =
std::make_shared<ReplIndexBuildState>(buildUUID,
- collectionUUID,
+ collection->uuid(),
dbName,
specs,
- IndexBuildProtocol::kSinglePhase,
+ protocol,
/*commitQuorum=*/boost::none);
Status status = [&]() {
@@ -391,12 +413,7 @@ StatusWith<std::pair<long long, long long>> IndexBuildsCoordinator::startIndexRe
return status;
}
- // Setup the index build.
- indexCatalogStats.numIndexesBefore =
- _getNumIndexesTotal(opCtx, collection) + indexNames.size();
-
IndexBuildsManager::SetupOptions options;
- options.forRecovery = true;
status = _indexBuildsManager.setUpIndexBuild(
opCtx, collection, specs, buildUUID, MultiIndexBlock::kNoopOnInitFn, options);
if (!status.isOK()) {
@@ -408,7 +425,7 @@ StatusWith<std::pair<long long, long long>> IndexBuildsCoordinator::startIndexRe
wuow.commit();
}
- return _runIndexRebuildForRecovery(opCtx, collection, indexCatalogStats, buildUUID);
+ return Status::OK();
}
void IndexBuildsCoordinator::joinIndexBuild(OperationContext* opCtx, const UUID& buildUUID) {
@@ -526,7 +543,10 @@ void IndexBuildsCoordinator::onStepUp(OperationContext* opCtx) {
void IndexBuildsCoordinator::onRollback(OperationContext* opCtx) {
log() << "IndexBuildsCoordinator::onRollback - this node is entering the rollback state";
auto indexBuilds = _getIndexBuilds();
- auto onIndexBuild = [](std::shared_ptr<ReplIndexBuildState> replState) {
+ auto onIndexBuild = [this](std::shared_ptr<ReplIndexBuildState> replState) {
+ const std::string reason = "rollback";
+ _indexBuildsManager.abortIndexBuild(replState->buildUUID, reason);
+
stdx::unique_lock<Latch> lk(replState->mutex);
if (!replState->aborted) {
// Leave abort timestamp as null. This will unblock the index build and allow it to
@@ -535,15 +555,42 @@ void IndexBuildsCoordinator::onRollback(OperationContext* opCtx) {
invariant(replState->abortTimestamp.isNull(), replState->buildUUID.toString());
invariant(!replState->aborted, replState->buildUUID.toString());
replState->aborted = true;
- replState->abortReason = "rollback";
+ replState->abortReason = reason;
replState->condVar.notify_all();
}
};
forEachIndexBuild(indexBuilds, "IndexBuildsCoordinator::onRollback - "_sd, onIndexBuild);
}
-void IndexBuildsCoordinator::recoverIndexBuilds() {
- // TODO: not yet implemented.
+void IndexBuildsCoordinator::restartIndexBuildsForRecovery(
+ OperationContext* opCtx,
+ const std::map<UUID, StorageEngine::IndexBuildToRestart>& buildsToRestart) {
+ for (auto& [buildUUID, build] : buildsToRestart) {
+ boost::optional<NamespaceString> nss =
+ CollectionCatalog::get(opCtx).lookupNSSByUUID(build.collUUID);
+ invariant(nss);
+
+ log() << "Restarting index build for collection: " << *nss
+ << ", collection UUID: " << build.collUUID << ", index build UUID: " << buildUUID;
+
+ IndexBuildsCoordinator::IndexBuildOptions indexBuildOptions;
+ // Start the index build as if in secondary oplog application.
+ indexBuildOptions.replSetAndNotPrimaryAtStart = true;
+ // Indicate that the intialization should not generate oplog entries or timestamps for the
+ // first catalog write, and that the original durable catalog entries should be dropped and
+ // replaced.
+ indexBuildOptions.twoPhaseRecovery = true;
+ // This spawns a new thread and returns immediately. These index builds will start and wait
+ // for a commit or abort to be replicated.
+ MONGO_COMPILER_VARIABLE_UNUSED auto fut =
+ uassertStatusOK(startIndexBuild(opCtx,
+ nss->db(),
+ build.collUUID,
+ build.indexSpecs,
+ buildUUID,
+ IndexBuildProtocol::kTwoPhase,
+ indexBuildOptions));
+ }
}
int IndexBuildsCoordinator::numInProgForDb(StringData db) const {
@@ -873,6 +920,25 @@ void IndexBuildsCoordinator::_unregisterIndexBuild(
invariant(_allIndexBuilds.erase(replIndexBuildState->buildUUID));
}
+Status IndexBuildsCoordinator::_registerAndSetUpIndexBuildForTwoPhaseRecovery(
+ OperationContext* opCtx,
+ StringData dbName,
+ CollectionUUID collectionUUID,
+ const std::vector<BSONObj>& specs,
+ const UUID& buildUUID) {
+ NamespaceStringOrUUID nssOrUuid{dbName.toString(), collectionUUID};
+
+ // Don't use the AutoGet helpers because they require an open database, which may not be the
+ // case when an index builds is restarted during recovery.
+ Lock::DBLock dbLock(opCtx, dbName, MODE_IX);
+ Lock::CollectionLock collLock(opCtx, nssOrUuid, MODE_X);
+ auto collection = CollectionCatalog::get(opCtx).lookupCollectionByUUID(collectionUUID);
+ invariant(collection);
+ const auto& nss = collection->ns();
+ const auto protocol = IndexBuildProtocol::kTwoPhase;
+ return _startIndexBuildForRecovery(opCtx, nss, specs, buildUUID, protocol);
+}
+
StatusWith<boost::optional<SharedSemiFuture<ReplIndexBuildState::IndexCatalogStats>>>
IndexBuildsCoordinator::_registerAndSetUpIndexBuild(
OperationContext* opCtx,
@@ -931,35 +997,13 @@ IndexBuildsCoordinator::_registerAndSetUpIndexBuild(
}
MultiIndexBlock::OnInitFn onInitFn;
- // Two-phase index builds write a different oplog entry than the default behavior which
- // writes a no-op just to generate an optime.
if (serverGlobalParams.featureCompatibility.isVersionInitialized() &&
serverGlobalParams.featureCompatibility.getVersion() ==
ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo44) {
- onInitFn = [&](std::vector<BSONObj>& specs) {
- // TODO (SERVER-40807): disabling the following code for the v4.2 release so it does not
- // have downstream impact.
- /*
- // Only the primary node writes an index build entry to the collection as the
- // secondaries will replicate it.
- if (replCoord->canAcceptWritesFor(opCtx, nss)) {
- invariant(replIndexBuildState->commitQuorum);
- std::vector<std::string> indexNames;
- for (const auto& spec : specs) {
- indexNames.push_back(spec.getStringField(IndexDescriptor::kIndexNameFieldName));
- }
-
- IndexBuildEntry entry(replIndexBuildState->buildUUID,
- *collection->uuid(),
- *replIndexBuildState->commitQuorum,
- indexNames);
- Status status = addIndexBuildEntry(opCtx, entry);
- if (!status.isOK()) {
- return status;
- }
- }
- */
+ // Two-phase index builds write a different oplog entry than the default behavior which
+ // writes a no-op just to generate an optime.
+ onInitFn = [&](std::vector<BSONObj>& specs) {
opCtx->getServiceContext()->getOpObserver()->onStartIndexBuild(
opCtx,
nss,
@@ -980,6 +1024,7 @@ IndexBuildsCoordinator::_registerAndSetUpIndexBuild(
? IndexBuildsManager::IndexConstraints::kRelax
: IndexBuildsManager::IndexConstraints::kEnforce;
options.protocol = protocol;
+
status = _indexBuildsManager.setUpIndexBuild(
opCtx, collection, filteredSpecs, replIndexBuildState->buildUUID, onInitFn, options);
@@ -1536,10 +1581,7 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesAndCommit(
}
StatusWith<std::pair<long long, long long>> IndexBuildsCoordinator::_runIndexRebuildForRecovery(
- OperationContext* opCtx,
- Collection* collection,
- ReplIndexBuildState::IndexCatalogStats& indexCatalogStats,
- const UUID& buildUUID) noexcept {
+ OperationContext* opCtx, Collection* collection, const UUID& buildUUID) noexcept {
// Index builds in recovery mode have the global write lock.
invariant(opCtx->lockState()->isW());
@@ -1555,6 +1597,9 @@ StatusWith<std::pair<long long, long long>> IndexBuildsCoordinator::_runIndexReb
long long numRecords = 0;
long long dataSize = 0;
+ ReplIndexBuildState::IndexCatalogStats indexCatalogStats;
+ indexCatalogStats.numIndexesBefore = _getNumIndexesTotal(opCtx, collection);
+
try {
log() << "Index builds manager starting: " << buildUUID << ": " << nss;
diff --git a/src/mongo/db/index_builds_coordinator.h b/src/mongo/db/index_builds_coordinator.h
index 76ed8f74945..9183a27795f 100644
--- a/src/mongo/db/index_builds_coordinator.h
+++ b/src/mongo/db/index_builds_coordinator.h
@@ -75,6 +75,7 @@ public:
struct IndexBuildOptions {
boost::optional<CommitQuorumOptions> commitQuorum;
bool replSetAndNotPrimaryAtStart = false;
+ bool twoPhaseRecovery = false;
};
/**
@@ -125,14 +126,22 @@ public:
IndexBuildOptions indexBuildOptions) = 0;
/**
- * Sets up the in-memory and persisted state of the index build.
- *
- * This function should only be called when in recovery mode, because we create new Collection
- * objects and replace old ones after dropping existing indexes.
+ * Given a vector of two-phase index builds, start, but do not complete each one in a background
+ * thread. Each index build will wait for a replicate commit or abort, as in steady-state
+ * replication.
+ */
+ void restartIndexBuildsForRecovery(
+ OperationContext* opCtx,
+ const std::map<UUID, StorageEngine::IndexBuildToRestart>& buildsToRestart);
+
+ /**
+ * Runs the full index rebuild for recovery. This will only rebuild single-phase index builds.
+ * Rebuilding an index in recovery mode verifies each document to ensure that it is a valid
+ * BSON object. It will remove any documents with invalid BSON.
*
* Returns the number of records and the size of the data iterated over, if successful.
*/
- StatusWith<std::pair<long long, long long>> startIndexRebuildForRecovery(
+ StatusWith<std::pair<long long, long long>> rebuildIndexesForRecovery(
OperationContext* opCtx,
const NamespaceString& nss,
const std::vector<BSONObj>& specs,
@@ -370,6 +379,18 @@ private:
*/
Status _registerIndexBuild(WithLock, std::shared_ptr<ReplIndexBuildState> replIndexBuildState);
+ /**
+ * Sets up the in-memory and persisted state of the index build.
+ *
+ * This function should only be called when in recovery mode, because we drop and replace
+ * existing indexes in a single WriteUnitOfWork.
+ */
+ Status _startIndexBuildForRecovery(OperationContext* opCtx,
+ const NamespaceString& nss,
+ const std::vector<BSONObj>& specs,
+ const UUID& buildUUID,
+ IndexBuildProtocol protocol);
+
protected:
/**
* Unregisters the index build.
@@ -395,6 +416,16 @@ protected:
boost::optional<CommitQuorumOptions> commitQuorum);
/**
+ * Sets up the in-memory and persisted state of the index build for two-phase recovery.
+ *
+ * Helper function for startIndexBuild during the two-phase index build recovery process.
+ */
+ Status _registerAndSetUpIndexBuildForTwoPhaseRecovery(OperationContext* opCtx,
+ StringData dbName,
+ CollectionUUID collectionUUID,
+ const std::vector<BSONObj>& specs,
+ const UUID& buildUUID);
+ /**
* Runs the index build on the caller thread. Handles unregistering the index build and setting
* the index build's Promise with the outcome of the index build.
* 'IndexBuildOptios::replSetAndNotPrimary' is determined at the start of the index build.
@@ -526,10 +557,7 @@ protected:
* Returns the number of records and the size of the data iterated over, if successful.
*/
StatusWith<std::pair<long long, long long>> _runIndexRebuildForRecovery(
- OperationContext* opCtx,
- Collection* collection,
- ReplIndexBuildState::IndexCatalogStats& indexCatalogStats,
- const UUID& buildUUID) noexcept;
+ OperationContext* opCtx, Collection* collection, const UUID& buildUUID) noexcept;
/**
* Looks up active index build by UUID.
diff --git a/src/mongo/db/index_builds_coordinator_mongod.cpp b/src/mongo/db/index_builds_coordinator_mongod.cpp
index 2d205a452c4..bdcb58dcb41 100644
--- a/src/mongo/db/index_builds_coordinator_mongod.cpp
+++ b/src/mongo/db/index_builds_coordinator_mongod.cpp
@@ -106,19 +106,35 @@ IndexBuildsCoordinatorMongod::startIndexBuild(OperationContext* opCtx,
// thread.
stdx::unique_lock<Latch> lk(_startBuildMutex);
- auto statusWithOptionalResult = _registerAndSetUpIndexBuild(
- opCtx, dbName, collectionUUID, specs, buildUUID, protocol, indexBuildOptions.commitQuorum);
- if (!statusWithOptionalResult.isOK()) {
- return statusWithOptionalResult.getStatus();
- }
+ if (indexBuildOptions.twoPhaseRecovery) {
+ // Two phase index build recovery goes though a different set-up procedure because the
+ // original index will be dropped first.
+ invariant(protocol == IndexBuildProtocol::kTwoPhase);
+ auto status = _registerAndSetUpIndexBuildForTwoPhaseRecovery(
+ opCtx, dbName, collectionUUID, specs, buildUUID);
+ if (!status.isOK()) {
+ return status;
+ }
+ } else {
+ auto statusWithOptionalResult = _registerAndSetUpIndexBuild(opCtx,
+ dbName,
+ collectionUUID,
+ specs,
+ buildUUID,
+ protocol,
+ indexBuildOptions.commitQuorum);
+ if (!statusWithOptionalResult.isOK()) {
+ return statusWithOptionalResult.getStatus();
+ }
- if (statusWithOptionalResult.getValue()) {
- // TODO (SERVER-37644): when joining is implemented, the returned Future will no longer
- // always be set.
- invariant(statusWithOptionalResult.getValue()->isReady());
- // The requested index (specs) are already built or are being built. Return success early
- // (this is v4.0 behavior compatible).
- return statusWithOptionalResult.getValue().get();
+ if (statusWithOptionalResult.getValue()) {
+ // TODO (SERVER-37644): when joining is implemented, the returned Future will no longer
+ // always be set.
+ invariant(statusWithOptionalResult.getValue()->isReady());
+ // The requested index (specs) are already built or are being built. Return success
+ // early (this is v4.0 behavior compatible).
+ return statusWithOptionalResult.getValue().get();
+ }
}
auto replState = invariant(_getIndexBuild(buildUUID));
diff --git a/src/mongo/db/repair_database.cpp b/src/mongo/db/repair_database.cpp
index 19e5a8fe114..ced21031a10 100644
--- a/src/mongo/db/repair_database.cpp
+++ b/src/mongo/db/repair_database.cpp
@@ -117,8 +117,8 @@ Status rebuildIndexesOnCollection(OperationContext* opCtx,
// Rebuild the indexes provided by 'indexSpecs'.
IndexBuildsCoordinator* indexBuildsCoord = IndexBuildsCoordinator::get(opCtx);
UUID buildUUID = UUID::gen();
- auto swRebuild = indexBuildsCoord->startIndexRebuildForRecovery(
- opCtx, collection->ns(), indexSpecs, buildUUID);
+ auto swRebuild =
+ indexBuildsCoord->rebuildIndexesForRecovery(opCtx, collection->ns(), indexSpecs, buildUUID);
if (!swRebuild.isOK()) {
return swRebuild.getStatus();
}
diff --git a/src/mongo/db/repair_database_and_check_version.cpp b/src/mongo/db/repair_database_and_check_version.cpp
index 1995e67e60a..e5a0f51b588 100644
--- a/src/mongo/db/repair_database_and_check_version.cpp
+++ b/src/mongo/db/repair_database_and_check_version.cpp
@@ -264,10 +264,11 @@ void checkForCappedOplog(OperationContext* opCtx, Database* db) {
}
void rebuildIndexes(OperationContext* opCtx, StorageEngine* storageEngine) {
- std::vector<StorageEngine::IndexIdentifier> indexesToRebuild =
- fassert(40593, storageEngine->reconcileCatalogAndIdents(opCtx));
+ auto reconcileResult = fassert(40593, storageEngine->reconcileCatalogAndIdents(opCtx));
+ // TODO (SERVER-44467): Restart two-phase index builds during startup
+ invariant(reconcileResult.indexBuildsToRestart.empty());
- if (!indexesToRebuild.empty() && serverGlobalParams.indexBuildRetry) {
+ if (!reconcileResult.indexesToRebuild.empty() && serverGlobalParams.indexBuildRetry) {
log() << "note: restart the server with --noIndexBuildRetry "
<< "to skip index rebuilds";
}
@@ -280,7 +281,7 @@ void rebuildIndexes(OperationContext* opCtx, StorageEngine* storageEngine) {
// Determine which indexes need to be rebuilt. rebuildIndexesOnCollection() requires that all
// indexes on that collection are done at once, so we use a map to group them together.
StringMap<IndexNameObjs> nsToIndexNameObjMap;
- for (auto&& idxIdentifier : indexesToRebuild) {
+ for (auto&& idxIdentifier : reconcileResult.indexesToRebuild) {
NamespaceString collNss = idxIdentifier.nss;
const std::string& indexName = idxIdentifier.indexName;
auto swIndexSpecs =
diff --git a/src/mongo/db/repl/rollback_impl.cpp b/src/mongo/db/repl/rollback_impl.cpp
index ca25a04134c..24c6e1e8b63 100644
--- a/src/mongo/db/repl/rollback_impl.cpp
+++ b/src/mongo/db/repl/rollback_impl.cpp
@@ -424,6 +424,9 @@ StatusWith<std::set<NamespaceString>> RollbackImpl::_namespacesForOp(const Oplog
case OplogEntry::CommandType::kDrop:
case OplogEntry::CommandType::kCreateIndexes:
case OplogEntry::CommandType::kDropIndexes:
+ case OplogEntry::CommandType::kStartIndexBuild:
+ case OplogEntry::CommandType::kAbortIndexBuild:
+ case OplogEntry::CommandType::kCommitIndexBuild:
case OplogEntry::CommandType::kCollMod: {
// For all other command types, we should be able to parse the collection name from
// the first command argument.
@@ -435,12 +438,6 @@ StatusWith<std::set<NamespaceString>> RollbackImpl::_namespacesForOp(const Oplog
}
break;
}
- // TODO(SERVER-39451): Ignore no-op startIndexBuild, abortIndexBuild, and
- // commitIndexBuild commands.
- // Revisit when we are ready to implement rollback logic.
- case OplogEntry::CommandType::kStartIndexBuild:
- case OplogEntry::CommandType::kAbortIndexBuild:
- case OplogEntry::CommandType::kCommitIndexBuild:
case OplogEntry::CommandType::kCommitTransaction:
case OplogEntry::CommandType::kAbortTransaction: {
break;
diff --git a/src/mongo/db/storage/kv/storage_engine_test.cpp b/src/mongo/db/storage/kv/storage_engine_test.cpp
index 93423116d78..5fb349a9a6b 100644
--- a/src/mongo/db/storage/kv/storage_engine_test.cpp
+++ b/src/mongo/db/storage/kv/storage_engine_test.cpp
@@ -67,7 +67,11 @@ TEST_F(StorageEngineTest, ReconcileIdentsTest) {
// Create a table in the KVEngine not reflected in the DurableCatalog. This should be dropped
// when reconciling.
ASSERT_OK(createCollTable(opCtx.get(), NamespaceString("db.coll2")));
- ASSERT_OK(reconcile(opCtx.get()).getStatus());
+
+ auto reconcileResult = unittest::assertGet(reconcile(opCtx.get()));
+ ASSERT_EQUALS(0UL, reconcileResult.indexesToRebuild.size());
+ ASSERT_EQUALS(0UL, reconcileResult.indexBuildsToRestart.size());
+
auto identsVec = getAllKVEngineIdents(opCtx.get());
auto idents = std::set<std::string>(identsVec.begin(), identsVec.end());
// There are two idents. `_mdb_catalog` and the ident for `db.coll1`.
@@ -80,10 +84,11 @@ TEST_F(StorageEngineTest, ReconcileIdentsTest) {
opCtx.get(), NamespaceString("db.coll1"), "_id", false /* isBackgroundSecondaryBuild */));
ASSERT_OK(dropIndexTable(opCtx.get(), NamespaceString("db.coll1"), "_id"));
// The reconcile response should include this index as needing to be rebuilt.
- auto reconcileStatus = reconcile(opCtx.get());
- ASSERT_OK(reconcileStatus.getStatus());
- ASSERT_EQUALS(static_cast<const unsigned long>(1), reconcileStatus.getValue().size());
- StorageEngine::IndexIdentifier& toRebuild = reconcileStatus.getValue()[0];
+ reconcileResult = unittest::assertGet(reconcile(opCtx.get()));
+ ASSERT_EQUALS(1UL, reconcileResult.indexesToRebuild.size());
+ ASSERT_EQUALS(0UL, reconcileResult.indexBuildsToRestart.size());
+
+ StorageEngine::IndexIdentifier& toRebuild = reconcileResult.indexesToRebuild[0];
ASSERT_EQUALS("db.coll1", toRebuild.nss.ns());
ASSERT_EQUALS("_id", toRebuild.indexName);
@@ -92,7 +97,7 @@ TEST_F(StorageEngineTest, ReconcileIdentsTest) {
ASSERT_EQUALS(static_cast<const unsigned long>(1), getAllKVEngineIdents(opCtx.get()).size());
// Reconciling this should result in an error.
- reconcileStatus = reconcile(opCtx.get());
+ auto reconcileStatus = reconcile(opCtx.get());
ASSERT_NOT_OK(reconcileStatus.getStatus());
ASSERT_EQUALS(ErrorCodes::UnrecoverableRollbackError, reconcileStatus.getStatus());
}
@@ -131,7 +136,9 @@ TEST_F(StorageEngineTest, ReconcileDropsTemporary) {
ASSERT(identExists(opCtx.get(), ident));
- ASSERT_OK(reconcile(opCtx.get()).getStatus());
+ auto reconcileResult = unittest::assertGet(reconcile(opCtx.get()));
+ ASSERT_EQUALS(0UL, reconcileResult.indexesToRebuild.size());
+ ASSERT_EQUALS(0UL, reconcileResult.indexBuildsToRestart.size());
// The storage engine is responsible for dropping its temporary idents.
ASSERT(!identExists(opCtx.get(), ident));
@@ -159,7 +166,7 @@ TEST_F(StorageEngineTest, TemporaryDropsItself) {
ASSERT(!identExists(opCtx.get(), ident));
}
-TEST_F(StorageEngineTest, ReconcileDoesNotDropIndexBuildTempTables) {
+TEST_F(StorageEngineTest, ReconcileUnfinishedIndex) {
auto opCtx = cc().makeOperationContext();
Lock::GlobalLock lk(&*opCtx, MODE_IS);
@@ -170,38 +177,29 @@ TEST_F(StorageEngineTest, ReconcileDoesNotDropIndexBuildTempTables) {
auto swCollInfo = createCollection(opCtx.get(), ns);
ASSERT_OK(swCollInfo.getStatus());
- const bool isBackgroundSecondaryBuild = false;
- ASSERT_OK(startIndexBuild(opCtx.get(), ns, indexName, isBackgroundSecondaryBuild));
- auto sideWrites = makeTemporary(opCtx.get());
- auto constraintViolations = makeTemporary(opCtx.get());
+ // Start an non-backgroundSecondary single-phase (i.e. no build UUID) index.
+ const bool isBackgroundSecondaryBuild = false;
+ const boost::optional<UUID> buildUUID = boost::none;
+ ASSERT_OK(startIndexBuild(opCtx.get(), ns, indexName, isBackgroundSecondaryBuild, buildUUID));
const auto indexIdent = _storageEngine->getCatalog()->getIndexIdent(
opCtx.get(), swCollInfo.getValue().catalogId, indexName);
- indexBuildScan(opCtx.get(),
- ns,
- indexName,
- sideWrites->rs()->getIdent(),
- constraintViolations->rs()->getIdent());
- indexBuildDrain(opCtx.get(), ns, indexName);
+ auto reconcileResult = unittest::assertGet(reconcile(opCtx.get()));
- auto reconcileStatus = reconcile(opCtx.get());
- ASSERT_OK(reconcileStatus.getStatus());
+ // Reconcile should have to dropped the ident to allow the index to be rebuilt.
ASSERT(!identExists(opCtx.get(), indexIdent));
- // Because this non-backgroundSecondary index is unfinished, reconcile will drop the index.
- ASSERT_EQUALS(0UL, reconcileStatus.getValue().size());
-
- // The owning index was dropped, and so should its temporary tables.
- ASSERT(!identExists(opCtx.get(), sideWrites->rs()->getIdent()));
- ASSERT(!identExists(opCtx.get(), constraintViolations->rs()->getIdent()));
+ // Because this non-backgroundSecondary index is unfinished, reconcile will drop the index and
+ // not require it to be rebuilt.
+ ASSERT_EQUALS(0UL, reconcileResult.indexesToRebuild.size());
- sideWrites->deleteTemporaryTable(opCtx.get());
- constraintViolations->deleteTemporaryTable(opCtx.get());
+ // There are no two-phase builds to restart.
+ ASSERT_EQUALS(0UL, reconcileResult.indexBuildsToRestart.size());
}
-TEST_F(StorageEngineTest, ReconcileDoesNotDropIndexBuildTempTablesBackgroundSecondary) {
+TEST_F(StorageEngineTest, ReconcileUnfinishedBackgroundSecondaryIndex) {
auto opCtx = cc().makeOperationContext();
Lock::GlobalLock lk(&*opCtx, MODE_IS);
@@ -212,40 +210,79 @@ TEST_F(StorageEngineTest, ReconcileDoesNotDropIndexBuildTempTablesBackgroundSeco
auto swCollInfo = createCollection(opCtx.get(), ns);
ASSERT_OK(swCollInfo.getStatus());
+ // Start a backgroundSecondary single-phase (i.e. no build UUID) index.
const bool isBackgroundSecondaryBuild = true;
- ASSERT_OK(startIndexBuild(opCtx.get(), ns, indexName, isBackgroundSecondaryBuild));
-
- auto sideWrites = makeTemporary(opCtx.get());
- auto constraintViolations = makeTemporary(opCtx.get());
+ const boost::optional<UUID> buildUUID = boost::none;
+ ASSERT_OK(startIndexBuild(opCtx.get(), ns, indexName, isBackgroundSecondaryBuild, buildUUID));
const auto indexIdent = _storageEngine->getCatalog()->getIndexIdent(
opCtx.get(), swCollInfo.getValue().catalogId, indexName);
- indexBuildScan(opCtx.get(),
- ns,
- indexName,
- sideWrites->rs()->getIdent(),
- constraintViolations->rs()->getIdent());
- indexBuildDrain(opCtx.get(), ns, indexName);
+ auto reconcileResult = unittest::assertGet(reconcile(opCtx.get()));
- auto reconcileStatus = reconcile(opCtx.get());
- ASSERT_OK(reconcileStatus.getStatus());
+ // Reconcile should not have dropped the ident because it expects the caller will drop and
+ // rebuild the index.
ASSERT(identExists(opCtx.get(), indexIdent));
- // Because this backgroundSecondary index is unfinished, reconcile will identify that it should
- // be rebuilt.
- ASSERT_EQUALS(1UL, reconcileStatus.getValue().size());
- StorageEngine::IndexIdentifier& toRebuild = reconcileStatus.getValue()[0];
- ASSERT_EQUALS(ns, toRebuild.nss);
+ // Because this backgroundSecondary index is unfinished, reconcile will drop the index and
+ // require it to be rebuilt.
+ ASSERT_EQUALS(1UL, reconcileResult.indexesToRebuild.size());
+ StorageEngine::IndexIdentifier& toRebuild = reconcileResult.indexesToRebuild[0];
+ ASSERT_EQUALS(ns.ns(), toRebuild.nss.ns());
ASSERT_EQUALS(indexName, toRebuild.indexName);
- // Because these temporary idents were associated with an in-progress index build, they are not
- // dropped.
- ASSERT(identExists(opCtx.get(), sideWrites->rs()->getIdent()));
- ASSERT(identExists(opCtx.get(), constraintViolations->rs()->getIdent()));
+ // There are no two-phase builds to restart.
+ ASSERT_EQUALS(0UL, reconcileResult.indexBuildsToRestart.size());
+}
+
+TEST_F(StorageEngineTest, ReconcileTwoPhaseIndexBuilds) {
+ auto opCtx = cc().makeOperationContext();
+
+ Lock::GlobalLock lk(&*opCtx, MODE_IS);
+
+ const NamespaceString ns("db.coll1");
+ const std::string indexA("a_1");
+ const std::string indexB("b_1");
+
+ auto swCollInfo = createCollection(opCtx.get(), ns);
+ ASSERT_OK(swCollInfo.getStatus());
+
+ // Using a build UUID implies that this index build is two-phase, so the isBackgroundSecondary
+ // field will be ignored. There is no special behavior on primaries or secondaries.
+ auto buildUUID = UUID::gen();
+ const bool isBackgroundSecondaryBuild = false;
- sideWrites->deleteTemporaryTable(opCtx.get());
- constraintViolations->deleteTemporaryTable(opCtx.get());
+ // Start two indexes with the same buildUUID to simulate building multiple indexes within the
+ // same build.
+ ASSERT_OK(startIndexBuild(opCtx.get(), ns, indexA, isBackgroundSecondaryBuild, buildUUID));
+ ASSERT_OK(startIndexBuild(opCtx.get(), ns, indexB, isBackgroundSecondaryBuild, buildUUID));
+
+ const auto indexIdentA = _storageEngine->getCatalog()->getIndexIdent(
+ opCtx.get(), swCollInfo.getValue().catalogId, indexA);
+ const auto indexIdentB = _storageEngine->getCatalog()->getIndexIdent(
+ opCtx.get(), swCollInfo.getValue().catalogId, indexB);
+
+ auto reconcileResult = unittest::assertGet(reconcile(opCtx.get()));
+
+ // Reconcile should not have dropped the ident to allow the restarted index build to do so
+ // transactionally with the start.
+ ASSERT(identExists(opCtx.get(), indexIdentA));
+ ASSERT(identExists(opCtx.get(), indexIdentB));
+
+ // Because this is an unfinished two-phase index build, reconcile will not require this index to
+ // be rebuilt to completion, rather restarted.
+ ASSERT_EQUALS(0UL, reconcileResult.indexesToRebuild.size());
+
+ // Only one index build should be indicated as needing to be restarted.
+ ASSERT_EQUALS(1UL, reconcileResult.indexBuildsToRestart.size());
+ auto& [toRestartBuildUUID, toRestart] = *reconcileResult.indexBuildsToRestart.begin();
+ ASSERT_EQ(buildUUID, toRestartBuildUUID);
+
+ // Both specs should be listed within the same build.
+ auto& specs = toRestart.indexSpecs;
+ ASSERT_EQ(2UL, specs.size());
+ ASSERT_EQ(indexA, specs[0]["name"].str());
+ ASSERT_EQ(indexB, specs[1]["name"].str());
}
TEST_F(StorageEngineRepairTest, LoadCatalogRecoversOrphans) {
@@ -284,7 +321,9 @@ TEST_F(StorageEngineRepairTest, ReconcileSucceeds) {
// Reconcile would normally return an error if a collection existed with a missing ident in the
// storage engine. When in a repair context, that should not be the case.
- ASSERT_OK(reconcile(opCtx.get()).getStatus());
+ auto reconcileResult = unittest::assertGet(reconcile(opCtx.get()));
+ ASSERT_EQUALS(0UL, reconcileResult.indexesToRebuild.size());
+ ASSERT_EQUALS(0UL, reconcileResult.indexBuildsToRestart.size());
ASSERT(!identExists(opCtx.get(), swCollInfo.getValue().ident));
ASSERT(collectionExists(opCtx.get(), collNs));
@@ -339,7 +378,9 @@ TEST_F(StorageEngineTest, LoadCatalogDropsOrphans) {
// orphaned idents.
_storageEngine->loadCatalog(opCtx.get());
// reconcileCatalogAndIdents() drops orphaned idents.
- ASSERT_OK(reconcile(opCtx.get()).getStatus());
+ auto reconcileResult = unittest::assertGet(reconcile(opCtx.get()));
+ ASSERT_EQUALS(0UL, reconcileResult.indexesToRebuild.size());
+ ASSERT_EQUALS(0UL, reconcileResult.indexBuildsToRestart.size());
ASSERT(!identExists(opCtx.get(), swCollInfo.getValue().ident));
auto identNs = swCollInfo.getValue().ident;
diff --git a/src/mongo/db/storage/storage_engine.h b/src/mongo/db/storage/storage_engine.h
index 6b156ce3de6..cdf21fd3837 100644
--- a/src/mongo/db/storage/storage_engine.h
+++ b/src/mongo/db/storage/storage_engine.h
@@ -497,11 +497,37 @@ public:
};
/**
- * Drop abandoned idents. In the successful case, returns a list of collection, index name
- * pairs to rebuild.
+ * Describes an index build on a collection that should be restarted.
*/
- virtual StatusWith<std::vector<IndexIdentifier>> reconcileCatalogAndIdents(
- OperationContext* opCtx) = 0;
+ struct IndexBuildToRestart {
+ IndexBuildToRestart(UUID collUUID) : collUUID(collUUID) {}
+
+ // Collection UUID.
+ const UUID collUUID;
+
+ // Index specs for the build.
+ std::vector<BSONObj> indexSpecs;
+ };
+
+ /*
+ * ReconcileResult is the result of reconciling abandoned storage engine idents and unfinished
+ * index builds.
+ */
+ struct ReconcileResult {
+ // A list of IndexIdentifiers that must be rebuilt to completion.
+ std::vector<IndexIdentifier> indexesToRebuild;
+
+ // A map of unfinished two-phase indexes that must be restarted in the background, but
+ // not to completion; they will wait for replicated commit or abort operations. This is a
+ // mapping from index build UUID to index build.
+ std::map<UUID, IndexBuildToRestart> indexBuildsToRestart;
+ };
+
+ /**
+ * Drop abandoned idents. If successful, returns a ReconcileResult with indexes that need to be
+ * rebuilt or builds that need to be restarted.
+ * */
+ virtual StatusWith<ReconcileResult> reconcileCatalogAndIdents(OperationContext* opCtx) = 0;
/**
* Returns the all_durable timestamp. All transactions with timestamps earlier than the
diff --git a/src/mongo/db/storage/storage_engine_impl.cpp b/src/mongo/db/storage/storage_engine_impl.cpp
index f7b203741a6..213c3c50937 100644
--- a/src/mongo/db/storage/storage_engine_impl.cpp
+++ b/src/mongo/db/storage/storage_engine_impl.cpp
@@ -40,6 +40,7 @@
#include "mongo/db/catalog/collection_catalog_helper.h"
#include "mongo/db/client.h"
#include "mongo/db/concurrency/d_concurrency.h"
+#include "mongo/db/index_builds_coordinator.h"
#include "mongo/db/logical_clock.h"
#include "mongo/db/operation_context_noop.h"
#include "mongo/db/server_options.h"
@@ -317,8 +318,8 @@ Status StorageEngineImpl::_recoverOrphanedCollection(OperationContext* opCtx,
* Third, a DurableCatalog may have an index ident that the KVEngine does not. This method will
* rebuild the index.
*/
-StatusWith<std::vector<StorageEngine::IndexIdentifier>>
-StorageEngineImpl::reconcileCatalogAndIdents(OperationContext* opCtx) {
+StatusWith<StorageEngine::ReconcileResult> StorageEngineImpl::reconcileCatalogAndIdents(
+ OperationContext* opCtx) {
// Gather all tables known to the storage engine and drop those that aren't cross-referenced
// in the _mdb_catalog. This can happen for two reasons.
//
@@ -405,7 +406,7 @@ StorageEngineImpl::reconcileCatalogAndIdents(OperationContext* opCtx) {
//
// Also, remove unfinished builds except those that were background index builds started on a
// secondary.
- std::vector<StorageEngine::IndexIdentifier> ret;
+ StorageEngine::ReconcileResult ret;
for (DurableCatalog::Entry entry : catalogEntries) {
BSONCollectionCatalogEntry::MetaData metaData =
_catalog->getMetaData(opCtx, entry.catalogId);
@@ -440,7 +441,32 @@ StorageEngineImpl::reconcileCatalogAndIdents(OperationContext* opCtx) {
if (indexMetaData.ready && !foundIdent) {
log() << "Expected index data is missing, rebuilding. Collection: " << coll
<< " Index: " << indexName;
- ret.push_back({entry.catalogId, coll, indexName});
+ ret.indexesToRebuild.push_back({entry.catalogId, coll, indexName});
+ continue;
+ }
+
+ // Any index build with a UUID is an unfinished two-phase build and must be restarted.
+ // There are no special cases to handle on primaries or secondaries. An index build may
+ // be associated with multiple indexes.
+ if (indexMetaData.buildUUID) {
+ invariant(!indexMetaData.ready);
+ invariant(indexMetaData.runTwoPhaseBuild);
+
+ auto collUUID = metaData.options.uuid;
+ invariant(collUUID);
+ auto buildUUID = *indexMetaData.buildUUID;
+
+ log() << "Found index from unfinished build. Collection: " << coll << "("
+ << *collUUID << "), index: " << indexName << ", build UUID: " << buildUUID;
+
+ // Insert in the map if a build has not already been registered.
+ auto existingIt = ret.indexBuildsToRestart.find(buildUUID);
+ if (existingIt == ret.indexBuildsToRestart.end()) {
+ ret.indexBuildsToRestart.insert({buildUUID, IndexBuildToRestart(*collUUID)});
+ existingIt = ret.indexBuildsToRestart.find(buildUUID);
+ }
+
+ existingIt->second.indexSpecs.emplace_back(indexMetaData.spec);
continue;
}
@@ -486,7 +512,7 @@ StorageEngineImpl::reconcileCatalogAndIdents(OperationContext* opCtx) {
log() << "Expected background index build did not complete, rebuilding. "
"Collection: "
<< coll << " Index: " << indexName;
- ret.push_back({entry.catalogId, coll, indexName});
+ ret.indexesToRebuild.push_back({entry.catalogId, coll, indexName});
continue;
}
diff --git a/src/mongo/db/storage/storage_engine_impl.h b/src/mongo/db/storage/storage_engine_impl.h
index 3dda7d220f6..96bbde222be 100644
--- a/src/mongo/db/storage/storage_engine_impl.h
+++ b/src/mongo/db/storage/storage_engine_impl.h
@@ -344,11 +344,7 @@ public:
return _engine->isInIndividuallyCheckpointedIndexesList(ident);
}
- /**
- * Drop abandoned idents. Returns a list of indexes to rebuild.
- */
- StatusWith<std::vector<StorageEngine::IndexIdentifier>> reconcileCatalogAndIdents(
- OperationContext* opCtx) override;
+ StatusWith<ReconcileResult> reconcileCatalogAndIdents(OperationContext* opCtx) override;
std::string getFilesystemPathForDb(const std::string& dbName) const override;
diff --git a/src/mongo/db/storage/storage_engine_test_fixture.h b/src/mongo/db/storage/storage_engine_test_fixture.h
index 22945fc184e..ffda8356902 100644
--- a/src/mongo/db/storage/storage_engine_test_fixture.h
+++ b/src/mongo/db/storage/storage_engine_test_fixture.h
@@ -88,7 +88,7 @@ public:
return _storageEngine->getEngine()->dropIdent(opCtx, ident);
}
- StatusWith<std::vector<StorageEngine::IndexIdentifier>> reconcile(OperationContext* opCtx) {
+ StatusWith<StorageEngine::ReconcileResult> reconcile(OperationContext* opCtx) {
return _storageEngine->reconcileCatalogAndIdents(opCtx);
}
@@ -116,7 +116,8 @@ public:
NamespaceString collNs,
std::string key,
bool isBackgroundSecondaryBuild) {
- auto ret = startIndexBuild(opCtx, collNs, key, isBackgroundSecondaryBuild);
+ auto buildUUID = UUID::gen();
+ auto ret = startIndexBuild(opCtx, collNs, key, isBackgroundSecondaryBuild, buildUUID);
if (!ret.isOK()) {
return ret;
}
@@ -128,7 +129,8 @@ public:
Status startIndexBuild(OperationContext* opCtx,
NamespaceString collNs,
std::string key,
- bool isBackgroundSecondaryBuild) {
+ bool isBackgroundSecondaryBuild,
+ boost::optional<UUID> buildUUID) {
BSONObjBuilder builder;
{
BSONObjBuilder keyObj;
@@ -143,26 +145,11 @@ public:
auto ret = DurableCatalog::get(opCtx)->prepareForIndexBuild(opCtx,
collection->getCatalogId(),
descriptor.get(),
- UUID::gen(),
+ buildUUID,
isBackgroundSecondaryBuild);
return ret;
}
- void indexBuildScan(OperationContext* opCtx,
- NamespaceString collNs,
- std::string key,
- std::string sideWritesIdent,
- std::string constraintViolationsIdent) {
- Collection* collection = CollectionCatalog::get(opCtx).lookupCollectionByNamespace(collNs);
- DurableCatalog::get(opCtx)->setIndexBuildScanning(
- opCtx, collection->getCatalogId(), key, sideWritesIdent, constraintViolationsIdent);
- }
-
- void indexBuildDrain(OperationContext* opCtx, NamespaceString collNs, std::string key) {
- Collection* collection = CollectionCatalog::get(opCtx).lookupCollectionByNamespace(collNs);
- DurableCatalog::get(opCtx)->setIndexBuildDraining(opCtx, collection->getCatalogId(), key);
- }
-
void indexBuildSuccess(OperationContext* opCtx, NamespaceString collNs, std::string key) {
Collection* collection = CollectionCatalog::get(opCtx).lookupCollectionByNamespace(collNs);
DurableCatalog::get(opCtx)->indexBuildSuccess(opCtx, collection->getCatalogId(), key);