summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLouis Williams <louis.williams@mongodb.com>2020-04-10 10:02:34 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-04-10 14:16:22 +0000
commit3d929ed533a72446353b18b5d60770aed33b58f1 (patch)
tree09bd3562511aef88eb6aa1ef3b6c45b8d73c16e9 /src
parent76d4548a751a56c8faf1887114685b540203a650 (diff)
downloadmongo-3d929ed533a72446353b18b5d60770aed33b58f1.tar.gz
SERVER-46560 Make abort index build deterministic
This redesigns user index build abort to have the following behavior: - Take a collection X lock to stop the index build from making progress - If we are no longer primary, return an error - Check whether we can abort the index build (i.e. it is not already committing or aborting) - Delete the index catalog entry and write the abortIndexBuild oplog entry in a WUOW - Interrupt the index builder thread - Wait for the thread to exit - Release locks
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/catalog/drop_collection.cpp42
-rw-r--r--src/mongo/db/catalog/drop_database.cpp14
-rw-r--r--src/mongo/db/catalog/drop_indexes.cpp67
-rw-r--r--src/mongo/db/catalog/index_builds_manager.cpp53
-rw-r--r--src/mongo/db/catalog/index_builds_manager.h36
-rw-r--r--src/mongo/db/catalog/index_builds_manager_test.cpp10
-rw-r--r--src/mongo/db/catalog/multi_index_block.cpp167
-rw-r--r--src/mongo/db/catalog/multi_index_block.h119
-rw-r--r--src/mongo/db/catalog/multi_index_block_test.cpp101
-rw-r--r--src/mongo/db/cloner.cpp10
-rw-r--r--src/mongo/db/commands/create_indexes.cpp88
-rw-r--r--src/mongo/db/commands/drop_indexes.cpp45
-rw-r--r--src/mongo/db/index_builds_coordinator.cpp1023
-rw-r--r--src/mongo/db/index_builds_coordinator.h163
-rw-r--r--src/mongo/db/index_builds_coordinator_mongod.cpp87
-rw-r--r--src/mongo/db/index_builds_coordinator_mongod_test.cpp1
-rw-r--r--src/mongo/db/repair_database_and_check_version.cpp5
-rw-r--r--src/mongo/db/repl/collection_bulk_loader_impl.cpp10
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp19
-rw-r--r--src/mongo/db/repl/rs_rollback_test.cpp18
-rw-r--r--src/mongo/db/repl_index_build_state.h68
-rw-r--r--src/mongo/dbtests/dbtests.cpp5
-rw-r--r--src/mongo/dbtests/indexupdatetests.cpp22
-rw-r--r--src/mongo/dbtests/querytests.cpp5
-rw-r--r--src/mongo/dbtests/storage_timestamp_tests.cpp20
-rw-r--r--src/mongo/dbtests/wildcard_multikey_persistence_test.cpp5
26 files changed, 882 insertions, 1321 deletions
diff --git a/src/mongo/db/catalog/drop_collection.cpp b/src/mongo/db/catalog/drop_collection.cpp
index a0deed258c7..6df38942003 100644
--- a/src/mongo/db/catalog/drop_collection.cpp
+++ b/src/mongo/db/catalog/drop_collection.cpp
@@ -159,21 +159,21 @@ Status _abortIndexBuildsAndDropCollection(OperationContext* opCtx,
const int numIndexes = coll->getIndexCatalog()->numIndexesTotal(opCtx);
while (true) {
- // Send the abort signal to any active index builds on the collection.
- indexBuildsCoord->abortCollectionIndexBuildsNoWait(
- opCtx,
- collectionUUID,
- str::stream() << "Collection " << coll->ns() << "(" << collectionUUID
- << ") is being dropped");
-
- // Now that the abort signals were sent out to the active index builders for this
- // collection, we need to release the lock temporarily to allow those index builders to
- // process the abort signal. Holding a lock here will cause the index builders to block
- // indefinitely.
+ // Save a copy of the namespace before yielding our locks.
+ const NamespaceString collectionNs = coll->ns();
+
+ // Release locks before aborting index builds. The helper will acquire locks on our behalf.
collLock = boost::none;
autoDb = boost::none;
- indexBuildsCoord->awaitNoIndexBuildInProgressForCollection(opCtx, collectionUUID);
+ // Send the abort signal to any active index builds on the collection. This waits until all
+ // aborted index builds complete.
+ indexBuildsCoord->abortCollectionIndexBuilds(opCtx,
+ collectionNs,
+ collectionUUID,
+ str::stream()
+ << "Collection " << collectionNs << "("
+ << collectionUUID << ") is being dropped");
// Take an exclusive lock to finish the collection drop.
autoDb.emplace(opCtx, startingNss.db(), MODE_IX);
@@ -195,24 +195,6 @@ Status _abortIndexBuildsAndDropCollection(OperationContext* opCtx,
if (!abortAgain) {
break;
}
-
- // We only need to hold an intent lock to send an abort signal to the active index
- // builders on this collection.
- collLock = boost::none;
- autoDb = boost::none;
-
- autoDb.emplace(opCtx, startingNss.db(), MODE_IX);
- collLock.emplace(opCtx, dbAndUUID, MODE_IX);
-
- // Abandon the snapshot as the index catalog will compare the in-memory state to the
- // disk state, which may have changed when we released the collection lock temporarily.
- opCtx->recoveryUnit()->abandonSnapshot();
-
- coll = CollectionCatalog::get(opCtx).lookupCollectionByUUID(opCtx, collectionUUID);
- status = _checkNssAndReplState(opCtx, coll);
- if (!status.isOK()) {
- return status;
- }
}
// It's possible for the given collection to be drop pending after obtaining the locks again, if
diff --git a/src/mongo/db/catalog/drop_database.cpp b/src/mongo/db/catalog/drop_database.cpp
index 5e0f097bc3f..8a50570b765 100644
--- a/src/mongo/db/catalog/drop_database.cpp
+++ b/src/mongo/db/catalog/drop_database.cpp
@@ -176,10 +176,6 @@ Status _dropDatabase(OperationContext* opCtx, const std::string& dbName, bool ab
// We need to keep aborting all the active index builders for this database until there
// are none left when we retrieve the exclusive database lock again.
while (indexBuildsCoord->inProgForDb(dbName)) {
- // Sends the abort signal to all the active index builders for this database.
- indexBuildsCoord->abortDatabaseIndexBuildsNoWait(
- opCtx, dbName, "dropDatabase command");
-
// Create a scope guard to reset the drop-pending state on the database to false if
// there is a replica state change that kills this operation while the locks were
// yielded.
@@ -192,12 +188,12 @@ Status _dropDatabase(OperationContext* opCtx, const std::string& dbName, bool ab
dropPendingGuard.dismiss();
});
- // Now that the abort signals were sent out to the active index builders for this
- // database, we need to release the lock temporarily to allow those index builders
- // to process the abort signal. Holding a lock here will cause the index builders to
- // block indefinitely.
+ // Drop locks. The drop helper will acquire locks on our behalf.
autoDB = boost::none;
- indexBuildsCoord->awaitNoBgOpInProgForDb(opCtx, dbName);
+
+ // Sends the abort signal to all the active index builders for this database. Waits
+ // for aborted index builds to complete.
+ indexBuildsCoord->abortDatabaseIndexBuilds(opCtx, dbName, "dropDatabase command");
if (MONGO_unlikely(dropDatabaseHangAfterWaitingForIndexBuilds.shouldFail())) {
LOGV2(4612300,
diff --git a/src/mongo/db/catalog/drop_indexes.cpp b/src/mongo/db/catalog/drop_indexes.cpp
index be8dcad5a21..a3bf4802de9 100644
--- a/src/mongo/db/catalog/drop_indexes.cpp
+++ b/src/mongo/db/catalog/drop_indexes.cpp
@@ -153,13 +153,13 @@ StatusWith<std::vector<std::string>> getIndexNames(OperationContext* opCtx,
/**
* Attempts to abort a single index builder that is responsible for all the index names passed in.
*/
-std::vector<UUID> abortIndexBuildByIndexNamesNoWait(OperationContext* opCtx,
- Collection* collection,
- std::vector<std::string> indexNames) {
+std::vector<UUID> abortIndexBuildByIndexNames(OperationContext* opCtx,
+ UUID collectionUUID,
+ std::vector<std::string> indexNames) {
boost::optional<UUID> buildUUID =
- IndexBuildsCoordinator::get(opCtx)->abortIndexBuildByIndexNamesNoWait(
- opCtx, collection->uuid(), indexNames, std::string("dropIndexes command"));
+ IndexBuildsCoordinator::get(opCtx)->abortIndexBuildByIndexNames(
+ opCtx, collectionUUID, indexNames, std::string("dropIndexes command"));
if (buildUUID) {
return {*buildUUID};
}
@@ -210,20 +210,19 @@ Status dropIndexByDescriptor(OperationContext* opCtx,
* otherwise this attempts to abort a single index builder building the given index names.
*/
std::vector<UUID> abortActiveIndexBuilders(OperationContext* opCtx,
- Collection* collection,
+ const NamespaceString& collectionNs,
+ CollectionUUID collectionUUID,
const std::vector<std::string>& indexNames) {
- invariant(opCtx->lockState()->isCollectionLockedForMode(collection->ns(), MODE_IX));
-
if (indexNames.empty()) {
return {};
}
if (indexNames.front() == "*") {
- return IndexBuildsCoordinator::get(opCtx)->abortCollectionIndexBuildsNoWait(
- opCtx, collection->uuid(), "dropIndexes command");
+ return IndexBuildsCoordinator::get(opCtx)->abortCollectionIndexBuilds(
+ opCtx, collectionNs, collectionUUID, "dropIndexes command");
}
- return abortIndexBuildByIndexNamesNoWait(opCtx, collection, indexNames);
+ return abortIndexBuildByIndexNames(opCtx, collectionUUID, indexNames);
}
Status dropReadyIndexes(OperationContext* opCtx,
@@ -333,20 +332,18 @@ Status dropIndexes(OperationContext* opCtx,
indexNames = swIndexNames.getValue();
- // Send the abort signal to any index builders that match the users request.
- abortedIndexBuilders = abortActiveIndexBuilders(opCtx, collection, indexNames);
+ // Copy the namespace and UUID before dropping locks.
+ auto collUUID = collection->uuid();
+ auto collNs = collection->ns();
- // Now that the abort signals were sent to the intended index builders, release our lock
- // temporarily to allow the index builders to process the abort signal. Holding a lock here
- // will cause the index builders to block indefinitely.
+ // Release locks before aborting index builds. The helper will acquire locks on our behalf.
autoColl = boost::none;
- if (abortedIndexBuilders.size() == 1) {
- indexBuildsCoord->awaitIndexBuildFinished(opCtx, abortedIndexBuilders.front());
- } else if (abortedIndexBuilders.size() > 1) {
- // Only the "*" wildcard can abort multiple index builders.
- invariant(isWildcard);
- indexBuildsCoord->awaitNoIndexBuildInProgressForCollection(opCtx, collectionUUID);
- }
+
+ // Send the abort signal to any index builders that match the users request. Waits until all
+ // aborted builders complete.
+ auto justAborted = abortActiveIndexBuilders(opCtx, collNs, collUUID, indexNames);
+ abortedIndexBuilders.insert(
+ abortedIndexBuilders.end(), justAborted.begin(), justAborted.end());
// Take an exclusive lock on the collection now to be able to perform index catalog writes
// when removing ready indexes from disk.
@@ -380,35 +377,11 @@ Status dropIndexes(OperationContext* opCtx,
if (!abortAgain) {
break;
}
-
- // We only need to hold an intent lock to send abort signals to the active index
- // builder(s) we intend to abort.
- autoColl = boost::none;
- autoColl.emplace(opCtx, dbAndUUID, MODE_IX);
-
- // Abandon the snapshot as the index catalog will compare the in-memory state to the
- // disk state, which may have changed when we released the lock temporarily.
- opCtx->recoveryUnit()->abandonSnapshot();
-
- db = autoColl->getDb();
- collection = autoColl->getCollection();
- if (!collection) {
- return Status(ErrorCodes::NamespaceNotFound,
- str::stream() << "ns not found on database " << dbAndUUID.db()
- << " with collection " << dbAndUUID.uuid());
- }
-
- status = checkReplState(opCtx, dbAndUUID);
- if (!status.isOK()) {
- return status;
- }
}
// If the "*" wildcard was not specified, verify that all the index names belonging to the
// index builder were aborted.
if (!isWildcard && !abortedIndexBuilders.empty()) {
- invariant(abortedIndexBuilders.size() == 1);
-
// This is necessary to check shard version.
OldClientContext ctx(opCtx, collection->ns().ns());
diff --git a/src/mongo/db/catalog/index_builds_manager.cpp b/src/mongo/db/catalog/index_builds_manager.cpp
index 1388bf5b4f8..dbb307c98af 100644
--- a/src/mongo/db/catalog/index_builds_manager.cpp
+++ b/src/mongo/db/catalog/index_builds_manager.cpp
@@ -37,6 +37,7 @@
#include "mongo/db/catalog/collection_catalog.h"
#include "mongo/db/catalog/index_catalog.h"
#include "mongo/db/catalog/index_timestamp_helper.h"
+#include "mongo/db/catalog/uncommitted_collections.h"
#include "mongo/db/concurrency/write_conflict_exception.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/storage/write_unit_of_work.h"
@@ -51,7 +52,7 @@ namespace {
/**
* Returns basic info on index builders.
*/
-std::string toSummary(const std::map<UUID, std::shared_ptr<MultiIndexBlock>>& builders) {
+std::string toSummary(const std::map<UUID, std::unique_ptr<MultiIndexBlock>>& builders) {
str::stream ss;
ss << "Number of builders: " << builders.size() << ": [";
bool first = true;
@@ -253,26 +254,25 @@ Status IndexBuildsManager::commitIndexBuild(OperationContext* opCtx,
return status;
}
wunit.commit();
-
- // Required call to clean up even though commit cleaned everything up.
- builder->cleanUpAfterBuild(opCtx, collection, MultiIndexBlock::kNoopOnCleanUpFn);
- _unregisterIndexBuild(buildUUID);
return Status::OK();
});
}
-bool IndexBuildsManager::abortIndexBuild(const UUID& buildUUID, const std::string& reason) {
- stdx::unique_lock<Latch> lk(_mutex);
-
- auto builderIt = _builders.find(buildUUID);
- if (builderIt == _builders.end()) {
+bool IndexBuildsManager::abortIndexBuild(OperationContext* opCtx,
+ Collection* collection,
+ const UUID& buildUUID,
+ OnCleanUpFn onCleanUpFn) {
+ auto builder = _getBuilder(buildUUID);
+ if (!builder.isOK()) {
return false;
}
- std::shared_ptr<MultiIndexBlock> builder = builderIt->second;
+ // Since abortIndexBuild is special in that it can be called by threads other than the index
+ // builder, ensure the caller has an exclusive lock.
+ auto nss = collection->ns();
+ UncommittedCollections::get(opCtx).invariantHasExclusiveAccessToCollection(opCtx, nss);
- lk.unlock();
- builder->abort(reason);
+ builder.getValue()->abortIndexBuild(opCtx, collection, onCleanUpFn);
return true;
}
@@ -291,25 +291,9 @@ bool IndexBuildsManager::abortIndexBuildWithoutCleanup(OperationContext* opCtx,
"reason"_attr = reason);
builder.getValue()->abortWithoutCleanup(opCtx);
- builder.getValue()->cleanUpAfterBuild(opCtx, collection, MultiIndexBlock::kNoopOnCleanUpFn);
- _unregisterIndexBuild(buildUUID);
-
return true;
}
-void IndexBuildsManager::tearDownIndexBuild(OperationContext* opCtx,
- Collection* collection,
- const UUID& buildUUID,
- OnCleanUpFn onCleanUpFn) {
- auto builder = _getBuilder(buildUUID);
- if (!builder.isOK()) {
- return;
- }
-
- builder.getValue()->cleanUpAfterBuild(opCtx, collection, onCleanUpFn);
- _unregisterIndexBuild(buildUUID);
-}
-
bool IndexBuildsManager::isBackgroundBuilding(const UUID& buildUUID) {
auto builder = invariant(_getBuilder(buildUUID));
return builder->isBackgroundBuilding();
@@ -322,11 +306,11 @@ void IndexBuildsManager::verifyNoIndexBuilds_forTestOnly() {
void IndexBuildsManager::_registerIndexBuild(UUID buildUUID) {
stdx::unique_lock<Latch> lk(_mutex);
- std::shared_ptr<MultiIndexBlock> mib = std::make_shared<MultiIndexBlock>();
- invariant(_builders.insert(std::make_pair(buildUUID, mib)).second);
+ auto mib = std::make_unique<MultiIndexBlock>();
+ invariant(_builders.insert(std::make_pair(buildUUID, std::move(mib))).second);
}
-void IndexBuildsManager::_unregisterIndexBuild(const UUID& buildUUID) {
+void IndexBuildsManager::unregisterIndexBuild(const UUID& buildUUID) {
stdx::unique_lock<Latch> lk(_mutex);
auto builderIt = _builders.find(buildUUID);
@@ -336,14 +320,13 @@ void IndexBuildsManager::_unregisterIndexBuild(const UUID& buildUUID) {
_builders.erase(builderIt);
}
-StatusWith<std::shared_ptr<MultiIndexBlock>> IndexBuildsManager::_getBuilder(
- const UUID& buildUUID) {
+StatusWith<MultiIndexBlock*> IndexBuildsManager::_getBuilder(const UUID& buildUUID) {
stdx::unique_lock<Latch> lk(_mutex);
auto builderIt = _builders.find(buildUUID);
if (builderIt == _builders.end()) {
return {ErrorCodes::NoSuchKey, str::stream() << "No index build with UUID: " << buildUUID};
}
- return builderIt->second;
+ return builderIt->second.get();
}
} // namespace mongo
diff --git a/src/mongo/db/catalog/index_builds_manager.h b/src/mongo/db/catalog/index_builds_manager.h
index 0b6becb6689..0686bd6af81 100644
--- a/src/mongo/db/catalog/index_builds_manager.h
+++ b/src/mongo/db/catalog/index_builds_manager.h
@@ -87,6 +87,11 @@ public:
SetupOptions options = {});
/**
+ * Unregisters the builder associated with the given buildUUID from the _builders map.
+ */
+ void unregisterIndexBuild(const UUID& buildUUID);
+
+ /**
* Runs the scanning/insertion phase of the index build..
*/
Status startBuildingIndex(OperationContext* opCtx,
@@ -138,12 +143,13 @@ public:
OnCommitFn onCommitFn);
/**
- * Signals the index build to be aborted and returns without waiting for completion.
- *
- * Returns true if a build existed to be signaled, as opposed to having already finished and
- * been cleared away, or not having yet started..
+ * Deletes the index entry from the durable catalog.
*/
- bool abortIndexBuild(const UUID& buildUUID, const std::string& reason);
+ using OnCleanUpFn = MultiIndexBlock::OnCleanUpFn;
+ bool abortIndexBuild(OperationContext* opCtx,
+ Collection* collection,
+ const UUID& buildUUID,
+ OnCleanUpFn onCleanUpFn);
/**
* Signals the index build to be aborted without being cleaned up and returns without waiting
@@ -158,15 +164,6 @@ public:
const std::string& reason);
/**
- * Cleans up the index build state and unregisters it from the manager.
- */
- using OnCleanUpFn = MultiIndexBlock::OnCleanUpFn;
- void tearDownIndexBuild(OperationContext* opCtx,
- Collection* collection,
- const UUID& buildUUID,
- OnCleanUpFn onCleanUpFn);
-
- /**
* Returns true if the index build supports background writes while building an index. This is
* true for the kHybrid method.
*/
@@ -184,21 +181,16 @@ private:
void _registerIndexBuild(UUID buildUUID);
/**
- * Unregisters the builder associcated with the given buildUUID from the _builders map.
- */
- void _unregisterIndexBuild(const UUID& buildUUID);
-
- /**
- * Returns a shared pointer to the builder. Returns a bad status if the builder does not exist.
+ * Returns a pointer to the builder. Returns a bad status if the builder does not exist.
*/
- StatusWith<std::shared_ptr<MultiIndexBlock>> _getBuilder(const UUID& buildUUID);
+ StatusWith<MultiIndexBlock*> _getBuilder(const UUID& buildUUID);
// Protects the map data structures below.
mutable Mutex _mutex = MONGO_MAKE_LATCH("IndexBuildsManager::_mutex");
// Map of index builders by build UUID. Allows access to the builders so that actions can be
// taken on and information passed to and from index builds.
- std::map<UUID, std::shared_ptr<MultiIndexBlock>> _builders;
+ std::map<UUID, std::unique_ptr<MultiIndexBlock>> _builders;
};
} // namespace mongo
diff --git a/src/mongo/db/catalog/index_builds_manager_test.cpp b/src/mongo/db/catalog/index_builds_manager_test.cpp
index 34f1e26d212..aa43f898082 100644
--- a/src/mongo/db/catalog/index_builds_manager_test.cpp
+++ b/src/mongo/db/catalog/index_builds_manager_test.cpp
@@ -91,12 +91,12 @@ TEST_F(IndexBuildsManagerTest, IndexBuildsManagerSetUpAndTearDown) {
_buildUUID,
MultiIndexBlock::kNoopOnInitFn));
- _indexBuildsManager.tearDownIndexBuild(operationContext(),
- autoColl.getCollection(),
- _buildUUID,
- MultiIndexBlock::kNoopOnCleanUpFn);
+ _indexBuildsManager.abortIndexBuild(operationContext(),
+ autoColl.getCollection(),
+ _buildUUID,
+ MultiIndexBlock::kNoopOnCleanUpFn);
+ _indexBuildsManager.unregisterIndexBuild(_buildUUID);
}
-
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp
index 8d78c98481f..06e3158dfb7 100644
--- a/src/mongo/db/catalog/multi_index_block.cpp
+++ b/src/mongo/db/catalog/multi_index_block.cpp
@@ -78,41 +78,16 @@ MultiIndexBlock::~MultiIndexBlock() {
MultiIndexBlock::OnCleanUpFn MultiIndexBlock::kNoopOnCleanUpFn = []() {};
-void MultiIndexBlock::cleanUpAfterBuild(OperationContext* opCtx,
- Collection* collection,
- OnCleanUpFn onCleanUp) {
+void MultiIndexBlock::abortIndexBuild(OperationContext* opCtx,
+ Collection* collection,
+ OnCleanUpFn onCleanUp) noexcept {
if (_collectionUUID) {
// init() was previously called with a collection pointer, so ensure that the same
// collection is being provided for clean up and the interface in not being abused.
invariant(_collectionUUID.get() == collection->uuid());
}
- if (_indexes.empty()) {
- _buildIsCleanedUp = true;
- return;
- }
-
- if (!_needToCleanup) {
- CollectionQueryInfo::get(collection).clearQueryCache();
-
- // The temp tables cannot be dropped in commit() because commit() can be called multiple
- // times on write conflict errors and the drop does not rollback in WUOWs.
-
- // Make lock acquisition uninterruptible.
- UninterruptibleLockGuard noInterrupt(opCtx->lockState());
-
- // Lock if it's not already locked, to ensure storage engine cannot be destructed out from
- // underneath us.
- boost::optional<Lock::GlobalLock> lk;
- if (!opCtx->lockState()->isWriteLocked()) {
- lk.emplace(opCtx, MODE_IS);
- }
-
- for (auto& index : _indexes) {
- index.block->deleteTemporaryTables(opCtx);
- }
-
- _buildIsCleanedUp = true;
+ if (_buildIsCleanedUp) {
return;
}
@@ -200,15 +175,6 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(OperationContext* opCtx,
invariant(opCtx->lockState()->isCollectionLockedForMode(collection->ns(), MODE_X),
str::stream() << "Collection " << collection->ns() << " with UUID "
<< collection->uuid() << " is holding the incorrect lock");
- if (State::kAborted == _getState()) {
- return {ErrorCodes::IndexBuildAborted,
- str::stream() << "Index build aborted: " << _abortReason
- << ". Cannot initialize index builder: " << collection->ns() << " ("
- << collection->uuid() << "): " << indexSpecs.size()
- << " index spec(s) provided. First index spec: "
- << (indexSpecs.empty() ? BSONObj() : indexSpecs[0])};
- }
-
_collectionUUID = collection->uuid();
_buildIsCleanedUp = false;
@@ -227,7 +193,7 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(OperationContext* opCtx,
for (auto& index : _indexes) {
index.block->deleteTemporaryTables(opCtx);
}
- _indexes.clear();
+ _buildIsCleanedUp = true;
});
const auto& ns = collection->ns().ns();
@@ -338,9 +304,6 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(OperationContext* opCtx,
}
wunit.commit();
-
- _setState(State::kRunning);
-
return indexInfoObjs;
// Avoid converting WCE to Status
} catch (const WriteConflictException&) {
@@ -372,6 +335,7 @@ void failPointHangDuringBuild(FailPoint* fp, StringData where, const BSONObj& do
Status MultiIndexBlock::insertAllDocumentsInCollection(OperationContext* opCtx,
Collection* collection) {
+ invariant(!_buildIsCleanedUp);
invariant(opCtx->lockState()->isNoop() || !opCtx->lockState()->inAWriteUnitOfWork());
// UUIDs are not guaranteed during startup because the check happens after indexes are rebuilt.
@@ -524,11 +488,7 @@ Status MultiIndexBlock::insertAllDocumentsInCollection(OperationContext* opCtx,
}
Status MultiIndexBlock::insert(OperationContext* opCtx, const BSONObj& doc, const RecordId& loc) {
- if (State::kAborted == _getState()) {
- return {ErrorCodes::IndexBuildAborted,
- str::stream() << "Index build aborted: " << _abortReason};
- }
-
+ invariant(!_buildIsCleanedUp);
for (size_t i = 0; i < _indexes.size(); i++) {
if (_indexes[i].filterExpression && !_indexes[i].filterExpression->matchesBSON(doc)) {
continue;
@@ -557,11 +517,7 @@ Status MultiIndexBlock::dumpInsertsFromBulk(OperationContext* opCtx) {
Status MultiIndexBlock::dumpInsertsFromBulk(OperationContext* opCtx,
std::set<RecordId>* dupRecords) {
- if (State::kAborted == _getState()) {
- return {ErrorCodes::IndexBuildAborted,
- str::stream() << "Index build aborted: " << _abortReason};
- }
-
+ invariant(!_buildIsCleanedUp);
invariant(opCtx->lockState()->isNoop() || !opCtx->lockState()->inAWriteUnitOfWork());
for (size_t i = 0; i < _indexes.size(); i++) {
// If 'dupRecords' is provided, it will be used to store all records that would result in
@@ -620,15 +576,7 @@ Status MultiIndexBlock::drainBackgroundWrites(
OperationContext* opCtx,
RecoveryUnit::ReadSource readSource,
IndexBuildInterceptor::DrainYieldPolicy drainYieldPolicy) {
- if (State::kAborted == _getState()) {
- return {ErrorCodes::IndexBuildAborted,
- str::stream() << "Index build aborted: " << _abortReason
- << ". Cannot complete drain phase for index build"
- << (_collectionUUID ? (" on collection '" +
- _collectionUUID.get().toString() + "'")
- : ".")};
- }
-
+ invariant(!_buildIsCleanedUp);
invariant(!opCtx->lockState()->inAWriteUnitOfWork());
// Drain side-writes table for each index. This only drains what is visible. Assuming intent
@@ -659,6 +607,7 @@ Status MultiIndexBlock::drainBackgroundWrites(
}
Status MultiIndexBlock::retrySkippedRecords(OperationContext* opCtx, Collection* collection) {
+ invariant(!_buildIsCleanedUp);
for (auto&& index : _indexes) {
auto interceptor = index.block->getEntry()->indexBuildInterceptor();
if (!interceptor)
@@ -673,14 +622,7 @@ Status MultiIndexBlock::retrySkippedRecords(OperationContext* opCtx, Collection*
}
Status MultiIndexBlock::checkConstraints(OperationContext* opCtx) {
- if (State::kAborted == _getState()) {
- return {ErrorCodes::IndexBuildAborted,
- str::stream() << "Index build aborted: " << _abortReason
- << ". Cannot complete constraint checking for index build"
- << (_collectionUUID ? (" on collection '" +
- _collectionUUID.get().toString() + "'")
- : ".")};
- }
+ invariant(!_buildIsCleanedUp);
// For each index that may be unique, check that no recorded duplicates still exist. This can
// only check what is visible on the index. Callers are responsible for ensuring all writes to
@@ -699,8 +641,7 @@ Status MultiIndexBlock::checkConstraints(OperationContext* opCtx) {
}
void MultiIndexBlock::abortWithoutCleanup(OperationContext* opCtx) {
- _setStateToAbortedIfNotCommitted("aborted without cleanup"_sd);
-
+ invariant(!_buildIsCleanedUp);
UninterruptibleLockGuard noInterrupt(opCtx->lockState());
// Lock if it's not already locked, to ensure storage engine cannot be destructed out from
// underneath us.
@@ -712,8 +653,7 @@ void MultiIndexBlock::abortWithoutCleanup(OperationContext* opCtx) {
for (auto& index : _indexes) {
index.block->deleteTemporaryTables(opCtx);
}
- _indexes.clear();
- _needToCleanup = false;
+ _buildIsCleanedUp = true;
}
MultiIndexBlock::OnCreateEachFn MultiIndexBlock::kNoopOnCreateEachFn = [](const BSONObj& spec) {};
@@ -723,6 +663,7 @@ Status MultiIndexBlock::commit(OperationContext* opCtx,
Collection* collection,
OnCreateEachFn onCreateEach,
OnCommitFn onCommit) {
+ invariant(!_buildIsCleanedUp);
invariant(opCtx->lockState()->isCollectionLockedForMode(collection->ns(), MODE_X),
str::stream() << "Collection " << collection->ns() << " with UUID "
<< collection->uuid() << " is holding the incorrect lock");
@@ -732,14 +673,6 @@ Status MultiIndexBlock::commit(OperationContext* opCtx,
invariant(_collectionUUID.get() == collection->uuid());
}
- if (State::kAborted == _getState()) {
- return {
- ErrorCodes::IndexBuildAborted,
- str::stream() << "Index build aborted: " << _abortReason
- << ". Cannot commit index builder: " << collection->ns()
- << (_collectionUUID ? (" (" + _collectionUUID->toString() + ")") : "")};
- }
-
// Do not interfere with writing multikey information when committing index builds.
auto restartTracker = makeGuard(
[this, opCtx] { MultikeyPathTracker::get(opCtx).startTrackingMultikeyPathInfo(); });
@@ -769,6 +702,11 @@ Status MultiIndexBlock::commit(OperationContext* opCtx,
_indexes[i].block->getEntry()->setMultikey(opCtx, bulkBuilder->getMultikeyPaths());
}
+ // The commit() function can be called multiple times on write conflict errors. Dropping the
+ // temp tables cannot be rolled back, so do it only after the WUOW commits.
+ opCtx->recoveryUnit()->onCommit(
+ [opCtx, i, this](auto commitTs) { _indexes[i].block->deleteTemporaryTables(opCtx); });
+
if (opCtx->getServiceContext()->getStorageEngine()->supportsCheckpoints()) {
// Add the new index ident to a list so that the validate cmd with {background:true}
// can ignore the new index until it is regularly checkpoint'ed with the rest of the
@@ -795,79 +733,20 @@ Status MultiIndexBlock::commit(OperationContext* opCtx,
onCommit();
- // The state of this index build is set to Committed only when the WUOW commits.
- // It is possible for abort() to be called after the check at the beginning of this function and
- // before the WUOW is committed. If the WUOW commits, the final state of this index builder will
- // be Committed. Otherwise, the index builder state will remain as Aborted and further attempts
- // to commit this index build will fail.
- opCtx->recoveryUnit()->onCommit(
- [this](boost::optional<Timestamp> commitTime) { _setState(State::kCommitted); });
-
- // On rollback sets MultiIndexBlock::_needToCleanup to true.
- opCtx->recoveryUnit()->onRollback([this]() { _needToCleanup = true; });
- _needToCleanup = false;
+ opCtx->recoveryUnit()->onCommit([collection, this](boost::optional<Timestamp> commitTime) {
+ CollectionQueryInfo::get(collection).clearQueryCache();
+ _buildIsCleanedUp = true;
+ });
return Status::OK();
}
-bool MultiIndexBlock::isCommitted() const {
- return State::kCommitted == _getState();
-}
-
-void MultiIndexBlock::abort(StringData reason) {
- _setStateToAbortedIfNotCommitted(reason);
-}
-
-
bool MultiIndexBlock::isBackgroundBuilding() const {
return _method == IndexBuildMethod::kHybrid;
}
void MultiIndexBlock::setIndexBuildMethod(IndexBuildMethod indexBuildMethod) {
- invariant(_getState() == State::kUninitialized);
_method = indexBuildMethod;
}
-MultiIndexBlock::State MultiIndexBlock::getState_forTest() const {
- return _getState();
-}
-
-MultiIndexBlock::State MultiIndexBlock::_getState() const {
- stdx::lock_guard<Latch> lock(_mutex);
- return _state;
-}
-
-void MultiIndexBlock::_setState(State newState) {
- invariant(State::kAborted != newState);
- stdx::lock_guard<Latch> lock(_mutex);
- _state = newState;
-}
-
-void MultiIndexBlock::_setStateToAbortedIfNotCommitted(StringData reason) {
- stdx::lock_guard<Latch> lock(_mutex);
- if (State::kCommitted == _state) {
- return;
- }
- _state = State::kAborted;
- _abortReason = reason.toString();
-}
-
-StringData toString(MultiIndexBlock::State state) {
- switch (state) {
- case MultiIndexBlock::State::kUninitialized:
- return "Uninitialized"_sd;
- case MultiIndexBlock::State::kRunning:
- return "Running"_sd;
- case MultiIndexBlock::State::kCommitted:
- return "Committed"_sd;
- case MultiIndexBlock::State::kAborted:
- return "Aborted"_sd;
- }
- MONGO_UNREACHABLE;
-}
-
-std::ostream& operator<<(std::ostream& os, const MultiIndexBlock::State& state) {
- return os << toString(state);
-}
-
} // namespace mongo
diff --git a/src/mongo/db/catalog/multi_index_block.h b/src/mongo/db/catalog/multi_index_block.h
index e600882ed00..9a17a6c1c58 100644
--- a/src/mongo/db/catalog/multi_index_block.h
+++ b/src/mongo/db/catalog/multi_index_block.h
@@ -79,26 +79,6 @@ public:
~MultiIndexBlock();
/**
- * Ensures the index build state is cleared correctly after index build success or failure.
- *
- * Must be called before object destruction if init() has been called; and safe to call if
- * init() has not been called.
- *
- * By only requiring this call after init(), we allow owners of the object to exit without
- * further handling if they never use the object.
- *
- * `onCleanUp` will be called after all indexes have been removed from the catalog.
- */
- using OnCleanUpFn = std::function<void()>;
- void cleanUpAfterBuild(OperationContext* opCtx, Collection* collection, OnCleanUpFn onCleanUp);
-
- /**
- * Not all index aborts need this function, in particular index builds that do not need
- * to timestamp catalog writes. This is a no-op.
- */
- static OnCleanUpFn kNoopOnCleanUpFn;
-
- /**
* By default we enforce the 'unique' flag in specs when building an index by failing.
* If this is called before init(), we will ignore unique violations. This has no effect if
* no specs are unique.
@@ -255,45 +235,34 @@ public:
static OnCommitFn kNoopOnCommitFn;
/**
- * Returns true if this index builder was added to the index catalog successfully.
- * In addition to having commit() return without errors, the enclosing WUOW has to be committed
- * for the indexes to show up in the index catalog.
- */
- bool isCommitted() const;
-
- /**
- * Signals the index build to abort.
- *
- * In-progress inserts and commits will still run to completion. However, subsequent index build
- * operations will fail an IndexBuildAborted error.
+ * Ensures the index build state is cleared correctly after index build failure.
*
- * Aborts the uncommitted index build and prevents further inserts or commit attempts from
- * proceeding. On destruction, all traces of uncommitted index builds will be removed.
- *
- * If the index build has already been aborted (using abort() or abortWithoutCleanup()),
- * this function does nothing.
+ * Must be called before object destruction if init() has been called; and safe to call if
+ * init() has not been called.
*
- * If this index build has been committed successfully, this function has no effect.
+ * By only requiring this call after init(), we allow owners of the object to exit without
+ * further handling if they never use the object.
*
- * May be called from any thread.
+ * `onCleanUp` will be called after all indexes have been removed from the catalog.
+ */
+ using OnCleanUpFn = std::function<void()>;
+ void abortIndexBuild(OperationContext* opCtx,
+ Collection* collection,
+ OnCleanUpFn onCleanUp) noexcept;
+
+ /**
+ * Not all index aborts need this function, in particular index builds that do not need
+ * to timestamp catalog writes. This is a no-op.
*/
- void abort(StringData reason);
+ static OnCleanUpFn kNoopOnCleanUpFn;
/**
* May be called at any time after construction but before a successful commit(). Suppresses
- * the default behavior on destruction of removing all traces of uncommitted index builds.
- *
- * The most common use of this is if the indexes were already dropped via some other
- * mechanism such as the whole collection being dropped. In that case, it would be invalid
- * to try to remove the indexes again. Also, replication uses this to ensure that indexes
- * that are being built on shutdown are resumed on startup.
- *
- * Do not use this unless you are really sure you need to.
+ * the default behavior on destruction of removing all traces of uncommitted index builds. Does
+ * not perform any storage engine writes. May delete internal tables, but this is not
+ * transactional.
*
- * Does not matter whether it is called inside of a WriteUnitOfWork. Will not be rolled
- * back.
- *
- * Must be called from owning thread.
+ * This should only be used during shutdown or rollback.
*/
void abortWithoutCleanup(OperationContext* opCtx);
@@ -305,27 +274,6 @@ public:
void setIndexBuildMethod(IndexBuildMethod indexBuildMethod);
- /**
- * State transitions:
- *
- * Uninitialized --> Running --> Committed
- * | | ^
- * | | |
- * \--------------+------> Aborted
- *
- * It is possible for abort() to skip intermediate states. For example, calling abort() when the
- * index build has not been initialized will transition from Uninitialized directly to Aborted.
- *
- * In the case where we are in the midst of committing the WUOW for a successful commit() call,
- * we may transition temporarily to Aborted before finally ending at Committed. See comments for
- * MultiIndexBlock::abort().
- *
- * For testing only. Callers should not have to query the state of the MultiIndexBlock directly.
- */
- enum class State { kUninitialized, kRunning, kCommitted, kAborted };
- StringData toString(State state);
- State getState_forTest() const;
-
private:
struct IndexToBuild {
std::unique_ptr<IndexBuildBlock> block;
@@ -337,21 +285,6 @@ private:
InsertDeleteOptions options;
};
- /**
- * Returns the current state.
- */
- State _getState() const;
-
- /**
- * Updates the current state to a non-Aborted state.
- */
- void _setState(State newState);
-
- /**
- * Updates the current state to Aborted with the given reason.
- */
- void _setStateToAbortedIfNotCommitted(StringData reason);
-
// Is set during init() and ensures subsequent function calls act on the same Collection.
boost::optional<UUID> _collectionUUID;
@@ -363,8 +296,6 @@ private:
bool _ignoreUnique = false;
- bool _needToCleanup = true;
-
// Set to true when no work remains to be done, the object can safely destruct without leaving
// incorrect state set anywhere.
bool _buildIsCleanedUp = true;
@@ -372,15 +303,5 @@ private:
// A unique identifier associating this index build with a two-phase index build within a
// replica set.
boost::optional<UUID> _buildUUID;
-
- // Protects member variables of this class declared below.
- mutable Mutex _mutex = MONGO_MAKE_LATCH("MultiIndexBlock::_mutex");
-
- State _state = State::kUninitialized;
- std::string _abortReason;
};
-
-// For unit tests that need to check MultiIndexBlock states.
-// The ASSERT_*() macros use this function to print the value of 'state' when the predicate fails.
-std::ostream& operator<<(std::ostream& os, const MultiIndexBlock::State& state);
} // namespace mongo
diff --git a/src/mongo/db/catalog/multi_index_block_test.cpp b/src/mongo/db/catalog/multi_index_block_test.cpp
index 0b73737a991..3f0615b14aa 100644
--- a/src/mongo/db/catalog/multi_index_block_test.cpp
+++ b/src/mongo/db/catalog/multi_index_block_test.cpp
@@ -85,7 +85,6 @@ void MultiIndexBlockTest::tearDown() {
auto service = getServiceContext();
repl::ReplicationCoordinator::set(service, {});
- _indexer->cleanUpAfterBuild(getOpCtx(), getCollection(), MultiIndexBlock::kNoopOnCleanUpFn);
_indexer = {};
_collection = {};
@@ -109,17 +108,14 @@ MultiIndexBlock* MultiIndexBlockTest::getIndexer() const {
TEST_F(MultiIndexBlockTest, CommitWithoutInsertingDocuments) {
auto indexer = getIndexer();
- ASSERT_EQUALS(MultiIndexBlock::State::kUninitialized, indexer->getState_forTest());
auto specs = unittest::assertGet(indexer->init(
getOpCtx(), getCollection(), std::vector<BSONObj>(), MultiIndexBlock::kNoopOnInitFn));
ASSERT_EQUALS(0U, specs.size());
- ASSERT_EQUALS(MultiIndexBlock::State::kRunning, indexer->getState_forTest());
ASSERT_OK(indexer->dumpInsertsFromBulk(getOpCtx()));
ASSERT_OK(indexer->checkConstraints(getOpCtx()));
- ASSERT_FALSE(indexer->isCommitted());
{
WriteUnitOfWork wunit(getOpCtx());
ASSERT_OK(indexer->commit(getOpCtx(),
@@ -128,24 +124,19 @@ TEST_F(MultiIndexBlockTest, CommitWithoutInsertingDocuments) {
MultiIndexBlock::kNoopOnCommitFn));
wunit.commit();
}
- ASSERT(indexer->isCommitted());
- ASSERT_EQUALS(MultiIndexBlock::State::kCommitted, indexer->getState_forTest());
}
TEST_F(MultiIndexBlockTest, CommitAfterInsertingSingleDocument) {
auto indexer = getIndexer();
- ASSERT_EQUALS(MultiIndexBlock::State::kUninitialized, indexer->getState_forTest());
auto specs = unittest::assertGet(indexer->init(
getOpCtx(), getCollection(), std::vector<BSONObj>(), MultiIndexBlock::kNoopOnInitFn));
ASSERT_EQUALS(0U, specs.size());
- ASSERT_EQUALS(MultiIndexBlock::State::kRunning, indexer->getState_forTest());
ASSERT_OK(indexer->insert(getOpCtx(), {}, {}));
ASSERT_OK(indexer->dumpInsertsFromBulk(getOpCtx()));
ASSERT_OK(indexer->checkConstraints(getOpCtx()));
- ASSERT_FALSE(indexer->isCommitted());
{
WriteUnitOfWork wunit(getOpCtx());
ASSERT_OK(indexer->commit(getOpCtx(),
@@ -154,11 +145,9 @@ TEST_F(MultiIndexBlockTest, CommitAfterInsertingSingleDocument) {
MultiIndexBlock::kNoopOnCommitFn));
wunit.commit();
}
- ASSERT(indexer->isCommitted());
// abort() should have no effect after the index build is committed.
- indexer->abort("test"_sd);
- ASSERT(indexer->isCommitted());
+ indexer->abortIndexBuild(getOpCtx(), getCollection(), MultiIndexBlock::kNoopOnCleanUpFn);
}
TEST_F(MultiIndexBlockTest, AbortWithoutCleanupAfterInsertingSingleDocument) {
@@ -168,94 +157,6 @@ TEST_F(MultiIndexBlockTest, AbortWithoutCleanupAfterInsertingSingleDocument) {
ASSERT_EQUALS(0U, specs.size());
ASSERT_OK(indexer->insert(getOpCtx(), {}, {}));
indexer->abortWithoutCleanup(getOpCtx());
- ASSERT_EQUALS(MultiIndexBlock::State::kAborted, indexer->getState_forTest());
-
- ASSERT_FALSE(indexer->isCommitted());
-}
-
-TEST_F(MultiIndexBlockTest, InitFailsAfterAbort) {
- auto indexer = getIndexer();
- ASSERT_EQUALS(MultiIndexBlock::State::kUninitialized, indexer->getState_forTest());
-
- indexer->abort("test"_sd);
- ASSERT_EQUALS(MultiIndexBlock::State::kAborted, indexer->getState_forTest());
-
- ASSERT_EQUALS(
- ErrorCodes::IndexBuildAborted,
- indexer
- ->init(
- getOpCtx(), getCollection(), std::vector<BSONObj>(), MultiIndexBlock::kNoopOnInitFn)
- .getStatus());
- ASSERT_EQUALS(MultiIndexBlock::State::kAborted, indexer->getState_forTest());
-
- ASSERT_FALSE(indexer->isCommitted());
-}
-
-TEST_F(MultiIndexBlockTest, InsertingSingleDocumentFailsAfterAbort) {
- auto indexer = getIndexer();
- ASSERT_EQUALS(MultiIndexBlock::State::kUninitialized, indexer->getState_forTest());
-
- auto specs = unittest::assertGet(indexer->init(
- getOpCtx(), getCollection(), std::vector<BSONObj>(), MultiIndexBlock::kNoopOnInitFn));
- ASSERT_EQUALS(0U, specs.size());
- ASSERT_EQUALS(MultiIndexBlock::State::kRunning, indexer->getState_forTest());
-
- indexer->abort("test"_sd);
- ASSERT_EQUALS(MultiIndexBlock::State::kAborted, indexer->getState_forTest());
-
- ASSERT_EQUALS(ErrorCodes::IndexBuildAborted,
- indexer->insert(getOpCtx(), BSON("_id" << 123 << "a" << 456), {}));
- ASSERT_EQUALS(MultiIndexBlock::State::kAborted, indexer->getState_forTest());
-
- ASSERT_FALSE(indexer->isCommitted());
-}
-
-TEST_F(MultiIndexBlockTest, DumpInsertsFromBulkFailsAfterAbort) {
- auto indexer = getIndexer();
- ASSERT_EQUALS(MultiIndexBlock::State::kUninitialized, indexer->getState_forTest());
-
- auto specs = unittest::assertGet(indexer->init(
- getOpCtx(), getCollection(), std::vector<BSONObj>(), MultiIndexBlock::kNoopOnInitFn));
- ASSERT_EQUALS(0U, specs.size());
- ASSERT_EQUALS(MultiIndexBlock::State::kRunning, indexer->getState_forTest());
-
- ASSERT_OK(indexer->insert(getOpCtx(), BSON("_id" << 123 << "a" << 456), {}));
- ASSERT_EQUALS(MultiIndexBlock::State::kRunning, indexer->getState_forTest());
-
- indexer->abort("test"_sd);
- ASSERT_EQUALS(MultiIndexBlock::State::kAborted, indexer->getState_forTest());
-
- ASSERT_EQUALS(ErrorCodes::IndexBuildAborted, indexer->dumpInsertsFromBulk(getOpCtx()));
- ASSERT_EQUALS(MultiIndexBlock::State::kAborted, indexer->getState_forTest());
-
- ASSERT_FALSE(indexer->isCommitted());
-}
-
-TEST_F(MultiIndexBlockTest, CommitFailsAfterAbort) {
- auto indexer = getIndexer();
- ASSERT_EQUALS(MultiIndexBlock::State::kUninitialized, indexer->getState_forTest());
-
- auto specs = unittest::assertGet(indexer->init(
- getOpCtx(), getCollection(), std::vector<BSONObj>(), MultiIndexBlock::kNoopOnInitFn));
- ASSERT_EQUALS(0U, specs.size());
- ASSERT_EQUALS(MultiIndexBlock::State::kRunning, indexer->getState_forTest());
-
- ASSERT_OK(indexer->insert(getOpCtx(), BSON("_id" << 123 << "a" << 456), {}));
- ASSERT_EQUALS(MultiIndexBlock::State::kRunning, indexer->getState_forTest());
-
- ASSERT_OK(indexer->dumpInsertsFromBulk(getOpCtx()));
-
- indexer->abort("test"_sd);
- ASSERT_EQUALS(MultiIndexBlock::State::kAborted, indexer->getState_forTest());
-
- ASSERT_EQUALS(ErrorCodes::IndexBuildAborted,
- indexer->commit(getOpCtx(),
- getCollection(),
- MultiIndexBlock::kNoopOnCreateEachFn,
- MultiIndexBlock::kNoopOnCommitFn));
- ASSERT_EQUALS(MultiIndexBlock::State::kAborted, indexer->getState_forTest());
-
- ASSERT_FALSE(indexer->isCommitted());
}
} // namespace
diff --git a/src/mongo/db/cloner.cpp b/src/mongo/db/cloner.cpp
index 1a45051af97..d7ec0c03de7 100644
--- a/src/mongo/db/cloner.cpp
+++ b/src/mongo/db/cloner.cpp
@@ -393,10 +393,6 @@ void Cloner::copyIndexes(OperationContext* opCtx,
// implementations anyway as long as that is supported.
MultiIndexBlock indexer;
- // The code below throws, so ensure build cleanup occurs.
- ON_BLOCK_EXIT(
- [&] { indexer.cleanUpAfterBuild(opCtx, collection, MultiIndexBlock::kNoopOnCleanUpFn); });
-
// Emit startIndexBuild and commitIndexBuild oplog entries if supported by the current FCV.
auto opObserver = opCtx->getServiceContext()->getOpObserver();
auto fromMigrate = false;
@@ -436,6 +432,11 @@ void Cloner::copyIndexes(OperationContext* opCtx,
}
auto indexInfoObjs = uassertStatusOK(indexer.init(opCtx, collection, indexesToBuild, onInitFn));
+
+ // The code below throws, so ensure build cleanup occurs.
+ auto abortOnExit = makeGuard(
+ [&] { indexer.abortIndexBuild(opCtx, collection, MultiIndexBlock::kNoopOnCleanUpFn); });
+
uassertStatusOK(indexer.insertAllDocumentsInCollection(opCtx, collection));
uassertStatusOK(indexer.checkConstraints(opCtx));
@@ -463,6 +464,7 @@ void Cloner::copyIndexes(OperationContext* opCtx,
}
}));
wunit.commit();
+ abortOnExit.dismiss();
}
bool Cloner::copyCollection(OperationContext* opCtx,
diff --git a/src/mongo/db/commands/create_indexes.cpp b/src/mongo/db/commands/create_indexes.cpp
index 6bfe04cafab..0ef519bb542 100644
--- a/src/mongo/db/commands/create_indexes.cpp
+++ b/src/mongo/db/commands/create_indexes.cpp
@@ -79,6 +79,7 @@ MONGO_FAIL_POINT_DEFINE(createIndexesWriteConflict);
// collection is created.
MONGO_FAIL_POINT_DEFINE(hangBeforeCreateIndexesCollectionCreate);
MONGO_FAIL_POINT_DEFINE(hangBeforeIndexBuildAbortOnInterrupt);
+MONGO_FAIL_POINT_DEFINE(hangAfterIndexBuildAbort);
// This failpoint hangs between logging the index build UUID and starting the index build
// through the IndexBuildsCoordinator.
@@ -600,93 +601,80 @@ bool runCreateIndexesWithCoordinator(OperationContext* opCtx,
opCtx, dbname, *collectionUUID, specs, buildUUID, protocol, indexBuildOptions));
auto deadline = opCtx->getDeadline();
- // Date_t::max() means no deadline.
- if (deadline == Date_t::max()) {
- LOGV2(20439,
- "Waiting for index build to complete: {buildUUID}",
- "buildUUID"_attr = buildUUID);
- } else {
- LOGV2(20440,
- "Waiting for index build to complete: {buildUUID} (deadline: {deadline})",
- "buildUUID"_attr = buildUUID,
- "deadline"_attr = deadline);
- }
+ LOGV2(20440,
+ "Waiting for index build to complete",
+ "buildUUID"_attr = buildUUID,
+ "deadline"_attr = deadline);
// Throws on error.
try {
stats = buildIndexFuture.get(opCtx);
} catch (const ExceptionForCat<ErrorCategory::Interruption>& interruptionEx) {
LOGV2(20441,
- "Index build interrupted: {buildUUID}: {interruptionEx}",
+ "Index build received interrupt signal",
"buildUUID"_attr = buildUUID,
- "interruptionEx"_attr = interruptionEx);
+ "signal"_attr = interruptionEx);
hangBeforeIndexBuildAbortOnInterrupt.pauseWhileSet();
- boost::optional<Lock::GlobalLock> globalLock;
if (IndexBuildProtocol::kTwoPhase == protocol) {
// If this node is no longer a primary, the index build will continue to run in the
// background and will complete when this node receives a commitIndexBuild oplog
// entry from the new primary.
if (ErrorCodes::InterruptedDueToReplStateChange == interruptionEx.code()) {
LOGV2(20442,
- "Index build continuing in background: {buildUUID}",
+ "Index build ignoring interrupt and continuing in background",
"buildUUID"_attr = buildUUID);
throw;
}
-
- // If we are using two-phase index builds and are no longer primary after receiving
- // an interrupt, we cannot replicate an abortIndexBuild oplog entry. Rely on the new
- // primary to finish the index build. Acquire the global lock to check the
- // replication state and to prevent any state transitions from happening while
- // aborting the index build.
- UninterruptibleLockGuard noInterrupt(opCtx->lockState());
- globalLock.emplace(opCtx, MODE_IS);
- if (!repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesFor(opCtx, ns)) {
- uassertStatusOK(
- {ErrorCodes::NotMaster,
- str::stream()
- << "Unable to abort index build because we are no longer primary: "
- << buildUUID});
- }
}
// It is unclear whether the interruption originated from the current opCtx instance
// for the createIndexes command or that the IndexBuildsCoordinator task was interrupted
// independently of this command invocation. We'll defensively abort the index build
// with the assumption that if the index build was already in the midst of tearing down,
- // this be a no-op.
- // Use a null abort timestamp because the index build will generate its own timestamp
- // on cleanup.
- indexBuildsCoord->abortIndexBuildOnError(opCtx, buildUUID, interruptionEx.toStatus());
- LOGV2(20443, "Index build aborted: {buildUUID}", "buildUUID"_attr = buildUUID);
-
+ // this will be a no-op.
+ {
+ // The current OperationContext may be interrupted, which would prevent us from
+ // taking locks. Use a new OperationContext to abort the index build.
+ auto newClient = opCtx->getServiceContext()->makeClient("abort-index-build");
+ AlternativeClientRegion acr(newClient);
+ const auto abortCtx = cc().makeOperationContext();
+
+ std::string abortReason(str::stream() << "Index build aborted: " << buildUUID
+ << ": " << interruptionEx.toString());
+ indexBuildsCoord->abortIndexBuildByBuildUUID(
+ abortCtx.get(), buildUUID, IndexBuildAction::kPrimaryAbort, abortReason);
+ LOGV2(
+ 20443, "Index build aborted due to interruption", "buildUUID"_attr = buildUUID);
+ }
throw;
} catch (const ExceptionForCat<ErrorCategory::NotMasterError>& ex) {
LOGV2(20444,
- "Index build interrupted due to change in replication state: {buildUUID}: {ex}",
+ "Index build received interrupt signal due to change in replication state",
"buildUUID"_attr = buildUUID,
"ex"_attr = ex);
- // The index build will continue to run in the background and will complete when this
- // node receives a commitIndexBuild oplog entry from the new primary.
-
+ // If this node is no longer a primary, the index build will continue to run in the
+ // background and will complete when this node receives a commitIndexBuild oplog
+ // entry from the new primary.
if (IndexBuildProtocol::kTwoPhase == protocol) {
LOGV2(20445,
- "Index build continuing in background: {buildUUID}",
+ "Index build ignoring interrupt and continuing in background",
"buildUUID"_attr = buildUUID);
throw;
}
- indexBuildsCoord->abortIndexBuildOnError(opCtx, buildUUID, ex.toStatus());
- LOGV2(20446,
- "Index build aborted due to NotMaster error: {buildUUID}",
- "buildUUID"_attr = buildUUID);
-
+ std::string abortReason(str::stream() << "Index build aborted: " << buildUUID << ": "
+ << ex.toString());
+ indexBuildsCoord->abortIndexBuildByBuildUUID(
+ opCtx, buildUUID, IndexBuildAction::kPrimaryAbort, abortReason);
+ LOGV2(
+ 20446, "Index build aborted due to NotMaster error", "buildUUID"_attr = buildUUID);
throw;
}
- LOGV2(20447, "Index build completed: {buildUUID}", "buildUUID"_attr = buildUUID);
+ LOGV2(20447, "Index build completed", "buildUUID"_attr = buildUUID);
} catch (DBException& ex) {
// If the collection is dropped after the initial checks in this function (before the
// AutoStatsTracker is created), the IndexBuildsCoordinator (either startIndexBuild() or
@@ -703,10 +691,7 @@ bool runCreateIndexesWithCoordinator(OperationContext* opCtx,
}
// All other errors should be forwarded to the caller with index build information included.
- LOGV2(20449,
- "Index build failed: {buildUUID}: {ex_toStatus}",
- "buildUUID"_attr = buildUUID,
- "ex_toStatus"_attr = ex.toStatus());
+ LOGV2(20449, "Index build failed", "buildUUID"_attr = buildUUID, "ex"_attr = ex.toStatus());
ex.addContext(str::stream() << "Index build failed: " << buildUUID << ": Collection " << ns
<< " ( " << *collectionUUID << " )");
@@ -770,6 +755,7 @@ public:
try {
return runCreateIndexesWithCoordinator(opCtx, dbname, cmdObj, errmsg, result);
} catch (const DBException& ex) {
+ hangAfterIndexBuildAbort.pauseWhileSet();
// We can only wait for an existing index build to finish if we are able to release
// our locks, in order to allow the existing index build to proceed. We cannot
// release locks in transactions, so we bypass the below logic in transactions.
diff --git a/src/mongo/db/commands/drop_indexes.cpp b/src/mongo/db/commands/drop_indexes.cpp
index e924becf9f4..38266a3cf48 100644
--- a/src/mongo/db/commands/drop_indexes.cpp
+++ b/src/mongo/db/commands/drop_indexes.cpp
@@ -213,22 +213,20 @@ public:
StatusWith<std::vector<BSONObj>> swIndexesToRebuild(ErrorCodes::UnknownError,
"Uninitialized");
- // The 'indexer' can throw, so ensure build cleanup occurs.
- ON_BLOCK_EXIT([&] {
- indexer->cleanUpAfterBuild(opCtx, collection, MultiIndexBlock::kNoopOnCleanUpFn);
+ writeConflictRetry(opCtx, "dropAllIndexes", toReIndexNss.ns(), [&] {
+ WriteUnitOfWork wunit(opCtx);
+ collection->getIndexCatalog()->dropAllIndexes(opCtx, true);
+
+ swIndexesToRebuild =
+ indexer->init(opCtx, collection, all, MultiIndexBlock::kNoopOnInitFn);
+ uassertStatusOK(swIndexesToRebuild.getStatus());
+ wunit.commit();
});
- {
- writeConflictRetry(opCtx, "dropAllIndexes", toReIndexNss.ns(), [&] {
- WriteUnitOfWork wunit(opCtx);
- collection->getIndexCatalog()->dropAllIndexes(opCtx, true);
-
- swIndexesToRebuild =
- indexer->init(opCtx, collection, all, MultiIndexBlock::kNoopOnInitFn);
- uassertStatusOK(swIndexesToRebuild.getStatus());
- wunit.commit();
- });
- }
+ // The 'indexer' can throw, so ensure build cleanup occurs.
+ auto abortOnExit = makeGuard([&] {
+ indexer->abortIndexBuild(opCtx, collection, MultiIndexBlock::kNoopOnCleanUpFn);
+ });
if (MONGO_unlikely(reIndexCrashAfterDrop.shouldFail())) {
LOGV2(20458, "exiting because 'reIndexCrashAfterDrop' fail point was set");
@@ -241,16 +239,15 @@ public:
uassertStatusOK(indexer->checkConstraints(opCtx));
- {
- writeConflictRetry(opCtx, "commitReIndex", toReIndexNss.ns(), [&] {
- WriteUnitOfWork wunit(opCtx);
- uassertStatusOK(indexer->commit(opCtx,
- collection,
- MultiIndexBlock::kNoopOnCreateEachFn,
- MultiIndexBlock::kNoopOnCommitFn));
- wunit.commit();
- });
- }
+ writeConflictRetry(opCtx, "commitReIndex", toReIndexNss.ns(), [&] {
+ WriteUnitOfWork wunit(opCtx);
+ uassertStatusOK(indexer->commit(opCtx,
+ collection,
+ MultiIndexBlock::kNoopOnCreateEachFn,
+ MultiIndexBlock::kNoopOnCommitFn));
+ wunit.commit();
+ });
+ abortOnExit.dismiss();
// Do not allow majority reads from this collection until all original indexes are visible.
// This was also done when dropAllIndexes() committed, but we need to ensure that no one
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index 31394c48e8c..ed2819c70de 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -68,6 +68,7 @@ using namespace indexbuildentryhelpers;
MONGO_FAIL_POINT_DEFINE(hangAfterIndexBuildFirstDrain);
MONGO_FAIL_POINT_DEFINE(hangAfterIndexBuildSecondDrain);
MONGO_FAIL_POINT_DEFINE(hangAfterIndexBuildDumpsInsertsFromBulk);
+MONGO_FAIL_POINT_DEFINE(hangAfterInitializingIndexBuild);
namespace {
@@ -147,9 +148,8 @@ bool shouldBuildIndexesOnEmptyCollectionSinglePhased(OperationContext* opCtx,
/*
* Determines whether to skip the index build state transition check.
* Index builder not using ReplIndexBuildState::waitForNextAction to signal primary and secondaries
- * to commit or abort signal will violate index build state transition i.e, they can move from
- * prepareAbort to Committed and from Committed to prepareAbort. So, we should skip state transition
- * verification. Otherwise, we would invariant.
+ * to commit or abort signal will violate index build state transition. So, we should skip state
+ * transition verification. Otherwise, we would invariant.
*/
bool shouldSkipIndexBuildStateTransitionCheck(OperationContext* opCtx,
IndexBuildProtocol protocol) {
@@ -161,14 +161,23 @@ bool shouldSkipIndexBuildStateTransitionCheck(OperationContext* opCtx,
}
/**
- * Signal downstream secondary nodes to commit index build.
+ * Replicates a commitIndexBuild oplog entry for two-phase builds, which signals downstream
+ * secondary nodes to commit the index build.
*/
void onCommitIndexBuild(OperationContext* opCtx,
const NamespaceString& nss,
- ReplIndexBuildState& replState,
- bool replSetAndNotPrimaryAtStart) {
+ ReplIndexBuildState& replState) {
const auto& buildUUID = replState.buildUUID;
+ auto skipCheck = shouldSkipIndexBuildStateTransitionCheck(opCtx, replState.protocol);
+ {
+ stdx::unique_lock<Latch> lk(replState.mutex);
+ replState.indexBuildState.setState(IndexBuildState::kCommitted, skipCheck);
+ }
+ if (IndexBuildProtocol::kSinglePhase == replState.protocol) {
+ return;
+ }
+
invariant(IndexBuildProtocol::kTwoPhase == replState.protocol,
str::stream() << "onCommitIndexBuild: " << buildUUID);
invariant(opCtx->lockState()->isWriteLocked(),
@@ -178,19 +187,10 @@ void onCommitIndexBuild(OperationContext* opCtx,
const auto& collUUID = replState.collectionUUID;
const auto& indexSpecs = replState.indexSpecs;
auto fromMigrate = false;
- auto skipCheck = shouldSkipIndexBuildStateTransitionCheck(opCtx, replState.protocol);
- {
- stdx::unique_lock<Latch> lk(replState.mutex);
- replState.indexBuildState.setState(IndexBuildState::kCommitted, skipCheck);
- }
// Since two phase index builds are allowed to survive replication state transitions, we should
// check if the node is currently a primary before attempting to write to the oplog.
auto replCoord = repl::ReplicationCoordinator::get(opCtx);
- if (!replCoord->getSettings().usingReplSets()) {
- return;
- }
-
if (!replCoord->canAcceptWritesFor(opCtx, nss)) {
invariant(!opCtx->recoveryUnit()->getCommitTimestamp().isNull(),
str::stream() << "commitIndexBuild: " << buildUUID);
@@ -201,7 +201,8 @@ void onCommitIndexBuild(OperationContext* opCtx,
}
/**
- * Signal downstream secondary nodes to abort index build.
+ * Replicates an abortIndexBuild oplog entry for two-phase builds, which signals downstream
+ * secondary nodes to abort the index build.
*/
void onAbortIndexBuild(OperationContext* opCtx,
const NamespaceString& nss,
@@ -216,58 +217,18 @@ void onAbortIndexBuild(OperationContext* opCtx,
auto opObserver = opCtx->getServiceContext()->getOpObserver();
auto collUUID = replState.collectionUUID;
auto fromMigrate = false;
- auto skipCheck = shouldSkipIndexBuildStateTransitionCheck(opCtx, replState.protocol);
- {
- stdx::unique_lock<Latch> lk(replState.mutex);
- replState.indexBuildState.setState(IndexBuildState::kAborted, skipCheck);
- }
opObserver->onAbortIndexBuild(
opCtx, nss, collUUID, replState.buildUUID, replState.indexSpecs, cause, fromMigrate);
}
/**
- * Aborts the index build identified by the provided 'replIndexBuildState'.
- * It gets called by drop database/collection/index command.
- */
-void abortIndexBuild(WithLock lk,
- OperationContext* opCtx,
- IndexBuildsManager* indexBuildsManager,
- std::shared_ptr<ReplIndexBuildState> replIndexBuildState,
- const std::string& reason) {
- stdx::unique_lock<Latch> replStateLock(replIndexBuildState->mutex);
- if (replIndexBuildState->waitForNextAction->getFuture().isReady()) {
- const auto nextAction = replIndexBuildState->waitForNextAction->getFuture().get();
- invariant(nextAction == IndexBuildAction::kSinglePhaseCommit ||
- nextAction == IndexBuildAction::kCommitQuorumSatisfied ||
- nextAction == IndexBuildAction::kPrimaryAbort);
- // Index build coordinator already received a signal to commit or abort. So, it's ok
- // to return and wait for the index build to complete. The index build coordinator
- // will not perform the signaled action (i.e, will not commit or abort the index build)
- // only when the node steps down. When the node steps down, the caller of this function,
- // drop commands (user operation) will also get interrupted. So, we no longer need to
- // abort the index build on step down.
- return;
- }
-
- auto skipCheck = shouldSkipIndexBuildStateTransitionCheck(opCtx, replIndexBuildState->protocol);
- // Set the state on replIndexBuildState and indexBuildsManager. And, then signal the value. It's
- // important we do all these 3 things in a critical section by holding mutex lock.
- replIndexBuildState->indexBuildState.setState(
- IndexBuildState::kPrepareAbort, skipCheck, boost::none, reason);
- indexBuildsManager->abortIndexBuild(replIndexBuildState->buildUUID, reason);
-
- IndexBuildsCoordinator::get(opCtx)->setSignalAndCancelVoteRequestCbkIfActive(
- replStateLock, opCtx, replIndexBuildState, IndexBuildAction::kPrimaryAbort);
-}
-
-/**
* We do not need synchronization with step up and step down. Dropping the RSTL is important because
* otherwise if we held the RSTL it would create deadlocks with prepared transactions on step up and
* step down. A deadlock could result if the index build was attempting to acquire a Collection S
* or X lock while a prepared transaction held a Collection IX lock, and a step down was waiting to
* acquire the RSTL in mode X.
*/
-void unlockRSTLForIndexCleanup(OperationContext* opCtx) {
+void unlockRSTL(OperationContext* opCtx) {
opCtx->lockState()->unlockRSTLforPrepare();
invariant(!opCtx->lockState()->isRSTLLocked());
}
@@ -562,92 +523,35 @@ void IndexBuildsCoordinator::waitForAllIndexBuildsToStopForShutdown(OperationCon
_indexBuildsCondVar.wait(lk, pred);
}
-std::vector<UUID> IndexBuildsCoordinator::_abortCollectionIndexBuilds(stdx::unique_lock<Latch>& lk,
- OperationContext* opCtx,
- const UUID& collectionUUID,
- const std::string& reason,
- bool shouldWait) {
- auto indexBuildFilter = [=](const auto& replState) {
- return collectionUUID == replState.collectionUUID;
- };
- auto collIndexBuilds = _filterIndexBuilds_inlock(lk, indexBuildFilter);
- if (collIndexBuilds.empty()) {
- return {};
- }
-
+std::vector<UUID> IndexBuildsCoordinator::abortCollectionIndexBuilds(
+ OperationContext* opCtx,
+ const NamespaceString collectionNss,
+ const UUID collectionUUID,
+ const std::string& reason) {
LOGV2(23879,
- "About to abort all index builders on collection with UUID: {collectionUUID}",
- "collectionUUID"_attr = collectionUUID);
+ "About to abort all index builders on collection",
+ "collection"_attr = collectionNss,
+ "collectionUUID"_attr = collectionUUID,
+ "reason"_attr = reason);
+
+ auto collIndexBuilds = [&]() -> std::vector<std::shared_ptr<ReplIndexBuildState>> {
+ stdx::unique_lock<Latch> lk(_mutex);
+ auto indexBuildFilter = [=](const auto& replState) {
+ return collectionUUID == replState.collectionUUID;
+ };
+ return _filterIndexBuilds_inlock(lk, indexBuildFilter);
+ }();
std::vector<UUID> buildUUIDs;
for (auto replState : collIndexBuilds) {
- abortIndexBuild(lk, opCtx, &_indexBuildsManager, replState, reason);
- buildUUIDs.push_back(replState->buildUUID);
- }
-
- if (!shouldWait) {
- return buildUUIDs;
+ if (abortIndexBuildByBuildUUID(
+ opCtx, replState->buildUUID, IndexBuildAction::kPrimaryAbort, reason)) {
+ buildUUIDs.push_back(replState->buildUUID);
+ }
}
-
- _awaitNoIndexBuildInProgressForCollection(lk, opCtx, collectionUUID);
-
return buildUUIDs;
}
-void IndexBuildsCoordinator::_awaitNoIndexBuildInProgressForCollection(stdx::unique_lock<Latch>& lk,
- OperationContext* opCtx,
- const UUID& collectionUUID) {
- auto indexBuildFilter = [=](const auto& replState) {
- return collectionUUID == replState.collectionUUID;
- };
- auto pred = [&, this]() {
- auto collIndexBuilds = _filterIndexBuilds_inlock(lk, indexBuildFilter);
- return collIndexBuilds.empty();
- };
- _indexBuildsCondVar.wait(lk, pred);
-}
-
-void IndexBuildsCoordinator::abortCollectionIndexBuilds(OperationContext* opCtx,
- const UUID& collectionUUID,
- const std::string& reason) {
- stdx::unique_lock<Latch> lk(_mutex);
- const bool shouldWait = true;
- _abortCollectionIndexBuilds(lk, opCtx, collectionUUID, reason, shouldWait);
-}
-
-std::vector<UUID> IndexBuildsCoordinator::abortCollectionIndexBuildsNoWait(
- OperationContext* opCtx, const UUID& collectionUUID, const std::string& reason) {
- stdx::unique_lock<Latch> lk(_mutex);
- const bool shouldWait = false;
- return _abortCollectionIndexBuilds(lk, opCtx, collectionUUID, reason, shouldWait);
-}
-
-void IndexBuildsCoordinator::_abortDatabaseIndexBuilds(stdx::unique_lock<Latch>& lk,
- OperationContext* opCtx,
- const StringData& db,
- const std::string& reason,
- bool shouldWait) {
- auto indexBuildFilter = [db](const auto& replState) { return db == replState.dbName; };
- auto dbIndexBuilds = _filterIndexBuilds_inlock(lk, indexBuildFilter);
- if (dbIndexBuilds.empty()) {
- return;
- }
-
- LOGV2(4612302,
- "About to abort all index builders running for collections in the given database",
- "database"_attr = db);
-
- for (auto replState : dbIndexBuilds) {
- abortIndexBuild(lk, opCtx, &_indexBuildsManager, replState, reason);
- }
-
- if (!shouldWait) {
- return;
- }
-
- _awaitNoBgOpInProgForDb(lk, opCtx, db);
-}
-
void IndexBuildsCoordinator::_awaitNoBgOpInProgForDb(stdx::unique_lock<Latch>& lk,
OperationContext* opCtx,
StringData db) {
@@ -662,17 +566,20 @@ void IndexBuildsCoordinator::_awaitNoBgOpInProgForDb(stdx::unique_lock<Latch>& l
void IndexBuildsCoordinator::abortDatabaseIndexBuilds(OperationContext* opCtx,
StringData db,
const std::string& reason) {
- stdx::unique_lock<Latch> lk(_mutex);
- const bool shouldWait = true;
- _abortDatabaseIndexBuilds(lk, opCtx, db, reason, shouldWait);
-}
+ LOGV2(4612302,
+ "About to abort all index builders running for collections in the given database",
+ "database"_attr = db,
+ "reason"_attr = reason);
-void IndexBuildsCoordinator::abortDatabaseIndexBuildsNoWait(OperationContext* opCtx,
- StringData db,
- const std::string& reason) {
- stdx::unique_lock<Latch> lk(_mutex);
- const bool shouldWait = false;
- _abortDatabaseIndexBuilds(lk, opCtx, db, reason, shouldWait);
+ auto builds = [&]() -> std::vector<std::shared_ptr<ReplIndexBuildState>> {
+ stdx::unique_lock<Latch> lk(_mutex);
+ auto indexBuildFilter = [=](const auto& replState) { return db == replState.dbName; };
+ return _filterIndexBuilds_inlock(lk, indexBuildFilter);
+ }();
+ for (auto replState : builds) {
+ abortIndexBuildByBuildUUID(
+ opCtx, replState->buildUUID, IndexBuildAction::kPrimaryAbort, reason);
+ }
}
namespace {
@@ -765,43 +672,51 @@ void IndexBuildsCoordinator::applyCommitIndexBuild(OperationContext* opCtx,
auto replState = uassertStatusOK(indexBuildsCoord->_getIndexBuild(buildUUID));
- while (true) {
- stdx::unique_lock<Latch> lk(replState->mutex);
- if (replState->waitForNextAction->getFuture().isReady()) {
- // If future wait is made uninterruptible, then the shutdown can stuck behind
- // oplog applier if the indexBuildCoordinator thread died after interruption on
- // shutdown. And, commitIndexBuild oplog entry will stuck waiting for reset of the
- // promise.
- const auto nextAction = replState->waitForNextAction->getFuture().get(opCtx);
- invariant(nextAction == IndexBuildAction::kCommitQuorumSatisfied);
- // Retry until the current promise result is consumed by the index builder thread and
- // a new empty promise got created by the indexBuildscoordinator thread.
- // Don't hammer it.
- sleepmillis(1);
- continue;
- }
- auto skipCheck = shouldSkipIndexBuildStateTransitionCheck(opCtx, replState->protocol);
- replState->indexBuildState.setState(IndexBuildState::kPrepareCommit,
- skipCheck,
- opCtx->recoveryUnit()->getCommitTimestamp());
- // Promise can be set only once.
- // We can't skip signaling here if a signal is already set because the previous commit or
- // abort signal might have been sent to handle for primary case.
- setSignalAndCancelVoteRequestCbkIfActive(
- lk, opCtx, replState, IndexBuildAction::kOplogCommit);
- break;
+ // Retry until we are able to put the index build in the kPrepareCommit state. None of the
+ // conditions for retrying are common or expected to be long-lived, so we believe this to be
+ // safe to poll at this frequency.
+ while (!_tryCommit(opCtx, replState)) {
+ opCtx->sleepFor(Milliseconds(100));
}
auto fut = replState->sharedPromise.getFuture();
LOGV2(20654,
- "Index build joined after commit: {buildUUID}: {fut_waitNoThrow_opCtx}",
+ "Index build joined after commit",
"buildUUID"_attr = buildUUID,
- "fut_waitNoThrow_opCtx"_attr = fut.waitNoThrow(opCtx));
+ "result"_attr = fut.waitNoThrow(opCtx));
// Throws if there was an error building the index.
fut.get();
}
+bool IndexBuildsCoordinator::_tryCommit(OperationContext* opCtx,
+ std::shared_ptr<ReplIndexBuildState> replState) {
+ stdx::unique_lock<Latch> lk(replState->mutex);
+ if (replState->indexBuildState.isSettingUp()) {
+ // It's possible that the index build thread has not reached the point where it can be
+ // committed yet.
+ return false;
+ }
+ if (replState->waitForNextAction->getFuture().isReady()) {
+ // If the future wait were uninterruptible, then shutdown could hang. If the
+ // IndexBuildsCoordinator thread gets interrupted on shutdown, the oplog applier will hang
+ // waiting for the promise applying the commitIndexBuild oplog entry.
+ const auto nextAction = replState->waitForNextAction->getFuture().get(opCtx);
+ invariant(nextAction == IndexBuildAction::kCommitQuorumSatisfied);
+ // Retry until the current promise result is consumed by the index builder thread and
+ // a new empty promise got created by the indexBuildscoordinator thread.
+ return false;
+ }
+ auto skipCheck = shouldSkipIndexBuildStateTransitionCheck(opCtx, replState->protocol);
+ replState->indexBuildState.setState(
+ IndexBuildState::kPrepareCommit, skipCheck, opCtx->recoveryUnit()->getCommitTimestamp());
+ // Promise can be set only once.
+ // We can't skip signaling here if a signal is already set because the previous commit or
+ // abort signal might have been sent to handle for primary case.
+ setSignalAndCancelVoteRequestCbkIfActive(lk, opCtx, replState, IndexBuildAction::kOplogCommit);
+ return true;
+}
+
void IndexBuildsCoordinator::applyAbortIndexBuild(OperationContext* opCtx,
const IndexBuildOplogEntry& oplogEntry) {
const auto collUUID = oplogEntry.collUUID;
@@ -820,50 +735,15 @@ void IndexBuildsCoordinator::applyAbortIndexBuild(OperationContext* opCtx,
std::string abortReason(str::stream()
<< "abortIndexBuild oplog entry encountered: " << *oplogEntry.cause);
auto indexBuildsCoord = IndexBuildsCoordinator::get(opCtx);
- indexBuildsCoord->abortIndexBuildByBuildUUID(opCtx,
- buildUUID,
- IndexBuildAction::kOplogAbort,
- opCtx->recoveryUnit()->getCommitTimestamp(),
- abortReason);
-}
-
-void IndexBuildsCoordinator::abortIndexBuildOnError(OperationContext* opCtx,
- const UUID& buildUUID,
- Status abortStatus) {
- // Use a null abort timestamp because the index build will generate a ghost timestamp
- // for the single-phase build on cleanup.
- std::string abortReason(str::stream() << "Index build interrupted: " << buildUUID << ": "
- << abortStatus.toString());
- abortIndexBuildByBuildUUIDNoWait(
- opCtx, buildUUID, IndexBuildAction::kPrimaryAbort, boost::none, abortReason);
-}
-
-void IndexBuildsCoordinator::abortIndexBuildByBuildUUID(OperationContext* opCtx,
- const UUID& buildUUID,
- IndexBuildAction signalAction,
- boost::optional<Timestamp> abortTimestamp,
- boost::optional<std::string> reason) {
- if (!abortIndexBuildByBuildUUIDNoWait(opCtx, buildUUID, signalAction, abortTimestamp, reason)) {
- return;
- }
-
- auto replState =
- invariant(_getIndexBuild(buildUUID),
- str::stream() << "Abort timestamp: "
- << abortTimestamp.get_value_or(Timestamp()).toString());
-
- auto fut = replState->sharedPromise.getFuture();
- LOGV2(20655,
- "Index build joined after abort: {buildUUID}: {fut_waitNoThrow}",
- "buildUUID"_attr = buildUUID,
- "fut_waitNoThrow"_attr = fut.waitNoThrow());
+ indexBuildsCoord->abortIndexBuildByBuildUUID(
+ opCtx, buildUUID, IndexBuildAction::kOplogAbort, abortReason);
}
-boost::optional<UUID> IndexBuildsCoordinator::abortIndexBuildByIndexNamesNoWait(
+boost::optional<UUID> IndexBuildsCoordinator::abortIndexBuildByIndexNames(
OperationContext* opCtx,
const UUID& collectionUUID,
const std::vector<std::string>& indexNames,
- boost::optional<std::string> reason) {
+ std::string reason) {
boost::optional<UUID> buildUUID;
auto indexBuilds = _getIndexBuilds();
auto onIndexBuild = [&](std::shared_ptr<ReplIndexBuildState> replState) {
@@ -886,17 +766,13 @@ boost::optional<UUID> IndexBuildsCoordinator::abortIndexBuildByIndexNamesNoWait(
"collectionUUID"_attr = collectionUUID,
"replState_indexNames_front"_attr = replState->indexNames.front());
- if (this->abortIndexBuildByBuildUUIDNoWait(opCtx,
- replState->buildUUID,
- IndexBuildAction::kPrimaryAbort,
- boost::none,
- reason)) {
+ if (abortIndexBuildByBuildUUID(
+ opCtx, replState->buildUUID, IndexBuildAction::kPrimaryAbort, reason)) {
buildUUID = replState->buildUUID;
}
};
- forEachIndexBuild(indexBuilds,
- "IndexBuildsCoordinator::abortIndexBuildByIndexNamesNoWait - "_sd,
- onIndexBuild);
+ forEachIndexBuild(
+ indexBuilds, "IndexBuildsCoordinator::abortIndexBuildByIndexNames - "_sd, onIndexBuild);
return buildUUID;
}
@@ -925,14 +801,80 @@ bool IndexBuildsCoordinator::hasIndexBuilder(OperationContext* opCtx,
return foundIndexBuilder;
}
-bool IndexBuildsCoordinator::abortIndexBuildByBuildUUIDNoWait(
+IndexBuildsCoordinator::TryAbortResult IndexBuildsCoordinator::_tryAbort(
OperationContext* opCtx,
- const UUID& buildUUID,
+ std::shared_ptr<ReplIndexBuildState> replState,
IndexBuildAction signalAction,
- boost::optional<Timestamp> abortTimestamp,
- boost::optional<std::string> reason) {
- // We need to avoid race between commit and abort index build.
+ std::string reason) {
+
+ {
+ stdx::unique_lock<Latch> lk(replState->mutex);
+ // Wait until the build is done setting up. This indicates that all required state is
+ // initialized to attempt an abort.
+ if (replState->indexBuildState.isSettingUp()) {
+ LOGV2_DEBUG(465605,
+ 2,
+ "waiting until index build is done setting up before attempting to abort",
+ "buildUUID"_attr = replState->buildUUID);
+ return TryAbortResult::kRetry;
+ }
+ if (replState->waitForNextAction->getFuture().isReady()) {
+ const auto nextAction = replState->waitForNextAction->getFuture().get(opCtx);
+ invariant(nextAction == IndexBuildAction::kSinglePhaseCommit ||
+ nextAction == IndexBuildAction::kCommitQuorumSatisfied ||
+ nextAction == IndexBuildAction::kPrimaryAbort);
+
+ // Index build coordinator already received a signal to commit or abort. So, it's ok
+ // to return and wait for the index build to complete if we are trying to signal
+ // 'kPrimaryAbort'. The index build coordinator will not perform the signaled action
+ // (i.e, will not commit or abort the index build) only when the node steps down.
+ // When the node steps down, the caller of this function, dropIndexes/createIndexes
+ // command (user operation) will also get interrupted. So, we no longer need to
+ // abort the index build on step down.
+ if (signalAction == IndexBuildAction::kPrimaryAbort) {
+ // Indicate if the index build is already being committed or aborted.
+ if (nextAction == IndexBuildAction::kPrimaryAbort) {
+ return TryAbortResult::kAlreadyAborted;
+ } else {
+ return TryAbortResult::kNotAborted;
+ }
+ }
+
+ // Retry until the current promise result is consumed by the index builder thread
+ // and a new empty promise got created by the indexBuildscoordinator thread. Or,
+ // until the index build got torn down after index build commit.
+ return TryAbortResult::kRetry;
+ }
+
+ LOGV2(4656003, "aborting index build", "buildUUID"_attr = replState->buildUUID);
+
+ // Set the state on replState. Once set, the calling thread must complete the abort process.
+ auto abortTimestamp =
+ boost::make_optional<Timestamp>(!opCtx->recoveryUnit()->getCommitTimestamp().isNull(),
+ opCtx->recoveryUnit()->getCommitTimestamp());
+ auto skipCheck = shouldSkipIndexBuildStateTransitionCheck(opCtx, replState->protocol);
+ replState->indexBuildState.setState(
+ IndexBuildState::kAborted, skipCheck, abortTimestamp, reason);
+ setSignalAndCancelVoteRequestCbkIfActive(lk, opCtx, replState, signalAction);
+ }
+ return TryAbortResult::kContinueAbort;
+}
+
+bool IndexBuildsCoordinator::abortIndexBuildByBuildUUID(OperationContext* opCtx,
+ const UUID& buildUUID,
+ IndexBuildAction signalAction,
+ std::string reason) {
+ std::shared_ptr<ReplIndexBuildState> replState;
+ bool retry = false;
while (true) {
+ // Retry until we are able to put the index build into the kAborted state. None of the
+ // conditions for retrying are common or expected to be long-lived, so we believe this to be
+ // safe to poll at this frequency.
+ if (retry) {
+ opCtx->sleepFor(Milliseconds(1000));
+ retry = false;
+ }
+
// It is possible to receive an abort for a non-existent index build. Abort should always
// succeed, so suppress the error.
auto replStateResult = _getIndexBuild(buildUUID);
@@ -945,55 +887,181 @@ bool IndexBuildsCoordinator::abortIndexBuildByBuildUUIDNoWait(
return false;
}
- auto replState = replStateResult.getValue();
+ replState = replStateResult.getValue();
+ LOGV2(4656010, "attempting to abort index build", "buildUUID"_attr = replState->buildUUID);
- stdx::unique_lock<Latch> lk(replState->mutex);
- if (replState->waitForNextAction->getFuture().isReady()) {
- const auto nextAction = replState->waitForNextAction->getFuture().get(opCtx);
- invariant(nextAction == IndexBuildAction::kSinglePhaseCommit ||
- nextAction == IndexBuildAction::kCommitQuorumSatisfied ||
- nextAction == IndexBuildAction::kPrimaryAbort);
+ const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
+ Lock::DBLock dbLock(opCtx, replState->dbName, MODE_IX);
- // Index build coordinator already received a signal to commit or abort. So, it's ok
- // to return and wait for the index build to complete if we are trying to signal
- // 'kPrimaryAbort'. The index build coordinator will not perform the signaled action
- // (i.e, will not commit or abort the index build) only when the node steps down. When
- // the node steps down, the caller of this function, dropIndexes/createIndexes command
- // (user operation) will also get interrupted. So, we no longer need to abort the index
- // build on step down.
- //
- // Currently dropIndexes command calls this function with the
- // collection lock held in IX mode, So, there are possibilities, we might block the
- // index build from completing, leading to 3 way deadlocks involving step down,
- // dropIndexes command, IndexBuildCoordinator thread.
- if (signalAction == IndexBuildAction::kPrimaryAbort) {
- // Only return true if the index build is being aborted already, not if it is going
- // to commit.
- return nextAction == IndexBuildAction::kPrimaryAbort;
+ if (IndexBuildProtocol::kSinglePhase == replState->protocol) {
+ // Unlock RSTL to avoid deadlocks with prepare conflicts and state transitions caused by
+ // taking a strong collection lock. See SERVER-42621.
+ unlockRSTL(opCtx);
+ }
+ Lock::CollectionLock collLock(opCtx, dbAndUUID, MODE_X);
+
+ // If we are using two-phase index builds and are no longer primary after receiving an
+ // abort, we cannot replicate an abortIndexBuild oplog entry. Continue holding the RSTL to
+ // check the replication state and to prevent any state transitions from happening while
+ // aborting the index build. Once an index build is put into kAborted, the index builder
+ // thread will be torn down, and an oplog entry must be replicated. Single-phase builds do
+ // not have this restriction and may be aborted after a stepDown.
+ if (IndexBuildProtocol::kTwoPhase == replState->protocol) {
+ // The DBLock helper takes the RSTL implictly.
+ invariant(opCtx->lockState()->isRSTLLocked());
+ auto replCoord = repl::ReplicationCoordinator::get(opCtx);
+ if (IndexBuildAction::kPrimaryAbort == signalAction &&
+ !replCoord->canAcceptWritesFor(opCtx, dbAndUUID)) {
+ uassertStatusOK({ErrorCodes::NotMaster,
+ str::stream()
+ << "Unable to abort index build because we are not primary: "
+ << buildUUID});
}
+ }
- // Retry until the current promise result is consumed by the index builder thread and
- // a new empty promise got created by the indexBuildscoordinator thread. Or, until the
- // index build got torn down after index build commit.
- // Don't hammer it.
- sleepmillis(1);
+ auto tryAbortResult = _tryAbort(opCtx, replState, signalAction, reason);
+ switch (tryAbortResult) {
+ case TryAbortResult::kNotAborted:
+ return false;
+ case TryAbortResult::kAlreadyAborted:
+ return true;
+ case TryAbortResult::kRetry:
+ case TryAbortResult::kContinueAbort:
+ break;
+ }
+
+ if (TryAbortResult::kRetry == tryAbortResult) {
+ retry = true;
continue;
}
- auto skipCheck = shouldSkipIndexBuildStateTransitionCheck(opCtx, replState->protocol);
- // Set the state on replState and _indexBuildsManager. And, then signal the value. It's
- // important we do all these 3 things in a critical section by holding mutex lock.
- replState->indexBuildState.setState(
- IndexBuildState::kPrepareAbort, skipCheck, abortTimestamp, reason);
- _indexBuildsManager.abortIndexBuild(buildUUID, reason.get_value_or(""));
+ invariant(TryAbortResult::kContinueAbort == tryAbortResult);
- setSignalAndCancelVoteRequestCbkIfActive(lk, opCtx, replState, signalAction);
+ // At this point we must continue aborting the index build.
+ try {
+ _completeAbort(opCtx, replState, signalAction, {ErrorCodes::IndexBuildAborted, reason});
+ } catch (const DBException& e) {
+ LOGV2_FATAL(
+ 4656011,
+ "Failed to abort index build after partially tearing-down index build state",
+ "buildUUID"_attr = replState->buildUUID,
+ "reason"_attr = e.toString());
+ }
+
+ {
+ stdx::unique_lock<Latch> lk(replState->mutex);
+
+ // Interrupt the builder thread so that it can no longer acquire locks or make progress.
+ auto serviceContext = opCtx->getServiceContext();
+ auto target = serviceContext->getLockedClient(replState->opId);
+ if (!target) {
+ LOGV2_FATAL(4656001,
+ "Index builder thread did not appear to be running while aborting",
+ "buildUUID"_attr = replState->buildUUID,
+ "opId"_attr = replState->opId);
+ }
+ serviceContext->killOperation(
+ target, target->getOperationContext(), ErrorCodes::IndexBuildAborted);
+ }
+
+ // Wait for the builder thread to receive the signal before unregistering. Don't release the
+ // Collection lock until this happens, guaranteeing the thread has stopped making progress
+ // and has exited.
+ auto fut = replState->sharedPromise.getFuture();
+ LOGV2(20655,
+ "Index build thread exited",
+ "buildUUID"_attr = buildUUID,
+ "status"_attr = fut.waitNoThrow());
+
+ {
+ // Unregister last once we guarantee all other state has been cleaned up.
+ stdx::unique_lock<Latch> lk(_mutex);
+ _unregisterIndexBuild(lk, replState);
+ }
break;
}
return true;
}
+void IndexBuildsCoordinator::_completeAbort(OperationContext* opCtx,
+ std::shared_ptr<ReplIndexBuildState> replState,
+ IndexBuildAction signalAction,
+ Status reason) {
+ auto coll =
+ CollectionCatalog::get(opCtx).lookupCollectionByUUID(opCtx, replState->collectionUUID);
+ auto nss = coll->ns();
+
+ switch (signalAction) {
+ // Replicates an abortIndexBuild oplog entry and deletes the index from the durable catalog.
+ case IndexBuildAction::kPrimaryAbort: {
+ auto onCleanUpFn = [&] { onAbortIndexBuild(opCtx, coll->ns(), *replState, reason); };
+ _indexBuildsManager.abortIndexBuild(opCtx, coll, replState->buildUUID, onCleanUpFn);
+ break;
+ }
+ // Deletes the index from the durable catalog.
+ case IndexBuildAction::kOplogAbort: {
+ invariant(IndexBuildProtocol::kTwoPhase == replState->protocol);
+ _indexBuildsManager.abortIndexBuild(
+ opCtx, coll, replState->buildUUID, MultiIndexBlock::kNoopOnCleanUpFn);
+ break;
+ }
+ // No locks are required when aborting due to rollback. This performs no storage engine
+ // writes, only cleans up the remaining in-memory state.
+ case IndexBuildAction::kRollbackAbort: {
+ _indexBuildsManager.abortIndexBuildWithoutCleanup(
+ opCtx, coll, replState->buildUUID, reason.reason());
+ break;
+ }
+ case IndexBuildAction::kNoAction:
+ case IndexBuildAction::kCommitQuorumSatisfied:
+ case IndexBuildAction::kOplogCommit:
+ case IndexBuildAction::kSinglePhaseCommit:
+ MONGO_UNREACHABLE;
+ }
+
+ LOGV2(465611, "Cleaned up index build after abort. ", "buildUUID"_attr = replState->buildUUID);
+}
+
+void IndexBuildsCoordinator::_completeSelfAbort(OperationContext* opCtx,
+ std::shared_ptr<ReplIndexBuildState> replState,
+ Status reason) {
+ _completeAbort(opCtx, replState, IndexBuildAction::kPrimaryAbort, reason);
+ {
+ auto skipCheck = shouldSkipIndexBuildStateTransitionCheck(opCtx, replState->protocol);
+ stdx::unique_lock<Latch> lk(replState->mutex);
+ replState->indexBuildState.setState(IndexBuildState::kAborted, skipCheck);
+ }
+ {
+ stdx::unique_lock<Latch> lk(_mutex);
+ _unregisterIndexBuild(lk, replState);
+ }
+}
+
+void IndexBuildsCoordinator::_completeAbortForShutdown(
+ OperationContext* opCtx,
+ std::shared_ptr<ReplIndexBuildState> replState,
+ Collection* collection) {
+ // Leave it as-if kill -9 happened. Startup recovery will restart the index build.
+ _indexBuildsManager.abortIndexBuildWithoutCleanup(
+ opCtx, collection, replState->buildUUID, "shutting down");
+
+ {
+ // Promise should be set at least once before it's getting destroyed.
+ stdx::unique_lock<Latch> lk(replState->mutex);
+ if (!replState->waitForNextAction->getFuture().isReady()) {
+ replState->waitForNextAction->emplaceValue(IndexBuildAction::kNoAction);
+ }
+ auto skipCheck = shouldSkipIndexBuildStateTransitionCheck(opCtx, replState->protocol);
+ replState->indexBuildState.setState(IndexBuildState::kAborted, skipCheck);
+ }
+ {
+ // This allows the builder thread to exit.
+ stdx::unique_lock<Latch> lk(_mutex);
+ _unregisterIndexBuild(lk, replState);
+ }
+}
+
std::size_t IndexBuildsCoordinator::getActiveIndexBuildCount(OperationContext* opCtx) {
auto indexBuilds = _getIndexBuilds();
// We use forEachIndexBuild() to log basic details on the current index builds and don't intend
@@ -1017,17 +1085,6 @@ void IndexBuildsCoordinator::onStepUp(OperationContext* opCtx) {
return;
}
- {
- stdx::unique_lock<Latch> lk(replState->mutex);
- // After Sending the abort this might have stepped down and stepped back up.
- if (replState->indexBuildState.isAbortPrepared() &&
- !replState->waitForNextAction->getFuture().isReady()) {
- setSignalAndCancelVoteRequestCbkIfActive(
- lk, opCtx, replState, IndexBuildAction::kPrimaryAbort);
- return;
- }
- }
-
if (!_signalIfCommitQuorumNotEnabled(opCtx, replState)) {
// This reads from system.indexBuilds collection to see if commit quorum got satisfied.
_signalIfCommitQuorumIsSatisfied(opCtx, replState);
@@ -1060,12 +1117,11 @@ IndexBuilds IndexBuildsCoordinator::stopIndexBuildsForRollback(OperationContext*
}
buildsStopped.insert({replState->buildUUID, aborted});
- // Leave abort timestamp as null. This will unblock the index build and allow it to
- // complete without cleaning up. Subsequently, the rollback algorithm can decide how to
- // undo the index build depending on the state of the oplog.
- // Signals the kRollbackAbort and then waits for the thread to join.
+ // This will unblock the index build and allow it to complete without cleaning up.
+ // Subsequently, the rollback algorithm can decide how to undo the index build depending on
+ // the state of the oplog. Signals the kRollbackAbort and then waits for the thread to join.
abortIndexBuildByBuildUUID(
- opCtx, replState->buildUUID, IndexBuildAction::kRollbackAbort, boost::none, reason);
+ opCtx, replState->buildUUID, IndexBuildAction::kRollbackAbort, reason);
};
forEachIndexBuild(
indexBuilds, "IndexBuildsCoordinator::stopIndexBuildsForRollback - "_sd, onIndexBuild);
@@ -1213,11 +1269,7 @@ void IndexBuildsCoordinator::createIndexes(OperationContext* opCtx,
auto buildUUID = UUID::gen();
// Rest of this function can throw, so ensure the build cleanup occurs.
- ON_BLOCK_EXIT([&] {
- opCtx->recoveryUnit()->abandonSnapshot();
- _indexBuildsManager.tearDownIndexBuild(
- opCtx, collection, buildUUID, MultiIndexBlock::kNoopOnCleanUpFn);
- });
+ ON_BLOCK_EXIT([&] { _indexBuildsManager.unregisterIndexBuild(buildUUID); });
auto onInitFn = MultiIndexBlock::makeTimestampedIndexOnInitFn(opCtx, collection);
IndexBuildsManager::SetupOptions options;
@@ -1225,6 +1277,10 @@ void IndexBuildsCoordinator::createIndexes(OperationContext* opCtx,
uassertStatusOK(_indexBuildsManager.setUpIndexBuild(
opCtx, collection, specs, buildUUID, onInitFn, options));
+ auto abortOnExit = makeGuard([&] {
+ _indexBuildsManager.abortIndexBuild(
+ opCtx, collection, buildUUID, MultiIndexBlock::kNoopOnCleanUpFn);
+ });
uassertStatusOK(_indexBuildsManager.startBuildingIndex(opCtx, collection, buildUUID));
uassertStatusOK(_indexBuildsManager.checkIndexConstraintViolations(opCtx, buildUUID));
@@ -1278,6 +1334,7 @@ void IndexBuildsCoordinator::createIndexes(OperationContext* opCtx,
};
uassertStatusOK(_indexBuildsManager.commitIndexBuild(
opCtx, collection, nss, buildUUID, onCreateEachFn, onCommitFn));
+ abortOnExit.dismiss();
}
void IndexBuildsCoordinator::createIndexesOnEmptyCollection(OperationContext* opCtx,
@@ -1376,8 +1433,7 @@ Status IndexBuildsCoordinator::_registerIndexBuild(
if (auto ts = existingIndexBuild->indexBuildState.getTimestamp()) {
ss << ", timestamp: " << ts->toString();
}
- if (existingIndexBuild->indexBuildState.isSet(IndexBuildState::kPrepareAbort |
- IndexBuildState::kAborted)) {
+ if (existingIndexBuild->indexBuildState.isAborted()) {
if (auto abortReason =
existingIndexBuild->indexBuildState.getAbortReason()) {
ss << ", abort reason: " << abortReason.get();
@@ -1395,8 +1451,6 @@ Status IndexBuildsCoordinator::_registerIndexBuild(
}
}
- // Register the index build.
-
invariant(_allIndexBuilds.emplace(replIndexBuildState->buildUUID, replIndexBuildState).second);
_indexBuildsCondVar.notify_all();
@@ -1409,6 +1463,8 @@ void IndexBuildsCoordinator::_unregisterIndexBuild(
invariant(_allIndexBuilds.erase(replIndexBuildState->buildUUID));
+ LOGV2(4656004, "unregistering index build", "buildUUID"_attr = replIndexBuildState->buildUUID);
+ _indexBuildsManager.unregisterIndexBuild(replIndexBuildState->buildUUID);
_indexBuildsCondVar.notify_all();
}
@@ -1617,7 +1673,7 @@ IndexBuildsCoordinator::PostSetupAction IndexBuildsCoordinator::_setUpIndexBuild
opCtx, collection, replState->indexSpecs, replState->buildUUID, onInitFn, options));
}
} catch (DBException& ex) {
- _indexBuildsManager.tearDownIndexBuild(
+ _indexBuildsManager.abortIndexBuild(
opCtx, collection, replState->buildUUID, MultiIndexBlock::kNoopOnCleanUpFn);
const auto& status = ex.toStatus();
@@ -1632,7 +1688,6 @@ IndexBuildsCoordinator::PostSetupAction IndexBuildsCoordinator::_setUpIndexBuild
throw;
}
-
return PostSetupAction::kContinueIndexBuild;
}
@@ -1697,6 +1752,14 @@ void IndexBuildsCoordinator::_runIndexBuild(OperationContext* opCtx,
return;
}
auto replState = invariant(swReplState);
+ {
+ // The index build is now past the setup stage and in progress. This makes it eligible to be
+ // aborted. Use the current OperationContext's opId as the means for interrupting the index
+ // build.
+ stdx::unique_lock<Latch> lk(replState->mutex);
+ replState->opId = opCtx->getOpID();
+ replState->indexBuildState.setState(IndexBuildState::kInProgress, false /* skipCheck */);
+ }
// Add build UUID to lock manager diagnostic output.
auto locker = opCtx->lockState();
@@ -1710,6 +1773,11 @@ void IndexBuildsCoordinator::_runIndexBuild(OperationContext* opCtx,
locker->setDebugInfo(ss);
}
+ while (MONGO_unlikely(hangAfterInitializingIndexBuild.shouldFail())) {
+ opCtx->runWithoutInterruptionExceptAtGlobalShutdown(
+ [&] { opCtx->sleepFor(Milliseconds(100)); });
+ }
+
auto status = [&]() {
try {
_runIndexBuildInner(opCtx, replState, indexBuildOptions);
@@ -1723,18 +1791,30 @@ void IndexBuildsCoordinator::_runIndexBuild(OperationContext* opCtx,
// Ensure the index build is unregistered from the Coordinator and the Promise is set with
// the build's result so that callers are notified of the outcome.
-
- stdx::unique_lock<Latch> lk(_mutex);
-
- _unregisterIndexBuild(lk, replState);
-
if (status.isOK()) {
+ stdx::unique_lock<Latch> lk(_mutex);
+ // Unregister first so that when we fulfill the future, the build is not observed as active.
+ _unregisterIndexBuild(lk, replState);
replState->sharedPromise.emplaceValue(replState->stats);
- } else {
- replState->sharedPromise.setError(status);
+ return;
}
+
+ // During a failure, unregistering is handled by either the caller or the current thread,
+ // depending on where the error originated. Signal to any waiters that an error occurred.
+ replState->sharedPromise.setError(status);
}
+namespace {
+
+template <typename Func>
+void runOnAlternateContext(OperationContext* opCtx, std::string name, Func func) {
+ auto newClient = opCtx->getServiceContext()->makeClient(name);
+ AlternativeClientRegion acr(newClient);
+ const auto newCtx = cc().makeOperationContext();
+ func(newCtx.get());
+}
+} // namespace
+
void IndexBuildsCoordinator::_cleanUpSinglePhaseAfterFailure(
OperationContext* opCtx,
Collection* collection,
@@ -1742,20 +1822,14 @@ void IndexBuildsCoordinator::_cleanUpSinglePhaseAfterFailure(
const IndexBuildOptions& indexBuildOptions,
const Status& status) {
if (status.isA<ErrorCategory::ShutdownError>()) {
- // Leave it as-if kill -9 happened. Startup recovery will rebuild the index.
- _indexBuildsManager.abortIndexBuildWithoutCleanup(
- opCtx, collection, replState->buildUUID, "shutting down");
- _indexBuildsManager.tearDownIndexBuild(
- opCtx, collection, replState->buildUUID, MultiIndexBlock::kNoopOnCleanUpFn);
+ _completeAbortForShutdown(opCtx, replState, collection);
return;
}
- // If the index build was not completed successfully, we'll need to acquire some locks to
- // clean it up.
- UninterruptibleLockGuard noInterrupt(opCtx->lockState());
-
- NamespaceString nss = collection->ns();
- Lock::DBLock dbLock(opCtx, nss.db(), MODE_IX);
+ // An external caller already cleaned up our state.
+ if (status == ErrorCodes::IndexBuildAborted) {
+ return;
+ }
if (indexBuildOptions.replSetAndNotPrimaryAtStart) {
// This build started and failed as a secondary. Single-phase index builds started on
@@ -1766,14 +1840,23 @@ void IndexBuildsCoordinator::_cleanUpSinglePhaseAfterFailure(
<< "; Database: " << replState->dbName));
}
- // Unlock the RSTL to avoid deadlocks with state transitions.
- unlockRSTLForIndexCleanup(opCtx);
- Lock::CollectionLock collLock(opCtx, nss, MODE_X);
+ // The index builder thread can abort on its own when it fails due to an indexing error or its
+ // deadline expires.
+ // The current operation's deadline may have expired, which would prevent us from taking
+ // locks. Use a new OperationContext to abort the index build.
+ runOnAlternateContext(
+ opCtx, "self-abort", [this, replState, status](OperationContext* abortCtx) {
+ ShouldNotConflictWithSecondaryBatchApplicationBlock noConflict(abortCtx->lockState());
+ Lock::DBLock dbLock(abortCtx, replState->dbName, MODE_IX);
+
+ // Unlock RSTL to avoid deadlocks with prepare conflicts and state transitions caused by
+ // taking a strong collection lock. See SERVER-42621.
+ unlockRSTL(abortCtx);
- // If we started the build as a primary and are now unable to accept writes, this build was
- // aborted due to a stepdown.
- _indexBuildsManager.tearDownIndexBuild(
- opCtx, collection, replState->buildUUID, MultiIndexBlock::kNoopOnCleanUpFn);
+ const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
+ Lock::CollectionLock collLock(abortCtx, dbAndUUID, MODE_X);
+ _completeSelfAbort(abortCtx, replState, status);
+ });
}
void IndexBuildsCoordinator::_cleanUpTwoPhaseAfterFailure(
@@ -1784,140 +1867,63 @@ void IndexBuildsCoordinator::_cleanUpTwoPhaseAfterFailure(
const Status& status) {
if (status.isA<ErrorCategory::ShutdownError>()) {
- // Promise should be set at least once before it's getting destroyed. Else it would
- // invariant.
- {
- stdx::unique_lock<Latch> lk(replState->mutex);
- if (!replState->waitForNextAction->getFuture().isReady()) {
- setSignalAndCancelVoteRequestCbkIfActive(
- lk, opCtx, replState, IndexBuildAction::kNoAction);
- }
- }
- // Leave it as-if kill -9 happened. Startup recovery will restart the index build.
- _indexBuildsManager.abortIndexBuildWithoutCleanup(
- opCtx, collection, replState->buildUUID, "shutting down");
- _indexBuildsManager.tearDownIndexBuild(
- opCtx, collection, replState->buildUUID, MultiIndexBlock::kNoopOnCleanUpFn);
+ _completeAbortForShutdown(opCtx, replState, collection);
return;
}
- // If the index build was not completed successfully, we'll need to acquire some locks to
- // clean it up.
- UninterruptibleLockGuard noInterrupt(opCtx->lockState());
-
- NamespaceString nss = collection->ns();
- Lock::DBLock dbLock(opCtx, nss.db(), MODE_IX);
+ // An external caller interrupted us and will be responsible for cleaning up our state.
+ if (status == ErrorCodes::IndexBuildAborted) {
+ return;
+ }
- auto replCoord = repl::ReplicationCoordinator::get(opCtx);
- if (replCoord->getSettings().usingReplSets() && !replCoord->canAcceptWritesFor(opCtx, nss)) {
- // We failed this index build as a secondary node.
-
- // Failed index builds should fatally assert on the secondary, except when the index build
- // was stopped due to an explicit abort oplog entry or rollback.
- if (status == ErrorCodes::IndexBuildAborted) {
- // On a secondary, we should be able to obtain the timestamp for cleaning up the index
- // build from the oplog entry unless the index build did not fail due to processing an
- // abortIndexBuild oplog entry. This is the case if we were aborted due to rollback.
- stdx::unique_lock<Latch> lk(replState->mutex);
- invariant(replState->indexBuildState.isAbortPrepared(),
- replState->buildUUID.toString());
- auto abortIndexBuildTimestamp = replState->indexBuildState.getTimestamp();
-
- // If we were aborted and no abort timestamp is set, then we should leave the index
- // build unfinished. This can happen during rollback because we are not primary and
- // cannot generate an optime to timestamp the index build abort. We rely on the
- // rollback process to correct this state.
- if (!abortIndexBuildTimestamp) {
- _indexBuildsManager.abortIndexBuildWithoutCleanup(
- opCtx, collection, replState->buildUUID, "no longer primary");
- _indexBuildsManager.tearDownIndexBuild(
- opCtx, collection, replState->buildUUID, MultiIndexBlock::kNoopOnCleanUpFn);
- return;
+ // The index builder thread can abort on its own when it fails due to an indexing error or its
+ // deadline expires.
+ // The current operation's deadline may have expired, which would prevent us from taking locks.
+ // Use a new OperationContext to abort the index build.
+ runOnAlternateContext(
+ opCtx, "self-abort", [this, replState, status](OperationContext* abortCtx) {
+ ShouldNotConflictWithSecondaryBatchApplicationBlock noConflict(abortCtx->lockState());
+
+ // Take RSTL (implicitly by DBLock) to observe and prevent replication state from
+ // changing.
+ Lock::DBLock dbLock(abortCtx, replState->dbName, MODE_IX);
+
+ // Index builds may not fail on secondaries. If a primary replicated an abortIndexBuild
+ // oplog entry, then this index build would have received an IndexBuildAborted error
+ // code.
+ const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
+ auto replCoord = repl::ReplicationCoordinator::get(abortCtx);
+ if (replCoord->getSettings().usingReplSets() &&
+ !replCoord->canAcceptWritesFor(abortCtx, dbAndUUID)) {
+ fassert(51101,
+ status.withContext(str::stream() << "Index build: " << replState->buildUUID
+ << "; Database: " << replState->dbName));
}
- // Unlock the RSTL to avoid deadlocks with state transitions. See SERVER-42824.
- unlockRSTLForIndexCleanup(opCtx);
- Lock::CollectionLock collLock(opCtx, nss, MODE_X);
-
- TimestampBlock tsBlock(opCtx, abortIndexBuildTimestamp.get());
- _indexBuildsManager.tearDownIndexBuild(
- opCtx, collection, replState->buildUUID, MultiIndexBlock::kNoopOnCleanUpFn);
- return;
- }
-
- fassert(51101,
- status.withContext(str::stream() << "Index build: " << replState->buildUUID
- << "; Database: " << replState->dbName));
- }
-
- // We are currently a primary node. Notify downstream nodes to abort their index builds with the
- // same build UUID.
- Lock::CollectionLock collLock(opCtx, nss, MODE_X);
- auto onCleanUpFn = [&] { onAbortIndexBuild(opCtx, nss, *replState, status); };
- _indexBuildsManager.tearDownIndexBuild(opCtx, collection, replState->buildUUID, onCleanUpFn);
- return;
+ Lock::CollectionLock collLock(abortCtx, dbAndUUID, MODE_X);
+ _completeSelfAbort(abortCtx, replState, status);
+ });
}
void IndexBuildsCoordinator::_runIndexBuildInner(OperationContext* opCtx,
std::shared_ptr<ReplIndexBuildState> replState,
const IndexBuildOptions& indexBuildOptions) {
- const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
-
// This Status stays unchanged unless we catch an exception in the following try-catch block.
auto status = Status::OK();
try {
- // Lock acquisition might throw, and we would still need to clean up the index build state,
- // so do it in the try-catch block
- AutoGetDb autoDb(opCtx, replState->dbName, MODE_IX);
-
- // Do not use AutoGetCollection since the lock will be reacquired in various modes
- // throughout the index build. Lock by UUID to protect against concurrent collection rename.
- boost::optional<Lock::CollectionLock> collLock;
- collLock.emplace(opCtx, dbAndUUID, MODE_X);
-
- // Two phase index builds and single-phase builds on secondaries can only be interrupted at
- // shutdown. For the duration of the runWithoutInterruptionExceptAtGlobalShutdown()
- // invocation, any kill status set by the killOp command will be ignored. After
- // runWithoutInterruptionExceptAtGlobalShutdown() returns, any call to checkForInterrupt()
- // will see the kill status and respond accordingly (checkForInterrupt() will throw an
- // exception while checkForInterruptNoAssert() returns an error Status).
- auto replCoord = repl::ReplicationCoordinator::get(opCtx);
- if (!replCoord->getSettings().usingReplSets()) {
- _buildIndex(opCtx, replState, indexBuildOptions, &collLock);
- } else if (IndexBuildProtocol::kTwoPhase == replState->protocol) {
- opCtx->runWithoutInterruptionExceptAtGlobalShutdown(
- [&, this] { _buildIndex(opCtx, replState, indexBuildOptions, &collLock); });
- } else {
- if (indexBuildOptions.replSetAndNotPrimaryAtStart) {
- // We need to drop the RSTL here, as we do not need synchronization with step up and
- // step down. Dropping the RSTL is important because otherwise if we held the RSTL
- // it would create deadlocks with prepared transactions on step up and step down. A
- // deadlock could result if the index build was attempting to acquire a Collection S
- // or X lock while a prepared transaction held a Collection IX lock, and a step down
- // was waiting to acquire the RSTL in mode X.
- // TODO(SERVER-44045): Revisit this logic for the non-two phase index build case.
- const bool unlocked = opCtx->lockState()->unlockRSTLforPrepare();
- invariant(unlocked);
- opCtx->runWithoutInterruptionExceptAtGlobalShutdown(
- [&, this] { _buildIndex(opCtx, replState, indexBuildOptions, &collLock); });
- } else {
- _buildIndex(opCtx, replState, indexBuildOptions, &collLock);
- }
- }
- // If _buildIndex returned normally, then we should have the collection X lock. It is not
- // required to safely access the collection, though, because an index build is registerd.
- auto collection =
- CollectionCatalog::get(opCtx).lookupCollectionByUUID(opCtx, replState->collectionUUID);
- invariant(collection);
- replState->stats.numIndexesAfter = getNumIndexesTotal(opCtx, collection);
+ _buildIndex(opCtx, replState, indexBuildOptions);
} catch (const DBException& ex) {
status = ex.toStatus();
}
+ if (status.isOK()) {
+ return;
+ }
+
// We do not hold a collection lock here, but we are protected against the collection being
- // dropped while the index build is still registered for the collection -- until
- // tearDownIndexBuild is called. The collection can be renamed, but it is OK for the name to
- // be stale just for logging purposes.
+ // dropped while the index build is still registered for the collection -- until abortIndexBuild
+ // is called. The collection can be renamed, but it is OK for the name to be stale just for
+ // logging purposes.
auto collection =
CollectionCatalog::get(opCtx).lookupCollectionByUUID(opCtx, replState->collectionUUID);
invariant(collection,
@@ -1925,25 +1931,6 @@ void IndexBuildsCoordinator::_runIndexBuildInner(OperationContext* opCtx,
<< " should exist because an index build is in progress: "
<< replState->buildUUID);
NamespaceString nss = collection->ns();
-
- if (status.isOK()) {
- _indexBuildsManager.tearDownIndexBuild(
- opCtx, collection, replState->buildUUID, MultiIndexBlock::kNoopOnCleanUpFn);
-
- LOGV2(20663,
- "Index build completed successfully: {replState_buildUUID}: {nss} ( "
- "{replState_collectionUUID} ). Index specs built: {replState_indexSpecs_size}. "
- "Indexes in catalog before build: {replState_stats_numIndexesBefore}. Indexes in "
- "catalog after build: {replState_stats_numIndexesAfter}",
- "replState_buildUUID"_attr = replState->buildUUID,
- "nss"_attr = nss,
- "replState_collectionUUID"_attr = replState->collectionUUID,
- "replState_indexSpecs_size"_attr = replState->indexSpecs.size(),
- "replState_stats_numIndexesBefore"_attr = replState->stats.numIndexesBefore,
- "replState_stats_numIndexesAfter"_attr = replState->stats.numIndexesAfter);
- return;
- }
-
logFailure(status, nss, replState);
if (IndexBuildProtocol::kSinglePhase == replState->protocol) {
@@ -1958,84 +1945,44 @@ void IndexBuildsCoordinator::_runIndexBuildInner(OperationContext* opCtx,
uassertStatusOK(status);
}
-void IndexBuildsCoordinator::_buildIndex(
- OperationContext* opCtx,
- std::shared_ptr<ReplIndexBuildState> replState,
- const IndexBuildOptions& indexBuildOptions,
- boost::optional<Lock::CollectionLock>* exclusiveCollectionLock) {
-
- if (IndexBuildProtocol::kSinglePhase == replState->protocol) {
- _buildIndexSinglePhase(opCtx, replState, indexBuildOptions, exclusiveCollectionLock);
- return;
- }
-
- invariant(IndexBuildProtocol::kTwoPhase == replState->protocol,
- str::stream() << replState->buildUUID);
- _buildIndexTwoPhase(opCtx, replState, indexBuildOptions, exclusiveCollectionLock);
-}
-
-void IndexBuildsCoordinator::_buildIndexSinglePhase(
- OperationContext* opCtx,
- std::shared_ptr<ReplIndexBuildState> replState,
- const IndexBuildOptions& indexBuildOptions,
- boost::optional<Lock::CollectionLock>* exclusiveCollectionLock) {
- _scanCollectionAndInsertKeysIntoSorter(opCtx, replState, exclusiveCollectionLock);
- _insertKeysFromSideTablesWithoutBlockingWrites(opCtx, replState);
- _insertKeysFromSideTablesBlockingWrites(opCtx, replState);
- _signalPrimaryForCommitReadiness(opCtx, replState);
- _waitForNextIndexBuildAction(opCtx, replState);
- _insertKeysFromSideTablesAndCommit(
- opCtx, replState, indexBuildOptions, exclusiveCollectionLock, {});
-}
-
-void IndexBuildsCoordinator::_buildIndexTwoPhase(
- OperationContext* opCtx,
- std::shared_ptr<ReplIndexBuildState> replState,
- const IndexBuildOptions& indexBuildOptions,
- boost::optional<Lock::CollectionLock>* exclusiveCollectionLock) {
-
- _scanCollectionAndInsertKeysIntoSorter(opCtx, replState, exclusiveCollectionLock);
+void IndexBuildsCoordinator::_buildIndex(OperationContext* opCtx,
+ std::shared_ptr<ReplIndexBuildState> replState,
+ const IndexBuildOptions& indexBuildOptions) {
+ _scanCollectionAndInsertKeysIntoSorter(opCtx, replState);
_insertKeysFromSideTablesWithoutBlockingWrites(opCtx, replState);
- _insertKeysFromSideTablesBlockingWrites(opCtx, replState);
-
+ _insertKeysFromSideTablesBlockingWrites(opCtx, replState, indexBuildOptions);
_signalPrimaryForCommitReadiness(opCtx, replState);
auto commitIndexBuildTimestamp = _waitForNextIndexBuildAction(opCtx, replState);
-
+ invariant(commitIndexBuildTimestamp.isNull() ||
+ replState->protocol != IndexBuildProtocol::kSinglePhase,
+ str::stream() << "buildUUID: " << replState->buildUUID
+ << "commitTs: " << commitIndexBuildTimestamp.toString());
_insertKeysFromSideTablesAndCommit(
- opCtx, replState, indexBuildOptions, exclusiveCollectionLock, commitIndexBuildTimestamp);
+ opCtx, replState, indexBuildOptions, commitIndexBuildTimestamp);
}
void IndexBuildsCoordinator::_scanCollectionAndInsertKeysIntoSorter(
- OperationContext* opCtx,
- std::shared_ptr<ReplIndexBuildState> replState,
- boost::optional<Lock::CollectionLock>* exclusiveCollectionLock) {
-
+ OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) {
+ // Collection scan and insert into index.
{
+ AutoGetDb autoDb(opCtx, replState->dbName, MODE_IX);
+ const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
+ Lock::CollectionLock collLock(opCtx, dbAndUUID, MODE_IX);
+
+ // Rebuilding system indexes during startup using the IndexBuildsCoordinator is done by all
+ // storage engines if they're missing.
+ invariant(_indexBuildsManager.isBackgroundBuilding(replState->buildUUID));
+
auto nss = CollectionCatalog::get(opCtx).lookupNSSByUUID(opCtx, replState->collectionUUID);
invariant(nss);
- invariant(opCtx->lockState()->isDbLockedForMode(replState->dbName, MODE_IX));
- invariant(opCtx->lockState()->isCollectionLockedForMode(*nss, MODE_X));
// Set up the thread's currentOp information to display createIndexes cmd information.
updateCurOpOpDescription(opCtx, *nss, replState->indexSpecs);
- }
-
- // Rebuilding system indexes during startup using the IndexBuildsCoordinator is done by all
- // storage engines if they're missing.
- invariant(_indexBuildsManager.isBackgroundBuilding(replState->buildUUID));
- // Index builds can safely ignore prepare conflicts and perform writes. On secondaries, prepare
- // operations wait for index builds to complete.
- opCtx->recoveryUnit()->abandonSnapshot();
- opCtx->recoveryUnit()->setPrepareConflictBehavior(
- PrepareConflictBehavior::kIgnoreConflictsAllowWrites);
-
- // Collection scan and insert into index, followed by a drain of writes received in the
- // background.
- exclusiveCollectionLock->reset();
- {
- const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
- Lock::CollectionLock collLock(opCtx, dbAndUUID, MODE_IS);
+ // Index builds can safely ignore prepare conflicts and perform writes. On secondaries,
+ // prepare operations wait for index builds to complete.
+ opCtx->recoveryUnit()->setPrepareConflictBehavior(
+ PrepareConflictBehavior::kIgnoreConflictsAllowWrites);
// The collection object should always exist while an index build is registered.
auto collection =
@@ -2060,8 +2007,8 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesWithoutBlockingWrites(
// Perform the first drain while holding an intent lock.
const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
{
- opCtx->recoveryUnit()->abandonSnapshot();
- Lock::CollectionLock collLock(opCtx, dbAndUUID, MODE_IS);
+ AutoGetDb autoDb(opCtx, replState->dbName, MODE_IX);
+ Lock::CollectionLock collLock(opCtx, dbAndUUID, MODE_IX);
uassertStatusOK(_indexBuildsManager.drainBackgroundWrites(
opCtx,
@@ -2076,11 +2023,20 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesWithoutBlockingWrites(
}
}
void IndexBuildsCoordinator::_insertKeysFromSideTablesBlockingWrites(
- OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) {
+ OperationContext* opCtx,
+ std::shared_ptr<ReplIndexBuildState> replState,
+ const IndexBuildOptions& indexBuildOptions) {
const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
// Perform the second drain while stopping writes on the collection.
{
- opCtx->recoveryUnit()->abandonSnapshot();
+ AutoGetDb autoDb(opCtx, replState->dbName, MODE_IX);
+
+ // Unlock RSTL to avoid deadlocks with prepare conflicts and state transitions. See
+ // SERVER-42621.
+ if (IndexBuildProtocol::kSinglePhase == replState->protocol &&
+ indexBuildOptions.replSetAndNotPrimaryAtStart) {
+ unlockRSTL(opCtx);
+ }
Lock::CollectionLock collLock(opCtx, dbAndUUID, MODE_S);
uassertStatusOK(_indexBuildsManager.drainBackgroundWrites(
@@ -2104,12 +2060,19 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesAndCommit(
OperationContext* opCtx,
std::shared_ptr<ReplIndexBuildState> replState,
const IndexBuildOptions& indexBuildOptions,
- boost::optional<Lock::CollectionLock>* exclusiveCollectionLock,
const Timestamp& commitIndexBuildTimestamp) {
- // Need to return the collection lock back to exclusive mode, to complete the index build.
- opCtx->recoveryUnit()->abandonSnapshot();
+ AutoGetDb autoDb(opCtx, replState->dbName, MODE_IX);
+
+ // Unlock RSTL to avoid deadlocks with prepare conflicts and state transitions caused by taking
+ // a strong collection lock. See SERVER-42621.
+ if (IndexBuildProtocol::kSinglePhase == replState->protocol &&
+ indexBuildOptions.replSetAndNotPrimaryAtStart) {
+ unlockRSTL(opCtx);
+ }
+
+ // Need to return the collection lock back to exclusive mode to complete the index build.
const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
- exclusiveCollectionLock->emplace(opCtx, dbAndUUID, MODE_X);
+ Lock::CollectionLock collLock(opCtx, dbAndUUID, MODE_X);
// The collection object should always exist while an index build is registered.
auto collection =
@@ -2159,14 +2122,7 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesAndCommit(
// If two phase index builds is enabled, index build will be coordinated using
// startIndexBuild and commitIndexBuild oplog entries.
- auto onCommitFn = [&] {
- if (IndexBuildProtocol::kTwoPhase != replState->protocol) {
- return;
- }
-
- onCommitIndexBuild(
- opCtx, collection->ns(), *replState, indexBuildOptions.replSetAndNotPrimaryAtStart);
- };
+ auto onCommitFn = [&] { onCommitIndexBuild(opCtx, collection->ns(), *replState); };
auto onCreateEachFn = [&](const BSONObj& spec) {
if (IndexBuildProtocol::kTwoPhase == replState->protocol) {
@@ -2198,7 +2154,15 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesAndCommit(
TimestampBlock tsBlock(opCtx, commitIndexBuildTimestamp);
uassertStatusOK(_indexBuildsManager.commitIndexBuild(
opCtx, collection, collection->ns(), replState->buildUUID, onCreateEachFn, onCommitFn));
-
+ replState->stats.numIndexesAfter = getNumIndexesTotal(opCtx, collection);
+ LOGV2(20663,
+ "Index build completed successfully",
+ "buildUUID"_attr = replState->buildUUID,
+ "collection"_attr = collection->ns(),
+ "collectionUUID"_attr = replState->collectionUUID,
+ "indexesBuilt"_attr = replState->indexSpecs.size(),
+ "numIndexesBefore"_attr = replState->stats.numIndexesBefore,
+ "numIndexesAfter"_attr = replState->stats.numIndexesAfter);
return;
}
@@ -2269,12 +2233,7 @@ StatusWith<std::pair<long long, long long>> IndexBuildsCoordinator::_runIndexReb
// Index build is registered in manager regardless of IndexBuildsManager::setUpIndexBuild()
// result.
- if (status.isOK()) {
- // A successful index build means that all the requested indexes are now part of the
- // catalog.
- _indexBuildsManager.tearDownIndexBuild(
- opCtx, collection, buildUUID, MultiIndexBlock::kNoopOnCleanUpFn);
- } else {
+ if (!status.isOK()) {
// An index build failure during recovery is fatal.
logFailure(status, nss, replState);
fassertNoTrace(51076, status);
diff --git a/src/mongo/db/index_builds_coordinator.h b/src/mongo/db/index_builds_coordinator.h
index c7524cec2c5..b12a1f0041d 100644
--- a/src/mongo/db/index_builds_coordinator.h
+++ b/src/mongo/db/index_builds_coordinator.h
@@ -203,74 +203,54 @@ public:
* must continue to operate on the collection by UUID to protect against rename collection. The
* provided 'reason' will be used in the error message that the index builders return to their
* callers.
+ *
+ * Does not stop new index builds from starting. Caller must make that guarantee.
+ *
+ * Does not require holding locks.
+ *
+ * Returns the UUIDs of the index builds that were aborted or are already in the process of
+ * being aborted by another caller.
*/
- void abortCollectionIndexBuilds(OperationContext* opCtx,
- const UUID& collectionUUID,
- const std::string& reason);
-
- /**
- * Signals all of the index builds on the specified collection to abort and returns the build
- * UUIDs of the index builds that will be aborted. Must identify the collection with a UUID. The
- * provided 'reason' will be used in the error message that the index builders return to their
- * callers.
- */
- std::vector<UUID> abortCollectionIndexBuildsNoWait(OperationContext* opCtx,
- const UUID& collectionUUID,
- const std::string& reason);
+ std::vector<UUID> abortCollectionIndexBuilds(OperationContext* opCx,
+ const NamespaceString collectionNss,
+ const UUID collectionUUID,
+ const std::string& reason);
/**
* Signals all of the index builds on the specified 'db' to abort and then waits until the index
* builds are no longer running. The provided 'reason' will be used in the error message that
* the index builders return to their callers.
+ *
+ * Does not require holding locks.
+ *
+ * Does not stop new index builds from starting. Caller must make that guarantee.
*/
void abortDatabaseIndexBuilds(OperationContext* opCtx,
StringData db,
const std::string& reason);
/**
- * Signals all of the index builds on the specified database to abort. The provided 'reason'
- * will be used in the error message that the index builders return to their callers.
- */
- void abortDatabaseIndexBuildsNoWait(OperationContext* opCtx,
- StringData db,
- const std::string& reason);
-
- /**
- * Aborts an index build by index build UUID. This gets called when the index build on primary
- * failed due to interruption or replica set state change.
- * It's a wrapper function to abortIndexBuildByBuildUUIDNoWait().
- */
- void abortIndexBuildOnError(OperationContext* opCtx, const UUID& buildUUID, Status abortStatus);
-
- /**
* Aborts an index build by index build UUID. Returns when the index build thread exits.
+ *
+ * Returns true if the index build was aborted or the index build is already in the process of
+ * being aborted.
+ * Returns false if the index build does not exist or the index build is already in the process
+ * of committing and cannot be aborted.
*/
- void abortIndexBuildByBuildUUID(OperationContext* opCtx,
+ bool abortIndexBuildByBuildUUID(OperationContext* opCtx,
const UUID& buildUUID,
IndexBuildAction signalAction,
- boost::optional<Timestamp> abortTimestamp = boost::none,
- boost::optional<std::string> reason = boost::none);
-
- /**
- * Aborts an index build by index build UUID. Does not wait for the index build thread to
- * exit. Returns true if an index build was aborted.
- */
- bool abortIndexBuildByBuildUUIDNoWait(OperationContext* opCtx,
- const UUID& buildUUID,
- IndexBuildAction signalAction,
- boost::optional<Timestamp> abortTimestamp = boost::none,
- boost::optional<std::string> reason = boost::none);
+ std::string reason);
/**
* Aborts an index build by its index name(s). This will only abort in-progress index builds if
* all of the indexes are specified that a single builder is building together. When an
* appropriate builder exists, this returns the build UUID of the index builder that will be
* aborted.
*/
- boost::optional<UUID> abortIndexBuildByIndexNamesNoWait(
- OperationContext* opCtx,
- const UUID& collectionUUID,
- const std::vector<std::string>& indexNames,
- boost::optional<std::string> reason = boost::none);
+ boost::optional<UUID> abortIndexBuildByIndexNames(OperationContext* opCtx,
+ const UUID& collectionUUID,
+ const std::vector<std::string>& indexNames,
+ std::string reason);
/**
* Returns true if there is an index builder building the given index names on a collection.
@@ -568,40 +548,39 @@ protected:
const Status& status);
/**
- * Modularizes the _indexBuildsManager calls part of _runIndexBuildInner. Throws on error.
+ * Attempt to abort an index build. Returns a flag indicating how the caller should proceed.
*/
- void _buildIndex(OperationContext* opCtx,
- std::shared_ptr<ReplIndexBuildState> replState,
- const IndexBuildOptions& indexBuildOptions,
- boost::optional<Lock::CollectionLock>* collLock);
-
+ enum class TryAbortResult { kRetry, kAlreadyAborted, kNotAborted, kContinueAbort };
+ TryAbortResult _tryAbort(OperationContext* opCtx,
+ std::shared_ptr<ReplIndexBuildState> replState,
+ IndexBuildAction signalAction,
+ std::string reason);
/**
- * Builds the indexes single-phased.
- * This method matches pre-4.4 behavior for a background index build driven by a single
- * createIndexes oplog entry.
+ * Performs last steps of aborting an index build.
*/
- void _buildIndexSinglePhase(OperationContext* opCtx,
- std::shared_ptr<ReplIndexBuildState> replState,
- const IndexBuildOptions& indexBuildOptions,
- boost::optional<Lock::CollectionLock>* collLock);
+ void _completeAbort(OperationContext* opCtx,
+ std::shared_ptr<ReplIndexBuildState> replState,
+ IndexBuildAction signalAction,
+ Status reason);
+ void _completeSelfAbort(OperationContext* opCtx,
+ std::shared_ptr<ReplIndexBuildState> replState,
+ Status reason);
+ void _completeAbortForShutdown(OperationContext* opCtx,
+ std::shared_ptr<ReplIndexBuildState> replState,
+ Collection* collection);
/**
- * Builds the indexes two-phased.
- * The beginning and completion of a index build is driven by the startIndexBuild and
- * commitIndexBuild oplog entries, respectively.
+ * Modularizes the _indexBuildsManager calls part of _runIndexBuildInner. Throws on error.
*/
- void _buildIndexTwoPhase(OperationContext* opCtx,
- std::shared_ptr<ReplIndexBuildState> replState,
- const IndexBuildOptions& indexBuildOptions,
- boost::optional<Lock::CollectionLock>* collLock);
+ void _buildIndex(OperationContext* opCtx,
+ std::shared_ptr<ReplIndexBuildState> replState,
+ const IndexBuildOptions& indexBuildOptions);
/**
* First phase is the collection scan and insertion of the keys into the sorter.
*/
- void _scanCollectionAndInsertKeysIntoSorter(
- OperationContext* opCtx,
- std::shared_ptr<ReplIndexBuildState> replState,
- boost::optional<Lock::CollectionLock>* exclusiveCollectionLock);
+ void _scanCollectionAndInsertKeysIntoSorter(OperationContext* opCtx,
+ std::shared_ptr<ReplIndexBuildState> replState);
/**
* Second phase is extracting the sorted keys and writing them into the new index table.
@@ -609,7 +588,8 @@ protected:
void _insertKeysFromSideTablesWithoutBlockingWrites(
OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState);
void _insertKeysFromSideTablesBlockingWrites(OperationContext* opCtx,
- std::shared_ptr<ReplIndexBuildState> replState);
+ std::shared_ptr<ReplIndexBuildState> replState,
+ const IndexBuildOptions& indexBuildOptions);
/**
* Reads the commit ready members list for index build UUID in 'replState' from
@@ -622,6 +602,13 @@ protected:
OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) = 0;
/**
+ * Attempt to signal the index build to commit and advance the index build to the kPrepareCommit
+ * state.
+ * Returns true if successful and false if the attempt was unnecessful and the caller should
+ * retry.
+ */
+ bool _tryCommit(OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState);
+ /**
* Skips the voting process and directly signal primary to commit index build if
* commit quorum is not enabled.
*/
@@ -673,12 +660,10 @@ protected:
* index, which sets the ready flag to true, to the catalog; it is not used for the catch-up
* writes during the final drain phase.
*/
- void _insertKeysFromSideTablesAndCommit(
- OperationContext* opCtx,
- std::shared_ptr<ReplIndexBuildState> replState,
- const IndexBuildOptions& indexBuildOptions,
- boost::optional<Lock::CollectionLock>* exclusiveCollectionLock,
- const Timestamp& commitIndexBuildTimestamp);
+ void _insertKeysFromSideTablesAndCommit(OperationContext* opCtx,
+ std::shared_ptr<ReplIndexBuildState> replState,
+ const IndexBuildOptions& indexBuildOptions,
+ const Timestamp& commitIndexBuildTimestamp);
/**
* Runs the index build.
@@ -712,33 +697,9 @@ protected:
std::vector<std::shared_ptr<ReplIndexBuildState>> _filterIndexBuilds_inlock(
WithLock lk, IndexBuildFilterFn indexBuildFilter) const;
- /**
- * Helper for 'abortCollectionIndexBuilds' and 'abortCollectionIndexBuildsNoWait'. Returns the
- * UUIDs of the aborted index builders
- */
- std::vector<UUID> _abortCollectionIndexBuilds(stdx::unique_lock<Latch>& lk,
- OperationContext* opCtx,
- const UUID& collectionUUID,
- const std::string& reason,
- bool shouldWait);
-
- void _awaitNoIndexBuildInProgressForCollection(stdx::unique_lock<Latch>& lk,
- OperationContext* opCtx,
- const UUID& collectionUUID);
-
- /**
- * Helper for 'abortDatabaseIndexBuilds' and 'abortDatabaseIndexBuildsNoWait'.
- */
- void _abortDatabaseIndexBuilds(stdx::unique_lock<Latch>& lk,
- OperationContext* opCtx,
- const StringData& db,
- const std::string& reason,
- bool shouldWait);
-
void _awaitNoBgOpInProgForDb(stdx::unique_lock<Latch>& lk,
OperationContext* opCtx,
StringData db);
-
// Protects the below state.
mutable Mutex _mutex = MONGO_MAKE_LATCH("IndexBuildsCoordinator::_mutex");
diff --git a/src/mongo/db/index_builds_coordinator_mongod.cpp b/src/mongo/db/index_builds_coordinator_mongod.cpp
index 97f8064c4b6..189e0397f29 100644
--- a/src/mongo/db/index_builds_coordinator_mongod.cpp
+++ b/src/mongo/db/index_builds_coordinator_mongod.cpp
@@ -61,7 +61,6 @@ using namespace indexbuildentryhelpers;
namespace {
MONGO_FAIL_POINT_DEFINE(hangBeforeInitializingIndexBuild);
-MONGO_FAIL_POINT_DEFINE(hangAfterInitializingIndexBuild);
const StringData kMaxNumActiveUserIndexBuildsServerParameterName = "maxNumActiveUserIndexBuilds"_sd;
@@ -305,10 +304,6 @@ IndexBuildsCoordinatorMongod::startIndexBuild(OperationContext* opCtx,
// Signal that the index build started successfully.
startPromise.setWith([] {});
- while (MONGO_unlikely(hangAfterInitializingIndexBuild.shouldFail())) {
- sleepmillis(100);
- }
-
// Runs the remainder of the index build. Sets the promise result and cleans up the index
// build.
_runIndexBuild(opCtx.get(), buildUUID, indexBuildOptions);
@@ -316,10 +311,14 @@ IndexBuildsCoordinatorMongod::startIndexBuild(OperationContext* opCtx,
// Do not exit with an incomplete future.
invariant(replState->sharedPromise.getFuture().isReady());
- // Logs the index build statistics if it took longer than the server parameter `slowMs` to
- // complete.
- CurOp::get(opCtx.get())
- ->completeAndLogOperation(opCtx.get(), MONGO_LOGV2_DEFAULT_COMPONENT);
+ try {
+ // Logs the index build statistics if it took longer than the server parameter `slowMs`
+ // to complete.
+ CurOp::get(opCtx.get())
+ ->completeAndLogOperation(opCtx.get(), MONGO_LOGV2_DEFAULT_COMPONENT);
+ } catch (const DBException& e) {
+ LOGV2(4656002, "unable to log operation", "reason"_attr = e);
+ }
});
// Waits until the index build has either been started or failed to start.
@@ -499,14 +498,13 @@ void IndexBuildsCoordinatorMongod::_signalPrimaryForCommitReadiness(
return;
}
- // Yield locks and storage engine resources before blocking.
- opCtx->recoveryUnit()->abandonSnapshot();
- Lock::TempRelease release(opCtx->lockState());
- invariant(!opCtx->lockState()->isRSTLLocked());
+ // No locks should be held.
+ invariant(!opCtx->lockState()->isLocked());
Backoff exponentialBackoff(Seconds(1), Seconds(2));
- auto onRemoteCmdScheduled = [&](executor::TaskExecutor::CallbackHandle handle) {
+ auto onRemoteCmdScheduled = [replState,
+ replCoord](executor::TaskExecutor::CallbackHandle handle) {
stdx::unique_lock<Latch> lk(replState->mutex);
// We have already received commit or abort signal, So skip voting.
if (replState->waitForNextAction->getFuture().isReady()) {
@@ -517,12 +515,12 @@ void IndexBuildsCoordinatorMongod::_signalPrimaryForCommitReadiness(
}
};
- auto onRemoteCmdComplete = [&](executor::TaskExecutor::CallbackHandle) {
+ auto onRemoteCmdComplete = [replState](executor::TaskExecutor::CallbackHandle) {
stdx::unique_lock<Latch> lk(replState->mutex);
replState->voteCmdCbkHandle = executor::TaskExecutor::CallbackHandle();
};
- auto needToVote = [&]() -> bool {
+ auto needToVote = [replState]() -> bool {
stdx::unique_lock<Latch> lk(replState->mutex);
return !replState->waitForNextAction->getFuture().isReady() ? true : false;
};
@@ -597,10 +595,6 @@ IndexBuildAction IndexBuildsCoordinatorMongod::_drainSideWritesUntilNextActionIs
// Waits until the promise is fulfilled or the deadline expires.
IndexBuildAction nextAction;
auto waitUntilNextActionIsReady = [&]() {
- // Don't perform a blocking wait while holding locks or storage engine resources.
- opCtx->recoveryUnit()->abandonSnapshot();
- Lock::TempRelease release(opCtx->lockState());
-
auto deadline = Date_t::now() + Milliseconds(1000);
auto timeoutError = opCtx->getTimeoutError();
@@ -634,9 +628,13 @@ Timestamp IndexBuildsCoordinatorMongod::_waitForNextIndexBuildAction(
"buildUUID"_attr = replState->buildUUID);
while (true) {
- // Future wait can be interrupted. This function will yield locks while waiting for the
- // future to be fulfilled.
+ // Future wait should hold no locks.
+ invariant(!opCtx->lockState()->isLocked(),
+ str::stream() << "holding locks while waiting for commit or abort: "
+ << replState->buildUUID);
+ // Future wait can be interrupted.
const auto nextAction = _drainSideWritesUntilNextActionIsAvailable(opCtx, replState);
+
LOGV2(3856204,
"Index build received signal for build uuid: {buildUUID} , action: {action}",
"buildUUID"_attr = replState->buildUUID,
@@ -644,9 +642,7 @@ Timestamp IndexBuildsCoordinatorMongod::_waitForNextIndexBuildAction(
bool needsToRetryWait = false;
- // Ensure RSTL is acquired before checking replication state. This is only necessary for
- // single-phase builds on secondaries. Everywhere else, the RSTL is already held and this is
- // should never block.
+ // Reacquire RSTL lock to check replication state.
repl::ReplicationStateTransitionLockGuard rstl(opCtx, MODE_IX);
const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
@@ -682,13 +678,15 @@ Timestamp IndexBuildsCoordinatorMongod::_waitForNextIndexBuildAction(
// Sanity check
// This signal can be received during primary (drain phase), secondary,
// startup( startup recovery) and startup2 (initial sync).
- invariant(!isMaster && replState->indexBuildState.isAbortPrepared(),
+ invariant(!isMaster, str::stream() << "Index build: " << replState->buildUUID);
+ invariant(replState->indexBuildState.isAborted(),
str::stream()
<< "Index build: " << replState->buildUUID
<< ", index build state: " << replState->indexBuildState.toString());
invariant(replState->indexBuildState.getTimestamp() &&
replState->indexBuildState.getAbortReason(),
replState->buildUUID.toString());
+ // The calling thread will interrupt our OperationContext and we will exit.
LOGV2(3856206,
"Aborting index build",
"buildUUID"_attr = replState->buildUUID,
@@ -699,33 +697,20 @@ Timestamp IndexBuildsCoordinatorMongod::_waitForNextIndexBuildAction(
case IndexBuildAction::kRollbackAbort:
invariant(replState->protocol == IndexBuildProtocol::kTwoPhase);
invariant(replCoord->getMemberState().rollback());
-
- uassertStatusOK(Status(
- ErrorCodes::IndexBuildAborted,
- str::stream() << "Aborting index build, index build uuid:"
- << replState->buildUUID << " , abort reason:"
- << replState->indexBuildState.getAbortReason().get_value_or("")));
+ // The calling thread will interrupt our OperationContext and we will exit.
break;
case IndexBuildAction::kPrimaryAbort:
- // There are chances when the index build got aborted, it only existed in the
- // coordinator, So, we missed marking the index build aborted on manager. So, it's
- // important, we exit from here if we are still primary. Otherwise, the index build
- // gets committed, though our index build was marked aborted.
-
- // Single-phase builds do not replicate abort oplog entries. We do not need to be
- // primary to abort the index build, and we must continue aborting even in the event
- // of a state transition because this build will not receive another signal.
- if (isMaster || IndexBuildProtocol::kSinglePhase == replState->protocol) {
- uassertStatusOK(Status(
- ErrorCodes::IndexBuildAborted,
- str::stream()
- << "Index build aborted for index build: " << replState->buildUUID
- << " , abort reason:"
- << replState->indexBuildState.getAbortReason().get_value_or("")));
- }
- // Intentionally continue to next case. If we are no longer primary while processing
- // kPrimaryAbort, fall back to the kCommitQuorumSatisfied case and reset our
- // 'waitForNextAction'.
+ // The thread aborting a two-phase index build must hold the RSTL so that the
+ // replication state does not change. They will interrupt our OperationContext and
+ // we will exit. Single-phase builds do not replicate abort oplog entries. We do
+ // not need to be primary to abort the index build, and we must continue aborting
+ // even in the event of a state transition because this build will not receive
+ // another signal.
+ invariant(isMaster || IndexBuildProtocol::kSinglePhase == replState->protocol,
+ str::stream()
+ << "isMaster: " << isMaster << ", singlePhase: "
+ << (IndexBuildProtocol::kSinglePhase == replState->protocol));
+ break;
case IndexBuildAction::kCommitQuorumSatisfied:
if (!isMaster) {
// Reset the promise as the node has stepped down,
diff --git a/src/mongo/db/index_builds_coordinator_mongod_test.cpp b/src/mongo/db/index_builds_coordinator_mongod_test.cpp
index 9535b0887ea..c2da0c3f263 100644
--- a/src/mongo/db/index_builds_coordinator_mongod_test.cpp
+++ b/src/mongo/db/index_builds_coordinator_mongod_test.cpp
@@ -89,7 +89,6 @@ void IndexBuildsCoordinatorMongodTest::setUp() {
}
void IndexBuildsCoordinatorMongodTest::tearDown() {
- _indexBuildsCoord->verifyNoIndexBuilds_forTestOnly();
_indexBuildsCoord->shutdown(operationContext());
_indexBuildsCoord.reset();
// All databases are dropped during tear down.
diff --git a/src/mongo/db/repair_database_and_check_version.cpp b/src/mongo/db/repair_database_and_check_version.cpp
index 20e9ec50172..d66a8699481 100644
--- a/src/mongo/db/repair_database_and_check_version.cpp
+++ b/src/mongo/db/repair_database_and_check_version.cpp
@@ -162,8 +162,8 @@ bool checkIdIndexExists(OperationContext* opCtx, RecordId catalogId) {
Status buildMissingIdIndex(OperationContext* opCtx, Collection* collection) {
MultiIndexBlock indexer;
- ON_BLOCK_EXIT(
- [&] { indexer.cleanUpAfterBuild(opCtx, collection, MultiIndexBlock::kNoopOnCleanUpFn); });
+ auto abortOnExit = makeGuard(
+ [&] { indexer.abortIndexBuild(opCtx, collection, MultiIndexBlock::kNoopOnCleanUpFn); });
const auto indexCatalog = collection->getIndexCatalog();
const auto idIndexSpec = indexCatalog->getDefaultIdIndexSpec();
@@ -187,6 +187,7 @@ Status buildMissingIdIndex(OperationContext* opCtx, Collection* collection) {
status = indexer.commit(
opCtx, collection, MultiIndexBlock::kNoopOnCreateEachFn, MultiIndexBlock::kNoopOnCommitFn);
wuow.commit();
+ abortOnExit.dismiss();
return status;
}
diff --git a/src/mongo/db/repl/collection_bulk_loader_impl.cpp b/src/mongo/db/repl/collection_bulk_loader_impl.cpp
index 3a537c96b0e..fdc32b1b32d 100644
--- a/src/mongo/db/repl/collection_bulk_loader_impl.cpp
+++ b/src/mongo/db/repl/collection_bulk_loader_impl.cpp
@@ -320,7 +320,11 @@ Status CollectionBulkLoaderImpl::commit() {
"namespace"_attr = _nss.ns(),
"stats"_attr = _stats.toString());
- _releaseResources();
+ // Clean up here so we do not try to abort the index builds when cleaning up in
+ // _releaseResources.
+ _idIndexBlock.reset();
+ _secondaryIndexesBlock.reset();
+ _autoColl.reset();
return Status::OK();
});
}
@@ -328,13 +332,13 @@ Status CollectionBulkLoaderImpl::commit() {
void CollectionBulkLoaderImpl::_releaseResources() {
invariant(&cc() == _opCtx->getClient());
if (_secondaryIndexesBlock) {
- _secondaryIndexesBlock->cleanUpAfterBuild(
+ _secondaryIndexesBlock->abortIndexBuild(
_opCtx.get(), _collection, MultiIndexBlock::kNoopOnCleanUpFn);
_secondaryIndexesBlock.reset();
}
if (_idIndexBlock) {
- _idIndexBlock->cleanUpAfterBuild(
+ _idIndexBlock->abortIndexBuild(
_opCtx.get(), _collection, MultiIndexBlock::kNoopOnCleanUpFn);
_idIndexBlock.reset();
}
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index b2d74114922..5b4cb82da23 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -2342,10 +2342,21 @@ BSONObj ReplicationCoordinatorImpl::runCmdOnPrimaryAndAwaitResponse(
uassertStatusOK(scheduleResult.getStatus());
CallbackHandle cbkHandle = scheduleResult.getValue();
- onRemoteCmdScheduled(cbkHandle);
-
- // Wait for the response in an interruptible mode.
- _replExecutor->wait(cbkHandle, opCtx);
+ try {
+ onRemoteCmdScheduled(cbkHandle);
+
+ // Wait for the response in an interruptible mode.
+ _replExecutor->wait(cbkHandle, opCtx);
+ } catch (const DBException&) {
+ // If waiting for the response is interrupted, then we still have a callback out and
+ // registered with the TaskExecutor to run when the response finally does come back. Since
+ // the callback references local state, cbkResponse, it would be invalid for the callback to
+ // run after leaving the this function. Therefore, we cancel the callback and wait
+ // uninterruptably for the callback to be run.
+ _replExecutor->cancel(cbkHandle);
+ _replExecutor->wait(cbkHandle);
+ throw;
+ }
onRemoteCmdComplete(cbkHandle);
uassertStatusOK(cbkResponse.status);
diff --git a/src/mongo/db/repl/rs_rollback_test.cpp b/src/mongo/db/repl/rs_rollback_test.cpp
index 2b3fdc3e828..8c9cc3d89e4 100644
--- a/src/mongo/db/repl/rs_rollback_test.cpp
+++ b/src/mongo/db/repl/rs_rollback_test.cpp
@@ -1052,8 +1052,10 @@ TEST_F(RSRollbackTest, RollbackCommitIndexBuild) {
ASSERT_EQUALS(1, numIndexesInProgress(_opCtx.get(), nss, coll));
// Kill the index build we just restarted so the fixture can shut down.
- IndexBuildsCoordinator::get(_opCtx.get())
- ->abortIndexBuildByBuildUUID(_opCtx.get(), buildUUID, IndexBuildAction::kPrimaryAbort);
+ ASSERT_OK(_coordinator->setFollowerMode(MemberState::RS_ROLLBACK));
+ ASSERT(IndexBuildsCoordinator::get(_opCtx.get())
+ ->abortIndexBuildByBuildUUID(
+ _opCtx.get(), buildUUID, IndexBuildAction::kRollbackAbort, ""));
}
TEST_F(RSRollbackTest, RollbackAbortIndexBuild) {
@@ -1102,8 +1104,10 @@ TEST_F(RSRollbackTest, RollbackAbortIndexBuild) {
ASSERT_EQUALS(1, numIndexesInProgress(_opCtx.get(), nss, coll));
// Kill the index build we just restarted so the fixture can shut down.
- IndexBuildsCoordinator::get(_opCtx.get())
- ->abortIndexBuildByBuildUUID(_opCtx.get(), buildUUID, IndexBuildAction::kPrimaryAbort);
+ ASSERT_OK(_coordinator->setFollowerMode(MemberState::RS_ROLLBACK));
+ ASSERT(IndexBuildsCoordinator::get(_opCtx.get())
+ ->abortIndexBuildByBuildUUID(
+ _opCtx.get(), buildUUID, IndexBuildAction::kRollbackAbort, ""));
}
TEST_F(RSRollbackTest, AbortedIndexBuildsAreRestarted) {
@@ -1157,8 +1161,10 @@ TEST_F(RSRollbackTest, AbortedIndexBuildsAreRestarted) {
ASSERT_EQUALS(1, numIndexesInProgress(_opCtx.get(), nss, coll));
// Kill the index build we just restarted so the fixture can shut down.
- IndexBuildsCoordinator::get(_opCtx.get())
- ->abortIndexBuildByBuildUUID(_opCtx.get(), buildUUID, IndexBuildAction::kPrimaryAbort);
+ ASSERT_OK(_coordinator->setFollowerMode(MemberState::RS_ROLLBACK));
+ ASSERT(IndexBuildsCoordinator::get(_opCtx.get())
+ ->abortIndexBuildByBuildUUID(
+ _opCtx.get(), buildUUID, IndexBuildAction::kRollbackAbort, ""));
}
TEST_F(RSRollbackTest, AbortedIndexBuildsAreNotRestartedWhenStartIsRolledBack) {
diff --git a/src/mongo/db/repl_index_build_state.h b/src/mongo/db/repl_index_build_state.h
index ef83160f64d..7b1fd07bf7f 100644
--- a/src/mongo/db/repl_index_build_state.h
+++ b/src/mongo/db/repl_index_build_state.h
@@ -98,32 +98,41 @@ enum class IndexBuildAction {
* Represents the index build state.
* Valid State transition for primary:
* ===================================
- * kNone ---> kAborted.
- * kNone ---> kPrepareAbort ---> kAborted.
+ * kSetup -> kInProgress
+ * kInProgress -> kAborted // An index build failed due to an indexing error or was aborted
+ * externally.
+ * kInProgress -> kCommitted // An index build was committed
+
*
* Valid State transition for secondaries:
* =======================================
- * kNone ---> kPrepareCommit ---> kCommitted.
- * kNone ---> kPrepareAbort ---> kAborted.
+ * kSetup -> kInProgress
+ * kInProgress -> kAborted // An index build received an abort oplog entry
+ * kInProgress -> kPrepareCommit -> kCommitted // An index build received a commit oplog entry
*/
class IndexBuildState {
public:
enum StateFlag {
- kNone = 1 << 0,
+ kSetup = 1 << 0,
+ /**
+ * Once an index build is in-progress it is eligible for being aborted by an external
+ * thread. The kSetup state prevents other threads from observing an inconsistent state of
+ * a build until it transitions to kInProgress.
+ */
+ kInProgress = 1 << 1,
/**
- * Below state indicates that indexBuildscoordinator thread was externally asked either to
- * commit or abort. Oplog applier, rollback, createIndexes command and drop
- * databases/collections/indexes cmds can change this state to kPrepareCommit or
- * kPrepareAbort.
+ * Below state indicates that IndexBuildsCoordinator thread was externally asked to commit.
+ * For kPrepareCommit, this can come from an oplog entry.
*/
- kPrepareCommit = 1 << 1,
- kPrepareAbort = 1 << 2,
+ kPrepareCommit = 1 << 2,
/**
- * Below state indicates that index build was successfully able to commit or abort. And,
- * it's yet to generate the commitIndexBuild or abortIndexBuild oplog entry respectively.
+ * Below state indicates that index build was successfully able to commit or abort. For
+ * kCommitted, the state is set immediately before it commits the index build. For
+ * kAborted, this state is set after the build is cleaned up and the abort oplog entry is
+ * replicated.
*/
kCommitted = 1 << 3,
- kAborted = 1 << 4
+ kAborted = 1 << 4,
};
using StateSet = int;
@@ -132,8 +141,9 @@ public:
}
bool checkIfValidTransition(StateFlag newState) {
- if (_state == kNone || (_state == kPrepareCommit && newState == kCommitted) ||
- (_state == kPrepareAbort && newState == kAborted)) {
+ if ((_state == kSetup && newState == kInProgress) ||
+ (_state == kInProgress && newState != kSetup) ||
+ (_state == kPrepareCommit && newState == kCommitted)) {
return true;
}
return false;
@@ -143,8 +153,6 @@ public:
bool skipCheck,
boost::optional<Timestamp> timestamp = boost::none,
boost::optional<std::string> abortReason = boost::none) {
- // TODO SERVER-46560: Should remove the hard-coded value skipCheck 'true'.
- skipCheck = true;
if (!skipCheck) {
invariant(checkIfValidTransition(state),
str::stream() << "current state :" << toString(_state)
@@ -154,7 +162,7 @@ public:
if (timestamp)
_timestamp = timestamp;
if (abortReason) {
- invariant(_state == kPrepareAbort);
+ invariant(_state == kAborted);
_abortReason = abortReason;
}
}
@@ -167,14 +175,14 @@ public:
return _state == kCommitted;
}
- bool isAbortPrepared() const {
- return _state == kPrepareAbort;
- }
-
bool isAborted() const {
return _state == kAborted;
}
+ bool isSettingUp() const {
+ return _state == kSetup;
+ }
+
boost::optional<Timestamp> getTimestamp() const {
return _timestamp;
}
@@ -189,14 +197,14 @@ public:
static std::string toString(StateFlag state) {
switch (state) {
- case kNone:
- return "in-progress";
+ case kSetup:
+ return "Setting up";
+ case kInProgress:
+ return "In progress";
case kPrepareCommit:
return "Prepare commit";
case kCommitted:
return "Committed";
- case kPrepareAbort:
- return "Prepare abort";
case kAborted:
return "Aborted";
}
@@ -205,7 +213,7 @@ public:
private:
// Represents the index build state.
- StateFlag _state = kNone;
+ StateFlag _state = kSetup;
// Timestamp will be populated only if the node is secondary.
// It represents the commit or abort timestamp communicated via
// commitIndexBuild and abortIndexBuild oplog entry.
@@ -263,6 +271,10 @@ struct ReplIndexBuildState {
// Protects the state below.
mutable Mutex mutex = MONGO_MAKE_LATCH("ReplIndexBuildState::mutex");
+ // The OperationId of the index build. This allows external callers to interrupt the index build
+ // thread.
+ OperationId opId;
+
/*
* Readers who read the commit quorum value from "config.system.indexBuilds" collection
* to decide if the commit quorum got satisfied for an index build, should take this lock in
diff --git a/src/mongo/dbtests/dbtests.cpp b/src/mongo/dbtests/dbtests.cpp
index 56612d5c9d5..09d93729855 100644
--- a/src/mongo/dbtests/dbtests.cpp
+++ b/src/mongo/dbtests/dbtests.cpp
@@ -108,8 +108,8 @@ Status createIndexFromSpec(OperationContext* opCtx, StringData ns, const BSONObj
wunit.commit();
}
MultiIndexBlock indexer;
- ON_BLOCK_EXIT(
- [&] { indexer.cleanUpAfterBuild(opCtx, coll, MultiIndexBlock::kNoopOnCleanUpFn); });
+ auto abortOnExit =
+ makeGuard([&] { indexer.abortIndexBuild(opCtx, coll, MultiIndexBlock::kNoopOnCleanUpFn); });
Status status = indexer
.init(opCtx,
coll,
@@ -140,6 +140,7 @@ Status createIndexFromSpec(OperationContext* opCtx, StringData ns, const BSONObj
opCtx, coll, MultiIndexBlock::kNoopOnCreateEachFn, MultiIndexBlock::kNoopOnCommitFn));
ASSERT_OK(opCtx->recoveryUnit()->setTimestamp(Timestamp(1, 1)));
wunit.commit();
+ abortOnExit.dismiss();
return Status::OK();
}
diff --git a/src/mongo/dbtests/indexupdatetests.cpp b/src/mongo/dbtests/indexupdatetests.cpp
index f474310f530..dc6f23cc99f 100644
--- a/src/mongo/dbtests/indexupdatetests.cpp
+++ b/src/mongo/dbtests/indexupdatetests.cpp
@@ -91,8 +91,8 @@ protected:
try {
MultiIndexBlock indexer;
- ON_BLOCK_EXIT([&] {
- indexer.cleanUpAfterBuild(_opCtx, collection(), MultiIndexBlock::kNoopOnCleanUpFn);
+ auto abortOnExit = makeGuard([&] {
+ indexer.abortIndexBuild(_opCtx, collection(), MultiIndexBlock::kNoopOnCleanUpFn);
});
uassertStatusOK(
@@ -104,6 +104,7 @@ protected:
MultiIndexBlock::kNoopOnCreateEachFn,
MultiIndexBlock::kNoopOnCommitFn));
wunit.commit();
+ abortOnExit.dismiss();
} catch (const DBException& e) {
if (ErrorCodes::isInterruption(e.code()))
return true;
@@ -156,8 +157,9 @@ public:
<< static_cast<int>(kIndexVersion) << "unique" << true
<< "background" << background);
- ON_BLOCK_EXIT(
- [&] { indexer.cleanUpAfterBuild(_opCtx, coll, MultiIndexBlock::kNoopOnCleanUpFn); });
+ auto abortOnExit = makeGuard([&] {
+ indexer.abortIndexBuild(_opCtx, collection(), MultiIndexBlock::kNoopOnCleanUpFn);
+ });
ASSERT_OK(indexer.init(_opCtx, coll, spec, MultiIndexBlock::kNoopOnInitFn).getStatus());
ASSERT_OK(indexer.insertAllDocumentsInCollection(_opCtx, coll));
@@ -167,6 +169,7 @@ public:
ASSERT_OK(indexer.commit(
_opCtx, coll, MultiIndexBlock::kNoopOnCreateEachFn, MultiIndexBlock::kNoopOnCommitFn));
wunit.commit();
+ abortOnExit.dismiss();
}
};
@@ -205,8 +208,9 @@ public:
<< "background" << background);
collLk.emplace(_opCtx, _nss, LockMode::MODE_X);
- ON_BLOCK_EXIT(
- [&] { indexer.cleanUpAfterBuild(_opCtx, coll, MultiIndexBlock::kNoopOnCleanUpFn); });
+ auto abortOnExit = makeGuard([&] {
+ indexer.abortIndexBuild(_opCtx, collection(), MultiIndexBlock::kNoopOnCleanUpFn);
+ });
ASSERT_OK(indexer.init(_opCtx, coll, spec, MultiIndexBlock::kNoopOnInitFn).getStatus());
@@ -320,9 +324,8 @@ Status IndexBuildBase::createIndex(const BSONObj& indexSpec) {
Lock::CollectionLock collLk(_opCtx, _nss, MODE_X);
MultiIndexBlock indexer;
- ON_BLOCK_EXIT([&] {
- indexer.cleanUpAfterBuild(_opCtx, collection(), MultiIndexBlock::kNoopOnCleanUpFn);
- });
+ auto abortOnExit = makeGuard(
+ [&] { indexer.abortIndexBuild(_opCtx, collection(), MultiIndexBlock::kNoopOnCleanUpFn); });
Status status =
indexer.init(_opCtx, collection(), indexSpec, MultiIndexBlock::kNoopOnInitFn).getStatus();
if (status == ErrorCodes::IndexAlreadyExists) {
@@ -345,6 +348,7 @@ Status IndexBuildBase::createIndex(const BSONObj& indexSpec) {
MultiIndexBlock::kNoopOnCreateEachFn,
MultiIndexBlock::kNoopOnCommitFn));
wunit.commit();
+ abortOnExit.dismiss();
return Status::OK();
}
diff --git a/src/mongo/dbtests/querytests.cpp b/src/mongo/dbtests/querytests.cpp
index 6f7d96ef33f..d017dcd2311 100644
--- a/src/mongo/dbtests/querytests.cpp
+++ b/src/mongo/dbtests/querytests.cpp
@@ -104,8 +104,8 @@ protected:
auto specObj = builder.obj();
MultiIndexBlock indexer;
- ON_BLOCK_EXIT([&] {
- indexer.cleanUpAfterBuild(&_opCtx, _collection, MultiIndexBlock::kNoopOnCleanUpFn);
+ auto abortOnExit = makeGuard([&] {
+ indexer.abortIndexBuild(&_opCtx, _collection, MultiIndexBlock::kNoopOnCleanUpFn);
});
{
WriteUnitOfWork wunit(&_opCtx);
@@ -127,6 +127,7 @@ protected:
MultiIndexBlock::kNoopOnCommitFn));
wunit.commit();
}
+ abortOnExit.dismiss();
}
void insert(const char* s) {
diff --git a/src/mongo/dbtests/storage_timestamp_tests.cpp b/src/mongo/dbtests/storage_timestamp_tests.cpp
index 67be7eb3eb8..58925890921 100644
--- a/src/mongo/dbtests/storage_timestamp_tests.cpp
+++ b/src/mongo/dbtests/storage_timestamp_tests.cpp
@@ -265,8 +265,8 @@ public:
// Build an index.
MultiIndexBlock indexer;
- ON_BLOCK_EXIT(
- [&] { indexer.cleanUpAfterBuild(_opCtx, coll, MultiIndexBlock::kNoopOnCleanUpFn); });
+ auto abortOnExit = makeGuard(
+ [&] { indexer.abortIndexBuild(_opCtx, coll, MultiIndexBlock::kNoopOnCleanUpFn); });
BSONObj indexInfoObj;
{
@@ -297,6 +297,7 @@ public:
// MultiIndexBlock.
wuow.commit();
}
+ abortOnExit.dismiss();
}
std::int32_t itCount(Collection* coll) {
@@ -1872,8 +1873,8 @@ public:
// Build an index on `{a: 1}`. This index will be multikey.
MultiIndexBlock indexer;
- ON_BLOCK_EXIT([&] {
- indexer.cleanUpAfterBuild(
+ auto abortOnExit = makeGuard([&] {
+ indexer.abortIndexBuild(
_opCtx, autoColl.getCollection(), MultiIndexBlock::kNoopOnCleanUpFn);
});
const LogicalTime beforeIndexBuild = _clock->reserveTicks(2);
@@ -1930,6 +1931,7 @@ public:
MultiIndexBlock::kNoopOnCommitFn));
wuow.commit();
}
+ abortOnExit.dismiss();
const Timestamp afterIndexBuild = _clock->reserveTicks(1).asTimestamp();
@@ -1984,8 +1986,8 @@ public:
// Build an index on `{a: 1}`.
MultiIndexBlock indexer;
- ON_BLOCK_EXIT([&] {
- indexer.cleanUpAfterBuild(
+ auto abortOnExit = makeGuard([&] {
+ indexer.abortIndexBuild(
_opCtx, autoColl.getCollection(), MultiIndexBlock::kNoopOnCleanUpFn);
});
const LogicalTime beforeIndexBuild = _clock->reserveTicks(2);
@@ -2116,6 +2118,7 @@ public:
MultiIndexBlock::kNoopOnCommitFn));
wuow.commit();
}
+ abortOnExit.dismiss();
}
};
@@ -2653,8 +2656,8 @@ public:
const IndexCatalogEntry* buildingIndex = nullptr;
MultiIndexBlock indexer;
- ON_BLOCK_EXIT([&] {
- indexer.cleanUpAfterBuild(_opCtx, collection, MultiIndexBlock::kNoopOnCleanUpFn);
+ auto abortOnExit = makeGuard([&] {
+ indexer.abortIndexBuild(_opCtx, collection, MultiIndexBlock::kNoopOnCleanUpFn);
});
// Provide a build UUID, indicating that this is a two-phase index build.
@@ -2767,6 +2770,7 @@ public:
MultiIndexBlock::kNoopOnCommitFn));
wuow.commit();
}
+ abortOnExit.dismiss();
}
};
diff --git a/src/mongo/dbtests/wildcard_multikey_persistence_test.cpp b/src/mongo/dbtests/wildcard_multikey_persistence_test.cpp
index a0b079255d8..679a39ed254 100644
--- a/src/mongo/dbtests/wildcard_multikey_persistence_test.cpp
+++ b/src/mongo/dbtests/wildcard_multikey_persistence_test.cpp
@@ -200,8 +200,8 @@ protected:
auto coll = autoColl.getCollection();
MultiIndexBlock indexer;
- ON_BLOCK_EXIT(
- [&] { indexer.cleanUpAfterBuild(opCtx(), coll, MultiIndexBlock::kNoopOnCleanUpFn); });
+ auto abortOnExit = makeGuard(
+ [&] { indexer.abortIndexBuild(opCtx(), coll, MultiIndexBlock::kNoopOnCleanUpFn); });
// Initialize the index builder and add all documents currently in the collection.
ASSERT_OK(
@@ -212,6 +212,7 @@ protected:
WriteUnitOfWork wunit(opCtx());
ASSERT_OK(indexer.commit(
opCtx(), coll, MultiIndexBlock::kNoopOnCreateEachFn, MultiIndexBlock::kNoopOnCommitFn));
+ abortOnExit.dismiss();
wunit.commit();
}