summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLouis Williams <louis.williams@mongodb.com>2020-04-13 13:12:31 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-04-20 19:58:33 +0000
commit4d1d90ebd4809c252a103399a0d5a9db06c9317c (patch)
tree6df400a174767c6fe946d8a293eb25e095fc7502
parentdea8b034f14f20737df28be440305d20b64ed354 (diff)
downloadmongo-4d1d90ebd4809c252a103399a0d5a9db06c9317c.tar.gz
SERVER-46989 Index builds should hold RSTL to prevent replication state changes after deciding to commit or abort.
This also elminates deadlocks caused by an index builder holding an RSTL IX lock while waiting for a collection X lock or holding a collection X lock and waiting for an RSTL IX lock. (cherry picked from commit fca4421ffab68ae0b62c36a7505e6045428ce2e5)
-rw-r--r--jstests/noPassthrough/index_secondary_commit_after_scan_error.js10
-rw-r--r--jstests/noPassthrough/index_stepdown_abort_prepare_conflict.js113
-rw-r--r--jstests/noPassthrough/index_stepdown_commit_prepare_conflict.js112
-rw-r--r--jstests/noPassthrough/index_stepdown_failover.js2
-rw-r--r--jstests/noPassthrough/index_stepdown_prepare_conflict.js101
-rw-r--r--src/mongo/db/catalog/multi_index_block.cpp2
-rw-r--r--src/mongo/db/index_builds_coordinator.cpp259
-rw-r--r--src/mongo/db/index_builds_coordinator.h44
-rw-r--r--src/mongo/db/index_builds_coordinator_mongod.cpp129
-rw-r--r--src/mongo/db/index_builds_coordinator_mongod.h5
-rw-r--r--src/mongo/embedded/index_builds_coordinator_embedded.cpp8
-rw-r--r--src/mongo/embedded/index_builds_coordinator_embedded.h5
12 files changed, 603 insertions, 187 deletions
diff --git a/jstests/noPassthrough/index_secondary_commit_after_scan_error.js b/jstests/noPassthrough/index_secondary_commit_after_scan_error.js
index d502e297fa1..5fe718004f3 100644
--- a/jstests/noPassthrough/index_secondary_commit_after_scan_error.js
+++ b/jstests/noPassthrough/index_secondary_commit_after_scan_error.js
@@ -1,7 +1,7 @@
/**
- * When an index build on a secondary fails during the first two phases, we expect to receive a
- * abortIndexBuild oplog entry from the primary eventually. If we get a commitIndexBuild oplog entry
- * instead, the secondary should crash.
+ * When an index build on a secondary fails, we expect to receive a abortIndexBuild oplog entry from
+ * the primary eventually. If we get a commitIndexBuild oplog entry instead, the secondary should
+ * crash.
* @tags: [
* requires_replication,
* ]
@@ -49,6 +49,8 @@ const secondary = rst.getSecondary();
const secondaryDB = secondary.getDB(testDB.getName());
assert.commandWorked(secondaryDB.adminCommand(
{configureFailPoint: 'hangAfterStartingIndexBuildUnlocked', mode: 'alwaysOn'}));
+assert.commandWorked(
+ secondaryDB.adminCommand({configureFailPoint: 'failIndexBuildOnCommit', mode: 'alwaysOn'}));
const createIdx = IndexBuildTest.startIndexBuild(primary, coll.getFullName(), {a: 1});
@@ -86,7 +88,7 @@ assert.soon(function() {
// Secondary should crash on receiving the unexpected commitIndexBuild oplog entry.
const fassertProcessExitCode = _isWindows() ? MongoRunner.EXIT_ABRUPT : MongoRunner.EXIT_ABORT;
assert.eq(fassertProcessExitCode, res.exitCode);
-assert(rawMongoProgramOutput().match('Fatal assertion.*51101.*OperationFailed: Index build:'),
+assert(rawMongoProgramOutput().match('Fatal assertion.*4698902'),
'Index build should have aborted secondary due to unexpected commitIndexBuild oplog entry.');
// Check indexes on primary.
diff --git a/jstests/noPassthrough/index_stepdown_abort_prepare_conflict.js b/jstests/noPassthrough/index_stepdown_abort_prepare_conflict.js
new file mode 100644
index 00000000000..b79f3ed2868
--- /dev/null
+++ b/jstests/noPassthrough/index_stepdown_abort_prepare_conflict.js
@@ -0,0 +1,113 @@
+/*
+ * Tests that we don't hit 3 way deadlock between an index builder, prepared transaction, and step
+ * down.
+ *
+ * This tests the following scenario:
+ * 1) Starts and index build.
+ * 2) Prepares a transaction which holds the collection lock in IX mode.
+ * 3) Waits for the index build to attempt to acquire the collection lock in X mode to commit, but
+ * blocks behind the prepared transaction due to a collection lock conflict.
+ * 4) Steps down the primary, which enqueues the RSTL in X mode.
+ * 5) Ensures the index build has released its RSTL lock before taking the X lock, and does not
+ * block stepDown. Since commit must acquire the RSTL to write its oplog entry, ensures that the
+ * index build is able to retry after failing once due to a stepDown.
+ * 6) Steps up a new primary. Ensure that the blocked index build on the secondary does not prevent
+ * step-up from ocurring.
+ *
+ * @tags: [
+ * uses_transactions,
+ * uses_prepare_transaction,
+ * ]
+ */
+load('jstests/noPassthrough/libs/index_build.js');
+load("jstests/replsets/rslib.js");
+load("jstests/core/txns/libs/prepare_helpers.js");
+
+(function() {
+
+"use strict";
+
+const dbName = "test";
+const collName = "coll";
+
+const rst = new ReplSetTest({nodes: 2});
+rst.startSet();
+rst.initiate();
+
+const primary = rst.getPrimary();
+const primaryDB = primary.getDB(dbName);
+const primaryColl = primaryDB[collName];
+
+if (!IndexBuildTest.supportsTwoPhaseIndexBuild(primary)) {
+ jsTestLog("Skipping test because two-phase index builds are not enabled");
+ rst.stopSet();
+ return;
+}
+
+// This will cause the index build to fail with a CannotIndexParallelArrays error.
+assert.commandWorked(
+ primaryColl.insert({_id: 1, x: [1, 2], y: [1, 2]}, {"writeConcern": {"w": 1}}));
+
+// Enable fail point which makes hybrid index build to hang.
+const failPoint = "hangAfterIndexBuildSecondDrain";
+let res =
+ assert.commandWorked(primary.adminCommand({configureFailPoint: failPoint, mode: "alwaysOn"}));
+let timesEntered = res.count;
+
+const indexName = 'myidx';
+const indexThread = IndexBuildTest.startIndexBuild(primary,
+ primaryColl.getFullName(),
+ {x: 1, y: 1},
+ {name: indexName},
+ ErrorCodes.InterruptedDueToReplStateChange);
+
+jsTestLog("Waiting for index build to hit failpoint");
+assert.commandWorked(primary.adminCommand({
+ waitForFailPoint: failPoint,
+ timesEntered: timesEntered + 1,
+ maxTimeMS: kDefaultWaitForFailPointTimeout
+}));
+
+jsTestLog("Start txn");
+const session = primary.startSession();
+const sessionDB = session.getDatabase(dbName);
+const sessionColl = sessionDB.getCollection(collName);
+session.startTransaction();
+assert.commandWorked(sessionColl.insert({x: 1}, {$set: {y: 1}}));
+
+jsTestLog("Prepare txn");
+PrepareHelpers.prepareTransaction(session);
+
+// Unblock index build, which will cause it to hang acquiring the X lock to commit.
+assert.commandWorked(primary.adminCommand({configureFailPoint: failPoint, mode: "off"}));
+
+let newPrimary = rst.getSecondary();
+
+jsTestLog("Make primary step down");
+const stepDownThread = startParallelShell(() => {
+ assert.commandWorked(db.adminCommand({"replSetStepDown": 60}));
+}, primary.port);
+
+// Wait for threads to join.
+indexThread();
+stepDownThread();
+
+waitForState(primary, ReplSetTest.State.SECONDARY);
+assert.neq(primary.port, newPrimary.port);
+
+jsTestLog("Stepping-up new primary");
+rst.stepUp(newPrimary);
+waitForState(newPrimary, ReplSetTest.State.PRIMARY);
+
+jsTestLog("Aborting transaction and waiting for index build to finish");
+const newSession = new _DelegatingDriverSession(newPrimary, session);
+assert.commandWorked(newSession.abortTransaction_forTesting());
+
+IndexBuildTest.waitForIndexBuildToStop(newPrimary.getDB(dbName), collName, indexName);
+IndexBuildTest.waitForIndexBuildToStop(primary.getDB(dbName), collName, indexName);
+
+IndexBuildTest.assertIndexes(newPrimary.getDB(dbName).getCollection(collName), 1, ["_id_"], []);
+IndexBuildTest.assertIndexes(primaryColl, 1, ["_id_"], []);
+
+rst.stopSet();
+})(); \ No newline at end of file
diff --git a/jstests/noPassthrough/index_stepdown_commit_prepare_conflict.js b/jstests/noPassthrough/index_stepdown_commit_prepare_conflict.js
new file mode 100644
index 00000000000..6291c5ce361
--- /dev/null
+++ b/jstests/noPassthrough/index_stepdown_commit_prepare_conflict.js
@@ -0,0 +1,112 @@
+/*
+ * Tests that we don't hit 3 way deadlock between an index builder, prepared transaction, and step
+ * down.
+ *
+ * This tests the following scenario:
+ * 1) Starts and index build.
+ * 2) Prepares a transaction which holds the collection lock in IX mode.
+ * 3) Waits for the index build to attempt to acquire the collection lock in X mode to commit, but
+ * blocks behind the prepared transaction due to a collection lock conflict.
+ * 4) Steps down the primary, which enqueues the RSTL in X mode.
+ * 5) Ensures the index build has released its RSTL lock before taking the X lock, and does not
+ * block stepDown. Since commit must acquire the RSTL to write its oplog entry, ensures that the
+ * index build is able to retry after failing once due to a stepDown.
+ * 6) Steps up a new primary. Ensure that the blocked index build on the secondary does not prevent
+ * step-up from ocurring.
+ *
+ * @tags: [
+ * uses_transactions,
+ * uses_prepare_transaction,
+ * ]
+ */
+load('jstests/noPassthrough/libs/index_build.js');
+load("jstests/replsets/rslib.js");
+load("jstests/core/txns/libs/prepare_helpers.js");
+
+(function() {
+
+"use strict";
+
+const dbName = "test";
+const collName = "coll";
+
+const rst = new ReplSetTest({nodes: 2});
+rst.startSet();
+rst.initiate();
+
+const primary = rst.getPrimary();
+const primaryDB = primary.getDB(dbName);
+const primaryColl = primaryDB[collName];
+
+assert.commandWorked(primaryColl.insert({_id: 1, x: 1}));
+
+// Clear the log.
+assert.commandWorked(primary.adminCommand({clearLog: 'global'}));
+
+// Enable fail point which makes hybrid index build to hang.
+const failPoint = "hangAfterIndexBuildSecondDrain";
+let res =
+ assert.commandWorked(primary.adminCommand({configureFailPoint: failPoint, mode: "alwaysOn"}));
+let timesEntered = res.count;
+
+const indexThread = IndexBuildTest.startIndexBuild(
+ primary, primaryColl.getFullName(), {x: 1}, {}, ErrorCodes.InterruptedDueToReplStateChange);
+
+jsTestLog("Waiting for index build to hit failpoint");
+assert.commandWorked(primary.adminCommand({
+ waitForFailPoint: failPoint,
+ timesEntered: timesEntered + 1,
+ maxTimeMS: kDefaultWaitForFailPointTimeout
+}));
+
+jsTestLog("Start txn");
+const session = primary.startSession();
+const sessionDB = session.getDatabase(dbName);
+const sessionColl = sessionDB.getCollection(collName);
+session.startTransaction();
+assert.commandWorked(sessionColl.insert({x: 1}, {$set: {y: 1}}));
+
+jsTestLog("Prepare txn");
+PrepareHelpers.prepareTransaction(session);
+
+// Unblock index build, which will cause it to hang acquiring the X lock to commit.
+assert.commandWorked(primary.adminCommand({configureFailPoint: failPoint, mode: "off"}));
+
+let newPrimary = rst.getSecondary();
+
+jsTestLog("Make primary step down");
+const stepDownThread = startParallelShell(() => {
+ assert.commandWorked(db.adminCommand({"replSetStepDown": 60}));
+}, primary.port);
+
+// Wait for threads to join.
+indexThread();
+stepDownThread();
+
+waitForState(primary, ReplSetTest.State.SECONDARY);
+assert.neq(primary.port, newPrimary.port);
+
+jsTestLog("Stepping-up new primary");
+// assert.commandWorked(primary.adminCommand({replSetFreeze: 0}));
+rst.stepUp(newPrimary);
+waitForState(newPrimary, ReplSetTest.State.PRIMARY);
+
+jsTestLog("Aborting transaction and waiting for index build to finish");
+const newSession = new _DelegatingDriverSession(newPrimary, session);
+assert.commandWorked(newSession.abortTransaction_forTesting());
+
+IndexBuildTest.waitForIndexBuildToStop(newPrimary.getDB(dbName), collName, "x_1");
+IndexBuildTest.waitForIndexBuildToStop(primary.getDB(dbName), collName, "x_1");
+
+if (IndexBuildTest.supportsTwoPhaseIndexBuild(primary)) {
+ IndexBuildTest.assertIndexes(
+ newPrimary.getDB(dbName).getCollection(collName), 2, ["_id_", "x_1"]);
+ IndexBuildTest.assertIndexes(primaryColl, 2, ["_id_", "x_1"]);
+} else {
+ // A single-phase index build will get aborted from the state transition.
+ IndexBuildTest.assertIndexes(newPrimary.getDB(dbName).getCollection(collName), 1, ["_id_"]);
+ IndexBuildTest.assertIndexes(primaryColl, 1, ["_id_"]);
+}
+
+rst.stopSet();
+})(); \ No newline at end of file
diff --git a/jstests/noPassthrough/index_stepdown_failover.js b/jstests/noPassthrough/index_stepdown_failover.js
index 28f0f8e2a2d..ed133fc7561 100644
--- a/jstests/noPassthrough/index_stepdown_failover.js
+++ b/jstests/noPassthrough/index_stepdown_failover.js
@@ -56,7 +56,7 @@ checkLog.containsJson(primary, 20441);
// This index build will not complete because it has to wait for a commitIndexBuild oplog
// entry.
IndexBuildTest.resumeIndexBuilds(primary);
-checkLog.contains(primary, 'Index build waiting for next action before completing final phase: ');
+checkLog.containsJson(primary, 3856203);
// Step up the new primary.
rst.stepUp(newPrimary);
diff --git a/jstests/noPassthrough/index_stepdown_prepare_conflict.js b/jstests/noPassthrough/index_stepdown_prepare_conflict.js
new file mode 100644
index 00000000000..3fff120e9f9
--- /dev/null
+++ b/jstests/noPassthrough/index_stepdown_prepare_conflict.js
@@ -0,0 +1,101 @@
+/*
+ * Tests that we don't hit 3 way deadlock between an index builder, prepared transaction, and step
+ * down.
+ *
+ * This tests the following scenario:
+ * 1) Starts and index build.
+ * 2) Prepares a transaction which holds the collection lock in IX mode.
+ * 3) Waits for the index build to attempt to acquire the collection lock in S mode to stop writes,
+ * but blocks behind the prepared transaction due to a collection lock conflict.
+ * 4) Steps down the primary, which enqueues the RSTL in X mode.
+ * 5) Ensures the index build has released its RSTL lock before taking the MODE_S lock, and does not
+ * block stepDown.
+ *
+ * @tags: [
+ * uses_transactions,
+ * uses_prepare_transaction,
+ * ]
+ */
+load('jstests/noPassthrough/libs/index_build.js');
+load("jstests/replsets/rslib.js");
+load("jstests/core/txns/libs/prepare_helpers.js");
+
+(function() {
+
+"use strict";
+
+const dbName = "test";
+const collName = "coll";
+
+const rst = new ReplSetTest({nodes: 1});
+rst.startSet();
+rst.initiate();
+
+const primary = rst.getPrimary();
+
+const primaryDB = primary.getDB(dbName);
+const primaryColl = primaryDB[collName];
+
+jsTestLog("Do a document write");
+assert.commandWorked(primaryColl.insert({_id: 1, x: 1}, {"writeConcern": {"w": 1}}));
+
+// Clear the log.
+assert.commandWorked(primary.adminCommand({clearLog: 'global'}));
+
+// Enable fail point which makes the index build to hang before taking a MODE_S lock to block
+// writes.
+const failPoint = "hangAfterIndexBuildDumpsInsertsFromBulk";
+let res =
+ assert.commandWorked(primary.adminCommand({configureFailPoint: failPoint, mode: "alwaysOn"}));
+let timesEntered = res.count;
+
+const indexThread = IndexBuildTest.startIndexBuild(
+ primary, primaryColl.getFullName(), {x: 1}, {}, ErrorCodes.InterruptedDueToReplStateChange);
+
+jsTestLog("Waiting for index build to hit failpoint");
+assert.commandWorked(primary.adminCommand({
+ waitForFailPoint: failPoint,
+ timesEntered: timesEntered + 1,
+ maxTimeMS: kDefaultWaitForFailPointTimeout
+}));
+
+jsTestLog("Start txn");
+const session = primary.startSession();
+const sessionDB = session.getDatabase(dbName);
+const sessionColl = sessionDB.getCollection(collName);
+session.startTransaction();
+assert.commandWorked(sessionColl.insert({x: 1}, {$set: {y: 1}}));
+
+jsTestLog("Prepare txn");
+PrepareHelpers.prepareTransaction(session);
+
+// Unblock the index build, which will cause it to hang acquiring the collection S lock.
+assert.commandWorked(primary.adminCommand({configureFailPoint: failPoint, mode: "off"}));
+
+const stepDownThread = startParallelShell(() => {
+ jsTestLog("Make primary step down");
+ assert.commandWorked(db.adminCommand({"replSetStepDown": 60 * 60, "force": true}));
+}, primary.port);
+
+jsTestLog("Waiting for stepdown to complete");
+indexThread();
+stepDownThread();
+
+waitForState(primary, ReplSetTest.State.SECONDARY);
+// Allow the primary to be re-elected, and wait for it.
+assert.commandWorked(primary.adminCommand({replSetFreeze: 0}));
+rst.getPrimary();
+
+jsTestLog("Aborting transaction and waiting for index build to finish");
+assert.commandWorked(session.abortTransaction_forTesting());
+IndexBuildTest.waitForIndexBuildToStop(primaryDB, primaryColl.getFullName(), "x_1");
+
+// A single-phase index build will get aborted from the state transition.
+if (IndexBuildTest.supportsTwoPhaseIndexBuild(primary)) {
+ IndexBuildTest.assertIndexes(primaryColl, 2, ["_id_", "x_1"], []);
+} else {
+ IndexBuildTest.assertIndexes(primaryColl, 1, ["_id_"], []);
+}
+
+rst.stopSet();
+})();
diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp
index dd3ab0a5957..6a9c297d127 100644
--- a/src/mongo/db/catalog/multi_index_block.cpp
+++ b/src/mongo/db/catalog/multi_index_block.cpp
@@ -544,8 +544,6 @@ Status MultiIndexBlock::insertAllDocumentsInCollection(OperationContext* opCtx,
if (isBackgroundBuilding()) {
opCtx->lockState()->restoreLockState(opCtx, lockInfo);
opCtx->recoveryUnit()->abandonSnapshot();
- return Status(ErrorCodes::OperationFailed,
- "background index build aborted due to failpoint");
} else {
invariant(!"the hangAfterStartingIndexBuildUnlocked failpoint can't be turned off for foreground index builds");
}
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index cfee3a82016..ef33409a41c 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -41,6 +41,7 @@
#include "mongo/db/catalog/uncommitted_collections.h"
#include "mongo/db/catalog_raii.h"
#include "mongo/db/concurrency/locker.h"
+#include "mongo/db/concurrency/replication_state_transition_lock_guard.h"
#include "mongo/db/concurrency/write_conflict_exception.h"
#include "mongo/db/curop.h"
#include "mongo/db/db_raii.h"
@@ -69,6 +70,7 @@ MONGO_FAIL_POINT_DEFINE(hangAfterIndexBuildFirstDrain);
MONGO_FAIL_POINT_DEFINE(hangAfterIndexBuildSecondDrain);
MONGO_FAIL_POINT_DEFINE(hangAfterIndexBuildDumpsInsertsFromBulk);
MONGO_FAIL_POINT_DEFINE(hangAfterInitializingIndexBuild);
+MONGO_FAIL_POINT_DEFINE(failIndexBuildOnCommit);
namespace {
@@ -228,7 +230,7 @@ void onAbortIndexBuild(OperationContext* opCtx,
* acquire the RSTL in mode X.
*/
void unlockRSTL(OperationContext* opCtx) {
- opCtx->lockState()->unlockRSTLforPrepare();
+ invariant(opCtx->lockState()->unlockRSTLforPrepare());
invariant(!opCtx->lockState()->isRSTLLocked());
}
@@ -876,7 +878,10 @@ IndexBuildsCoordinator::TryAbortResult IndexBuildsCoordinator::_tryAbort(
return TryAbortResult::kRetry;
}
- LOGV2(4656003, "aborting index build", "buildUUID"_attr = replState->buildUUID);
+ LOGV2(4656003,
+ "Aborting index build",
+ "buildUUID"_attr = replState->buildUUID,
+ "reason"_attr = reason);
// Set the state on replState. Once set, the calling thread must complete the abort process.
auto abortTimestamp =
@@ -885,6 +890,21 @@ IndexBuildsCoordinator::TryAbortResult IndexBuildsCoordinator::_tryAbort(
auto skipCheck = shouldSkipIndexBuildStateTransitionCheck(opCtx, replState->protocol);
replState->indexBuildState.setState(
IndexBuildState::kAborted, skipCheck, abortTimestamp, reason);
+
+ // Interrupt the builder thread so that it can no longer acquire locks or make progress.
+ auto serviceContext = opCtx->getServiceContext();
+ auto target = serviceContext->getLockedClient(replState->opId);
+ if (!target) {
+ LOGV2_FATAL(4656001,
+ "Index builder thread did not appear to be running while aborting",
+ "buildUUID"_attr = replState->buildUUID,
+ "opId"_attr = replState->opId);
+ }
+ serviceContext->killOperation(
+ target, target->getOperationContext(), ErrorCodes::IndexBuildAborted);
+
+ // Set the signal. Because we have already interrupted the index build, it will not observe
+ // this signal. We do this so that other observers do not also try to abort the index build.
setSignalAndCancelVoteRequestCbkIfActive(lk, opCtx, replState, signalAction);
}
return TryAbortResult::kContinueAbort;
@@ -978,22 +998,6 @@ bool IndexBuildsCoordinator::abortIndexBuildByBuildUUID(OperationContext* opCtx,
"reason"_attr = e.toString());
}
- {
- stdx::unique_lock<Latch> lk(replState->mutex);
-
- // Interrupt the builder thread so that it can no longer acquire locks or make progress.
- auto serviceContext = opCtx->getServiceContext();
- auto target = serviceContext->getLockedClient(replState->opId);
- if (!target) {
- LOGV2_FATAL(4656001,
- "Index builder thread did not appear to be running while aborting",
- "buildUUID"_attr = replState->buildUUID,
- "opId"_attr = replState->opId);
- }
- serviceContext->killOperation(
- target, target->getOperationContext(), ErrorCodes::IndexBuildAborted);
- }
-
// Wait for the builder thread to receive the signal before unregistering. Don't release the
// Collection lock until this happens, guaranteeing the thread has stopped making progress
// and has exited.
@@ -1021,10 +1025,19 @@ void IndexBuildsCoordinator::_completeAbort(OperationContext* opCtx,
auto coll =
CollectionCatalog::get(opCtx).lookupCollectionByUUID(opCtx, replState->collectionUUID);
auto nss = coll->ns();
-
+ auto replCoord = repl::ReplicationCoordinator::get(opCtx);
switch (signalAction) {
// Replicates an abortIndexBuild oplog entry and deletes the index from the durable catalog.
case IndexBuildAction::kPrimaryAbort: {
+ // Single-phase builds are aborted on step-down, so it's possible to no longer be
+ // primary after we process an abort. We must continue with the abort, but since
+ // single-phase builds do not replicate abort oplog entries, this write will use a ghost
+ // timestamp.
+ bool isPrimaryOrSinglePhase = replState->protocol == IndexBuildProtocol::kSinglePhase ||
+ replCoord->canAcceptWritesFor(opCtx, nss);
+ invariant(isPrimaryOrSinglePhase,
+ str::stream() << "singlePhase: "
+ << (IndexBuildProtocol::kSinglePhase == replState->protocol));
auto onCleanUpFn = [&] { onAbortIndexBuild(opCtx, coll->ns(), *replState, reason); };
_indexBuildsManager.abortIndexBuild(opCtx, coll, replState->buildUUID, onCleanUpFn);
break;
@@ -1032,6 +1045,24 @@ void IndexBuildsCoordinator::_completeAbort(OperationContext* opCtx,
// Deletes the index from the durable catalog.
case IndexBuildAction::kOplogAbort: {
invariant(IndexBuildProtocol::kTwoPhase == replState->protocol);
+ // This signal can be received during primary (drain phase), secondary,
+ // startup (startup recovery) and startup2 (initial sync).
+ bool isMaster = replCoord->canAcceptWritesFor(opCtx, nss);
+ invariant(!isMaster, str::stream() << "Index build: " << replState->buildUUID);
+ invariant(replState->indexBuildState.isAborted(),
+ str::stream()
+ << "Index build: " << replState->buildUUID
+ << ", index build state: " << replState->indexBuildState.toString());
+ invariant(replState->indexBuildState.getTimestamp() &&
+ replState->indexBuildState.getAbortReason(),
+ replState->buildUUID.toString());
+ LOGV2(3856206,
+ "Aborting index build from oplog entry",
+ "buildUUID"_attr = replState->buildUUID,
+ "abortTimestamp"_attr = replState->indexBuildState.getTimestamp().get(),
+ "abortReason"_attr = replState->indexBuildState.getAbortReason().get(),
+ "collectionUUID"_attr = replState->collectionUUID);
+
_indexBuildsManager.abortIndexBuild(
opCtx, coll, replState->buildUUID, MultiIndexBlock::kNoopOnCleanUpFn);
break;
@@ -1039,6 +1070,8 @@ void IndexBuildsCoordinator::_completeAbort(OperationContext* opCtx,
// No locks are required when aborting due to rollback. This performs no storage engine
// writes, only cleans up the remaining in-memory state.
case IndexBuildAction::kRollbackAbort: {
+ invariant(replState->protocol == IndexBuildProtocol::kTwoPhase);
+ invariant(replCoord->getMemberState().rollback());
_indexBuildsManager.abortIndexBuildWithoutCleanup(
opCtx, coll, replState->buildUUID, reason.reason());
break;
@@ -1796,11 +1829,6 @@ void IndexBuildsCoordinator::_runIndexBuild(OperationContext* opCtx,
locker->setDebugInfo(ss);
}
- while (MONGO_unlikely(hangAfterInitializingIndexBuild.shouldFail())) {
- opCtx->runWithoutInterruptionExceptAtGlobalShutdown(
- [&] { opCtx->sleepFor(Milliseconds(100)); });
- }
-
auto status = [&]() {
try {
_runIndexBuildInner(opCtx, replState, indexBuildOptions);
@@ -1849,11 +1877,6 @@ void IndexBuildsCoordinator::_cleanUpSinglePhaseAfterFailure(
return;
}
- // An external caller already cleaned up our state.
- if (status == ErrorCodes::IndexBuildAborted) {
- return;
- }
-
if (indexBuildOptions.replSetAndNotPrimaryAtStart) {
// This build started and failed as a secondary. Single-phase index builds started on
// secondaries may not fail. Do not clean up the index build. It must remain unfinished
@@ -1863,10 +1886,8 @@ void IndexBuildsCoordinator::_cleanUpSinglePhaseAfterFailure(
<< "; Database: " << replState->dbName));
}
- // The index builder thread can abort on its own when it fails due to an indexing error or its
- // deadline expires.
- // The current operation's deadline may have expired, which would prevent us from taking
- // locks. Use a new OperationContext to abort the index build.
+ // The index builder thread can abort on its own if it is interrupted by a user killop. This
+ // would prevent us from taking locks. Use a new OperationContext to abort the index build.
runOnAlternateContext(
opCtx, "self-abort", [this, replState, status](OperationContext* abortCtx) {
ShouldNotConflictWithSecondaryBatchApplicationBlock noConflict(abortCtx->lockState());
@@ -1894,15 +1915,8 @@ void IndexBuildsCoordinator::_cleanUpTwoPhaseAfterFailure(
return;
}
- // An external caller interrupted us and will be responsible for cleaning up our state.
- if (status == ErrorCodes::IndexBuildAborted) {
- return;
- }
-
- // The index builder thread can abort on its own when it fails due to an indexing error or its
- // deadline expires.
- // The current operation's deadline may have expired, which would prevent us from taking locks.
- // Use a new OperationContext to abort the index build.
+ // The index builder thread can abort on its own if it is interrupted by a user killop. This
+ // would prevent us from taking locks. Use a new OperationContext to abort the index build.
runOnAlternateContext(
opCtx, "self-abort", [this, replState, status](OperationContext* abortCtx) {
ShouldNotConflictWithSecondaryBatchApplicationBlock noConflict(abortCtx->lockState());
@@ -1934,6 +1948,10 @@ void IndexBuildsCoordinator::_runIndexBuildInner(OperationContext* opCtx,
// This Status stays unchanged unless we catch an exception in the following try-catch block.
auto status = Status::OK();
try {
+ while (MONGO_unlikely(hangAfterInitializingIndexBuild.shouldFail())) {
+ hangAfterInitializingIndexBuild.pauseWhileSet(opCtx);
+ }
+
_buildIndex(opCtx, replState, indexBuildOptions);
} catch (const DBException& ex) {
status = ex.toStatus();
@@ -1956,11 +1974,31 @@ void IndexBuildsCoordinator::_runIndexBuildInner(OperationContext* opCtx,
NamespaceString nss = collection->ns();
logFailure(status, nss, replState);
+ {
+ // If the index build has already been cleaned-up because it encountered an error at
+ // commit-time, there is no work to do. This is the most routine case, since index
+ // constraint checking happens at commit-time for two phase index builds.
+ stdx::unique_lock<Latch> lk(replState->mutex);
+ if (replState->indexBuildState.isAborted()) {
+ uassertStatusOK(status);
+ }
+ }
+
+ // If we received an external abort, the caller should have already set our state to kAborted.
+ invariant(status.code() != ErrorCodes::IndexBuildAborted);
+
if (IndexBuildProtocol::kSinglePhase == replState->protocol) {
_cleanUpSinglePhaseAfterFailure(opCtx, collection, replState, indexBuildOptions, status);
} else {
invariant(IndexBuildProtocol::kTwoPhase == replState->protocol,
str::stream() << replState->buildUUID);
+ // Two-phase index builds only check index constraints when committing. If an error occurs
+ // at that point, then the build is cleaned up while still holding the appropriate locks.
+ // The only errors that we cannot anticipate are user interrupts and shutdown errors.
+ invariant(status.isA<ErrorCategory::Interruption>() ||
+ status.isA<ErrorCategory::ShutdownError>(),
+ str::stream() << "Unnexpected error code during two-phase index build cleanup: "
+ << status);
_cleanUpTwoPhaseAfterFailure(opCtx, collection, replState, indexBuildOptions, status);
}
@@ -1975,13 +2013,7 @@ void IndexBuildsCoordinator::_buildIndex(OperationContext* opCtx,
_insertKeysFromSideTablesWithoutBlockingWrites(opCtx, replState);
_signalPrimaryForCommitReadiness(opCtx, replState);
_insertKeysFromSideTablesBlockingWrites(opCtx, replState, indexBuildOptions);
- auto commitIndexBuildTimestamp = _waitForNextIndexBuildAction(opCtx, replState);
- invariant(commitIndexBuildTimestamp.isNull() ||
- replState->protocol != IndexBuildProtocol::kSinglePhase,
- str::stream() << "buildUUID: " << replState->buildUUID
- << "commitTs: " << commitIndexBuildTimestamp.toString());
- _insertKeysFromSideTablesAndCommit(
- opCtx, replState, indexBuildOptions, commitIndexBuildTimestamp);
+ _waitForNextIndexBuildActionAndCommit(opCtx, replState, indexBuildOptions);
}
void IndexBuildsCoordinator::_scanCollectionAndInsertKeysIntoSorter(
@@ -2056,10 +2088,7 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesBlockingWrites(
// Unlock RSTL to avoid deadlocks with prepare conflicts and state transitions. See
// SERVER-42621.
- if (IndexBuildProtocol::kSinglePhase == replState->protocol &&
- indexBuildOptions.replSetAndNotPrimaryAtStart) {
- unlockRSTL(opCtx);
- }
+ unlockRSTL(opCtx);
Lock::CollectionLock collLock(opCtx, dbAndUUID, MODE_S);
uassertStatusOK(_indexBuildsManager.drainBackgroundWrites(
@@ -2079,24 +2108,59 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesBlockingWrites(
* Third phase is catching up on all the writes that occurred during the first two phases.
* Accepts a commit timestamp for the index (null if not available).
*/
-void IndexBuildsCoordinator::_insertKeysFromSideTablesAndCommit(
+IndexBuildsCoordinator::CommitResult IndexBuildsCoordinator::_insertKeysFromSideTablesAndCommit(
OperationContext* opCtx,
std::shared_ptr<ReplIndexBuildState> replState,
+ IndexBuildAction action,
const IndexBuildOptions& indexBuildOptions,
const Timestamp& commitIndexBuildTimestamp) {
+
AutoGetDb autoDb(opCtx, replState->dbName, MODE_IX);
- // Unlock RSTL to avoid deadlocks with prepare conflicts and state transitions caused by taking
- // a strong collection lock. See SERVER-42621.
- if (IndexBuildProtocol::kSinglePhase == replState->protocol &&
- indexBuildOptions.replSetAndNotPrimaryAtStart) {
- unlockRSTL(opCtx);
- }
+ // Unlock RSTL to avoid deadlocks with prepare conflicts and state transitions caused by waiting
+ // for a a strong collection lock. See SERVER-42621.
+ unlockRSTL(opCtx);
// Need to return the collection lock back to exclusive mode to complete the index build.
const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
Lock::CollectionLock collLock(opCtx, dbAndUUID, MODE_X);
+ // If we can't acquire the RSTL within a given time period, there is an active state transition
+ // and we should release our locks and try again. We would otherwise introduce a deadlock with
+ // step-up by holding the Collection lock in exclusive mode. After it has enqueued its RSTL X
+ // lock, step-up tries to reacquire the Collection locks for prepared transactions, which will
+ // conflict with the X lock we currently hold.
+ repl::ReplicationStateTransitionLockGuard rstl(
+ opCtx, MODE_IX, repl::ReplicationStateTransitionLockGuard::EnqueueOnly());
+ auto replCoord = repl::ReplicationCoordinator::get(opCtx);
+ try {
+ // Since this thread is not killable by state transitions, this deadline is effectively the
+ // longest period of time we can block a step-up. State transitions are infrequent, but
+ // need to happen quickly. It should be okay to set this to a low value because the RSTL is
+ // rarely contended, and if this times out, we will retry and reacquire the RSTL again
+ // without a deadline at the beginning of this function.
+ auto deadline = Date_t::now() + Milliseconds(10);
+ rstl.waitForLockUntil(deadline);
+ } catch (const ExceptionFor<ErrorCodes::LockTimeout>&) {
+ return CommitResult::kLockTimeout;
+ }
+
+ // If we are no longer primary after receiving a commit quorum, we must restart and wait for a
+ // new signal from a new primary because we cannot commit.
+ bool isMaster = replCoord->canAcceptWritesFor(opCtx, dbAndUUID);
+ if (!isMaster && IndexBuildAction::kCommitQuorumSatisfied == action) {
+ return CommitResult::kNoLongerPrimary;
+ }
+
+ if (IndexBuildAction::kOplogCommit == action) {
+ // This signal can be received during primary (drain phase), secondary, startup (startup
+ // recovery) and startup2 (initial sync).
+ invariant(!isMaster && replState->indexBuildState.isCommitPrepared(),
+ str::stream() << "Index build: " << replState->buildUUID
+ << ", index build state: "
+ << replState->indexBuildState.toString());
+ }
+
// The collection object should always exist while an index build is registered.
auto collection =
CollectionCatalog::get(opCtx).lookupCollectionByUUID(opCtx, replState->collectionUUID);
@@ -2119,28 +2183,59 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesAndCommit(
RecoveryUnit::ReadSource::kUnset,
IndexBuildInterceptor::DrainYieldPolicy::kNoYield));
- // Retry indexing records that failed key generation while relaxing constraints (i.e. while
- // a secondary node), but only if we are primary and committing the index build and during
- // two-phase builds. Single-phase index builds are not resilient to state transitions and do not
- // track skipped records. Secondaries rely on the primary's decision to commit as assurance that
- // it has checked all key generation errors on its behalf.
- auto replCoord = repl::ReplicationCoordinator::get(opCtx);
- if (IndexBuildProtocol::kTwoPhase == replState->protocol &&
- replCoord->canAcceptWritesFor(opCtx, collection->ns())) {
- uassertStatusOK(
- _indexBuildsManager.retrySkippedRecords(opCtx, replState->buildUUID, collection));
- }
+ try {
+ if (MONGO_unlikely(failIndexBuildOnCommit.shouldFail())) {
+ uasserted(4698903, "index build aborted due to failpoint");
+ }
- // Duplicate key constraint checking phase. Duplicate key errors are tracked for single-phase
- // builds on primaries and two-phase builds in all replication states. Single-phase builds on
- // secondaries don't track duplicates so this call is a no-op. This can be called for two-phase
- // builds in all replication states except during initial sync when this node is not guaranteed
- // to be consistent.
- bool twoPhaseAndNotInitialSyncing = IndexBuildProtocol::kTwoPhase == replState->protocol &&
- !replCoord->getMemberState().startup2();
- if (IndexBuildProtocol::kSinglePhase == replState->protocol || twoPhaseAndNotInitialSyncing) {
- uassertStatusOK(
- _indexBuildsManager.checkIndexConstraintViolations(opCtx, replState->buildUUID));
+ // Retry indexing records that failed key generation while relaxing constraints (i.e. while
+ // a secondary node), but only if we are primary and committing the index build and during
+ // two-phase builds. Single-phase index builds are not resilient to state transitions and do
+ // not track skipped records. Secondaries rely on the primary's decision to commit as
+ // assurance that it has checked all key generation errors on its behalf.
+ if (IndexBuildProtocol::kTwoPhase == replState->protocol &&
+ replCoord->canAcceptWritesFor(opCtx, collection->ns())) {
+ uassertStatusOK(
+ _indexBuildsManager.retrySkippedRecords(opCtx, replState->buildUUID, collection));
+ }
+
+ // Duplicate key constraint checking phase. Duplicate key errors are tracked for
+ // single-phase builds on primaries and two-phase builds in all replication states.
+ // Single-phase builds on secondaries don't track duplicates so this call is a no-op. This
+ // can be called for two-phase builds in all replication states except during initial sync
+ // when this node is not guaranteed to be consistent.
+ bool twoPhaseAndNotInitialSyncing = IndexBuildProtocol::kTwoPhase == replState->protocol &&
+ !replCoord->getMemberState().startup2();
+ if (IndexBuildProtocol::kSinglePhase == replState->protocol ||
+ twoPhaseAndNotInitialSyncing) {
+ uassertStatusOK(
+ _indexBuildsManager.checkIndexConstraintViolations(opCtx, replState->buildUUID));
+ }
+ } catch (const ExceptionForCat<ErrorCategory::ShutdownError>&) {
+ _completeAbortForShutdown(opCtx, replState, collection);
+ throw;
+ } catch (const DBException& e) {
+ // It is illegal to abort the index build at this point. Note that Interruption exceptions
+ // are allowed because we cannot control them as they bypass the routine abort machinery.
+ invariant(e.code() != ErrorCodes::IndexBuildAborted);
+
+ // Index builds may not fail on secondaries at this point. If a primary replicated an
+ // abortIndexBuild oplog entry, then this index build would have been interrupted before
+ // committing with an IndexBuildAborted error code.
+ auto status = e.toStatus();
+ if (!isMaster) {
+ LOGV2_FATAL(4698902,
+ "Index build failed while not primary",
+ "buildUUID"_attr = replState->buildUUID,
+ "collectionUUID"_attr = replState->collectionUUID,
+ "db"_attr = replState->dbName,
+ "reason"_attr = status);
+ }
+
+ // This index build failed due to an indexing error in normal circumstances. Abort while
+ // still holding the RSTL and collection locks.
+ _completeSelfAbort(opCtx, replState, status);
+ throw;
}
// If two phase index builds is enabled, index build will be coordinated using
@@ -2186,7 +2281,7 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesAndCommit(
"indexesBuilt"_attr = replState->indexSpecs.size(),
"numIndexesBefore"_attr = replState->stats.numIndexesBefore,
"numIndexesAfter"_attr = replState->stats.numIndexesAfter);
- return;
+ return CommitResult::kSuccess;
}
StatusWith<std::pair<long long, long long>> IndexBuildsCoordinator::_runIndexRebuildForRecovery(
diff --git a/src/mongo/db/index_builds_coordinator.h b/src/mongo/db/index_builds_coordinator.h
index 88c3590d286..c665a433659 100644
--- a/src/mongo/db/index_builds_coordinator.h
+++ b/src/mongo/db/index_builds_coordinator.h
@@ -643,29 +643,45 @@ protected:
* - Commit signal can be sent only by oplog applier.
* - Abort signal on secondaries can be sent by oplog applier, bgSync on rollback.
*
- * On completion, this function returns a timestamp, which may be null, that may be used to
- * update the mdb catalog as we commit the index build. The commit index build timestamp is
- * obtained from a commitIndexBuild oplog entry during secondary oplog application.
- * This function returns a null timestamp on receiving a abortIndexBuild oplog entry; or if we
- * are currently a primary, in which case we do not need to wait any external signal to commit
- * the index build.
- */
- virtual Timestamp _waitForNextIndexBuildAction(
- OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) = 0;
+ * On completion, this function will commit the index build.
+ */
+ virtual void _waitForNextIndexBuildActionAndCommit(
+ OperationContext* opCtx,
+ std::shared_ptr<ReplIndexBuildState> replState,
+ const IndexBuildOptions& indexBuildOptions) = 0;
std::string _indexBuildActionToString(IndexBuildAction action);
+
/**
* Third phase is catching up on all the writes that occurred during the first two phases.
* Accepts a commit timestamp for the index, which could be null. See
* _waitForNextIndexBuildAction() comments. This timestamp is used only for committing the
* index, which sets the ready flag to true, to the catalog; it is not used for the catch-up
* writes during the final drain phase.
- */
- void _insertKeysFromSideTablesAndCommit(OperationContext* opCtx,
- std::shared_ptr<ReplIndexBuildState> replState,
- const IndexBuildOptions& indexBuildOptions,
- const Timestamp& commitIndexBuildTimestamp);
+ *
+ * This operation released the RSTL temporarily to acquire the collection X lock to prevent
+ * deadlocks. It must reacquire the RSTL to commit, but it's possible for the node's state to
+ * have changed in that period of time. If the replication state has changed or the lock
+ * acquisition times out, a non-success CommitResult will be returned and the caller must retry.
+ *
+ * Returns a CommitResult that indicates whether or not the commit was successful.
+ */
+ enum class CommitResult {
+ /** The index build was able to commit successfully. */
+ kSuccess,
+ /** After reacquiring the RSTL to commit, this node was no longer primary. The caller must
+ reset and wait for the next IndexBuildAction again. */
+ kNoLongerPrimary,
+ /** Reacquiring the RSTL timed out, indicating that conflicting state transition was in
+ progress. The caller must try again. */
+ kLockTimeout
+ };
+ CommitResult _insertKeysFromSideTablesAndCommit(OperationContext* opCtx,
+ std::shared_ptr<ReplIndexBuildState> replState,
+ IndexBuildAction action,
+ const IndexBuildOptions& indexBuildOptions,
+ const Timestamp& commitIndexBuildTimestamp);
/**
* Runs the index build.
diff --git a/src/mongo/db/index_builds_coordinator_mongod.cpp b/src/mongo/db/index_builds_coordinator_mongod.cpp
index ca4a6dc22e6..a4061b24b4f 100644
--- a/src/mongo/db/index_builds_coordinator_mongod.cpp
+++ b/src/mongo/db/index_builds_coordinator_mongod.cpp
@@ -630,12 +630,12 @@ IndexBuildAction IndexBuildsCoordinatorMongod::_drainSideWritesUntilNextActionIs
return nextAction;
}
-Timestamp IndexBuildsCoordinatorMongod::_waitForNextIndexBuildAction(
- OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) {
- Timestamp commitIndexBuildTimestamp;
-
+void IndexBuildsCoordinatorMongod::_waitForNextIndexBuildActionAndCommit(
+ OperationContext* opCtx,
+ std::shared_ptr<ReplIndexBuildState> replState,
+ const IndexBuildOptions& indexBuildOptions) {
LOGV2(3856203,
- "Index build waiting for next action before completing final phase: {buildUUID}",
+ "Index build waiting for next action before completing final phase",
"buildUUID"_attr = replState->buildUUID);
while (true) {
@@ -647,97 +647,75 @@ Timestamp IndexBuildsCoordinatorMongod::_waitForNextIndexBuildAction(
const auto nextAction = _drainSideWritesUntilNextActionIsAvailable(opCtx, replState);
LOGV2(3856204,
- "Index build received signal for build uuid: {buildUUID} , action: {action}",
+ "Index build received signal",
"buildUUID"_attr = replState->buildUUID,
"action"_attr = _indexBuildActionToString(nextAction));
- bool needsToRetryWait = false;
-
- // Reacquire RSTL lock to check replication state.
- repl::ReplicationStateTransitionLockGuard rstl(opCtx, MODE_IX);
+ // If the index build was aborted, this serves as a final interruption point. Since the
+ // index builder thread is interrupted before the action is set, this must fail if the build
+ // was aborted.
+ opCtx->checkForInterrupt();
- const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
- auto replCoord = repl::ReplicationCoordinator::get(opCtx);
- auto isMaster = replCoord->canAcceptWritesFor(opCtx, dbAndUUID);
+ bool needsToRetryWait = false;
- stdx::unique_lock<Latch> lk(replState->mutex);
switch (nextAction) {
- case IndexBuildAction::kNoAction:
- break;
- case IndexBuildAction::kOplogCommit:
+ case IndexBuildAction::kOplogCommit: {
invariant(replState->protocol == IndexBuildProtocol::kTwoPhase);
-
- // Sanity check
- // This signal can be received during primary (drain phase), secondary,
- // startup( startup recovery) and startup2 (initial sync).
- invariant(!isMaster && replState->indexBuildState.isCommitPrepared(),
- str::stream()
- << "Index build: " << replState->buildUUID
- << ", index build state: " << replState->indexBuildState.toString());
invariant(replState->indexBuildState.getTimestamp(),
replState->buildUUID.toString());
- // set the commit timestamp
- commitIndexBuildTimestamp = replState->indexBuildState.getTimestamp().get();
LOGV2(3856205,
- "Committing index build",
+ "Committing index build from oplog entry",
"buildUUID"_attr = replState->buildUUID,
"commitTimestamp"_attr = replState->indexBuildState.getTimestamp().get(),
"collectionUUID"_attr = replState->collectionUUID);
break;
- case IndexBuildAction::kOplogAbort:
- invariant(replState->protocol == IndexBuildProtocol::kTwoPhase);
- // Sanity check
- // This signal can be received during primary (drain phase), secondary,
- // startup( startup recovery) and startup2 (initial sync).
- invariant(!isMaster, str::stream() << "Index build: " << replState->buildUUID);
- invariant(replState->indexBuildState.isAborted(),
- str::stream()
- << "Index build: " << replState->buildUUID
- << ", index build state: " << replState->indexBuildState.toString());
- invariant(replState->indexBuildState.getTimestamp() &&
- replState->indexBuildState.getAbortReason(),
- replState->buildUUID.toString());
- // The calling thread will interrupt our OperationContext and we will exit.
- LOGV2(3856206,
- "Aborting index build",
- "buildUUID"_attr = replState->buildUUID,
- "abortTimestamp"_attr = replState->indexBuildState.getTimestamp().get(),
- "abortReason"_attr = replState->indexBuildState.getAbortReason().get(),
- "collectionUUID"_attr = replState->collectionUUID);
+ }
+ case IndexBuildAction::kCommitQuorumSatisfied: {
+ invariant(!replState->indexBuildState.getTimestamp());
break;
- case IndexBuildAction::kRollbackAbort:
- invariant(replState->protocol == IndexBuildProtocol::kTwoPhase);
- invariant(replCoord->getMemberState().rollback());
- // The calling thread will interrupt our OperationContext and we will exit.
+ }
+ case IndexBuildAction::kSinglePhaseCommit:
+ invariant(replState->protocol == IndexBuildProtocol::kSinglePhase);
break;
+ case IndexBuildAction::kOplogAbort:
+ case IndexBuildAction::kRollbackAbort:
case IndexBuildAction::kPrimaryAbort:
- // The thread aborting a two-phase index build must hold the RSTL so that the
- // replication state does not change. They will interrupt our OperationContext and
- // we will exit. Single-phase builds do not replicate abort oplog entries. We do
- // not need to be primary to abort the index build, and we must continue aborting
- // even in the event of a state transition because this build will not receive
- // another signal.
- invariant(isMaster || IndexBuildProtocol::kSinglePhase == replState->protocol,
- str::stream()
- << "isMaster: " << isMaster << ", singlePhase: "
- << (IndexBuildProtocol::kSinglePhase == replState->protocol));
- break;
- case IndexBuildAction::kCommitQuorumSatisfied:
- if (!isMaster) {
- // Reset the promise as the node has stepped down,
- // wait for the new primary to coordinate the index build and send the new
- // signal/action.
- LOGV2(3856207,
- "No longer primary, so will be waiting again for next action before "
- "completing final phase: {buildUUID}",
- "buildUUID"_attr = replState->buildUUID);
+ // The calling thread should have interrupted us before signaling an abort action.
+ LOGV2_FATAL(4698901, "Index build abort should have interrupted this operation");
+ case IndexBuildAction::kNoAction:
+ return;
+ }
+
+ Timestamp commitTimestamp = replState->indexBuildState.getTimestamp()
+ ? replState->indexBuildState.getTimestamp().get()
+ : Timestamp();
+
+ auto result = _insertKeysFromSideTablesAndCommit(
+ opCtx, replState, nextAction, indexBuildOptions, commitTimestamp);
+ switch (result) {
+ case CommitResult::kNoLongerPrimary:
+ invariant(nextAction != IndexBuildAction::kOplogCommit);
+ // Reset the promise as the node has stepped down. Wait for the new primary to
+ // coordinate the index build and send the new signal/action.
+ LOGV2(3856207,
+ "No longer primary while attempting to commit. Waiting again for next action "
+ "before completing final phase",
+ "buildUUID"_attr = replState->buildUUID);
+ {
+ stdx::unique_lock<Latch> lk(replState->mutex);
replState->waitForNextAction =
std::make_unique<SharedPromise<IndexBuildAction>>();
- needsToRetryWait = true;
}
+ needsToRetryWait = true;
break;
- case IndexBuildAction::kSinglePhaseCommit:
- invariant(replState->protocol == IndexBuildProtocol::kSinglePhase);
+ case CommitResult::kLockTimeout:
+ LOGV2(4698900,
+ "Unable to acquire RSTL for commit within deadline. Releasing locks and "
+ "trying again",
+ "buildUUID"_attr = replState->buildUUID);
+ needsToRetryWait = true;
+ break;
+ case CommitResult::kSuccess:
break;
}
@@ -745,7 +723,6 @@ Timestamp IndexBuildsCoordinatorMongod::_waitForNextIndexBuildAction(
break;
}
}
- return commitIndexBuildTimestamp;
}
Status IndexBuildsCoordinatorMongod::setCommitQuorum(OperationContext* opCtx,
diff --git a/src/mongo/db/index_builds_coordinator_mongod.h b/src/mongo/db/index_builds_coordinator_mongod.h
index 9e688c23639..c9bf15b6b3a 100644
--- a/src/mongo/db/index_builds_coordinator_mongod.h
+++ b/src/mongo/db/index_builds_coordinator_mongod.h
@@ -155,8 +155,9 @@ private:
IndexBuildAction _drainSideWritesUntilNextActionIsAvailable(
OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) override;
- Timestamp _waitForNextIndexBuildAction(OperationContext* opCtx,
- std::shared_ptr<ReplIndexBuildState> replState) override;
+ void _waitForNextIndexBuildActionAndCommit(OperationContext* opCtx,
+ std::shared_ptr<ReplIndexBuildState> replState,
+ const IndexBuildOptions& indexBuildOptions) override;
// Thread pool on which index builds are run.
ThreadPool _threadPool;
diff --git a/src/mongo/embedded/index_builds_coordinator_embedded.cpp b/src/mongo/embedded/index_builds_coordinator_embedded.cpp
index bc18e625f93..d89da980cf8 100644
--- a/src/mongo/embedded/index_builds_coordinator_embedded.cpp
+++ b/src/mongo/embedded/index_builds_coordinator_embedded.cpp
@@ -81,10 +81,10 @@ IndexBuildsCoordinatorEmbedded::startIndexBuild(OperationContext* opCtx,
void IndexBuildsCoordinatorEmbedded::_signalPrimaryForCommitReadiness(
OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) {}
-Timestamp IndexBuildsCoordinatorEmbedded::_waitForNextIndexBuildAction(
- OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) {
- return Timestamp();
-}
+void IndexBuildsCoordinatorEmbedded::_waitForNextIndexBuildActionAndCommit(
+ OperationContext* opCtx,
+ std::shared_ptr<ReplIndexBuildState> replState,
+ const IndexBuildOptions& indexBuildOptions) {}
void IndexBuildsCoordinatorEmbedded::setSignalAndCancelVoteRequestCbkIfActive(
WithLock ReplIndexBuildStateLk,
diff --git a/src/mongo/embedded/index_builds_coordinator_embedded.h b/src/mongo/embedded/index_builds_coordinator_embedded.h
index 62e44411287..84c9d30ded7 100644
--- a/src/mongo/embedded/index_builds_coordinator_embedded.h
+++ b/src/mongo/embedded/index_builds_coordinator_embedded.h
@@ -96,8 +96,9 @@ private:
return {};
};
- Timestamp _waitForNextIndexBuildAction(OperationContext* opCtx,
- std::shared_ptr<ReplIndexBuildState> replState) override;
+ void _waitForNextIndexBuildActionAndCommit(OperationContext* opCtx,
+ std::shared_ptr<ReplIndexBuildState> replState,
+ const IndexBuildOptions& indexBuildOptions) override;
};
} // namespace mongo