summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--jstests/noPassthrough/index_build_external_and_internal_abort.js70
-rw-r--r--src/mongo/db/index_builds_coordinator.cpp3
-rw-r--r--src/mongo/db/index_builds_coordinator_mongod.cpp18
-rw-r--r--src/mongo/db/repl_index_build_state.cpp8
-rw-r--r--src/mongo/db/repl_index_build_state.h6
5 files changed, 102 insertions, 3 deletions
diff --git a/jstests/noPassthrough/index_build_external_and_internal_abort.js b/jstests/noPassthrough/index_build_external_and_internal_abort.js
new file mode 100644
index 00000000000..b3b3e671abd
--- /dev/null
+++ b/jstests/noPassthrough/index_build_external_and_internal_abort.js
@@ -0,0 +1,70 @@
+/**
+ * Validate a scenario where an external index build abort (e.g. collection drop) races with an
+ * internal index build abort (e.g. build failed due to invalid keys).
+ *
+ * @tags: [
+ * featureFlagIndexBuildGracefulErrorHandling,
+ * requires_replication,
+ * ]
+ */
+(function() {
+"use strict";
+
+load("jstests/libs/feature_flag_util.js");
+load('jstests/noPassthrough/libs/index_build.js');
+
+const rst = new ReplSetTest({
+ nodes: [
+ {},
+ ]
+});
+rst.startSet();
+rst.initiate();
+
+const primary = rst.getPrimary();
+const testDB = primary.getDB('test');
+const coll = testDB.getCollection('test');
+
+assert.commandWorked(coll.insert({point: {x: -15.0, y: "abc"}}));
+
+let indexBuilderThreadFP =
+ configureFailPoint(testDB, 'hangIndexBuildBeforeTransitioningReplStateTokAwaitPrimaryAbort');
+let connThreadFP = configureFailPoint(testDB, 'hangInRemoveIndexBuildEntryAfterCommitOrAbort');
+
+// Will fail with error code 13026: "geo values must be 'legacy coordinate pairs' for 2d indexes"
+const waitForIndexBuild =
+ IndexBuildTest.startIndexBuild(primary, coll.getFullName(), {point: "2d"}, {}, 13026);
+
+// Wait for the index builder to detect the malformed geo value and to hang before transitioning
+// the index build state to kAwaitPrimaryAbort.
+indexBuilderThreadFP.wait();
+
+// Drop the underlying collection.
+const awaitDropCollection =
+ startParallelShell(funWithArgs(function(collName) {
+ assert.commandWorked(db.runCommand({drop: collName}));
+ }, coll.getName()), primary.port);
+
+// Wait for the 'drop' command to hang while tearing down the index build, just after setting the
+// index build state to kAborted.
+connThreadFP.wait();
+
+// Resume the index builder thread, which would now try to abort an index that's already in kAbort
+// state.
+indexBuilderThreadFP.off();
+
+// Wait for the log to confirm the index builder won't attempt to abort the build, because it's
+// already in aborted state.
+checkLog.containsJson(primary, 7530800);
+
+// Resume the collection drop and wait for its completion.
+connThreadFP.off();
+awaitDropCollection();
+
+waitForIndexBuild();
+
+// The collection does not exist.
+assert.eq(testDB.getCollectionNames().indexOf(coll.getName()), -1, "collection still exists.");
+
+rst.stopSet();
+})();
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index 2212b975414..cedf0227954 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -92,6 +92,7 @@ MONGO_FAIL_POINT_DEFINE(hangIndexBuildBeforeWaitingUntilMajorityOpTime);
MONGO_FAIL_POINT_DEFINE(hangBeforeUnregisteringAfterCommit);
MONGO_FAIL_POINT_DEFINE(failSetUpResumeIndexBuild);
MONGO_FAIL_POINT_DEFINE(failIndexBuildWithError);
+MONGO_FAIL_POINT_DEFINE(hangInRemoveIndexBuildEntryAfterCommitOrAbort);
IndexBuildsCoordinator::IndexBuildsSSS::IndexBuildsSSS()
: ServerStatusSection("indexBuilds"),
@@ -191,6 +192,8 @@ void removeIndexBuildEntryAfterCommitOrAbort(OperationContext* opCtx,
return;
}
+ hangInRemoveIndexBuildEntryAfterCommitOrAbort.pauseWhileSet();
+
auto replCoord = repl::ReplicationCoordinator::get(opCtx);
if (!replCoord->canAcceptWritesFor(opCtx, dbAndUUID)) {
return;
diff --git a/src/mongo/db/index_builds_coordinator_mongod.cpp b/src/mongo/db/index_builds_coordinator_mongod.cpp
index 48d582dafb8..3e4aa686c64 100644
--- a/src/mongo/db/index_builds_coordinator_mongod.cpp
+++ b/src/mongo/db/index_builds_coordinator_mongod.cpp
@@ -70,6 +70,7 @@ MONGO_FAIL_POINT_DEFINE(hangBeforeInitializingIndexBuild);
MONGO_FAIL_POINT_DEFINE(hangIndexBuildAfterSignalPrimaryForCommitReadiness);
MONGO_FAIL_POINT_DEFINE(hangBeforeRunningIndexBuild);
MONGO_FAIL_POINT_DEFINE(hangIndexBuildBeforeSignalingPrimaryForAbort);
+MONGO_FAIL_POINT_DEFINE(hangIndexBuildBeforeTransitioningReplStateTokAwaitPrimaryAbort);
const StringData kMaxNumActiveUserIndexBuildsServerParameterName = "maxNumActiveUserIndexBuilds"_sd;
@@ -681,13 +682,28 @@ bool IndexBuildsCoordinatorMongod::_signalIfCommitQuorumNotEnabled(
void IndexBuildsCoordinatorMongod::_signalPrimaryForAbortAndWaitForExternalAbort(
OperationContext* opCtx, ReplIndexBuildState* replState, const Status& abortStatus) {
+
+ hangIndexBuildBeforeTransitioningReplStateTokAwaitPrimaryAbort.pauseWhileSet(opCtx);
+
LOGV2(7419402,
"Index build: signaling primary to abort index build",
"buildUUID"_attr = replState->buildUUID,
logAttrs(replState->dbName),
"collectionUUID"_attr = replState->collectionUUID,
"reason"_attr = abortStatus);
- replState->requestAbortFromPrimary(abortStatus);
+ const auto transitionedToWaitForAbort = replState->requestAbortFromPrimary(abortStatus);
+
+ if (!transitionedToWaitForAbort) {
+ // The index build has likely been aborted externally (e.g. its underlying collection was
+ // dropped), and it's in the midst of tearing down. There's nothing else to do here.
+ LOGV2(7530800,
+ "Index build: the build is already in aborted state; not signaling primary to abort",
+ "buildUUID"_attr = replState->buildUUID,
+ "db"_attr = replState->dbName,
+ "collectionUUID"_attr = replState->collectionUUID,
+ "reason"_attr = abortStatus);
+ return;
+ }
hangIndexBuildBeforeSignalingPrimaryForAbort.pauseWhileSet(opCtx);
diff --git a/src/mongo/db/repl_index_build_state.cpp b/src/mongo/db/repl_index_build_state.cpp
index ebc1d0e72f0..29bd6d22fab 100644
--- a/src/mongo/db/repl_index_build_state.cpp
+++ b/src/mongo/db/repl_index_build_state.cpp
@@ -229,7 +229,7 @@ void ReplIndexBuildState::commit(OperationContext* opCtx) {
});
}
-void ReplIndexBuildState::requestAbortFromPrimary(const Status& abortStatus) {
+bool ReplIndexBuildState::requestAbortFromPrimary(const Status& abortStatus) {
invariant(protocol == IndexBuildProtocol::kTwoPhase);
stdx::lock_guard lk(_mutex);
@@ -245,8 +245,14 @@ void ReplIndexBuildState::requestAbortFromPrimary(const Status& abortStatus) {
"buildUUID"_attr = buildUUID);
}
+ if (_indexBuildState.isAborted()) {
+ return false;
+ }
+
_indexBuildState.setState(
IndexBuildState::kAwaitPrimaryAbort, false /* skipCheck */, boost::none, abortStatus);
+
+ return true;
}
Timestamp ReplIndexBuildState::getCommitTimestamp() const {
diff --git a/src/mongo/db/repl_index_build_state.h b/src/mongo/db/repl_index_build_state.h
index eff7b0c976e..a1b4a1fe2bc 100644
--- a/src/mongo/db/repl_index_build_state.h
+++ b/src/mongo/db/repl_index_build_state.h
@@ -309,8 +309,12 @@ public:
/**
* Only for two-phase index builds. Requests the primary to abort the build, and transitions
* into a waiting state.
+ *
+ * Returns true if the thread has transitioned into the waiting state.
+ * Returns false if the build is already in abort state. This can happen if the build detected
+ * an error while an external operation (e.g. a collection drop) is concurrently aborting it.
*/
- void requestAbortFromPrimary(const Status& abortStatus);
+ bool requestAbortFromPrimary(const Status& abortStatus);
/**
* Returns timestamp for committing this index build.