summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorJosef Ahmad <josef.ahmad@mongodb.com>2023-04-04 12:15:17 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-04-04 13:07:44 +0000
commit806b58d5fac1b17d848b3c7c997b67b68440b2ba (patch)
tree59bd4047a8b07b7491549ad57b1598b9f763d176 /src/mongo
parentb7a17172c80bf156595ad3e9d92ea9ec900c03e2 (diff)
downloadmongo-806b58d5fac1b17d848b3c7c997b67b68440b2ba.tar.gz
SERVER-75308 Fix race between external and internal index build aborts
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/index_builds_coordinator.cpp3
-rw-r--r--src/mongo/db/index_builds_coordinator_mongod.cpp18
-rw-r--r--src/mongo/db/repl_index_build_state.cpp8
-rw-r--r--src/mongo/db/repl_index_build_state.h6
4 files changed, 32 insertions, 3 deletions
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index 2212b975414..cedf0227954 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -92,6 +92,7 @@ MONGO_FAIL_POINT_DEFINE(hangIndexBuildBeforeWaitingUntilMajorityOpTime);
MONGO_FAIL_POINT_DEFINE(hangBeforeUnregisteringAfterCommit);
MONGO_FAIL_POINT_DEFINE(failSetUpResumeIndexBuild);
MONGO_FAIL_POINT_DEFINE(failIndexBuildWithError);
+MONGO_FAIL_POINT_DEFINE(hangInRemoveIndexBuildEntryAfterCommitOrAbort);
IndexBuildsCoordinator::IndexBuildsSSS::IndexBuildsSSS()
: ServerStatusSection("indexBuilds"),
@@ -191,6 +192,8 @@ void removeIndexBuildEntryAfterCommitOrAbort(OperationContext* opCtx,
return;
}
+ hangInRemoveIndexBuildEntryAfterCommitOrAbort.pauseWhileSet();
+
auto replCoord = repl::ReplicationCoordinator::get(opCtx);
if (!replCoord->canAcceptWritesFor(opCtx, dbAndUUID)) {
return;
diff --git a/src/mongo/db/index_builds_coordinator_mongod.cpp b/src/mongo/db/index_builds_coordinator_mongod.cpp
index 48d582dafb8..3e4aa686c64 100644
--- a/src/mongo/db/index_builds_coordinator_mongod.cpp
+++ b/src/mongo/db/index_builds_coordinator_mongod.cpp
@@ -70,6 +70,7 @@ MONGO_FAIL_POINT_DEFINE(hangBeforeInitializingIndexBuild);
MONGO_FAIL_POINT_DEFINE(hangIndexBuildAfterSignalPrimaryForCommitReadiness);
MONGO_FAIL_POINT_DEFINE(hangBeforeRunningIndexBuild);
MONGO_FAIL_POINT_DEFINE(hangIndexBuildBeforeSignalingPrimaryForAbort);
+MONGO_FAIL_POINT_DEFINE(hangIndexBuildBeforeTransitioningReplStateTokAwaitPrimaryAbort);
const StringData kMaxNumActiveUserIndexBuildsServerParameterName = "maxNumActiveUserIndexBuilds"_sd;
@@ -681,13 +682,28 @@ bool IndexBuildsCoordinatorMongod::_signalIfCommitQuorumNotEnabled(
void IndexBuildsCoordinatorMongod::_signalPrimaryForAbortAndWaitForExternalAbort(
OperationContext* opCtx, ReplIndexBuildState* replState, const Status& abortStatus) {
+
+ hangIndexBuildBeforeTransitioningReplStateTokAwaitPrimaryAbort.pauseWhileSet(opCtx);
+
LOGV2(7419402,
"Index build: signaling primary to abort index build",
"buildUUID"_attr = replState->buildUUID,
logAttrs(replState->dbName),
"collectionUUID"_attr = replState->collectionUUID,
"reason"_attr = abortStatus);
- replState->requestAbortFromPrimary(abortStatus);
+ const auto transitionedToWaitForAbort = replState->requestAbortFromPrimary(abortStatus);
+
+ if (!transitionedToWaitForAbort) {
+ // The index build has likely been aborted externally (e.g. its underlying collection was
+ // dropped), and it's in the midst of tearing down. There's nothing else to do here.
+ LOGV2(7530800,
+ "Index build: the build is already in aborted state; not signaling primary to abort",
+ "buildUUID"_attr = replState->buildUUID,
+ "db"_attr = replState->dbName,
+ "collectionUUID"_attr = replState->collectionUUID,
+ "reason"_attr = abortStatus);
+ return;
+ }
hangIndexBuildBeforeSignalingPrimaryForAbort.pauseWhileSet(opCtx);
diff --git a/src/mongo/db/repl_index_build_state.cpp b/src/mongo/db/repl_index_build_state.cpp
index ebc1d0e72f0..29bd6d22fab 100644
--- a/src/mongo/db/repl_index_build_state.cpp
+++ b/src/mongo/db/repl_index_build_state.cpp
@@ -229,7 +229,7 @@ void ReplIndexBuildState::commit(OperationContext* opCtx) {
});
}
-void ReplIndexBuildState::requestAbortFromPrimary(const Status& abortStatus) {
+bool ReplIndexBuildState::requestAbortFromPrimary(const Status& abortStatus) {
invariant(protocol == IndexBuildProtocol::kTwoPhase);
stdx::lock_guard lk(_mutex);
@@ -245,8 +245,14 @@ void ReplIndexBuildState::requestAbortFromPrimary(const Status& abortStatus) {
"buildUUID"_attr = buildUUID);
}
+ if (_indexBuildState.isAborted()) {
+ return false;
+ }
+
_indexBuildState.setState(
IndexBuildState::kAwaitPrimaryAbort, false /* skipCheck */, boost::none, abortStatus);
+
+ return true;
}
Timestamp ReplIndexBuildState::getCommitTimestamp() const {
diff --git a/src/mongo/db/repl_index_build_state.h b/src/mongo/db/repl_index_build_state.h
index eff7b0c976e..a1b4a1fe2bc 100644
--- a/src/mongo/db/repl_index_build_state.h
+++ b/src/mongo/db/repl_index_build_state.h
@@ -309,8 +309,12 @@ public:
/**
* Only for two-phase index builds. Requests the primary to abort the build, and transitions
* into a waiting state.
+ *
+ * Returns true if the thread has transitioned into the waiting state.
+ * Returns false if the build is already in abort state. This can happen if the build detected
+ * an error while an external operation (e.g. a collection drop) is concurrently aborting it.
*/
- void requestAbortFromPrimary(const Status& abortStatus);
+ bool requestAbortFromPrimary(const Status& abortStatus);
/**
* Returns timestamp for committing this index build.