diff options
author | Josef Ahmad <josef.ahmad@mongodb.com> | 2023-04-04 12:15:17 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-04-04 13:07:44 +0000 |
commit | 806b58d5fac1b17d848b3c7c997b67b68440b2ba (patch) | |
tree | 59bd4047a8b07b7491549ad57b1598b9f763d176 /src/mongo | |
parent | b7a17172c80bf156595ad3e9d92ea9ec900c03e2 (diff) | |
download | mongo-806b58d5fac1b17d848b3c7c997b67b68440b2ba.tar.gz |
SERVER-75308 Fix race between external and internal index build aborts
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/index_builds_coordinator.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/index_builds_coordinator_mongod.cpp | 18 | ||||
-rw-r--r-- | src/mongo/db/repl_index_build_state.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/repl_index_build_state.h | 6 |
4 files changed, 32 insertions, 3 deletions
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index 2212b975414..cedf0227954 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -92,6 +92,7 @@ MONGO_FAIL_POINT_DEFINE(hangIndexBuildBeforeWaitingUntilMajorityOpTime); MONGO_FAIL_POINT_DEFINE(hangBeforeUnregisteringAfterCommit); MONGO_FAIL_POINT_DEFINE(failSetUpResumeIndexBuild); MONGO_FAIL_POINT_DEFINE(failIndexBuildWithError); +MONGO_FAIL_POINT_DEFINE(hangInRemoveIndexBuildEntryAfterCommitOrAbort); IndexBuildsCoordinator::IndexBuildsSSS::IndexBuildsSSS() : ServerStatusSection("indexBuilds"), @@ -191,6 +192,8 @@ void removeIndexBuildEntryAfterCommitOrAbort(OperationContext* opCtx, return; } + hangInRemoveIndexBuildEntryAfterCommitOrAbort.pauseWhileSet(); + auto replCoord = repl::ReplicationCoordinator::get(opCtx); if (!replCoord->canAcceptWritesFor(opCtx, dbAndUUID)) { return; diff --git a/src/mongo/db/index_builds_coordinator_mongod.cpp b/src/mongo/db/index_builds_coordinator_mongod.cpp index 48d582dafb8..3e4aa686c64 100644 --- a/src/mongo/db/index_builds_coordinator_mongod.cpp +++ b/src/mongo/db/index_builds_coordinator_mongod.cpp @@ -70,6 +70,7 @@ MONGO_FAIL_POINT_DEFINE(hangBeforeInitializingIndexBuild); MONGO_FAIL_POINT_DEFINE(hangIndexBuildAfterSignalPrimaryForCommitReadiness); MONGO_FAIL_POINT_DEFINE(hangBeforeRunningIndexBuild); MONGO_FAIL_POINT_DEFINE(hangIndexBuildBeforeSignalingPrimaryForAbort); +MONGO_FAIL_POINT_DEFINE(hangIndexBuildBeforeTransitioningReplStateTokAwaitPrimaryAbort); const StringData kMaxNumActiveUserIndexBuildsServerParameterName = "maxNumActiveUserIndexBuilds"_sd; @@ -681,13 +682,28 @@ bool IndexBuildsCoordinatorMongod::_signalIfCommitQuorumNotEnabled( void IndexBuildsCoordinatorMongod::_signalPrimaryForAbortAndWaitForExternalAbort( OperationContext* opCtx, ReplIndexBuildState* replState, const Status& abortStatus) { + + hangIndexBuildBeforeTransitioningReplStateTokAwaitPrimaryAbort.pauseWhileSet(opCtx); + LOGV2(7419402, "Index build: signaling primary to abort index build", "buildUUID"_attr = replState->buildUUID, logAttrs(replState->dbName), "collectionUUID"_attr = replState->collectionUUID, "reason"_attr = abortStatus); - replState->requestAbortFromPrimary(abortStatus); + const auto transitionedToWaitForAbort = replState->requestAbortFromPrimary(abortStatus); + + if (!transitionedToWaitForAbort) { + // The index build has likely been aborted externally (e.g. its underlying collection was + // dropped), and it's in the midst of tearing down. There's nothing else to do here. + LOGV2(7530800, + "Index build: the build is already in aborted state; not signaling primary to abort", + "buildUUID"_attr = replState->buildUUID, + "db"_attr = replState->dbName, + "collectionUUID"_attr = replState->collectionUUID, + "reason"_attr = abortStatus); + return; + } hangIndexBuildBeforeSignalingPrimaryForAbort.pauseWhileSet(opCtx); diff --git a/src/mongo/db/repl_index_build_state.cpp b/src/mongo/db/repl_index_build_state.cpp index ebc1d0e72f0..29bd6d22fab 100644 --- a/src/mongo/db/repl_index_build_state.cpp +++ b/src/mongo/db/repl_index_build_state.cpp @@ -229,7 +229,7 @@ void ReplIndexBuildState::commit(OperationContext* opCtx) { }); } -void ReplIndexBuildState::requestAbortFromPrimary(const Status& abortStatus) { +bool ReplIndexBuildState::requestAbortFromPrimary(const Status& abortStatus) { invariant(protocol == IndexBuildProtocol::kTwoPhase); stdx::lock_guard lk(_mutex); @@ -245,8 +245,14 @@ void ReplIndexBuildState::requestAbortFromPrimary(const Status& abortStatus) { "buildUUID"_attr = buildUUID); } + if (_indexBuildState.isAborted()) { + return false; + } + _indexBuildState.setState( IndexBuildState::kAwaitPrimaryAbort, false /* skipCheck */, boost::none, abortStatus); + + return true; } Timestamp ReplIndexBuildState::getCommitTimestamp() const { diff --git a/src/mongo/db/repl_index_build_state.h b/src/mongo/db/repl_index_build_state.h index eff7b0c976e..a1b4a1fe2bc 100644 --- a/src/mongo/db/repl_index_build_state.h +++ b/src/mongo/db/repl_index_build_state.h @@ -309,8 +309,12 @@ public: /** * Only for two-phase index builds. Requests the primary to abort the build, and transitions * into a waiting state. + * + * Returns true if the thread has transitioned into the waiting state. + * Returns false if the build is already in abort state. This can happen if the build detected + * an error while an external operation (e.g. a collection drop) is concurrently aborting it. */ - void requestAbortFromPrimary(const Status& abortStatus); + bool requestAbortFromPrimary(const Status& abortStatus); /** * Returns timestamp for committing this index build. |