diff options
author | Yu Jin Kang Park <yujin.kang@mongodb.com> | 2023-05-09 14:06:33 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-05-09 15:02:37 +0000 |
commit | 696d8924ff38cca476da7865b39ee7bd6971208b (patch) | |
tree | 5f4e796c55f895779d856c34a1ae1bebcac2ce9d | |
parent | ef9da916d39ac0ea7c6aa3426dfdfbabe3d2cd5f (diff) | |
download | mongo-696d8924ff38cca476da7865b39ee7bd6971208b.tar.gz |
SERVER-76915 Unregister index build if no cleanup is required in shutdown path
-rw-r--r-- | src/mongo/db/index_builds_coordinator.cpp | 61 |
1 files changed, 26 insertions, 35 deletions
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index 8051f89ec36..de5004df03d 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -2588,6 +2588,21 @@ void IndexBuildsCoordinator::_cleanUpAfterFailure(OperationContext* opCtx, const IndexBuildOptions& indexBuildOptions, const Status& status) { + if (!replState->isAbortCleanUpRequired()) { + // The index build aborted at an early stage before the 'startIndexBuild' oplog entry is + // replicated: members replicating from this sync source are not aware of this index + // build, nor has any build state been persisted locally. Unregister the index build + // locally. In two phase index builds, any conditions causing secondaries to fail setting up + // an index build (which must have succeeded in the primary) are assumed to eventually cause + // the node to crash, so we do not attempt to verify this is a primary. + LOGV2(7564400, + "Index build: unregistering without cleanup", + "buildUUD"_attr = replState->buildUUID, + "error"_attr = status); + activeIndexBuilds.unregisterIndexBuild(&_indexBuildsManager, replState); + return; + } + if (!status.isA<ErrorCategory::ShutdownError>()) { try { // It is still possible to get a shutdown request while trying to clean-up. All shutdown @@ -2627,6 +2642,8 @@ void IndexBuildsCoordinator::_cleanUpSinglePhaseAfterNonShutdownFailure( const IndexBuildOptions& indexBuildOptions, const Status& status) { + invariant(replState->isAbortCleanUpRequired()); + // The index builder thread can abort on its own if it is interrupted by a user killop. This // would prevent us from taking locks. Use a new OperationContext to abort the index build. runOnAlternateContext( @@ -2653,26 +2670,7 @@ void IndexBuildsCoordinator::_cleanUpTwoPhaseAfterNonShutdownFailure( const IndexBuildOptions& indexBuildOptions, const Status& status) { - // We can only get here when there is no external abort, after a failure. If the operation has - // been killed, it must have been from a killop. In which case we cannot continue and try to - // vote, because we want the voting itself to be killable. Continue and try to abort as primary - // or crash. - if (!opCtx->isKillPending() && - feature_flags::gIndexBuildGracefulErrorHandling.isEnabled( - serverGlobalParams.featureCompatibility)) { - if (!replState->isAbortCleanUpRequired()) { - // The index build aborted at an early stage before the 'startIndexBuild' oplog entry is - // replicated: members replicating from this sync source are not aware of this index - // build, nor has any build state been persisted locally. Unregister the index build - // locally without voting to abort the build. - LOGV2(7564400, - "Index build: unregistering without voting for abort", - "buildUUD"_attr = replState->buildUUID, - "error"_attr = status); - activeIndexBuilds.unregisterIndexBuild(&_indexBuildsManager, replState); - return; - } - } + invariant(replState->isAbortCleanUpRequired()); // Use a new OperationContext to abort the index build since our current opCtx may be // interrupted. This is still susceptible to shutdown interrupts, but in that case, on server @@ -2717,21 +2715,14 @@ void IndexBuildsCoordinator::_cleanUpTwoPhaseAfterNonShutdownFailure( const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID); auto replCoord = repl::ReplicationCoordinator::get(abortCtx); if (!replCoord->canAcceptWritesFor(abortCtx, dbAndUUID)) { - if (replState->isSettingUp()) { - // Clean up if the error happens before StartIndexBuild oplog entry - // is replicated during startup or stepdown. - activeIndexBuilds.unregisterIndexBuild(&_indexBuildsManager, replState); - return; - } else { - // Index builds may not fail on secondaries. If a primary replicated - // an abortIndexBuild oplog entry, then this index build would have - // received an IndexBuildAborted error code. - fassert(51101, - status.withContext(str::stream() - << "Index build: " << replState->buildUUID - << "; Database: " - << replState->dbName.toStringForErrorMsg())); - } + // Index builds may not fail on secondaries. If a primary replicated + // an abortIndexBuild oplog entry, then this index build would have + // received an IndexBuildAborted error code. + fassert(51101, + status.withContext(str::stream() + << "Index build: " << replState->buildUUID + << "; Database: " + << replState->dbName.toStringForErrorMsg())); } AutoGetCollection indexBuildEntryColl( |