summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYu Jin Kang Park <yujin.kang@mongodb.com>2023-05-09 14:06:33 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-05-09 15:02:37 +0000
commit696d8924ff38cca476da7865b39ee7bd6971208b (patch)
tree5f4e796c55f895779d856c34a1ae1bebcac2ce9d
parentef9da916d39ac0ea7c6aa3426dfdfbabe3d2cd5f (diff)
downloadmongo-696d8924ff38cca476da7865b39ee7bd6971208b.tar.gz
SERVER-76915 Unregister index build if no cleanup is required in shutdown path
-rw-r--r--src/mongo/db/index_builds_coordinator.cpp61
1 files changed, 26 insertions, 35 deletions
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index 8051f89ec36..de5004df03d 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -2588,6 +2588,21 @@ void IndexBuildsCoordinator::_cleanUpAfterFailure(OperationContext* opCtx,
const IndexBuildOptions& indexBuildOptions,
const Status& status) {
+ if (!replState->isAbortCleanUpRequired()) {
+ // The index build aborted at an early stage before the 'startIndexBuild' oplog entry is
+ // replicated: members replicating from this sync source are not aware of this index
+ // build, nor has any build state been persisted locally. Unregister the index build
+ // locally. In two phase index builds, any conditions causing secondaries to fail setting up
+ // an index build (which must have succeeded in the primary) are assumed to eventually cause
+ // the node to crash, so we do not attempt to verify this is a primary.
+ LOGV2(7564400,
+ "Index build: unregistering without cleanup",
+ "buildUUD"_attr = replState->buildUUID,
+ "error"_attr = status);
+ activeIndexBuilds.unregisterIndexBuild(&_indexBuildsManager, replState);
+ return;
+ }
+
if (!status.isA<ErrorCategory::ShutdownError>()) {
try {
// It is still possible to get a shutdown request while trying to clean-up. All shutdown
@@ -2627,6 +2642,8 @@ void IndexBuildsCoordinator::_cleanUpSinglePhaseAfterNonShutdownFailure(
const IndexBuildOptions& indexBuildOptions,
const Status& status) {
+ invariant(replState->isAbortCleanUpRequired());
+
// The index builder thread can abort on its own if it is interrupted by a user killop. This
// would prevent us from taking locks. Use a new OperationContext to abort the index build.
runOnAlternateContext(
@@ -2653,26 +2670,7 @@ void IndexBuildsCoordinator::_cleanUpTwoPhaseAfterNonShutdownFailure(
const IndexBuildOptions& indexBuildOptions,
const Status& status) {
- // We can only get here when there is no external abort, after a failure. If the operation has
- // been killed, it must have been from a killop. In which case we cannot continue and try to
- // vote, because we want the voting itself to be killable. Continue and try to abort as primary
- // or crash.
- if (!opCtx->isKillPending() &&
- feature_flags::gIndexBuildGracefulErrorHandling.isEnabled(
- serverGlobalParams.featureCompatibility)) {
- if (!replState->isAbortCleanUpRequired()) {
- // The index build aborted at an early stage before the 'startIndexBuild' oplog entry is
- // replicated: members replicating from this sync source are not aware of this index
- // build, nor has any build state been persisted locally. Unregister the index build
- // locally without voting to abort the build.
- LOGV2(7564400,
- "Index build: unregistering without voting for abort",
- "buildUUD"_attr = replState->buildUUID,
- "error"_attr = status);
- activeIndexBuilds.unregisterIndexBuild(&_indexBuildsManager, replState);
- return;
- }
- }
+ invariant(replState->isAbortCleanUpRequired());
// Use a new OperationContext to abort the index build since our current opCtx may be
// interrupted. This is still susceptible to shutdown interrupts, but in that case, on server
@@ -2717,21 +2715,14 @@ void IndexBuildsCoordinator::_cleanUpTwoPhaseAfterNonShutdownFailure(
const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
auto replCoord = repl::ReplicationCoordinator::get(abortCtx);
if (!replCoord->canAcceptWritesFor(abortCtx, dbAndUUID)) {
- if (replState->isSettingUp()) {
- // Clean up if the error happens before StartIndexBuild oplog entry
- // is replicated during startup or stepdown.
- activeIndexBuilds.unregisterIndexBuild(&_indexBuildsManager, replState);
- return;
- } else {
- // Index builds may not fail on secondaries. If a primary replicated
- // an abortIndexBuild oplog entry, then this index build would have
- // received an IndexBuildAborted error code.
- fassert(51101,
- status.withContext(str::stream()
- << "Index build: " << replState->buildUUID
- << "; Database: "
- << replState->dbName.toStringForErrorMsg()));
- }
+ // Index builds may not fail on secondaries. If a primary replicated
+ // an abortIndexBuild oplog entry, then this index build would have
+ // received an IndexBuildAborted error code.
+ fassert(51101,
+ status.withContext(str::stream()
+ << "Index build: " << replState->buildUUID
+ << "; Database: "
+ << replState->dbName.toStringForErrorMsg()));
}
AutoGetCollection indexBuildEntryColl(