diff options
author | Louis Williams <louis.williams@mongodb.com> | 2020-03-03 16:15:23 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-03-10 18:03:42 +0000 |
commit | 2fd22e51e07c93a78a67cbb8d01289b96cb7f60a (patch) | |
tree | dfba42ee14bdfd2a0a480ddb152a4970df29fa65 | |
parent | f042509f1c46e292cc14af7c7ba23b9cc97c5185 (diff) | |
download | mongo-2fd22e51e07c93a78a67cbb8d01289b96cb7f60a.tar.gz |
SERVER-46397 Only report an index build as aborted if it is currently aborting, not committing
This expands the concurrency control features used by two-phase index
builds to standalone nodes and single-phase index builds so that
concurrent commits and aborts behave correctly.
-rw-r--r-- | jstests/noPassthrough/drop_indexes_aborts_in_progress_index_builds_simple_name.js | 2 | ||||
-rw-r--r-- | src/mongo/db/catalog/drop_indexes.cpp | 47 | ||||
-rw-r--r-- | src/mongo/db/index_builds_coordinator.cpp | 31 | ||||
-rw-r--r-- | src/mongo/db/index_builds_coordinator_mongod.cpp | 45 | ||||
-rw-r--r-- | src/mongo/db/repl_index_build_state.h | 8 |
5 files changed, 62 insertions, 71 deletions
diff --git a/jstests/noPassthrough/drop_indexes_aborts_in_progress_index_builds_simple_name.js b/jstests/noPassthrough/drop_indexes_aborts_in_progress_index_builds_simple_name.js index ec9894a729a..1f56613966f 100644 --- a/jstests/noPassthrough/drop_indexes_aborts_in_progress_index_builds_simple_name.js +++ b/jstests/noPassthrough/drop_indexes_aborts_in_progress_index_builds_simple_name.js @@ -45,6 +45,7 @@ jsTest.log("Aborting index builder with one index build and simple index spec"); assert.commandWorked(testDB.getCollection(collName).insert({a: 1})); IndexBuildTest.pauseIndexBuilds(testDB.getMongo()); +IndexBuildTest.pauseIndexBuilds(secondaryReadsTest.getSecondaryDB().getMongo()); const awaitIndexBuild = IndexBuildTest.startIndexBuild( testDB.getMongo(), coll.getFullName(), {a: 1}, {}, [ErrorCodes.IndexBuildAborted]); @@ -89,6 +90,7 @@ const awaitDropIndex = startParallelShell(() => { checkLog.contains(testDB.getMongo(), "About to abort index builder"); IndexBuildTest.resumeIndexBuilds(testDB.getMongo()); +IndexBuildTest.resumeIndexBuilds(secondaryReadsTest.getSecondaryDB().getMongo()); awaitIndexBuild(); awaitDropIndex(); diff --git a/src/mongo/db/catalog/drop_indexes.cpp b/src/mongo/db/catalog/drop_indexes.cpp index ddf2185ab42..b76c2d582b8 100644 --- a/src/mongo/db/catalog/drop_indexes.cpp +++ b/src/mongo/db/catalog/drop_indexes.cpp @@ -405,41 +405,26 @@ Status dropIndexes(OperationContext* opCtx, } // If the "*" wildcard was not specified, verify that all the index names belonging to the - // index builder were aborted. If not, they must be ready, so we drop them. + // index builder were aborted. if (!isWildcard && !abortedIndexBuilders.empty()) { invariant(abortedIndexBuilders.size() == 1); - return writeConflictRetry( - opCtx, "dropIndexes", dbAndUUID.toString(), [opCtx, &collection, &indexNames, result] { - WriteUnitOfWork wunit(opCtx); - - // This is necessary to check shard version. - OldClientContext ctx(opCtx, collection->ns().ns()); - - size_t numReady = 0; - const bool includeUnfinished = false; - IndexCatalog* indexCatalog = collection->getIndexCatalog(); - for (const auto& indexName : indexNames) { - const IndexDescriptor* desc = - indexCatalog->findIndexByName(opCtx, indexName, includeUnfinished); - if (!desc) { - // The given index name was successfully aborted. - continue; - } - - Status status = dropIndexByDescriptor(opCtx, collection, indexCatalog, desc); - if (!status.isOK()) { - return status; - } - - numReady++; - } - - invariant(numReady == 0 || numReady == indexNames.size()); + // This is necessary to check shard version. + OldClientContext ctx(opCtx, collection->ns().ns()); + + // Iterate through all aborted indexes and verify none of them are ready. This would + // indicate a flaw with the abort logic that allows indexes to complete despite the + // dropIndexes command reporting they were aborted. + auto indexCatalog = collection->getIndexCatalog(); + const bool includeUnfinished = false; + const bool noneReady = std::none_of(indexNames.begin(), indexNames.end(), [&](auto name) { + return indexCatalog->findIndexByName(opCtx, name, includeUnfinished); + }); - wunit.commit(); - return Status::OK(); - }); + invariant(noneReady, + str::stream() << "Found completed indexes despite aborting index build: " + << abortedIndexBuilders.front()); + return Status::OK(); } if (!abortedIndexBuilders.empty()) { diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index e156245dfdf..a8c2946b6d1 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -229,10 +229,8 @@ void abortIndexBuild(WithLock lk, IndexBuildsManager* indexBuildsManager, std::shared_ptr<ReplIndexBuildState> replIndexBuildState, const std::string& reason) { - auto protocol = replIndexBuildState->protocol; stdx::unique_lock<Latch> replStateLock(replIndexBuildState->mutex); - if (protocol == IndexBuildProtocol::kTwoPhase && - replIndexBuildState->waitForNextAction->getFuture().isReady()) { + if (replIndexBuildState->waitForNextAction->getFuture().isReady()) { const auto nextAction = replIndexBuildState->waitForNextAction->getFuture().get(); invariant(nextAction == IndexBuildAction::kCommitQuorumSatisfied || nextAction == IndexBuildAction::kPrimaryAbort); @@ -252,11 +250,7 @@ void abortIndexBuild(WithLock lk, IndexBuildState::kPrepareAbort, skipCheck, boost::none, reason); indexBuildsManager->abortIndexBuild(replIndexBuildState->buildUUID, reason); - if (protocol == IndexBuildProtocol::kTwoPhase) { - // Only for 2 phase we need to use signaling logic. - // Promise can be set only once. - replIndexBuildState->waitForNextAction->emplaceValue(IndexBuildAction::kPrimaryAbort); - } + replIndexBuildState->waitForNextAction->emplaceValue(IndexBuildAction::kPrimaryAbort); } /** @@ -525,6 +519,8 @@ std::string IndexBuildsCoordinator::_indexBuildActionToString(IndexBuildAction a return "Rollback abort"; } else if (action == IndexBuildAction::kPrimaryAbort) { return "Primary abort"; + } else if (action == IndexBuildAction::kSinglePhaseSecondaryCommit) { + return "Single-phase secondary commit"; } else if (action == IndexBuildAction::kCommitQuorumSatisfied) { return "Commit quorum Satisfied"; } @@ -908,11 +904,9 @@ bool IndexBuildsCoordinator::abortIndexBuildByBuildUUIDNoWait( } auto replState = replStateResult.getValue(); - auto protocol = replState->protocol; stdx::unique_lock<Latch> lk(replState->mutex); - if (protocol == IndexBuildProtocol::kTwoPhase && - replState->waitForNextAction->getFuture().isReady()) { + if (replState->waitForNextAction->getFuture().isReady()) { const auto nextAction = replState->waitForNextAction->getFuture().get(opCtx); invariant(nextAction == IndexBuildAction::kCommitQuorumSatisfied || nextAction == IndexBuildAction::kPrimaryAbort); @@ -929,8 +923,11 @@ bool IndexBuildsCoordinator::abortIndexBuildByBuildUUIDNoWait( // collection lock held in IX mode, So, there are possibilities, we might block the // index build from completing, leading to 3 way deadlocks involving step down, // dropIndexes command, IndexBuildCoordinator thread. - if (signalAction == IndexBuildAction::kPrimaryAbort) - return true; + if (signalAction == IndexBuildAction::kPrimaryAbort) { + // Only return true if the index build is being aborted already, not if it is going + // to commit. + return nextAction == IndexBuildAction::kPrimaryAbort; + } // Retry until the current promise result is consumed by the index builder thread and // a new empty promise got created by the indexBuildscoordinator thread. Or, until the @@ -947,11 +944,7 @@ bool IndexBuildsCoordinator::abortIndexBuildByBuildUUIDNoWait( IndexBuildState::kPrepareAbort, skipCheck, abortTimestamp, reason); _indexBuildsManager.abortIndexBuild(buildUUID, reason.get_value_or("")); - if (protocol == IndexBuildProtocol::kTwoPhase) { - // Only for 2 phase we need to use signaling logic. - // Promise can be set only once. - replState->waitForNextAction->emplaceValue(signalAction); - } + replState->waitForNextAction->emplaceValue(signalAction); break; } @@ -1950,6 +1943,8 @@ void IndexBuildsCoordinator::_buildIndexSinglePhase( boost::optional<Lock::CollectionLock>* exclusiveCollectionLock) { _scanCollectionAndInsertKeysIntoSorter(opCtx, replState, exclusiveCollectionLock); _insertKeysFromSideTablesWithoutBlockingWrites(opCtx, replState); + _signalPrimaryForCommitReadiness(opCtx, replState); + _waitForNextIndexBuildAction(opCtx, replState); _insertKeysFromSideTablesAndCommit( opCtx, replState, indexBuildOptions, exclusiveCollectionLock, {}); } diff --git a/src/mongo/db/index_builds_coordinator_mongod.cpp b/src/mongo/db/index_builds_coordinator_mongod.cpp index a136f74d685..4bf885b5d7a 100644 --- a/src/mongo/db/index_builds_coordinator_mongod.cpp +++ b/src/mongo/db/index_builds_coordinator_mongod.cpp @@ -394,19 +394,23 @@ void IndexBuildsCoordinatorMongod::_signalIfCommitQuorumIsSatisfied( bool IndexBuildsCoordinatorMongod::_signalIfCommitQuorumNotEnabled( OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState, bool onStepup) { - // Locking order is important here to avoid deadlocks i.e, rstl followed by ReplIndexBuildState - // mutex. - invariant(opCtx->lockState()->isRSTLLocked()); - // TODO SERVER-46557: Revisit this logic to see if we can check replState->commitQuorum for // value to be zero to determine whether commit quorum is enabled or not for this index build. - if (!enableIndexBuildCommitQuorum) { - auto replCoord = repl::ReplicationCoordinator::get(opCtx); + auto replCoord = repl::ReplicationCoordinator::get(opCtx); + // Single-phase and standalones don't support commit quorum, but they must go through the + // process of updating their state to synchronize with concurrent abort operations. + bool singlePhaseOrStandalone = replState->protocol == IndexBuildProtocol::kSinglePhase || + !replCoord->getSettings().usingReplSets(); + if (!enableIndexBuildCommitQuorum || singlePhaseOrStandalone) { const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID); + repl::ReplicationStateTransitionLockGuard rstl(opCtx, MODE_IX); + stdx::unique_lock<Latch> lk(replState->mutex); if (replCoord->canAcceptWritesFor(opCtx, dbAndUUID) || onStepup) { // Node is primary here. - stdx::unique_lock<Latch> lk(replState->mutex); _sendCommitQuorumSatisfiedSignal(lk, opCtx, replState); + } else if (replState->protocol == IndexBuildProtocol::kSinglePhase) { + replState->waitForNextAction->emplaceValue( + IndexBuildAction::kSinglePhaseSecondaryCommit); } // No-op for secondaries. return true; @@ -431,10 +435,6 @@ bool IndexBuildsCoordinatorMongod::_checkVoteCommitIndexCmdSucceeded(const BSONO void IndexBuildsCoordinatorMongod::_signalPrimaryForCommitReadiness( OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) { auto replCoord = repl::ReplicationCoordinator::get(opCtx); - if (!replCoord->getSettings().usingReplSets()) { - // Standalones does not support commit quorum. - return; - } // Before voting see if we are eligible to skip voting and signal // to commit index build if the node is primary. @@ -549,14 +549,6 @@ Timestamp IndexBuildsCoordinatorMongod::_waitForNextIndexBuildAction( OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) { Timestamp commitIndexBuildTimestamp; - invariant(replState->protocol == IndexBuildProtocol::kTwoPhase); - - // standalones doesn't need to wait for commit or abort index build oplog entry. - auto replCoord = repl::ReplicationCoordinator::get(opCtx); - if (!replCoord->getSettings().usingReplSets()) { - return commitIndexBuildTimestamp; - }; - // Yield locks and storage engine resources before blocking. opCtx->recoveryUnit()->abandonSnapshot(); Lock::TempRelease release(opCtx->lockState()); @@ -584,11 +576,14 @@ Timestamp IndexBuildsCoordinatorMongod::_waitForNextIndexBuildAction( // Reacquire RSTL lock repl::ReplicationStateTransitionLockGuard rstl(opCtx, MODE_IX); const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID); + auto replCoord = repl::ReplicationCoordinator::get(opCtx); auto isMaster = replCoord->canAcceptWritesFor(opCtx, dbAndUUID); stdx::unique_lock<Latch> lk(replState->mutex); switch (nextAction) { case IndexBuildAction::kOplogCommit: + invariant(replState->protocol == IndexBuildProtocol::kTwoPhase); + // Sanity check // This signal can be received during primary (drain phase), secondary, // startup( startup recovery) and startup2 (initial sync). @@ -607,6 +602,7 @@ Timestamp IndexBuildsCoordinatorMongod::_waitForNextIndexBuildAction( "collectionUUID"_attr = replState->collectionUUID); break; case IndexBuildAction::kOplogAbort: + invariant(replState->protocol == IndexBuildProtocol::kTwoPhase); // Sanity check // This signal can be received during primary (drain phase), secondary, // startup( startup recovery) and startup2 (initial sync). @@ -624,6 +620,7 @@ Timestamp IndexBuildsCoordinatorMongod::_waitForNextIndexBuildAction( "abortReason"_attr = replState->indexBuildState.getAbortReason().get(), "collectionUUID"_attr = replState->collectionUUID); case IndexBuildAction::kRollbackAbort: + invariant(replState->protocol == IndexBuildProtocol::kTwoPhase); // Currently, We abort the index build before transitioning the state to rollback. // So, we can check if the node state is rollback. break; @@ -640,6 +637,16 @@ Timestamp IndexBuildsCoordinatorMongod::_waitForNextIndexBuildAction( << " , abort reason:" << replState->indexBuildState.getAbortReason().get_value_or(""))); } + case IndexBuildAction::kSinglePhaseSecondaryCommit: + invariant(replState->protocol == IndexBuildProtocol::kSinglePhase); + if (isMaster) { + LOGV2_FATAL( + 4639700, + "Primary while trying to commit single-phase build that was started on a " + "secondary"); + fassertFailed(4639701); + } + break; case IndexBuildAction::kCommitQuorumSatisfied: if (!isMaster) { // Reset the promise as the node has stepped down, diff --git a/src/mongo/db/repl_index_build_state.h b/src/mongo/db/repl_index_build_state.h index 816eb4a7c11..5bd3552ffb5 100644 --- a/src/mongo/db/repl_index_build_state.h +++ b/src/mongo/db/repl_index_build_state.h @@ -84,6 +84,10 @@ enum class IndexBuildAction { */ kPrimaryAbort, /** + * Commit signal set by an index build on a secondary for a single-phase build. + */ + kSinglePhaseSecondaryCommit, + /** * Commit signal set by "voteCommitIndexBuild" cmd and step up. */ kCommitQuorumSatisfied @@ -230,9 +234,7 @@ struct ReplIndexBuildState { indexSpecs(specs), protocol(protocol), commitQuorum(commitQuorum) { - if (IndexBuildProtocol::kTwoPhase == protocol) { - waitForNextAction = std::make_unique<SharedPromise<IndexBuildAction>>(); - } + waitForNextAction = std::make_unique<SharedPromise<IndexBuildAction>>(); } // Uniquely identifies this index build across replica set members. |