diff options
-rw-r--r-- | src/mongo/db/index_builds_coordinator.cpp | 142 | ||||
-rw-r--r-- | src/mongo/db/index_builds_coordinator.h | 20 | ||||
-rw-r--r-- | src/mongo/db/index_builds_coordinator_mongod.cpp | 34 | ||||
-rw-r--r-- | src/mongo/db/index_builds_coordinator_mongod.h | 5 | ||||
-rw-r--r-- | src/mongo/db/repl_index_build_state.cpp | 188 | ||||
-rw-r--r-- | src/mongo/db/repl_index_build_state.h | 67 | ||||
-rw-r--r-- | src/mongo/embedded/index_builds_coordinator_embedded.cpp | 6 | ||||
-rw-r--r-- | src/mongo/embedded/index_builds_coordinator_embedded.h | 5 |
8 files changed, 268 insertions, 199 deletions
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index 1e0e2afe127..76bf934e79e 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -1015,30 +1015,7 @@ void IndexBuildsCoordinator::applyCommitIndexBuild(OperationContext* opCtx, bool IndexBuildsCoordinator::_tryCommit(OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) { - stdx::unique_lock<Latch> lk(replState->mutex); - if (replState->indexBuildState.isSettingUp()) { - // It's possible that the index build thread has not reached the point where it can be - // committed yet. - return false; - } - if (replState->waitForNextAction->getFuture().isReady()) { - // If the future wait were uninterruptible, then shutdown could hang. If the - // IndexBuildsCoordinator thread gets interrupted on shutdown, the oplog applier will hang - // waiting for the promise applying the commitIndexBuild oplog entry. - const auto nextAction = replState->waitForNextAction->getFuture().get(opCtx); - invariant(nextAction == IndexBuildAction::kCommitQuorumSatisfied); - // Retry until the current promise result is consumed by the index builder thread and - // a new empty promise got created by the indexBuildscoordinator thread. - return false; - } - auto skipCheck = shouldSkipIndexBuildStateTransitionCheck(opCtx, replState->protocol); - replState->indexBuildState.setState( - IndexBuildState::kPrepareCommit, skipCheck, opCtx->recoveryUnit()->getCommitTimestamp()); - // Promise can be set only once. - // We can't skip signaling here if a signal is already set because the previous commit or - // abort signal might have been sent to handle for primary case. - setSignalAndCancelVoteRequestCbkIfActive(lk, opCtx, replState, IndexBuildAction::kOplogCommit); - return true; + return replState->tryCommit(opCtx); } void IndexBuildsCoordinator::applyAbortIndexBuild(OperationContext* opCtx, @@ -1132,85 +1109,6 @@ bool IndexBuildsCoordinator::hasIndexBuilder(OperationContext* opCtx, return foundIndexBuilder; } -IndexBuildsCoordinator::TryAbortResult IndexBuildsCoordinator::_tryAbort( - OperationContext* opCtx, - std::shared_ptr<ReplIndexBuildState> replState, - IndexBuildAction signalAction, - std::string reason) { - - { - stdx::unique_lock<Latch> lk(replState->mutex); - // Wait until the build is done setting up. This indicates that all required state is - // initialized to attempt an abort. - if (replState->indexBuildState.isSettingUp()) { - LOGV2_DEBUG(465605, - 2, - "waiting until index build is done setting up before attempting to abort", - "buildUUID"_attr = replState->buildUUID); - return TryAbortResult::kRetry; - } - if (replState->waitForNextAction->getFuture().isReady()) { - const auto nextAction = replState->waitForNextAction->getFuture().get(opCtx); - invariant(nextAction == IndexBuildAction::kSinglePhaseCommit || - nextAction == IndexBuildAction::kCommitQuorumSatisfied || - nextAction == IndexBuildAction::kPrimaryAbort); - - // Index build coordinator already received a signal to commit or abort. So, it's ok - // to return and wait for the index build to complete if we are trying to signal - // 'kPrimaryAbort'. The index build coordinator will not perform the signaled action - // (i.e, will not commit or abort the index build) only when the node steps down. - // When the node steps down, the caller of this function, dropIndexes/createIndexes - // command (user operation) will also get interrupted. So, we no longer need to - // abort the index build on step down. - if (signalAction == IndexBuildAction::kPrimaryAbort) { - // Indicate if the index build is already being committed or aborted. - if (nextAction == IndexBuildAction::kPrimaryAbort) { - return TryAbortResult::kAlreadyAborted; - } else { - return TryAbortResult::kNotAborted; - } - } - - // Retry until the current promise result is consumed by the index builder thread - // and a new empty promise got created by the indexBuildscoordinator thread. Or, - // until the index build got torn down after index build commit. - return TryAbortResult::kRetry; - } - - LOGV2(4656003, - "Aborting index build", - "buildUUID"_attr = replState->buildUUID, - "error"_attr = reason); - - // Set the state on replState. Once set, the calling thread must complete the abort process. - auto abortTimestamp = - boost::make_optional<Timestamp>(!opCtx->recoveryUnit()->getCommitTimestamp().isNull(), - opCtx->recoveryUnit()->getCommitTimestamp()); - auto skipCheck = shouldSkipIndexBuildStateTransitionCheck(opCtx, replState->protocol); - replState->indexBuildState.setState( - IndexBuildState::kAborted, skipCheck, abortTimestamp, reason); - - // Interrupt the builder thread so that it can no longer acquire locks or make progress. - // It is possible that the index build thread may have completed its operation and removed - // itself from the ServiceContext. This may happen in the case of an explicit db.killOp() - // operation or during shutdown. - // During normal operation, the abort logic, initiated through external means such as - // dropIndexes or internally through an indexing error, should have set the state in - // ReplIndexBuildState so that this code would not be reachable as it is no longer necessary - // to interrupt the builder thread here. - auto serviceContext = opCtx->getServiceContext(); - if (auto target = serviceContext->getLockedClient(replState->opId)) { - auto targetOpCtx = target->getOperationContext(); - serviceContext->killOperation(target, targetOpCtx, ErrorCodes::IndexBuildAborted); - } - - // Set the signal. Because we have already interrupted the index build, it will not observe - // this signal. We do this so that other observers do not also try to abort the index build. - setSignalAndCancelVoteRequestCbkIfActive(lk, opCtx, replState, signalAction); - } - return TryAbortResult::kContinueAbort; -} - bool IndexBuildsCoordinator::abortIndexBuildByBuildUUID(OperationContext* opCtx, const UUID& buildUUID, IndexBuildAction signalAction, @@ -1282,23 +1180,23 @@ bool IndexBuildsCoordinator::abortIndexBuildByBuildUUID(OperationContext* opCtx, } } - auto tryAbortResult = _tryAbort(opCtx, replState, signalAction, reason); + auto tryAbortResult = replState->tryAbort(opCtx, signalAction, reason); switch (tryAbortResult) { - case TryAbortResult::kNotAborted: + case ReplIndexBuildState::TryAbortResult::kNotAborted: return false; - case TryAbortResult::kAlreadyAborted: + case ReplIndexBuildState::TryAbortResult::kAlreadyAborted: return true; - case TryAbortResult::kRetry: - case TryAbortResult::kContinueAbort: + case ReplIndexBuildState::TryAbortResult::kRetry: + case ReplIndexBuildState::TryAbortResult::kContinueAbort: break; } - if (TryAbortResult::kRetry == tryAbortResult) { + if (ReplIndexBuildState::TryAbortResult::kRetry == tryAbortResult) { retry = true; continue; } - invariant(TryAbortResult::kContinueAbort == tryAbortResult); + invariant(ReplIndexBuildState::TryAbortResult::kContinueAbort == tryAbortResult); if (MONGO_unlikely(hangBeforeCompletingAbort.shouldFail())) { LOGV2(4806200, "Hanging before completing index build abort"); @@ -1388,17 +1286,13 @@ void IndexBuildsCoordinator::_completeAbort(OperationContext* opCtx, bool isMaster = replCoord->canAcceptWritesFor(opCtx, nss); invariant(!isMaster, str::stream() << "Index build: " << replState->buildUUID); - invariant(replState->indexBuildState.isAborted(), - str::stream() - << "Index build: " << replState->buildUUID - << ", index build state: " << replState->indexBuildState.toString()); - invariant(replState->indexBuildState.getAbortReason(), replState->buildUUID.toString()); + + auto abortReason = replState->getAbortReason(); LOGV2(4665903, "Aborting index build during initial sync", "buildUUID"_attr = replState->buildUUID, - "abortReason"_attr = replState->indexBuildState.getAbortReason().get(), + "abortReason"_attr = abortReason, "collectionUUID"_attr = replState->collectionUUID); - _indexBuildsManager.abortIndexBuild( opCtx, coll, replState->buildUUID, MultiIndexBlock::kNoopOnCleanUpFn); break; @@ -2285,14 +2179,9 @@ void IndexBuildsCoordinator::_runIndexBuild( return; } auto replState = invariant(swReplState); - { - // The index build is now past the setup stage and in progress. This makes it eligible to be - // aborted. Use the current OperationContext's opId as the means for interrupting the index - // build. - stdx::unique_lock<Latch> lk(replState->mutex); - replState->opId = opCtx->getOpID(); - replState->indexBuildState.setState(IndexBuildState::kInProgress, false /* skipCheck */); - } + + // Set index build state to in-progress and save OperationContext's opId. + replState->start(opCtx); // Add build UUID to lock manager diagnostic output. auto locker = opCtx->lockState(); @@ -2440,8 +2329,7 @@ void IndexBuildsCoordinator::_runIndexBuildInner( // If the index build has already been cleaned-up because it encountered an error at // commit-time, there is no work to do. This is the most routine case, since index // constraint checking happens at commit-time for index builds. - stdx::unique_lock<Latch> lk(replState->mutex); - if (replState->indexBuildState.isAborted()) { + if (replState->isAborted()) { uassertStatusOK(status); } } diff --git a/src/mongo/db/index_builds_coordinator.h b/src/mongo/db/index_builds_coordinator.h index a673289aa2b..d4156ab81d0 100644 --- a/src/mongo/db/index_builds_coordinator.h +++ b/src/mongo/db/index_builds_coordinator.h @@ -463,18 +463,6 @@ public: */ static int getNumIndexesTotal(OperationContext* opCtx, const CollectionPtr& collection); - - /** - * Sets the index build action 'signal' for the index build pointed by 'replState'. Also, it - * cancels if there is any active remote 'voteCommitIndexBuild' command request callback handle - * for this index build. - */ - virtual void setSignalAndCancelVoteRequestCbkIfActive( - WithLock ReplIndexBuildStateLk, - OperationContext* opCtx, - std::shared_ptr<ReplIndexBuildState> replState, - IndexBuildAction signal) = 0; - bool supportsResumableIndexBuilds() const; private: @@ -612,14 +600,6 @@ protected: const Status& status); /** - * Attempt to abort an index build. Returns a flag indicating how the caller should proceed. - */ - enum class TryAbortResult { kRetry, kAlreadyAborted, kNotAborted, kContinueAbort }; - TryAbortResult _tryAbort(OperationContext* opCtx, - std::shared_ptr<ReplIndexBuildState> replState, - IndexBuildAction signalAction, - std::string reason); - /** * Performs last steps of aborting an index build. */ void _completeAbort(OperationContext* opCtx, diff --git a/src/mongo/db/index_builds_coordinator_mongod.cpp b/src/mongo/db/index_builds_coordinator_mongod.cpp index d65581979a5..913234d665a 100644 --- a/src/mongo/db/index_builds_coordinator_mongod.cpp +++ b/src/mongo/db/index_builds_coordinator_mongod.cpp @@ -411,41 +411,9 @@ Status IndexBuildsCoordinatorMongod::voteCommitIndexBuild(OperationContext* opCt return persistStatus; } -void IndexBuildsCoordinatorMongod::setSignalAndCancelVoteRequestCbkIfActive( - WithLock ReplIndexBuildStateLk, - OperationContext* opCtx, - std::shared_ptr<ReplIndexBuildState> replState, - IndexBuildAction signal) { - // set the signal - replState->waitForNextAction->emplaceValue(signal); - // Cancel the callback. - if (replState->voteCmdCbkHandle.isValid()) { - repl::ReplicationCoordinator::get(opCtx)->cancelCbkHandle(replState->voteCmdCbkHandle); - } -} - void IndexBuildsCoordinatorMongod::_sendCommitQuorumSatisfiedSignal( OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) { - stdx::unique_lock<Latch> ReplIndexBuildStateLk(replState->mutex); - if (!replState->waitForNextAction->getFuture().isReady()) { - setSignalAndCancelVoteRequestCbkIfActive( - ReplIndexBuildStateLk, opCtx, replState, IndexBuildAction::kCommitQuorumSatisfied); - } else { - // This implies we already got a commit or abort signal by other ways. This might have - // been signaled earlier with kPrimaryAbort or kCommitQuorumSatisfied. Or, it's also - // possible the node got stepped down and received kOplogCommit/koplogAbort or got - // kRollbackAbort. So, it's ok to skip signaling. - auto action = replState->waitForNextAction->getFuture().get(opCtx); - - LOGV2(3856200, - "Not signaling \"{skippedAction}\" as it was previously signaled with " - "\"{previousAction}\" for index build: {buildUUID}", - "Skipping signaling as it was previously signaled for index build", - "skippedAction"_attr = - _indexBuildActionToString(IndexBuildAction::kCommitQuorumSatisfied), - "previousAction"_attr = _indexBuildActionToString(action), - "buildUUID"_attr = replState->buildUUID); - } + replState->setCommitQuorumSatisfied(opCtx); } void IndexBuildsCoordinatorMongod::_signalIfCommitQuorumIsSatisfied( diff --git a/src/mongo/db/index_builds_coordinator_mongod.h b/src/mongo/db/index_builds_coordinator_mongod.h index a16cbb842e1..cbdb3ab0666 100644 --- a/src/mongo/db/index_builds_coordinator_mongod.h +++ b/src/mongo/db/index_builds_coordinator_mongod.h @@ -100,11 +100,6 @@ public: const std::vector<StringData>& indexNames, const CommitQuorumOptions& newCommitQuorum) override; - void setSignalAndCancelVoteRequestCbkIfActive(WithLock ReplIndexBuildStateLk, - OperationContext* opCtx, - std::shared_ptr<ReplIndexBuildState> replState, - IndexBuildAction signal) override; - private: /** * Keeps track of the relevant replica set member states. Index builds are managed differently diff --git a/src/mongo/db/repl_index_build_state.cpp b/src/mongo/db/repl_index_build_state.cpp index a7617172312..c1fda23d4dc 100644 --- a/src/mongo/db/repl_index_build_state.cpp +++ b/src/mongo/db/repl_index_build_state.cpp @@ -27,10 +27,15 @@ * it in the license file. */ +#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kStorage + #include "mongo/platform/basic.h" #include "mongo/db/repl_index_build_state.h" +#include "mongo/db/repl/replication_coordinator.h" +#include "mongo/logv2/log.h" + namespace mongo { namespace { @@ -68,6 +73,27 @@ bool checkIfValidTransition(IndexBuildState::StateFlag currentState, } // namespace +std::string indexBuildActionToString(IndexBuildAction action) { + if (action == IndexBuildAction::kNoAction) { + return "No action"; + } else if (action == IndexBuildAction::kOplogCommit) { + return "Oplog commit"; + } else if (action == IndexBuildAction::kOplogAbort) { + return "Oplog abort"; + } else if (action == IndexBuildAction::kInitialSyncAbort) { + return "Initial sync abort"; + } else if (action == IndexBuildAction::kRollbackAbort) { + return "Rollback abort"; + } else if (action == IndexBuildAction::kPrimaryAbort) { + return "Primary abort"; + } else if (action == IndexBuildAction::kSinglePhaseCommit) { + return "Single-phase commit"; + } else if (action == IndexBuildAction::kCommitQuorumSatisfied) { + return "Commit quorum Satisfied"; + } + MONGO_UNREACHABLE; +} + void IndexBuildState::setState(StateFlag state, bool skipCheck, boost::optional<Timestamp> timestamp, @@ -102,6 +128,148 @@ ReplIndexBuildState::ReplIndexBuildState(const UUID& indexBuildUUID, commitQuorumLock.emplace(indexBuildUUID.toString()); } +void ReplIndexBuildState::start(OperationContext* opCtx) { + stdx::unique_lock<Latch> lk(mutex); + _opId = opCtx->getOpID(); + indexBuildState.setState(IndexBuildState::kInProgress, false /* skipCheck */); +} + +bool ReplIndexBuildState::isAborted() const { + stdx::unique_lock<Latch> lk(mutex); + return indexBuildState.isAborted(); +} + +std::string ReplIndexBuildState::getAbortReason() const { + stdx::unique_lock<Latch> lk(mutex); + invariant(indexBuildState.isAborted(), + str::stream() << "Index build: " << buildUUID + << ", index build state: " << indexBuildState.toString()); + auto reason = indexBuildState.getAbortReason(); + invariant(reason, str::stream() << buildUUID); + return *reason; +} + +void ReplIndexBuildState::setCommitQuorumSatisfied(OperationContext* opCtx) { + stdx::unique_lock<Latch> lk(mutex); + if (!waitForNextAction->getFuture().isReady()) { + _setSignalAndCancelVoteRequestCbkIfActive( + lk, opCtx, IndexBuildAction::kCommitQuorumSatisfied); + } else { + // This implies we already got a commit or abort signal by other ways. This might have + // been signaled earlier with kPrimaryAbort or kCommitQuorumSatisfied. Or, it's also + // possible the node got stepped down and received kOplogCommit/koplogAbort or got + // kRollbackAbort. So, it's ok to skip signaling. + auto action = waitForNextAction->getFuture().get(opCtx); + + LOGV2(3856200, + "Not signaling \"{skippedAction}\" as it was previously signaled with " + "\"{previousAction}\" for index build: {buildUUID}", + "Skipping signaling as it was previously signaled for index build", + "skippedAction"_attr = + indexBuildActionToString(IndexBuildAction::kCommitQuorumSatisfied), + "previousAction"_attr = indexBuildActionToString(action), + "buildUUID"_attr = buildUUID); + } +} + +bool ReplIndexBuildState::tryCommit(OperationContext* opCtx) { + stdx::unique_lock<Latch> lk(mutex); + if (indexBuildState.isSettingUp()) { + // It's possible that the index build thread has not reached the point where it can be + // committed yet. + return false; + } + if (waitForNextAction->getFuture().isReady()) { + // If the future wait were uninterruptible, then shutdown could hang. If the + // IndexBuildsCoordinator thread gets interrupted on shutdown, the oplog applier will hang + // waiting for the promise applying the commitIndexBuild oplog entry. + const auto nextAction = waitForNextAction->getFuture().get(opCtx); + invariant(nextAction == IndexBuildAction::kCommitQuorumSatisfied); + // Retry until the current promise result is consumed by the index builder thread and + // a new empty promise got created by the indexBuildscoordinator thread. + return false; + } + auto skipCheck = _shouldSkipIndexBuildStateTransitionCheck(opCtx); + indexBuildState.setState( + IndexBuildState::kPrepareCommit, skipCheck, opCtx->recoveryUnit()->getCommitTimestamp()); + // Promise can be set only once. + // We can't skip signaling here if a signal is already set because the previous commit or + // abort signal might have been sent to handle for primary case. + _setSignalAndCancelVoteRequestCbkIfActive(lk, opCtx, IndexBuildAction::kOplogCommit); + return true; +} + +ReplIndexBuildState::TryAbortResult ReplIndexBuildState::tryAbort(OperationContext* opCtx, + IndexBuildAction signalAction, + std::string reason) { + stdx::unique_lock<Latch> lk(mutex); + // Wait until the build is done setting up. This indicates that all required state is + // initialized to attempt an abort. + if (indexBuildState.isSettingUp()) { + LOGV2_DEBUG(465605, + 2, + "waiting until index build is done setting up before attempting to abort", + "buildUUID"_attr = buildUUID); + return TryAbortResult::kRetry; + } + if (waitForNextAction->getFuture().isReady()) { + const auto nextAction = waitForNextAction->getFuture().get(opCtx); + invariant(nextAction == IndexBuildAction::kSinglePhaseCommit || + nextAction == IndexBuildAction::kCommitQuorumSatisfied || + nextAction == IndexBuildAction::kPrimaryAbort); + + // Index build coordinator already received a signal to commit or abort. So, it's ok + // to return and wait for the index build to complete if we are trying to signal + // 'kPrimaryAbort'. The index build coordinator will not perform the signaled action + // (i.e, will not commit or abort the index build) only when the node steps down. + // When the node steps down, the caller of this function, dropIndexes/createIndexes + // command (user operation) will also get interrupted. So, we no longer need to + // abort the index build on step down. + if (signalAction == IndexBuildAction::kPrimaryAbort) { + // Indicate if the index build is already being committed or aborted. + if (nextAction == IndexBuildAction::kPrimaryAbort) { + return TryAbortResult::kAlreadyAborted; + } else { + return TryAbortResult::kNotAborted; + } + } + + // Retry until the current promise result is consumed by the index builder thread + // and a new empty promise got created by the indexBuildscoordinator thread. Or, + // until the index build got torn down after index build commit. + return TryAbortResult::kRetry; + } + + LOGV2(4656003, "Aborting index build", "buildUUID"_attr = buildUUID, "error"_attr = reason); + + // Set the state on replState. Once set, the calling thread must complete the abort process. + auto abortTimestamp = + boost::make_optional<Timestamp>(!opCtx->recoveryUnit()->getCommitTimestamp().isNull(), + opCtx->recoveryUnit()->getCommitTimestamp()); + auto skipCheck = _shouldSkipIndexBuildStateTransitionCheck(opCtx); + indexBuildState.setState(IndexBuildState::kAborted, skipCheck, abortTimestamp, reason); + + // Interrupt the builder thread so that it can no longer acquire locks or make progress. + // It is possible that the index build thread may have completed its operation and removed + // itself from the ServiceContext. This may happen in the case of an explicit db.killOp() + // operation or during shutdown. + // During normal operation, the abort logic, initiated through external means such as + // dropIndexes or internally through an indexing error, should have set the state in + // ReplIndexBuildState so that this code would not be reachable as it is no longer necessary + // to interrupt the builder thread here. + auto serviceContext = opCtx->getServiceContext(); + if (auto target = serviceContext->getLockedClient(*_opId)) { + auto targetOpCtx = target->getOperationContext(); + serviceContext->killOperation(target, targetOpCtx, ErrorCodes::IndexBuildAborted); + } + + // Set the signal. Because we have already interrupted the index build, it will not observe + // this signal. We do this so that other observers do not also try to abort the index build. + _setSignalAndCancelVoteRequestCbkIfActive(lk, opCtx, signalAction); + + return TryAbortResult::kContinueAbort; +} + bool ReplIndexBuildState::isResumable() const { stdx::unique_lock<Latch> lk(mutex); return !_lastOpTimeBeforeInterceptors.isNull(); @@ -122,4 +290,24 @@ void ReplIndexBuildState::clearLastOpTimeBeforeInterceptors() { _lastOpTimeBeforeInterceptors = {}; } +bool ReplIndexBuildState::_shouldSkipIndexBuildStateTransitionCheck(OperationContext* opCtx) const { + const auto replCoord = repl::ReplicationCoordinator::get(opCtx); + if (replCoord->getSettings().usingReplSets() && protocol == IndexBuildProtocol::kTwoPhase) { + return false; + } + return true; +} + +void ReplIndexBuildState::_setSignalAndCancelVoteRequestCbkIfActive(WithLock lk, + OperationContext* opCtx, + IndexBuildAction signal) { + // set the signal + waitForNextAction->emplaceValue(signal); + // Cancel the callback. + if (voteCmdCbkHandle.isValid()) { + auto replCoord = repl::ReplicationCoordinator::get(opCtx); + replCoord->cancelCbkHandle(voteCmdCbkHandle); + } +} + } // namespace mongo diff --git a/src/mongo/db/repl_index_build_state.h b/src/mongo/db/repl_index_build_state.h index fde82963b1f..acc1c28c377 100644 --- a/src/mongo/db/repl_index_build_state.h +++ b/src/mongo/db/repl_index_build_state.h @@ -40,6 +40,7 @@ #include "mongo/db/concurrency/d_concurrency.h" #include "mongo/db/index/index_descriptor.h" #include "mongo/db/namespace_string.h" +#include "mongo/db/operation_context.h" #include "mongo/db/repl/optime.h" #include "mongo/executor/task_executor.h" #include "mongo/stdx/condition_variable.h" @@ -100,6 +101,11 @@ enum class IndexBuildAction { }; /** + * Returns string representation of IndexBuildAction. + */ +std::string indexBuildActionToString(IndexBuildAction action); + +/** * Represents the index build state. * Valid State transition for primary: * =================================== @@ -225,6 +231,46 @@ public: IndexBuildProtocol protocol); /** + * The index build is now past the setup stage and in progress. This makes it eligible to be + * aborted. Use the current OperationContext's opId as the means for interrupting the index + * build. + */ + void start(OperationContext* opCtx); + + /** + * Returns true if this index build has been aborted. + */ + bool isAborted() const; + + /** + * Returns abort reason. Invariants if not in aborted state. + */ + std::string getAbortReason() const; + + /** + * Called when commit quorum is satisfied. + * Invokes 'onCommitQuorumSatisfied' if state is successfully transitioned to commit quorum + * satisfied. + */ + void setCommitQuorumSatisfied(OperationContext* opCtx); + + /** + * Attempt to signal the index build to commit and advance the index build to the kPrepareCommit + * state. + * Returns true if successful and false if the attempt was unnecessful and the caller should + * retry. + */ + bool tryCommit(OperationContext* opCtx); + + /** + * Attempt to abort an index build. Returns a flag indicating how the caller should proceed. + */ + enum class TryAbortResult { kRetry, kAlreadyAborted, kNotAborted, kContinueAbort }; + TryAbortResult tryAbort(OperationContext* opCtx, + IndexBuildAction signalAction, + std::string reason); + + /** * Accessor and mutator for last optime in the oplog before the interceptors were installed. * This supports resumable index builds. */ @@ -233,7 +279,6 @@ public: void setLastOpTimeBeforeInterceptors(repl::OpTime opTime); void clearLastOpTimeBeforeInterceptors(); - // Uniquely identifies this index build across replica set members. const UUID buildUUID; @@ -283,6 +328,22 @@ public: // SharedSemiFuture(s). SharedPromise<IndexCatalogStats> sharedPromise; + /* + * Determines whether to skip the index build state transition check. + * Index builder not using ReplIndexBuildState::waitForNextAction to signal primary and + * secondaries to commit or abort signal will violate index build state transition. So, we + * should skip state transition verification. Otherwise, we would invariant. + */ + bool _shouldSkipIndexBuildStateTransitionCheck(OperationContext* opCtx) const; + + /** + * Updates the next action signal and cancels the vote request under lock. + * Used by IndexBuildsCoordinatorMongod only. + */ + void _setSignalAndCancelVoteRequestCbkIfActive(WithLock lk, + OperationContext* opCtx, + IndexBuildAction signal); + // Protects the state below. mutable Mutex mutex = MONGO_MAKE_LATCH("ReplIndexBuildState::mutex"); @@ -296,8 +357,8 @@ public: executor::TaskExecutor::CallbackHandle voteCmdCbkHandle; // The OperationId of the index build. This allows external callers to interrupt the index build - // thread. - OperationId opId = 0; + // thread. Initialized in start() as we transition from setup to in-progress. + boost::optional<OperationId> _opId; private: // The last optime in the oplog before the interceptors were installed. If this is a single diff --git a/src/mongo/embedded/index_builds_coordinator_embedded.cpp b/src/mongo/embedded/index_builds_coordinator_embedded.cpp index ffbcaccd0e6..7f2ca76690e 100644 --- a/src/mongo/embedded/index_builds_coordinator_embedded.cpp +++ b/src/mongo/embedded/index_builds_coordinator_embedded.cpp @@ -93,12 +93,6 @@ void IndexBuildsCoordinatorEmbedded::_waitForNextIndexBuildActionAndCommit( std::shared_ptr<ReplIndexBuildState> replState, const IndexBuildOptions& indexBuildOptions) {} -void IndexBuildsCoordinatorEmbedded::setSignalAndCancelVoteRequestCbkIfActive( - WithLock ReplIndexBuildStateLk, - OperationContext* opCtx, - std::shared_ptr<ReplIndexBuildState> replState, - IndexBuildAction signal) {} - Status IndexBuildsCoordinatorEmbedded::voteCommitIndexBuild(OperationContext* opCtx, const UUID& buildUUID, const HostAndPort& hostAndPort) { diff --git a/src/mongo/embedded/index_builds_coordinator_embedded.h b/src/mongo/embedded/index_builds_coordinator_embedded.h index 81b02ddada9..c2ec6b77b88 100644 --- a/src/mongo/embedded/index_builds_coordinator_embedded.h +++ b/src/mongo/embedded/index_builds_coordinator_embedded.h @@ -73,11 +73,6 @@ public: const UUID& buildUUID, const ResumeIndexInfo& resumeInfo) override; - void setSignalAndCancelVoteRequestCbkIfActive(WithLock ReplIndexBuildStateLk, - OperationContext* opCtx, - std::shared_ptr<ReplIndexBuildState> replState, - IndexBuildAction signal) override; - /** * None of the following functions should ever be called on an embedded server node. */ |