summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenety Goh <benety@mongodb.com>2020-10-16 11:33:27 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-10-16 16:06:01 +0000
commit8bc619e4656e966d207838f8a45707912d8e3579 (patch)
tree15c251192aefbf104789315c065781200a6acd74
parent77a42ecdd91ae10d2953d592b899e05d5aeca0a4 (diff)
downloadmongo-8bc619e4656e966d207838f8a45707912d8e3579.tar.gz
SERVER-46995 move pre-commit and abort logic from IndesBuildsCoordinator into ReplIndexBuildState
-rw-r--r--src/mongo/db/index_builds_coordinator.cpp142
-rw-r--r--src/mongo/db/index_builds_coordinator.h20
-rw-r--r--src/mongo/db/index_builds_coordinator_mongod.cpp34
-rw-r--r--src/mongo/db/index_builds_coordinator_mongod.h5
-rw-r--r--src/mongo/db/repl_index_build_state.cpp188
-rw-r--r--src/mongo/db/repl_index_build_state.h67
-rw-r--r--src/mongo/embedded/index_builds_coordinator_embedded.cpp6
-rw-r--r--src/mongo/embedded/index_builds_coordinator_embedded.h5
8 files changed, 268 insertions, 199 deletions
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index 1e0e2afe127..76bf934e79e 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -1015,30 +1015,7 @@ void IndexBuildsCoordinator::applyCommitIndexBuild(OperationContext* opCtx,
bool IndexBuildsCoordinator::_tryCommit(OperationContext* opCtx,
std::shared_ptr<ReplIndexBuildState> replState) {
- stdx::unique_lock<Latch> lk(replState->mutex);
- if (replState->indexBuildState.isSettingUp()) {
- // It's possible that the index build thread has not reached the point where it can be
- // committed yet.
- return false;
- }
- if (replState->waitForNextAction->getFuture().isReady()) {
- // If the future wait were uninterruptible, then shutdown could hang. If the
- // IndexBuildsCoordinator thread gets interrupted on shutdown, the oplog applier will hang
- // waiting for the promise applying the commitIndexBuild oplog entry.
- const auto nextAction = replState->waitForNextAction->getFuture().get(opCtx);
- invariant(nextAction == IndexBuildAction::kCommitQuorumSatisfied);
- // Retry until the current promise result is consumed by the index builder thread and
- // a new empty promise got created by the indexBuildscoordinator thread.
- return false;
- }
- auto skipCheck = shouldSkipIndexBuildStateTransitionCheck(opCtx, replState->protocol);
- replState->indexBuildState.setState(
- IndexBuildState::kPrepareCommit, skipCheck, opCtx->recoveryUnit()->getCommitTimestamp());
- // Promise can be set only once.
- // We can't skip signaling here if a signal is already set because the previous commit or
- // abort signal might have been sent to handle for primary case.
- setSignalAndCancelVoteRequestCbkIfActive(lk, opCtx, replState, IndexBuildAction::kOplogCommit);
- return true;
+ return replState->tryCommit(opCtx);
}
void IndexBuildsCoordinator::applyAbortIndexBuild(OperationContext* opCtx,
@@ -1132,85 +1109,6 @@ bool IndexBuildsCoordinator::hasIndexBuilder(OperationContext* opCtx,
return foundIndexBuilder;
}
-IndexBuildsCoordinator::TryAbortResult IndexBuildsCoordinator::_tryAbort(
- OperationContext* opCtx,
- std::shared_ptr<ReplIndexBuildState> replState,
- IndexBuildAction signalAction,
- std::string reason) {
-
- {
- stdx::unique_lock<Latch> lk(replState->mutex);
- // Wait until the build is done setting up. This indicates that all required state is
- // initialized to attempt an abort.
- if (replState->indexBuildState.isSettingUp()) {
- LOGV2_DEBUG(465605,
- 2,
- "waiting until index build is done setting up before attempting to abort",
- "buildUUID"_attr = replState->buildUUID);
- return TryAbortResult::kRetry;
- }
- if (replState->waitForNextAction->getFuture().isReady()) {
- const auto nextAction = replState->waitForNextAction->getFuture().get(opCtx);
- invariant(nextAction == IndexBuildAction::kSinglePhaseCommit ||
- nextAction == IndexBuildAction::kCommitQuorumSatisfied ||
- nextAction == IndexBuildAction::kPrimaryAbort);
-
- // Index build coordinator already received a signal to commit or abort. So, it's ok
- // to return and wait for the index build to complete if we are trying to signal
- // 'kPrimaryAbort'. The index build coordinator will not perform the signaled action
- // (i.e, will not commit or abort the index build) only when the node steps down.
- // When the node steps down, the caller of this function, dropIndexes/createIndexes
- // command (user operation) will also get interrupted. So, we no longer need to
- // abort the index build on step down.
- if (signalAction == IndexBuildAction::kPrimaryAbort) {
- // Indicate if the index build is already being committed or aborted.
- if (nextAction == IndexBuildAction::kPrimaryAbort) {
- return TryAbortResult::kAlreadyAborted;
- } else {
- return TryAbortResult::kNotAborted;
- }
- }
-
- // Retry until the current promise result is consumed by the index builder thread
- // and a new empty promise got created by the indexBuildscoordinator thread. Or,
- // until the index build got torn down after index build commit.
- return TryAbortResult::kRetry;
- }
-
- LOGV2(4656003,
- "Aborting index build",
- "buildUUID"_attr = replState->buildUUID,
- "error"_attr = reason);
-
- // Set the state on replState. Once set, the calling thread must complete the abort process.
- auto abortTimestamp =
- boost::make_optional<Timestamp>(!opCtx->recoveryUnit()->getCommitTimestamp().isNull(),
- opCtx->recoveryUnit()->getCommitTimestamp());
- auto skipCheck = shouldSkipIndexBuildStateTransitionCheck(opCtx, replState->protocol);
- replState->indexBuildState.setState(
- IndexBuildState::kAborted, skipCheck, abortTimestamp, reason);
-
- // Interrupt the builder thread so that it can no longer acquire locks or make progress.
- // It is possible that the index build thread may have completed its operation and removed
- // itself from the ServiceContext. This may happen in the case of an explicit db.killOp()
- // operation or during shutdown.
- // During normal operation, the abort logic, initiated through external means such as
- // dropIndexes or internally through an indexing error, should have set the state in
- // ReplIndexBuildState so that this code would not be reachable as it is no longer necessary
- // to interrupt the builder thread here.
- auto serviceContext = opCtx->getServiceContext();
- if (auto target = serviceContext->getLockedClient(replState->opId)) {
- auto targetOpCtx = target->getOperationContext();
- serviceContext->killOperation(target, targetOpCtx, ErrorCodes::IndexBuildAborted);
- }
-
- // Set the signal. Because we have already interrupted the index build, it will not observe
- // this signal. We do this so that other observers do not also try to abort the index build.
- setSignalAndCancelVoteRequestCbkIfActive(lk, opCtx, replState, signalAction);
- }
- return TryAbortResult::kContinueAbort;
-}
-
bool IndexBuildsCoordinator::abortIndexBuildByBuildUUID(OperationContext* opCtx,
const UUID& buildUUID,
IndexBuildAction signalAction,
@@ -1282,23 +1180,23 @@ bool IndexBuildsCoordinator::abortIndexBuildByBuildUUID(OperationContext* opCtx,
}
}
- auto tryAbortResult = _tryAbort(opCtx, replState, signalAction, reason);
+ auto tryAbortResult = replState->tryAbort(opCtx, signalAction, reason);
switch (tryAbortResult) {
- case TryAbortResult::kNotAborted:
+ case ReplIndexBuildState::TryAbortResult::kNotAborted:
return false;
- case TryAbortResult::kAlreadyAborted:
+ case ReplIndexBuildState::TryAbortResult::kAlreadyAborted:
return true;
- case TryAbortResult::kRetry:
- case TryAbortResult::kContinueAbort:
+ case ReplIndexBuildState::TryAbortResult::kRetry:
+ case ReplIndexBuildState::TryAbortResult::kContinueAbort:
break;
}
- if (TryAbortResult::kRetry == tryAbortResult) {
+ if (ReplIndexBuildState::TryAbortResult::kRetry == tryAbortResult) {
retry = true;
continue;
}
- invariant(TryAbortResult::kContinueAbort == tryAbortResult);
+ invariant(ReplIndexBuildState::TryAbortResult::kContinueAbort == tryAbortResult);
if (MONGO_unlikely(hangBeforeCompletingAbort.shouldFail())) {
LOGV2(4806200, "Hanging before completing index build abort");
@@ -1388,17 +1286,13 @@ void IndexBuildsCoordinator::_completeAbort(OperationContext* opCtx,
bool isMaster = replCoord->canAcceptWritesFor(opCtx, nss);
invariant(!isMaster, str::stream() << "Index build: " << replState->buildUUID);
- invariant(replState->indexBuildState.isAborted(),
- str::stream()
- << "Index build: " << replState->buildUUID
- << ", index build state: " << replState->indexBuildState.toString());
- invariant(replState->indexBuildState.getAbortReason(), replState->buildUUID.toString());
+
+ auto abortReason = replState->getAbortReason();
LOGV2(4665903,
"Aborting index build during initial sync",
"buildUUID"_attr = replState->buildUUID,
- "abortReason"_attr = replState->indexBuildState.getAbortReason().get(),
+ "abortReason"_attr = abortReason,
"collectionUUID"_attr = replState->collectionUUID);
-
_indexBuildsManager.abortIndexBuild(
opCtx, coll, replState->buildUUID, MultiIndexBlock::kNoopOnCleanUpFn);
break;
@@ -2285,14 +2179,9 @@ void IndexBuildsCoordinator::_runIndexBuild(
return;
}
auto replState = invariant(swReplState);
- {
- // The index build is now past the setup stage and in progress. This makes it eligible to be
- // aborted. Use the current OperationContext's opId as the means for interrupting the index
- // build.
- stdx::unique_lock<Latch> lk(replState->mutex);
- replState->opId = opCtx->getOpID();
- replState->indexBuildState.setState(IndexBuildState::kInProgress, false /* skipCheck */);
- }
+
+ // Set index build state to in-progress and save OperationContext's opId.
+ replState->start(opCtx);
// Add build UUID to lock manager diagnostic output.
auto locker = opCtx->lockState();
@@ -2440,8 +2329,7 @@ void IndexBuildsCoordinator::_runIndexBuildInner(
// If the index build has already been cleaned-up because it encountered an error at
// commit-time, there is no work to do. This is the most routine case, since index
// constraint checking happens at commit-time for index builds.
- stdx::unique_lock<Latch> lk(replState->mutex);
- if (replState->indexBuildState.isAborted()) {
+ if (replState->isAborted()) {
uassertStatusOK(status);
}
}
diff --git a/src/mongo/db/index_builds_coordinator.h b/src/mongo/db/index_builds_coordinator.h
index a673289aa2b..d4156ab81d0 100644
--- a/src/mongo/db/index_builds_coordinator.h
+++ b/src/mongo/db/index_builds_coordinator.h
@@ -463,18 +463,6 @@ public:
*/
static int getNumIndexesTotal(OperationContext* opCtx, const CollectionPtr& collection);
-
- /**
- * Sets the index build action 'signal' for the index build pointed by 'replState'. Also, it
- * cancels if there is any active remote 'voteCommitIndexBuild' command request callback handle
- * for this index build.
- */
- virtual void setSignalAndCancelVoteRequestCbkIfActive(
- WithLock ReplIndexBuildStateLk,
- OperationContext* opCtx,
- std::shared_ptr<ReplIndexBuildState> replState,
- IndexBuildAction signal) = 0;
-
bool supportsResumableIndexBuilds() const;
private:
@@ -612,14 +600,6 @@ protected:
const Status& status);
/**
- * Attempt to abort an index build. Returns a flag indicating how the caller should proceed.
- */
- enum class TryAbortResult { kRetry, kAlreadyAborted, kNotAborted, kContinueAbort };
- TryAbortResult _tryAbort(OperationContext* opCtx,
- std::shared_ptr<ReplIndexBuildState> replState,
- IndexBuildAction signalAction,
- std::string reason);
- /**
* Performs last steps of aborting an index build.
*/
void _completeAbort(OperationContext* opCtx,
diff --git a/src/mongo/db/index_builds_coordinator_mongod.cpp b/src/mongo/db/index_builds_coordinator_mongod.cpp
index d65581979a5..913234d665a 100644
--- a/src/mongo/db/index_builds_coordinator_mongod.cpp
+++ b/src/mongo/db/index_builds_coordinator_mongod.cpp
@@ -411,41 +411,9 @@ Status IndexBuildsCoordinatorMongod::voteCommitIndexBuild(OperationContext* opCt
return persistStatus;
}
-void IndexBuildsCoordinatorMongod::setSignalAndCancelVoteRequestCbkIfActive(
- WithLock ReplIndexBuildStateLk,
- OperationContext* opCtx,
- std::shared_ptr<ReplIndexBuildState> replState,
- IndexBuildAction signal) {
- // set the signal
- replState->waitForNextAction->emplaceValue(signal);
- // Cancel the callback.
- if (replState->voteCmdCbkHandle.isValid()) {
- repl::ReplicationCoordinator::get(opCtx)->cancelCbkHandle(replState->voteCmdCbkHandle);
- }
-}
-
void IndexBuildsCoordinatorMongod::_sendCommitQuorumSatisfiedSignal(
OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) {
- stdx::unique_lock<Latch> ReplIndexBuildStateLk(replState->mutex);
- if (!replState->waitForNextAction->getFuture().isReady()) {
- setSignalAndCancelVoteRequestCbkIfActive(
- ReplIndexBuildStateLk, opCtx, replState, IndexBuildAction::kCommitQuorumSatisfied);
- } else {
- // This implies we already got a commit or abort signal by other ways. This might have
- // been signaled earlier with kPrimaryAbort or kCommitQuorumSatisfied. Or, it's also
- // possible the node got stepped down and received kOplogCommit/koplogAbort or got
- // kRollbackAbort. So, it's ok to skip signaling.
- auto action = replState->waitForNextAction->getFuture().get(opCtx);
-
- LOGV2(3856200,
- "Not signaling \"{skippedAction}\" as it was previously signaled with "
- "\"{previousAction}\" for index build: {buildUUID}",
- "Skipping signaling as it was previously signaled for index build",
- "skippedAction"_attr =
- _indexBuildActionToString(IndexBuildAction::kCommitQuorumSatisfied),
- "previousAction"_attr = _indexBuildActionToString(action),
- "buildUUID"_attr = replState->buildUUID);
- }
+ replState->setCommitQuorumSatisfied(opCtx);
}
void IndexBuildsCoordinatorMongod::_signalIfCommitQuorumIsSatisfied(
diff --git a/src/mongo/db/index_builds_coordinator_mongod.h b/src/mongo/db/index_builds_coordinator_mongod.h
index a16cbb842e1..cbdb3ab0666 100644
--- a/src/mongo/db/index_builds_coordinator_mongod.h
+++ b/src/mongo/db/index_builds_coordinator_mongod.h
@@ -100,11 +100,6 @@ public:
const std::vector<StringData>& indexNames,
const CommitQuorumOptions& newCommitQuorum) override;
- void setSignalAndCancelVoteRequestCbkIfActive(WithLock ReplIndexBuildStateLk,
- OperationContext* opCtx,
- std::shared_ptr<ReplIndexBuildState> replState,
- IndexBuildAction signal) override;
-
private:
/**
* Keeps track of the relevant replica set member states. Index builds are managed differently
diff --git a/src/mongo/db/repl_index_build_state.cpp b/src/mongo/db/repl_index_build_state.cpp
index a7617172312..c1fda23d4dc 100644
--- a/src/mongo/db/repl_index_build_state.cpp
+++ b/src/mongo/db/repl_index_build_state.cpp
@@ -27,10 +27,15 @@
* it in the license file.
*/
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kStorage
+
#include "mongo/platform/basic.h"
#include "mongo/db/repl_index_build_state.h"
+#include "mongo/db/repl/replication_coordinator.h"
+#include "mongo/logv2/log.h"
+
namespace mongo {
namespace {
@@ -68,6 +73,27 @@ bool checkIfValidTransition(IndexBuildState::StateFlag currentState,
} // namespace
+std::string indexBuildActionToString(IndexBuildAction action) {
+ if (action == IndexBuildAction::kNoAction) {
+ return "No action";
+ } else if (action == IndexBuildAction::kOplogCommit) {
+ return "Oplog commit";
+ } else if (action == IndexBuildAction::kOplogAbort) {
+ return "Oplog abort";
+ } else if (action == IndexBuildAction::kInitialSyncAbort) {
+ return "Initial sync abort";
+ } else if (action == IndexBuildAction::kRollbackAbort) {
+ return "Rollback abort";
+ } else if (action == IndexBuildAction::kPrimaryAbort) {
+ return "Primary abort";
+ } else if (action == IndexBuildAction::kSinglePhaseCommit) {
+ return "Single-phase commit";
+ } else if (action == IndexBuildAction::kCommitQuorumSatisfied) {
+ return "Commit quorum Satisfied";
+ }
+ MONGO_UNREACHABLE;
+}
+
void IndexBuildState::setState(StateFlag state,
bool skipCheck,
boost::optional<Timestamp> timestamp,
@@ -102,6 +128,148 @@ ReplIndexBuildState::ReplIndexBuildState(const UUID& indexBuildUUID,
commitQuorumLock.emplace(indexBuildUUID.toString());
}
+void ReplIndexBuildState::start(OperationContext* opCtx) {
+ stdx::unique_lock<Latch> lk(mutex);
+ _opId = opCtx->getOpID();
+ indexBuildState.setState(IndexBuildState::kInProgress, false /* skipCheck */);
+}
+
+bool ReplIndexBuildState::isAborted() const {
+ stdx::unique_lock<Latch> lk(mutex);
+ return indexBuildState.isAborted();
+}
+
+std::string ReplIndexBuildState::getAbortReason() const {
+ stdx::unique_lock<Latch> lk(mutex);
+ invariant(indexBuildState.isAborted(),
+ str::stream() << "Index build: " << buildUUID
+ << ", index build state: " << indexBuildState.toString());
+ auto reason = indexBuildState.getAbortReason();
+ invariant(reason, str::stream() << buildUUID);
+ return *reason;
+}
+
+void ReplIndexBuildState::setCommitQuorumSatisfied(OperationContext* opCtx) {
+ stdx::unique_lock<Latch> lk(mutex);
+ if (!waitForNextAction->getFuture().isReady()) {
+ _setSignalAndCancelVoteRequestCbkIfActive(
+ lk, opCtx, IndexBuildAction::kCommitQuorumSatisfied);
+ } else {
+ // This implies we already got a commit or abort signal by other ways. This might have
+ // been signaled earlier with kPrimaryAbort or kCommitQuorumSatisfied. Or, it's also
+ // possible the node got stepped down and received kOplogCommit/koplogAbort or got
+ // kRollbackAbort. So, it's ok to skip signaling.
+ auto action = waitForNextAction->getFuture().get(opCtx);
+
+ LOGV2(3856200,
+ "Not signaling \"{skippedAction}\" as it was previously signaled with "
+ "\"{previousAction}\" for index build: {buildUUID}",
+ "Skipping signaling as it was previously signaled for index build",
+ "skippedAction"_attr =
+ indexBuildActionToString(IndexBuildAction::kCommitQuorumSatisfied),
+ "previousAction"_attr = indexBuildActionToString(action),
+ "buildUUID"_attr = buildUUID);
+ }
+}
+
+bool ReplIndexBuildState::tryCommit(OperationContext* opCtx) {
+ stdx::unique_lock<Latch> lk(mutex);
+ if (indexBuildState.isSettingUp()) {
+ // It's possible that the index build thread has not reached the point where it can be
+ // committed yet.
+ return false;
+ }
+ if (waitForNextAction->getFuture().isReady()) {
+ // If the future wait were uninterruptible, then shutdown could hang. If the
+ // IndexBuildsCoordinator thread gets interrupted on shutdown, the oplog applier will hang
+ // waiting for the promise applying the commitIndexBuild oplog entry.
+ const auto nextAction = waitForNextAction->getFuture().get(opCtx);
+ invariant(nextAction == IndexBuildAction::kCommitQuorumSatisfied);
+ // Retry until the current promise result is consumed by the index builder thread and
+ // a new empty promise got created by the indexBuildscoordinator thread.
+ return false;
+ }
+ auto skipCheck = _shouldSkipIndexBuildStateTransitionCheck(opCtx);
+ indexBuildState.setState(
+ IndexBuildState::kPrepareCommit, skipCheck, opCtx->recoveryUnit()->getCommitTimestamp());
+ // Promise can be set only once.
+ // We can't skip signaling here if a signal is already set because the previous commit or
+ // abort signal might have been sent to handle for primary case.
+ _setSignalAndCancelVoteRequestCbkIfActive(lk, opCtx, IndexBuildAction::kOplogCommit);
+ return true;
+}
+
+ReplIndexBuildState::TryAbortResult ReplIndexBuildState::tryAbort(OperationContext* opCtx,
+ IndexBuildAction signalAction,
+ std::string reason) {
+ stdx::unique_lock<Latch> lk(mutex);
+ // Wait until the build is done setting up. This indicates that all required state is
+ // initialized to attempt an abort.
+ if (indexBuildState.isSettingUp()) {
+ LOGV2_DEBUG(465605,
+ 2,
+ "waiting until index build is done setting up before attempting to abort",
+ "buildUUID"_attr = buildUUID);
+ return TryAbortResult::kRetry;
+ }
+ if (waitForNextAction->getFuture().isReady()) {
+ const auto nextAction = waitForNextAction->getFuture().get(opCtx);
+ invariant(nextAction == IndexBuildAction::kSinglePhaseCommit ||
+ nextAction == IndexBuildAction::kCommitQuorumSatisfied ||
+ nextAction == IndexBuildAction::kPrimaryAbort);
+
+ // Index build coordinator already received a signal to commit or abort. So, it's ok
+ // to return and wait for the index build to complete if we are trying to signal
+ // 'kPrimaryAbort'. The index build coordinator will not perform the signaled action
+ // (i.e, will not commit or abort the index build) only when the node steps down.
+ // When the node steps down, the caller of this function, dropIndexes/createIndexes
+ // command (user operation) will also get interrupted. So, we no longer need to
+ // abort the index build on step down.
+ if (signalAction == IndexBuildAction::kPrimaryAbort) {
+ // Indicate if the index build is already being committed or aborted.
+ if (nextAction == IndexBuildAction::kPrimaryAbort) {
+ return TryAbortResult::kAlreadyAborted;
+ } else {
+ return TryAbortResult::kNotAborted;
+ }
+ }
+
+ // Retry until the current promise result is consumed by the index builder thread
+ // and a new empty promise got created by the indexBuildscoordinator thread. Or,
+ // until the index build got torn down after index build commit.
+ return TryAbortResult::kRetry;
+ }
+
+ LOGV2(4656003, "Aborting index build", "buildUUID"_attr = buildUUID, "error"_attr = reason);
+
+ // Set the state on replState. Once set, the calling thread must complete the abort process.
+ auto abortTimestamp =
+ boost::make_optional<Timestamp>(!opCtx->recoveryUnit()->getCommitTimestamp().isNull(),
+ opCtx->recoveryUnit()->getCommitTimestamp());
+ auto skipCheck = _shouldSkipIndexBuildStateTransitionCheck(opCtx);
+ indexBuildState.setState(IndexBuildState::kAborted, skipCheck, abortTimestamp, reason);
+
+ // Interrupt the builder thread so that it can no longer acquire locks or make progress.
+ // It is possible that the index build thread may have completed its operation and removed
+ // itself from the ServiceContext. This may happen in the case of an explicit db.killOp()
+ // operation or during shutdown.
+ // During normal operation, the abort logic, initiated through external means such as
+ // dropIndexes or internally through an indexing error, should have set the state in
+ // ReplIndexBuildState so that this code would not be reachable as it is no longer necessary
+ // to interrupt the builder thread here.
+ auto serviceContext = opCtx->getServiceContext();
+ if (auto target = serviceContext->getLockedClient(*_opId)) {
+ auto targetOpCtx = target->getOperationContext();
+ serviceContext->killOperation(target, targetOpCtx, ErrorCodes::IndexBuildAborted);
+ }
+
+ // Set the signal. Because we have already interrupted the index build, it will not observe
+ // this signal. We do this so that other observers do not also try to abort the index build.
+ _setSignalAndCancelVoteRequestCbkIfActive(lk, opCtx, signalAction);
+
+ return TryAbortResult::kContinueAbort;
+}
+
bool ReplIndexBuildState::isResumable() const {
stdx::unique_lock<Latch> lk(mutex);
return !_lastOpTimeBeforeInterceptors.isNull();
@@ -122,4 +290,24 @@ void ReplIndexBuildState::clearLastOpTimeBeforeInterceptors() {
_lastOpTimeBeforeInterceptors = {};
}
+bool ReplIndexBuildState::_shouldSkipIndexBuildStateTransitionCheck(OperationContext* opCtx) const {
+ const auto replCoord = repl::ReplicationCoordinator::get(opCtx);
+ if (replCoord->getSettings().usingReplSets() && protocol == IndexBuildProtocol::kTwoPhase) {
+ return false;
+ }
+ return true;
+}
+
+void ReplIndexBuildState::_setSignalAndCancelVoteRequestCbkIfActive(WithLock lk,
+ OperationContext* opCtx,
+ IndexBuildAction signal) {
+ // set the signal
+ waitForNextAction->emplaceValue(signal);
+ // Cancel the callback.
+ if (voteCmdCbkHandle.isValid()) {
+ auto replCoord = repl::ReplicationCoordinator::get(opCtx);
+ replCoord->cancelCbkHandle(voteCmdCbkHandle);
+ }
+}
+
} // namespace mongo
diff --git a/src/mongo/db/repl_index_build_state.h b/src/mongo/db/repl_index_build_state.h
index fde82963b1f..acc1c28c377 100644
--- a/src/mongo/db/repl_index_build_state.h
+++ b/src/mongo/db/repl_index_build_state.h
@@ -40,6 +40,7 @@
#include "mongo/db/concurrency/d_concurrency.h"
#include "mongo/db/index/index_descriptor.h"
#include "mongo/db/namespace_string.h"
+#include "mongo/db/operation_context.h"
#include "mongo/db/repl/optime.h"
#include "mongo/executor/task_executor.h"
#include "mongo/stdx/condition_variable.h"
@@ -100,6 +101,11 @@ enum class IndexBuildAction {
};
/**
+ * Returns string representation of IndexBuildAction.
+ */
+std::string indexBuildActionToString(IndexBuildAction action);
+
+/**
* Represents the index build state.
* Valid State transition for primary:
* ===================================
@@ -225,6 +231,46 @@ public:
IndexBuildProtocol protocol);
/**
+ * The index build is now past the setup stage and in progress. This makes it eligible to be
+ * aborted. Use the current OperationContext's opId as the means for interrupting the index
+ * build.
+ */
+ void start(OperationContext* opCtx);
+
+ /**
+ * Returns true if this index build has been aborted.
+ */
+ bool isAborted() const;
+
+ /**
+ * Returns abort reason. Invariants if not in aborted state.
+ */
+ std::string getAbortReason() const;
+
+ /**
+ * Called when commit quorum is satisfied.
+ * Invokes 'onCommitQuorumSatisfied' if state is successfully transitioned to commit quorum
+ * satisfied.
+ */
+ void setCommitQuorumSatisfied(OperationContext* opCtx);
+
+ /**
+ * Attempt to signal the index build to commit and advance the index build to the kPrepareCommit
+ * state.
+ * Returns true if successful and false if the attempt was unnecessful and the caller should
+ * retry.
+ */
+ bool tryCommit(OperationContext* opCtx);
+
+ /**
+ * Attempt to abort an index build. Returns a flag indicating how the caller should proceed.
+ */
+ enum class TryAbortResult { kRetry, kAlreadyAborted, kNotAborted, kContinueAbort };
+ TryAbortResult tryAbort(OperationContext* opCtx,
+ IndexBuildAction signalAction,
+ std::string reason);
+
+ /**
* Accessor and mutator for last optime in the oplog before the interceptors were installed.
* This supports resumable index builds.
*/
@@ -233,7 +279,6 @@ public:
void setLastOpTimeBeforeInterceptors(repl::OpTime opTime);
void clearLastOpTimeBeforeInterceptors();
-
// Uniquely identifies this index build across replica set members.
const UUID buildUUID;
@@ -283,6 +328,22 @@ public:
// SharedSemiFuture(s).
SharedPromise<IndexCatalogStats> sharedPromise;
+ /*
+ * Determines whether to skip the index build state transition check.
+ * Index builder not using ReplIndexBuildState::waitForNextAction to signal primary and
+ * secondaries to commit or abort signal will violate index build state transition. So, we
+ * should skip state transition verification. Otherwise, we would invariant.
+ */
+ bool _shouldSkipIndexBuildStateTransitionCheck(OperationContext* opCtx) const;
+
+ /**
+ * Updates the next action signal and cancels the vote request under lock.
+ * Used by IndexBuildsCoordinatorMongod only.
+ */
+ void _setSignalAndCancelVoteRequestCbkIfActive(WithLock lk,
+ OperationContext* opCtx,
+ IndexBuildAction signal);
+
// Protects the state below.
mutable Mutex mutex = MONGO_MAKE_LATCH("ReplIndexBuildState::mutex");
@@ -296,8 +357,8 @@ public:
executor::TaskExecutor::CallbackHandle voteCmdCbkHandle;
// The OperationId of the index build. This allows external callers to interrupt the index build
- // thread.
- OperationId opId = 0;
+ // thread. Initialized in start() as we transition from setup to in-progress.
+ boost::optional<OperationId> _opId;
private:
// The last optime in the oplog before the interceptors were installed. If this is a single
diff --git a/src/mongo/embedded/index_builds_coordinator_embedded.cpp b/src/mongo/embedded/index_builds_coordinator_embedded.cpp
index ffbcaccd0e6..7f2ca76690e 100644
--- a/src/mongo/embedded/index_builds_coordinator_embedded.cpp
+++ b/src/mongo/embedded/index_builds_coordinator_embedded.cpp
@@ -93,12 +93,6 @@ void IndexBuildsCoordinatorEmbedded::_waitForNextIndexBuildActionAndCommit(
std::shared_ptr<ReplIndexBuildState> replState,
const IndexBuildOptions& indexBuildOptions) {}
-void IndexBuildsCoordinatorEmbedded::setSignalAndCancelVoteRequestCbkIfActive(
- WithLock ReplIndexBuildStateLk,
- OperationContext* opCtx,
- std::shared_ptr<ReplIndexBuildState> replState,
- IndexBuildAction signal) {}
-
Status IndexBuildsCoordinatorEmbedded::voteCommitIndexBuild(OperationContext* opCtx,
const UUID& buildUUID,
const HostAndPort& hostAndPort) {
diff --git a/src/mongo/embedded/index_builds_coordinator_embedded.h b/src/mongo/embedded/index_builds_coordinator_embedded.h
index 81b02ddada9..c2ec6b77b88 100644
--- a/src/mongo/embedded/index_builds_coordinator_embedded.h
+++ b/src/mongo/embedded/index_builds_coordinator_embedded.h
@@ -73,11 +73,6 @@ public:
const UUID& buildUUID,
const ResumeIndexInfo& resumeInfo) override;
- void setSignalAndCancelVoteRequestCbkIfActive(WithLock ReplIndexBuildStateLk,
- OperationContext* opCtx,
- std::shared_ptr<ReplIndexBuildState> replState,
- IndexBuildAction signal) override;
-
/**
* None of the following functions should ever be called on an embedded server node.
*/