summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorLouis Williams <louis.williams@mongodb.com>2020-03-11 18:09:33 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-04-03 14:06:26 +0000
commitca49965d8d68cc853e466ba741df08bb248d46f6 (patch)
treec5cde966da4485d4fa515f6db1de5194eaf3bb02 /src/mongo/db
parentc91e0bd2524552561c0e1aec3f9aeac8b0173764 (diff)
downloadmongo-ca49965d8d68cc853e466ba741df08bb248d46f6.tar.gz
SERVER-39458 Continuously drain side writes while waiting for next index build action
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/index_builds_coordinator.cpp7
-rw-r--r--src/mongo/db/index_builds_coordinator.h8
-rw-r--r--src/mongo/db/index_builds_coordinator_mongod.cpp59
-rw-r--r--src/mongo/db/index_builds_coordinator_mongod.h3
4 files changed, 63 insertions, 14 deletions
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index 38d3d517725..c1a978cdf16 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -1973,6 +1973,7 @@ void IndexBuildsCoordinator::_buildIndexSinglePhase(
boost::optional<Lock::CollectionLock>* exclusiveCollectionLock) {
_scanCollectionAndInsertKeysIntoSorter(opCtx, replState, exclusiveCollectionLock);
_insertKeysFromSideTablesWithoutBlockingWrites(opCtx, replState);
+ _insertKeysFromSideTablesBlockingWrites(opCtx, replState);
_signalPrimaryForCommitReadiness(opCtx, replState);
_waitForNextIndexBuildAction(opCtx, replState);
_insertKeysFromSideTablesAndCommit(
@@ -1987,6 +1988,7 @@ void IndexBuildsCoordinator::_buildIndexTwoPhase(
_scanCollectionAndInsertKeysIntoSorter(opCtx, replState, exclusiveCollectionLock);
_insertKeysFromSideTablesWithoutBlockingWrites(opCtx, replState);
+ _insertKeysFromSideTablesBlockingWrites(opCtx, replState);
_signalPrimaryForCommitReadiness(opCtx, replState);
auto commitIndexBuildTimestamp = _waitForNextIndexBuildAction(opCtx, replState);
@@ -2064,7 +2066,10 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesWithoutBlockingWrites(
LOGV2(20666, "Hanging after index build first drain");
hangAfterIndexBuildFirstDrain.pauseWhileSet();
}
-
+}
+void IndexBuildsCoordinator::_insertKeysFromSideTablesBlockingWrites(
+ OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) {
+ const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
// Perform the second drain while stopping writes on the collection.
{
opCtx->recoveryUnit()->abandonSnapshot();
diff --git a/src/mongo/db/index_builds_coordinator.h b/src/mongo/db/index_builds_coordinator.h
index 175f4fb3c27..46cb0ff005c 100644
--- a/src/mongo/db/index_builds_coordinator.h
+++ b/src/mongo/db/index_builds_coordinator.h
@@ -609,6 +609,8 @@ protected:
*/
void _insertKeysFromSideTablesWithoutBlockingWrites(
OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState);
+ void _insertKeysFromSideTablesBlockingWrites(OperationContext* opCtx,
+ std::shared_ptr<ReplIndexBuildState> replState);
/**
* Reads the commit ready members list for index build UUID in 'replState' from
@@ -638,6 +640,12 @@ protected:
OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) = 0;
/**
+ * Drains the side-writes table periodically while waiting for the IndexBuildAction to be ready.
+ */
+ virtual IndexBuildAction _drainSideWritesUntilNextActionIsAvailable(
+ OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) = 0;
+
+ /**
* Both primary and secondaries will wait on 'ReplIndexBuildState::waitForNextAction' future for
* commit or abort index build signal.
* On primary:
diff --git a/src/mongo/db/index_builds_coordinator_mongod.cpp b/src/mongo/db/index_builds_coordinator_mongod.cpp
index 48be752b76f..8d204fd7d9f 100644
--- a/src/mongo/db/index_builds_coordinator_mongod.cpp
+++ b/src/mongo/db/index_builds_coordinator_mongod.cpp
@@ -604,27 +604,57 @@ void IndexBuildsCoordinatorMongod::_signalPrimaryForCommitReadiness(
return;
}
+IndexBuildAction IndexBuildsCoordinatorMongod::_drainSideWritesUntilNextActionIsAvailable(
+ OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) {
+ auto future = [&] {
+ stdx::unique_lock<Latch> lk(replState->mutex);
+ invariant(replState->waitForNextAction);
+ return replState->waitForNextAction->getFuture();
+ }();
+
+ // Waits until the promise is fulfilled or the deadline expires.
+ IndexBuildAction nextAction;
+ auto waitUntilNextActionIsReady = [&]() {
+ // Don't perform a blocking wait while holding locks or storage engine resources.
+ opCtx->recoveryUnit()->abandonSnapshot();
+ Lock::TempRelease release(opCtx->lockState());
+
+ auto deadline = Date_t::now() + Milliseconds(1000);
+ auto timeoutError = opCtx->getTimeoutError();
+
+ try {
+ nextAction =
+ opCtx->runWithDeadline(deadline, timeoutError, [&] { return future.get(opCtx); });
+ } catch (const ExceptionForCat<ErrorCategory::ExceededTimeLimitError>& e) {
+ if (e.code() == timeoutError) {
+ return false;
+ }
+ throw;
+ }
+ return true;
+ };
+
+ // Continuously drain incoming writes until the future is ready. This is an optimization that
+ // allows the critical section of committing, which must drain the remainder of the side writes,
+ // to be as short as possible.
+ while (!waitUntilNextActionIsReady()) {
+ _insertKeysFromSideTablesWithoutBlockingWrites(opCtx, replState);
+ }
+ return nextAction;
+}
+
Timestamp IndexBuildsCoordinatorMongod::_waitForNextIndexBuildAction(
OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) {
Timestamp commitIndexBuildTimestamp;
- // Yield locks and storage engine resources before blocking.
- opCtx->recoveryUnit()->abandonSnapshot();
- Lock::TempRelease release(opCtx->lockState());
-
LOGV2(3856203,
"Index build waiting for next action before completing final phase: {buildUUID}",
"buildUUID"_attr = replState->buildUUID);
while (true) {
- // Future wait should ignore state transition.
- invariant(!opCtx->lockState()->isRSTLLocked(),
- str::stream()
- << "failed to yield locks for index build while waiting for commit or abort: "
- << replState->buildUUID);
-
- // future wait should get interrupted if the node shutdowns.
- const auto nextAction = replState->waitForNextAction->getFuture().get(opCtx);
+ // Future wait can be interrupted. This function will yield locks while waiting for the
+ // future to be fulfilled.
+ const auto nextAction = _drainSideWritesUntilNextActionIsAvailable(opCtx, replState);
LOGV2(3856204,
"Index build received signal for build uuid: {buildUUID} , action: {action}",
"buildUUID"_attr = replState->buildUUID,
@@ -632,8 +662,11 @@ Timestamp IndexBuildsCoordinatorMongod::_waitForNextIndexBuildAction(
bool needsToRetryWait = false;
- // Reacquire RSTL lock
+ // Ensure RSTL is acquired before checking replication state. This is only necessary for
+ // single-phase builds on secondaries. Everywhere else, the RSTL is already held and this is
+ // should never block.
repl::ReplicationStateTransitionLockGuard rstl(opCtx, MODE_IX);
+
const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
auto replCoord = repl::ReplicationCoordinator::get(opCtx);
auto isMaster = replCoord->canAcceptWritesFor(opCtx, dbAndUUID);
diff --git a/src/mongo/db/index_builds_coordinator_mongod.h b/src/mongo/db/index_builds_coordinator_mongod.h
index 7596a4ddb97..6c2a89a7c72 100644
--- a/src/mongo/db/index_builds_coordinator_mongod.h
+++ b/src/mongo/db/index_builds_coordinator_mongod.h
@@ -154,6 +154,9 @@ private:
void _signalPrimaryForCommitReadiness(OperationContext* opCtx,
std::shared_ptr<ReplIndexBuildState> replState) override;
+ IndexBuildAction _drainSideWritesUntilNextActionIsAvailable(
+ OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) override;
+
Timestamp _waitForNextIndexBuildAction(OperationContext* opCtx,
std::shared_ptr<ReplIndexBuildState> replState) override;