diff options
author | Louis Williams <louis.williams@mongodb.com> | 2020-03-11 18:09:33 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-04-10 19:12:40 +0000 |
commit | 98be6002347d03644c7a68259ac5b0930edc6ba7 (patch) | |
tree | df538bb86b452099143e00bd2cceacab188a8048 | |
parent | c01d93166f824ecc0aeb57d3f02003d833769fa5 (diff) | |
download | mongo-98be6002347d03644c7a68259ac5b0930edc6ba7.tar.gz |
SERVER-39458 Continuously drain side writes while waiting for next index build action
(cherry picked from commit ca49965d8d68cc853e466ba741df08bb248d46f6)
-rw-r--r-- | jstests/noPassthrough/index_build_continuous_drain_secondary.js | 90 | ||||
-rw-r--r-- | src/mongo/db/index_builds_coordinator.cpp | 7 | ||||
-rw-r--r-- | src/mongo/db/index_builds_coordinator.h | 8 | ||||
-rw-r--r-- | src/mongo/db/index_builds_coordinator_mongod.cpp | 59 | ||||
-rw-r--r-- | src/mongo/db/index_builds_coordinator_mongod.h | 3 | ||||
-rw-r--r-- | src/mongo/embedded/index_builds_coordinator_embedded.h | 5 |
6 files changed, 158 insertions, 14 deletions
diff --git a/jstests/noPassthrough/index_build_continuous_drain_secondary.js b/jstests/noPassthrough/index_build_continuous_drain_secondary.js new file mode 100644 index 00000000000..edfbc35db22 --- /dev/null +++ b/jstests/noPassthrough/index_build_continuous_drain_secondary.js @@ -0,0 +1,90 @@ +/** + * Tests that secondaries drain side writes while waiting for the primary to commit an index build. + * + * This test does not make very many correctness assertions because this exercises a performance + * optimization. Instead we log the time difference between how long the primary and secondary took + * to complete the index builds. The expectation is that these values are close to each other. + * + * @tags: [requires_replication] + * + */ +(function() { +load("jstests/noPassthrough/libs/index_build.js"); + +const replSet = new ReplSetTest({ + nodes: [ + {}, + { + // Disallow elections on secondary. + rsConfig: { + priority: 0, + votes: 0, + }, + }, + ] +}); + +replSet.startSet(); +replSet.initiate(); + +const primary = replSet.getPrimary(); +if (!IndexBuildTest.supportsTwoPhaseIndexBuild(primary)) { + jsTestLog('Skipping test because two phase index builds are not supported.'); + replSet.stopSet(); + return; +} + +const dbName = 'test'; +const primaryDB = primary.getDB(dbName); +const coll = primaryDB.test; + +let insertDocs = function(numDocs) { + const bulk = coll.initializeUnorderedBulkOp(); + for (let i = 0; i < numDocs; i++) { + bulk.insert({a: i, b: i}); + } + assert.commandWorked(bulk.execute()); +}; +insertDocs(10000); +replSet.awaitReplication(); + +// Start and pause the index build on the primary so that it does not start collection scanning. +IndexBuildTest.pauseIndexBuilds(primary); +const createIdx = IndexBuildTest.startIndexBuild(primary, coll.getFullName(), {a: 1, b: 1}); + +const secondary = replSet.getSecondary(); +const secondaryDB = secondary.getDB(dbName); + +// Wait until the secondary reports that it is ready to commit. +// "Index build waiting for next action before completing final phase" +checkLog.containsJson(secondary, 3856203); + +// Insert a high volume of documents. Since the secondary has reported that it is ready to commit, +// the expectation is that the secondary will intercept and drain these writes as they are +// replicated from primary. +insertDocs(50000); +// "index build: drained side writes" +checkLog.containsJson(secondary, 20689); + +// Record how long it takes for the index build to complete from this point onward. +let start = new Date(); +IndexBuildTest.resumeIndexBuilds(primary); + +// Wait for index build to finish on primary. +createIdx(); +let primaryEnd = new Date(); + +// Wait for the index build to complete on the secondary. +IndexBuildTest.waitForIndexBuildToStop(secondaryDB); +let secondaryEnd = new Date(); + +// We don't make any assertions about these times, just report them for informational purposes. The +// expectation is that they are as close to each other as possible, which would suggest that the +// secondary does not spend more time completing the index than the primary. +jsTestLog("these values should be similar:"); +jsTestLog("elapsed on primary: " + (primaryEnd - start)); +jsTestLog("elapsed on secondary: " + (secondaryEnd - start)); + +IndexBuildTest.assertIndexes(coll, 2, ['_id_', 'a_1_b_1']); +replSet.stopSet(); +})(); diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index 93e5c2c6c18..896f6930aa9 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -2000,6 +2000,7 @@ void IndexBuildsCoordinator::_buildIndexSinglePhase( boost::optional<Lock::CollectionLock>* exclusiveCollectionLock) { _scanCollectionAndInsertKeysIntoSorter(opCtx, replState, exclusiveCollectionLock); _insertKeysFromSideTablesWithoutBlockingWrites(opCtx, replState); + _insertKeysFromSideTablesBlockingWrites(opCtx, replState); _signalPrimaryForCommitReadiness(opCtx, replState); _waitForNextIndexBuildAction(opCtx, replState); _insertKeysFromSideTablesAndCommit( @@ -2014,6 +2015,7 @@ void IndexBuildsCoordinator::_buildIndexTwoPhase( _scanCollectionAndInsertKeysIntoSorter(opCtx, replState, exclusiveCollectionLock); _insertKeysFromSideTablesWithoutBlockingWrites(opCtx, replState); + _insertKeysFromSideTablesBlockingWrites(opCtx, replState); _signalPrimaryForCommitReadiness(opCtx, replState); auto commitIndexBuildTimestamp = _waitForNextIndexBuildAction(opCtx, replState); @@ -2091,7 +2093,10 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesWithoutBlockingWrites( LOGV2(20666, "Hanging after index build first drain"); hangAfterIndexBuildFirstDrain.pauseWhileSet(); } - +} +void IndexBuildsCoordinator::_insertKeysFromSideTablesBlockingWrites( + OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) { + const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID); // Perform the second drain while stopping writes on the collection. { opCtx->recoveryUnit()->abandonSnapshot(); diff --git a/src/mongo/db/index_builds_coordinator.h b/src/mongo/db/index_builds_coordinator.h index 9f9a250ea0d..20b4e44e544 100644 --- a/src/mongo/db/index_builds_coordinator.h +++ b/src/mongo/db/index_builds_coordinator.h @@ -611,6 +611,8 @@ protected: */ void _insertKeysFromSideTablesWithoutBlockingWrites( OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState); + void _insertKeysFromSideTablesBlockingWrites(OperationContext* opCtx, + std::shared_ptr<ReplIndexBuildState> replState); /** * Reads the commit ready members list for index build UUID in 'replState' from @@ -640,6 +642,12 @@ protected: OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) = 0; /** + * Drains the side-writes table periodically while waiting for the IndexBuildAction to be ready. + */ + virtual IndexBuildAction _drainSideWritesUntilNextActionIsAvailable( + OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) = 0; + + /** * Both primary and secondaries will wait on 'ReplIndexBuildState::waitForNextAction' future for * commit or abort index build signal. * On primary: diff --git a/src/mongo/db/index_builds_coordinator_mongod.cpp b/src/mongo/db/index_builds_coordinator_mongod.cpp index 47c26e4b53a..7a75f2d61f4 100644 --- a/src/mongo/db/index_builds_coordinator_mongod.cpp +++ b/src/mongo/db/index_builds_coordinator_mongod.cpp @@ -610,27 +610,57 @@ void IndexBuildsCoordinatorMongod::_signalPrimaryForCommitReadiness( return; } +IndexBuildAction IndexBuildsCoordinatorMongod::_drainSideWritesUntilNextActionIsAvailable( + OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) { + auto future = [&] { + stdx::unique_lock<Latch> lk(replState->mutex); + invariant(replState->waitForNextAction); + return replState->waitForNextAction->getFuture(); + }(); + + // Waits until the promise is fulfilled or the deadline expires. + IndexBuildAction nextAction; + auto waitUntilNextActionIsReady = [&]() { + // Don't perform a blocking wait while holding locks or storage engine resources. + opCtx->recoveryUnit()->abandonSnapshot(); + Lock::TempRelease release(opCtx->lockState()); + + auto deadline = Date_t::now() + Milliseconds(1000); + auto timeoutError = opCtx->getTimeoutError(); + + try { + nextAction = + opCtx->runWithDeadline(deadline, timeoutError, [&] { return future.get(opCtx); }); + } catch (const ExceptionForCat<ErrorCategory::ExceededTimeLimitError>& e) { + if (e.code() == timeoutError) { + return false; + } + throw; + } + return true; + }; + + // Continuously drain incoming writes until the future is ready. This is an optimization that + // allows the critical section of committing, which must drain the remainder of the side writes, + // to be as short as possible. + while (!waitUntilNextActionIsReady()) { + _insertKeysFromSideTablesWithoutBlockingWrites(opCtx, replState); + } + return nextAction; +} + Timestamp IndexBuildsCoordinatorMongod::_waitForNextIndexBuildAction( OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) { Timestamp commitIndexBuildTimestamp; - // Yield locks and storage engine resources before blocking. - opCtx->recoveryUnit()->abandonSnapshot(); - Lock::TempRelease release(opCtx->lockState()); - LOGV2(3856203, "Index build waiting for next action before completing final phase: {buildUUID}", "buildUUID"_attr = replState->buildUUID); while (true) { - // Future wait should ignore state transition. - invariant(!opCtx->lockState()->isRSTLLocked(), - str::stream() - << "failed to yield locks for index build while waiting for commit or abort: " - << replState->buildUUID); - - // future wait should get interrupted if the node shutdowns. - const auto nextAction = replState->waitForNextAction->getFuture().get(opCtx); + // Future wait can be interrupted. This function will yield locks while waiting for the + // future to be fulfilled. + const auto nextAction = _drainSideWritesUntilNextActionIsAvailable(opCtx, replState); LOGV2(3856204, "Index build received signal for build uuid: {buildUUID} , action: {action}", "buildUUID"_attr = replState->buildUUID, @@ -638,8 +668,11 @@ Timestamp IndexBuildsCoordinatorMongod::_waitForNextIndexBuildAction( bool needsToRetryWait = false; - // Reacquire RSTL lock + // Ensure RSTL is acquired before checking replication state. This is only necessary for + // single-phase builds on secondaries. Everywhere else, the RSTL is already held and this is + // should never block. repl::ReplicationStateTransitionLockGuard rstl(opCtx, MODE_IX); + const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID); auto replCoord = repl::ReplicationCoordinator::get(opCtx); auto isMaster = replCoord->canAcceptWritesFor(opCtx, dbAndUUID); diff --git a/src/mongo/db/index_builds_coordinator_mongod.h b/src/mongo/db/index_builds_coordinator_mongod.h index 7596a4ddb97..6c2a89a7c72 100644 --- a/src/mongo/db/index_builds_coordinator_mongod.h +++ b/src/mongo/db/index_builds_coordinator_mongod.h @@ -154,6 +154,9 @@ private: void _signalPrimaryForCommitReadiness(OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) override; + IndexBuildAction _drainSideWritesUntilNextActionIsAvailable( + OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) override; + Timestamp _waitForNextIndexBuildAction(OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) override; diff --git a/src/mongo/embedded/index_builds_coordinator_embedded.h b/src/mongo/embedded/index_builds_coordinator_embedded.h index 3ebe78c4e83..06f39bba115 100644 --- a/src/mongo/embedded/index_builds_coordinator_embedded.h +++ b/src/mongo/embedded/index_builds_coordinator_embedded.h @@ -92,6 +92,11 @@ private: void _signalPrimaryForCommitReadiness(OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) override; + IndexBuildAction _drainSideWritesUntilNextActionIsAvailable( + OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) { + return {}; + }; + Timestamp _waitForNextIndexBuildAction(OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) override; }; |