diff options
author | Louis Williams <louis.williams@mongodb.com> | 2020-06-02 13:12:32 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-06-02 21:23:56 +0000 |
commit | 41dac937f818a5d10977347cae5884bac4a10a74 (patch) | |
tree | 2793c1b91e65bb2aba57f37053ccd2e275eebd6f | |
parent | f841e415fa8fb7b057c1973f6ff8fc0460de1f08 (diff) | |
download | mongo-41dac937f818a5d10977347cae5884bac4a10a74.tar.gz |
SERVER-48062 Index build abort should not be interrupted by stepdown
(cherry picked from commit baf75dd579bfec6554fa881b73cc427f0bbb3309)
-rw-r--r-- | jstests/noPassthrough/index_stepdown_abort.js | 76 | ||||
-rw-r--r-- | src/mongo/db/index_builds_coordinator.cpp | 12 |
2 files changed, 88 insertions, 0 deletions
diff --git a/jstests/noPassthrough/index_stepdown_abort.js b/jstests/noPassthrough/index_stepdown_abort.js new file mode 100644 index 00000000000..8ebcdb664f9 --- /dev/null +++ b/jstests/noPassthrough/index_stepdown_abort.js @@ -0,0 +1,76 @@ +/** + * Confirms that aborting an index build on a primaries succeeds despite a concurrent stepDown + * attempting to interrupt the operation. + * + * @tags: [requires_replication] + */ +(function() { +"use strict"; + +load('jstests/noPassthrough/libs/index_build.js'); +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/logv2_helpers.js"); + +const rst = new ReplSetTest({ + nodes: [ + {}, + { + // Disallow elections on secondary. + rsConfig: { + priority: 0, + votes: 0, + }, + }, + ] +}); +rst.startSet(); +rst.initiate(); + +const primary = rst.getPrimary(); +const primaryDB = primary.getDB('test'); +const primaryColl = primaryDB.getCollection('test'); + +assert.commandWorked(primaryColl.insert({a: 1})); + +let hangAfterInitFailPoint = configureFailPoint(primaryDB, 'hangAfterInitializingIndexBuild'); + +jsTestLog("Waiting for index build to start"); +const createIdx = IndexBuildTest.startIndexBuild( + primary, primaryColl.getFullName(), {a: 1}, null, [ErrorCodes.IndexBuildAborted]); +IndexBuildTest.waitForIndexBuildToStart(primaryDB, primaryColl.getName(), 'a_1'); + +jsTestLog("Attempting to abort the index build and blocking before it completes"); +let hangBeforeAbortFailPoint = configureFailPoint(primaryDB, 'hangBeforeCompletingAbort'); +const abortIndexThread = startParallelShell(() => { + // We can't assert that this succeeds because it may return an Interrupted error even after it + // successfully aborts the index build. + db.getSiblingDB('test').test.dropIndex('a_1'); +}, primary.port); +hangBeforeAbortFailPoint.wait(); +hangAfterInitFailPoint.off(); + +jsTestLog("Stepping down the primary"); +const stepDown = startParallelShell(() => { + assert.commandWorked(db.adminCommand({"replSetStepDown": 60, 'force': true})); +}, primary.port); + +jsTestLog("Waiting for primary to kill operations"); +checkLog.containsJson(primary, 21579); +hangBeforeAbortFailPoint.off(); + +jsTestLog("Waiting for threads to join"); +abortIndexThread(); +createIdx(); +stepDown(); + +// Allow primary to step back up. +assert.commandWorked(primaryDB.adminCommand({replSetFreeze: 0})); + +rst.awaitReplication(); +IndexBuildTest.assertIndexes(primaryColl, 1, ['_id_']); + +const secondaryColl = rst.getSecondary().getCollection(primaryColl.getFullName()); +IndexBuildTest.assertIndexes(secondaryColl, 1, ['_id_']); + +rst.stopSet(); +})(); diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index 4ad8ccfdd68..18c4ae1df54 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -70,6 +70,7 @@ MONGO_FAIL_POINT_DEFINE(hangAfterIndexBuildFirstDrain); MONGO_FAIL_POINT_DEFINE(hangAfterIndexBuildSecondDrain); MONGO_FAIL_POINT_DEFINE(hangAfterIndexBuildDumpsInsertsFromBulk); MONGO_FAIL_POINT_DEFINE(hangAfterInitializingIndexBuild); +MONGO_FAIL_POINT_DEFINE(hangBeforeCompletingAbort); MONGO_FAIL_POINT_DEFINE(failIndexBuildOnCommit); namespace { @@ -1028,8 +1029,19 @@ bool IndexBuildsCoordinator::abortIndexBuildByBuildUUID(OperationContext* opCtx, invariant(TryAbortResult::kContinueAbort == tryAbortResult); + if (MONGO_unlikely(hangBeforeCompletingAbort.shouldFail())) { + LOGV2(4806200, "Hanging before completing index build abort"); + hangBeforeCompletingAbort.pauseWhileSet(); + } + // At this point we must continue aborting the index build. try { + // We are holding the RSTL and an exclusive collection lock, so we will block stepdown + // and be targeted for being killed. In addition to writing to the catalog, we need to + // acquire an IX lock to write to the config.system.indexBuilds collection. Since + // we must perform these final writes, but we expect them not to block, we can safely, + // temporarily disable interrupts. + UninterruptibleLockGuard noInterrupt(opCtx->lockState()); _completeAbort(opCtx, replState, signalAction, {ErrorCodes::IndexBuildAborted, reason}); } catch (const DBException& e) { LOGV2_FATAL( |