summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLouis Williams <louis.williams@mongodb.com>2020-06-02 13:12:32 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-06-02 21:23:56 +0000
commit41dac937f818a5d10977347cae5884bac4a10a74 (patch)
tree2793c1b91e65bb2aba57f37053ccd2e275eebd6f
parentf841e415fa8fb7b057c1973f6ff8fc0460de1f08 (diff)
downloadmongo-41dac937f818a5d10977347cae5884bac4a10a74.tar.gz
SERVER-48062 Index build abort should not be interrupted by stepdown
(cherry picked from commit baf75dd579bfec6554fa881b73cc427f0bbb3309)
-rw-r--r--jstests/noPassthrough/index_stepdown_abort.js76
-rw-r--r--src/mongo/db/index_builds_coordinator.cpp12
2 files changed, 88 insertions, 0 deletions
diff --git a/jstests/noPassthrough/index_stepdown_abort.js b/jstests/noPassthrough/index_stepdown_abort.js
new file mode 100644
index 00000000000..8ebcdb664f9
--- /dev/null
+++ b/jstests/noPassthrough/index_stepdown_abort.js
@@ -0,0 +1,76 @@
+/**
+ * Confirms that aborting an index build on a primaries succeeds despite a concurrent stepDown
+ * attempting to interrupt the operation.
+ *
+ * @tags: [requires_replication]
+ */
+(function() {
+"use strict";
+
+load('jstests/noPassthrough/libs/index_build.js');
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/logv2_helpers.js");
+
+const rst = new ReplSetTest({
+ nodes: [
+ {},
+ {
+ // Disallow elections on secondary.
+ rsConfig: {
+ priority: 0,
+ votes: 0,
+ },
+ },
+ ]
+});
+rst.startSet();
+rst.initiate();
+
+const primary = rst.getPrimary();
+const primaryDB = primary.getDB('test');
+const primaryColl = primaryDB.getCollection('test');
+
+assert.commandWorked(primaryColl.insert({a: 1}));
+
+let hangAfterInitFailPoint = configureFailPoint(primaryDB, 'hangAfterInitializingIndexBuild');
+
+jsTestLog("Waiting for index build to start");
+const createIdx = IndexBuildTest.startIndexBuild(
+ primary, primaryColl.getFullName(), {a: 1}, null, [ErrorCodes.IndexBuildAborted]);
+IndexBuildTest.waitForIndexBuildToStart(primaryDB, primaryColl.getName(), 'a_1');
+
+jsTestLog("Attempting to abort the index build and blocking before it completes");
+let hangBeforeAbortFailPoint = configureFailPoint(primaryDB, 'hangBeforeCompletingAbort');
+const abortIndexThread = startParallelShell(() => {
+ // We can't assert that this succeeds because it may return an Interrupted error even after it
+ // successfully aborts the index build.
+ db.getSiblingDB('test').test.dropIndex('a_1');
+}, primary.port);
+hangBeforeAbortFailPoint.wait();
+hangAfterInitFailPoint.off();
+
+jsTestLog("Stepping down the primary");
+const stepDown = startParallelShell(() => {
+ assert.commandWorked(db.adminCommand({"replSetStepDown": 60, 'force': true}));
+}, primary.port);
+
+jsTestLog("Waiting for primary to kill operations");
+checkLog.containsJson(primary, 21579);
+hangBeforeAbortFailPoint.off();
+
+jsTestLog("Waiting for threads to join");
+abortIndexThread();
+createIdx();
+stepDown();
+
+// Allow primary to step back up.
+assert.commandWorked(primaryDB.adminCommand({replSetFreeze: 0}));
+
+rst.awaitReplication();
+IndexBuildTest.assertIndexes(primaryColl, 1, ['_id_']);
+
+const secondaryColl = rst.getSecondary().getCollection(primaryColl.getFullName());
+IndexBuildTest.assertIndexes(secondaryColl, 1, ['_id_']);
+
+rst.stopSet();
+})();
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index 4ad8ccfdd68..18c4ae1df54 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -70,6 +70,7 @@ MONGO_FAIL_POINT_DEFINE(hangAfterIndexBuildFirstDrain);
MONGO_FAIL_POINT_DEFINE(hangAfterIndexBuildSecondDrain);
MONGO_FAIL_POINT_DEFINE(hangAfterIndexBuildDumpsInsertsFromBulk);
MONGO_FAIL_POINT_DEFINE(hangAfterInitializingIndexBuild);
+MONGO_FAIL_POINT_DEFINE(hangBeforeCompletingAbort);
MONGO_FAIL_POINT_DEFINE(failIndexBuildOnCommit);
namespace {
@@ -1028,8 +1029,19 @@ bool IndexBuildsCoordinator::abortIndexBuildByBuildUUID(OperationContext* opCtx,
invariant(TryAbortResult::kContinueAbort == tryAbortResult);
+ if (MONGO_unlikely(hangBeforeCompletingAbort.shouldFail())) {
+ LOGV2(4806200, "Hanging before completing index build abort");
+ hangBeforeCompletingAbort.pauseWhileSet();
+ }
+
// At this point we must continue aborting the index build.
try {
+ // We are holding the RSTL and an exclusive collection lock, so we will block stepdown
+ // and be targeted for being killed. In addition to writing to the catalog, we need to
+ // acquire an IX lock to write to the config.system.indexBuilds collection. Since
+ // we must perform these final writes, but we expect them not to block, we can safely,
+ // temporarily disable interrupts.
+ UninterruptibleLockGuard noInterrupt(opCtx->lockState());
_completeAbort(opCtx, replState, signalAction, {ErrorCodes::IndexBuildAborted, reason});
} catch (const DBException& e) {
LOGV2_FATAL(