summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJosef Ahmad <josef.ahmad@mongodb.com>2023-05-12 12:47:02 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-05-12 13:27:50 +0000
commitbe1c14d1f945a1ca55d7311c54bccae8d27edd84 (patch)
tree80bb3502e0bff7c44167b50bede008b463cd25f7
parent75ef59b570f937c0a14944187d43bf0707543980 (diff)
downloadmongo-be1c14d1f945a1ca55d7311c54bccae8d27edd84.tar.gz
SERVER-77025 Wait for index builds to complete on conflicting command application
-rw-r--r--jstests/noPassthrough/index_build_stepdown_dropCollection_during_early_setup.js79
-rw-r--r--src/mongo/db/index_builds_coordinator.cpp5
-rw-r--r--src/mongo/db/repl/oplog.cpp59
3 files changed, 130 insertions, 13 deletions
diff --git a/jstests/noPassthrough/index_build_stepdown_dropCollection_during_early_setup.js b/jstests/noPassthrough/index_build_stepdown_dropCollection_during_early_setup.js
new file mode 100644
index 00000000000..778bfa89e05
--- /dev/null
+++ b/jstests/noPassthrough/index_build_stepdown_dropCollection_during_early_setup.js
@@ -0,0 +1,79 @@
+/**
+ * Starts an index build, steps down the primary before the index build has completed its setup (and
+ * made other replicas aware of the index build), and drop the collection the index is being built
+ * on. This exercises a path described in SERVER-77025 whereby applying a DDL operation (like
+ * dropCollection) on the secondary conflicts with the ongoing index build. This test confirms that
+ * replication waits until the index build is not present anymore, and then retries dropCollection
+ * and succeeds.
+ *
+ * @tags: [
+ * requires_replication,
+ * ]
+ */
+(function() {
+"use strict";
+
+load("jstests/libs/fail_point_util.js"); // For "configureFailPoint()"
+load("jstests/libs/parallelTester.js"); // For "startParallelShell()"
+load("jstests/noPassthrough/libs/index_build.js"); // For "IndexBuildTest"
+
+const rst = new ReplSetTest({nodes: 2});
+rst.startSet();
+rst.initiate();
+
+const primary = rst.getPrimary();
+const primaryDB = primary.getDB("test");
+const primaryColl = primaryDB.getCollection("coll");
+assert.commandWorked(primaryDB.setLogLevel(1, "replication"));
+
+assert.commandWorked(primaryColl.insert({_id: 1, a: 1}));
+rst.awaitReplication();
+
+// Enable fail point which makes index build hang during setup, simulating a condition where the
+// index build is registered, but not yet replicated.
+const fp = configureFailPoint(primary, "hangIndexBuildOnSetupBeforeTakingLocks");
+
+const waitForIndexBuildToErrorOut = IndexBuildTest.startIndexBuild(
+ primary, primaryColl.getFullName(), {a: 1}, {}, [ErrorCodes.InterruptedDueToReplStateChange]);
+
+fp.wait();
+
+// Step down the node, while the index build is set up in memory but the "startIndexBuild" entry
+// hasn't replicated.
+assert.commandWorked(primaryDB.adminCommand({"replSetStepDown": 5 * 60, "force": true}));
+
+rst.waitForPrimary();
+
+// Drop the collection on the new primary. The new primary is not aware of the index build, because
+// the old primary hadn't been able to replicate the "startIndexBuild" oplog entry.
+const waitForDropCollection = startParallelShell(function() {
+ db.getCollection("coll").drop();
+}, rst.getPrimary().port);
+
+// Confirm that the old primary, now secondary waits until the index build is not in progress any
+// longer before retrying the drop.
+// "Waiting for index build(s) to complete on the namespace before retrying the conflicting
+// operation"
+assert.soon(() => checkLog.checkContainsOnceJson(rst.getSecondary(), 7702500));
+
+// Resume the index build so it can fail due to InterruptedDueToReplStateChange.
+fp.off();
+
+// Confirm that the old primary, now secondary can retry the dropCollection.
+// "Acceptable error during oplog application: background operation in progress for namespace"
+assert.soon(() => checkLog.checkContainsOnceJson(rst.getSecondary(), 51775));
+
+// dropCollection now succeeds, and the command completes on the primary.
+waitForDropCollection();
+
+rst.awaitReplication();
+
+// The index build fails with InterruptedDueToReplStateChange.
+waitForIndexBuildToErrorOut();
+
+// Collection doesn't exist.
+assert(!rst.getPrimary().getDB("test").getCollectionNames().includes("coll"));
+assert(!rst.getSecondary().getDB("test").getCollectionNames().includes("coll"));
+
+rst.stopSet();
+})();
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index 851f35d8a3f..d11df7344d9 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -96,9 +96,9 @@ MONGO_FAIL_POINT_DEFINE(failSetUpResumeIndexBuild);
MONGO_FAIL_POINT_DEFINE(failIndexBuildWithError);
MONGO_FAIL_POINT_DEFINE(failIndexBuildWithErrorInSecondDrain);
MONGO_FAIL_POINT_DEFINE(hangInRemoveIndexBuildEntryAfterCommitOrAbort);
+MONGO_FAIL_POINT_DEFINE(hangIndexBuildOnSetupBeforeTakingLocks);
MONGO_FAIL_POINT_DEFINE(hangAbortIndexBuildByBuildUUIDAfterLocks);
-
IndexBuildsCoordinator::IndexBuildsSSS::IndexBuildsSSS()
: ServerStatusSection("indexBuilds"),
registered(0),
@@ -2319,6 +2319,9 @@ IndexBuildsCoordinator::PostSetupAction IndexBuildsCoordinator::_setUpIndexBuild
std::shared_ptr<ReplIndexBuildState> replState,
Timestamp startTimestamp,
const IndexBuildOptions& indexBuildOptions) {
+
+ hangIndexBuildOnSetupBeforeTakingLocks.pauseWhileSet(opCtx);
+
auto [dbLock, collLock, rstl] =
std::move(_acquireExclusiveLockWithRSTLRetry(opCtx, replState.get()).getValue());
diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp
index 01137d570b9..24a8d4d8b25 100644
--- a/src/mongo/db/repl/oplog.cpp
+++ b/src/mongo/db/repl/oplog.cpp
@@ -2345,19 +2345,54 @@ Status applyCommand_inlock(OperationContext* opCtx,
auto ns = cmd->parse(opCtx, OpMsgRequest::fromDBAndBody(nss.db(), o))->ns();
- // This error is only possible during initial sync mode.
- invariant(mode == OplogApplication::Mode::kInitialSync);
+ // TODO (SERVER-74953): Turn the 'mode == OplogApplication::Mode::kInitialSync' if
+ // condition below into an invariant and remove the trailing 'else' block.
+ if (mode == OplogApplication::Mode::kInitialSync) {
+ // Aborting an index build involves writing to the catalog. This write needs to
+ // be timestamped. It will be given 'writeTime' as the commit timestamp.
+ TimestampBlock tsBlock(opCtx, writeTime);
+ abortIndexBuilds(opCtx,
+ op->getCommandType(),
+ ns,
+ "Aborting index builds during initial sync");
+ LOGV2_DEBUG(4665901,
+ 1,
+ "Conflicting DDL operation encountered during initial sync; "
+ "aborting index build and retrying",
+ logAttrs(ns));
+ } else {
+ invariant(!opCtx->lockState()->isLocked());
+
+ auto swUUID = op->getUuid();
+ if (!swUUID) {
+ LOGV2_ERROR(21261,
+ "Failed command during oplog application. Expected a UUID",
+ "command"_attr = redact(o),
+ logAttrs(ns));
+ }
- // Aborting an index build involves writing to the catalog. This write needs to be
- // timestamped. It will be given 'writeTime' as the commit timestamp.
- TimestampBlock tsBlock(opCtx, writeTime);
- abortIndexBuilds(
- opCtx, op->getCommandType(), ns, "Aborting index builds during initial sync");
- LOGV2_DEBUG(4665901,
- 1,
- "Conflicting DDL operation encountered during initial sync; "
- "aborting index build and retrying",
- logAttrs(ns));
+ LOGV2_DEBUG(
+ 7702500,
+ 1,
+ "Waiting for index build(s) to complete on the namespace before retrying "
+ "the conflicting operation",
+ logAttrs(ns),
+ "oplogEntry"_attr = redact(op->toBSONForLogging()));
+
+ IndexBuildsCoordinator::get(opCtx)->awaitNoIndexBuildInProgressForCollection(
+ opCtx, swUUID.get());
+
+ opCtx->recoveryUnit()->abandonSnapshot();
+ opCtx->checkForInterrupt();
+
+ LOGV2_DEBUG(
+ 51775,
+ 1,
+ "Acceptable error during oplog application: background operation in "
+ "progress for namespace",
+ logAttrs(ns),
+ "oplogEntry"_attr = redact(op->toBSONForLogging()));
+ }
break;
}