diff options
-rw-r--r-- | jstests/noPassthrough/index_build_stepdown_dropCollection_during_early_setup.js | 79 | ||||
-rw-r--r-- | src/mongo/db/index_builds_coordinator.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/repl/oplog.cpp | 59 |
3 files changed, 130 insertions, 13 deletions
diff --git a/jstests/noPassthrough/index_build_stepdown_dropCollection_during_early_setup.js b/jstests/noPassthrough/index_build_stepdown_dropCollection_during_early_setup.js new file mode 100644 index 00000000000..778bfa89e05 --- /dev/null +++ b/jstests/noPassthrough/index_build_stepdown_dropCollection_during_early_setup.js @@ -0,0 +1,79 @@ +/** + * Starts an index build, steps down the primary before the index build has completed its setup (and + * made other replicas aware of the index build), and drop the collection the index is being built + * on. This exercises a path described in SERVER-77025 whereby applying a DDL operation (like + * dropCollection) on the secondary conflicts with the ongoing index build. This test confirms that + * replication waits until the index build is not present anymore, and then retries dropCollection + * and succeeds. + * + * @tags: [ + * requires_replication, + * ] + */ +(function() { +"use strict"; + +load("jstests/libs/fail_point_util.js"); // For "configureFailPoint()" +load("jstests/libs/parallelTester.js"); // For "startParallelShell()" +load("jstests/noPassthrough/libs/index_build.js"); // For "IndexBuildTest" + +const rst = new ReplSetTest({nodes: 2}); +rst.startSet(); +rst.initiate(); + +const primary = rst.getPrimary(); +const primaryDB = primary.getDB("test"); +const primaryColl = primaryDB.getCollection("coll"); +assert.commandWorked(primaryDB.setLogLevel(1, "replication")); + +assert.commandWorked(primaryColl.insert({_id: 1, a: 1})); +rst.awaitReplication(); + +// Enable fail point which makes index build hang during setup, simulating a condition where the +// index build is registered, but not yet replicated. +const fp = configureFailPoint(primary, "hangIndexBuildOnSetupBeforeTakingLocks"); + +const waitForIndexBuildToErrorOut = IndexBuildTest.startIndexBuild( + primary, primaryColl.getFullName(), {a: 1}, {}, [ErrorCodes.InterruptedDueToReplStateChange]); + +fp.wait(); + +// Step down the node, while the index build is set up in memory but the "startIndexBuild" entry +// hasn't replicated. +assert.commandWorked(primaryDB.adminCommand({"replSetStepDown": 5 * 60, "force": true})); + +rst.waitForPrimary(); + +// Drop the collection on the new primary. The new primary is not aware of the index build, because +// the old primary hadn't been able to replicate the "startIndexBuild" oplog entry. +const waitForDropCollection = startParallelShell(function() { + db.getCollection("coll").drop(); +}, rst.getPrimary().port); + +// Confirm that the old primary, now secondary waits until the index build is not in progress any +// longer before retrying the drop. +// "Waiting for index build(s) to complete on the namespace before retrying the conflicting +// operation" +assert.soon(() => checkLog.checkContainsOnceJson(rst.getSecondary(), 7702500)); + +// Resume the index build so it can fail due to InterruptedDueToReplStateChange. +fp.off(); + +// Confirm that the old primary, now secondary can retry the dropCollection. +// "Acceptable error during oplog application: background operation in progress for namespace" +assert.soon(() => checkLog.checkContainsOnceJson(rst.getSecondary(), 51775)); + +// dropCollection now succeeds, and the command completes on the primary. +waitForDropCollection(); + +rst.awaitReplication(); + +// The index build fails with InterruptedDueToReplStateChange. +waitForIndexBuildToErrorOut(); + +// Collection doesn't exist. +assert(!rst.getPrimary().getDB("test").getCollectionNames().includes("coll")); +assert(!rst.getSecondary().getDB("test").getCollectionNames().includes("coll")); + +rst.stopSet(); +})(); diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index 851f35d8a3f..d11df7344d9 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -96,9 +96,9 @@ MONGO_FAIL_POINT_DEFINE(failSetUpResumeIndexBuild); MONGO_FAIL_POINT_DEFINE(failIndexBuildWithError); MONGO_FAIL_POINT_DEFINE(failIndexBuildWithErrorInSecondDrain); MONGO_FAIL_POINT_DEFINE(hangInRemoveIndexBuildEntryAfterCommitOrAbort); +MONGO_FAIL_POINT_DEFINE(hangIndexBuildOnSetupBeforeTakingLocks); MONGO_FAIL_POINT_DEFINE(hangAbortIndexBuildByBuildUUIDAfterLocks); - IndexBuildsCoordinator::IndexBuildsSSS::IndexBuildsSSS() : ServerStatusSection("indexBuilds"), registered(0), @@ -2319,6 +2319,9 @@ IndexBuildsCoordinator::PostSetupAction IndexBuildsCoordinator::_setUpIndexBuild std::shared_ptr<ReplIndexBuildState> replState, Timestamp startTimestamp, const IndexBuildOptions& indexBuildOptions) { + + hangIndexBuildOnSetupBeforeTakingLocks.pauseWhileSet(opCtx); + auto [dbLock, collLock, rstl] = std::move(_acquireExclusiveLockWithRSTLRetry(opCtx, replState.get()).getValue()); diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp index 01137d570b9..24a8d4d8b25 100644 --- a/src/mongo/db/repl/oplog.cpp +++ b/src/mongo/db/repl/oplog.cpp @@ -2345,19 +2345,54 @@ Status applyCommand_inlock(OperationContext* opCtx, auto ns = cmd->parse(opCtx, OpMsgRequest::fromDBAndBody(nss.db(), o))->ns(); - // This error is only possible during initial sync mode. - invariant(mode == OplogApplication::Mode::kInitialSync); + // TODO (SERVER-74953): Turn the 'mode == OplogApplication::Mode::kInitialSync' if + // condition below into an invariant and remove the trailing 'else' block. + if (mode == OplogApplication::Mode::kInitialSync) { + // Aborting an index build involves writing to the catalog. This write needs to + // be timestamped. It will be given 'writeTime' as the commit timestamp. + TimestampBlock tsBlock(opCtx, writeTime); + abortIndexBuilds(opCtx, + op->getCommandType(), + ns, + "Aborting index builds during initial sync"); + LOGV2_DEBUG(4665901, + 1, + "Conflicting DDL operation encountered during initial sync; " + "aborting index build and retrying", + logAttrs(ns)); + } else { + invariant(!opCtx->lockState()->isLocked()); + + auto swUUID = op->getUuid(); + if (!swUUID) { + LOGV2_ERROR(21261, + "Failed command during oplog application. Expected a UUID", + "command"_attr = redact(o), + logAttrs(ns)); + } - // Aborting an index build involves writing to the catalog. This write needs to be - // timestamped. It will be given 'writeTime' as the commit timestamp. - TimestampBlock tsBlock(opCtx, writeTime); - abortIndexBuilds( - opCtx, op->getCommandType(), ns, "Aborting index builds during initial sync"); - LOGV2_DEBUG(4665901, - 1, - "Conflicting DDL operation encountered during initial sync; " - "aborting index build and retrying", - logAttrs(ns)); + LOGV2_DEBUG( + 7702500, + 1, + "Waiting for index build(s) to complete on the namespace before retrying " + "the conflicting operation", + logAttrs(ns), + "oplogEntry"_attr = redact(op->toBSONForLogging())); + + IndexBuildsCoordinator::get(opCtx)->awaitNoIndexBuildInProgressForCollection( + opCtx, swUUID.get()); + + opCtx->recoveryUnit()->abandonSnapshot(); + opCtx->checkForInterrupt(); + + LOGV2_DEBUG( + 51775, + 1, + "Acceptable error during oplog application: background operation in " + "progress for namespace", + logAttrs(ns), + "oplogEntry"_attr = redact(op->toBSONForLogging())); + } break; } |