diff options
-rw-r--r-- | jstests/noPassthrough/index_build_killed_disk_space.js | 160 | ||||
-rw-r--r-- | jstests/noPassthrough/index_build_killed_disk_space_secondary.js | 215 | ||||
-rw-r--r-- | jstests/noPassthrough/index_build_killop_primary.js | 107 | ||||
-rw-r--r-- | jstests/noPassthrough/index_build_killop_secondary_after_commit.js (renamed from jstests/noPassthrough/indexbg_killop_secondary.js) | 0 | ||||
-rw-r--r-- | jstests/noPassthrough/index_build_killop_secondary_before_commit.js | 144 | ||||
-rw-r--r-- | jstests/noPassthrough/index_build_vote_abort_while_vote_commit.js | 21 | ||||
-rw-r--r-- | jstests/noPassthrough/indexbg_killop_primary.js | 88 | ||||
-rw-r--r-- | src/mongo/db/index_builds_coordinator.cpp | 16 | ||||
-rw-r--r-- | src/mongo/db/index_builds_coordinator_mongod.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/repl_index_build_state.cpp | 19 | ||||
-rw-r--r-- | src/mongo/db/repl_index_build_state.h | 17 |
11 files changed, 532 insertions, 263 deletions
diff --git a/jstests/noPassthrough/index_build_killed_disk_space.js b/jstests/noPassthrough/index_build_killed_disk_space.js index f07753e639e..ab9b318f996 100644 --- a/jstests/noPassthrough/index_build_killed_disk_space.js +++ b/jstests/noPassthrough/index_build_killed_disk_space.js @@ -1,6 +1,6 @@ /** * Ensures that index builds are killed on primaries when the available disk space drops below a - * limit. + * limit,only if the primary has not yet voted for commit. * * @tags: [ * requires_fcv_71, @@ -13,67 +13,131 @@ load('jstests/noPassthrough/libs/index_build.js'); load("jstests/libs/fail_point_util.js"); -const rst = new ReplSetTest({ - nodes: [ - {}, - { - // Disallow elections on secondary. - rsConfig: { - priority: 0, - }, - }, - ] -}); -rst.startSet(); -rst.initiate(); +function killBeforeVoteCommitSucceeds(rst) { + const primary = rst.getPrimary(); + const primaryDB = primary.getDB('test'); + const primaryColl = primaryDB.getCollection('test'); -const primary = rst.getPrimary(); -const primaryDB = primary.getDB('test'); -const primaryColl = primaryDB.getCollection('test'); + primaryColl.drop(); + assert.commandWorked(primaryColl.insert({a: 1})); -assert.commandWorked(primaryColl.insert({a: 1})); + const hangAfterInitFailPoint = configureFailPoint(primaryDB, 'hangAfterInitializingIndexBuild'); -let hangAfterInitFailPoint = configureFailPoint(primaryDB, 'hangAfterInitializingIndexBuild'); + let serverStatus = primaryDB.serverStatus(); + const tookActionCountBefore = serverStatus.metrics.diskSpaceMonitor.tookAction; + const killedDueToInsufficientDiskSpaceBefore = + serverStatus.indexBuilds.killedDueToInsufficientDiskSpace; -const tookActionCountBefore = primaryDB.serverStatus().metrics.diskSpaceMonitor.tookAction; + jsTestLog("Waiting for index build to start"); + const createIdx = IndexBuildTest.startIndexBuild( + primary, primaryColl.getFullName(), {a: 1}, null, [ErrorCodes.Interrupted]); + IndexBuildTest.waitForIndexBuildToStart(primaryDB, primaryColl.getName(), 'a_1'); -jsTestLog("Waiting for index build to start"); -const createIdx = IndexBuildTest.startIndexBuild( - primary, primaryColl.getFullName(), {a: 1}, null, [ErrorCodes.Interrupted]); -IndexBuildTest.waitForIndexBuildToStart(primaryDB, primaryColl.getName(), 'a_1'); + // Ensure the index build is in an abortable state before the DiskSpaceMonitor runs. + hangAfterInitFailPoint.wait(); -// Ensure the index build is in an abortable state before the DiskSpaceMonitor runs. -hangAfterInitFailPoint.wait(); + // Default indexBuildMinAvailableDiskSpaceMB is 500 MB. + // Simulate a remaining disk space of 450MB. + const simulateDiskSpaceFp = + configureFailPoint(primaryDB, 'simulateAvailableDiskSpace', {bytes: 450 * 1024 * 1024}); -// Default indexBuildMinAvailableDiskSpaceMB is 500 MB. -// Simulate a remaining disk space of 450MB. -const simulateDiskSpaceFp = - configureFailPoint(primaryDB, 'simulateAvailableDiskSpace', {bytes: 450 * 1024 * 1024}); + jsTestLog("Waiting for the disk space monitor to take action"); + assert.soon(() => { + return primaryDB.serverStatus().metrics.diskSpaceMonitor.tookAction > tookActionCountBefore; + }); -jsTestLog("Waiting for the disk space monitor to take action"); -assert.soon(() => { - return primaryDB.serverStatus().metrics.diskSpaceMonitor.tookAction > tookActionCountBefore; -}); -hangAfterInitFailPoint.off(); + jsTestLog("Waiting for the index build to be killed"); + // "Index build: joined after abort". + checkLog.containsJson(primary, 20655); + + jsTestLog("Waiting for threads to join"); + createIdx(); + simulateDiskSpaceFp.off(); + hangAfterInitFailPoint.off(); + + // "Index build: aborted due to insufficient disk space" + checkLog.containsJson(primary, 7333601); + + assert.eq(killedDueToInsufficientDiskSpaceBefore + 1, + primaryDB.serverStatus().indexBuilds.killedDueToInsufficientDiskSpace); + + rst.awaitReplication(); + IndexBuildTest.assertIndexes(primaryColl, 1, ['_id_']); + + const secondaryColl = rst.getSecondary().getCollection(primaryColl.getFullName()); + IndexBuildTest.assertIndexes(secondaryColl, 1, ['_id_']); +} + +function killAfterVoteCommitFails(rst) { + const primary = rst.getPrimary(); + const primaryDB = primary.getDB('test'); + const primaryColl = primaryDB.getCollection('test'); + + primaryColl.drop(); + assert.commandWorked(primaryColl.insert({a: 1})); -jsTestLog("Waiting for the index build to be killed"); -// "Index build: joined after abort". -checkLog.containsJson(primary, 20655); + const hangAfterVoteCommit = + configureFailPoint(primaryDB, 'hangIndexBuildAfterSignalPrimaryForCommitReadiness'); -jsTestLog("Waiting for threads to join"); -createIdx(); -simulateDiskSpaceFp.off(); + let serverStatus = primaryDB.serverStatus(); + const tookActionCountBefore = serverStatus.metrics.diskSpaceMonitor.tookAction; + const killedDueToInsufficientDiskSpaceBefore = + serverStatus.indexBuilds.killedDueToInsufficientDiskSpace; -// "Index build: aborted due to insufficient disk space" -checkLog.containsJson(primary, 7333601); + jsTestLog("Waiting for index build to start"); + const createIdx = IndexBuildTest.startIndexBuild( + primary, primaryColl.getFullName(), {a: 1}, null, [ErrorCodes.Interrupted]); + IndexBuildTest.waitForIndexBuildToStart(primaryDB, primaryColl.getName(), 'a_1'); -assert.eq(1, primaryDB.serverStatus().indexBuilds.killedDueToInsufficientDiskSpace); + // Ensure the index build has voted commit before the DiskSpaceMonitor runs. + hangAfterVoteCommit.wait(); -rst.awaitReplication(); -IndexBuildTest.assertIndexes(primaryColl, 1, ['_id_']); + // Default indexBuildMinAvailableDiskSpaceMB is 500 MB. + // Simulate a remaining disk space of 450MB. + const simulateDiskSpaceFp = + configureFailPoint(primaryDB, 'simulateAvailableDiskSpace', {bytes: 450 * 1024 * 1024}); + + jsTestLog("Waiting for the disk space monitor to take action"); + assert.soon(() => { + return primaryDB.serverStatus().metrics.diskSpaceMonitor.tookAction > tookActionCountBefore; + }); + + jsTestLog("Waiting for the index build kill attempt to fail"); + // "Index build: cannot force abort". + checkLog.containsJson(primary, 7617000); + + hangAfterVoteCommit.off(); + simulateDiskSpaceFp.off(); + + jsTestLog("Waiting for threads to join"); + createIdx(); + + assert.eq(killedDueToInsufficientDiskSpaceBefore, + primaryDB.serverStatus().indexBuilds.killedDueToInsufficientDiskSpace); + + rst.awaitReplication(); + IndexBuildTest.assertIndexes(primaryColl, 2, ['_id_', 'a_1']); + + const secondaryColl = rst.getSecondary().getCollection(primaryColl.getFullName()); + IndexBuildTest.assertIndexes(secondaryColl, 2, ['_id_', 'a_1']); +} + +const rst = new ReplSetTest({ + nodes: [ + {}, + { + // Disallow elections on secondary. + rsConfig: { + priority: 0, + }, + }, + ] +}); +rst.startSet(); +rst.initiate(); -const secondaryColl = rst.getSecondary().getCollection(primaryColl.getFullName()); -IndexBuildTest.assertIndexes(secondaryColl, 1, ['_id_']); +killBeforeVoteCommitSucceeds(rst); +killAfterVoteCommitFails(rst); rst.stopSet(); })(); diff --git a/jstests/noPassthrough/index_build_killed_disk_space_secondary.js b/jstests/noPassthrough/index_build_killed_disk_space_secondary.js index 3bb4ba7a75d..cec690236a6 100644 --- a/jstests/noPassthrough/index_build_killed_disk_space_secondary.js +++ b/jstests/noPassthrough/index_build_killed_disk_space_secondary.js @@ -1,6 +1,6 @@ /** * Ensures that index builds are cancelled by secondaries when the available disk space drops below - * a limit. + * a limit, only if the secondary has not yet voted for commit. * * @tags: [ * requires_fcv_71, @@ -13,6 +13,158 @@ load('jstests/noPassthrough/libs/index_build.js'); load("jstests/libs/fail_point_util.js"); +function killBeforeVoteCommitSucceeds(rst) { + jsTestLog( + "Index build in a secondary can be killed by the DiskSpaceMonitor before it has voted for commit."); + + const dbName = 'test'; + const collName = 'coll'; + const primary = rst.getPrimary(); + const primaryDB = primary.getDB(dbName); + const primaryColl = primaryDB.getCollection(collName); + + primaryColl.drop(); + assert.commandWorked(primaryColl.insert({a: 1})); + + rst.awaitReplication(); + + const secondary = rst.getSecondary(); + const secondaryDB = secondary.getDB(dbName); + const secondaryColl = secondaryDB.getCollection(collName); + + const primaryKilledDueToDiskSpaceBefore = + primaryDB.serverStatus().indexBuilds.killedDueToInsufficientDiskSpace; + const secondaryKilledDueToDiskSpaceBefore = + secondaryDB.serverStatus().indexBuilds.killedDueToInsufficientDiskSpace; + + // Pause the index build on the primary after it replicates the startIndexBuild oplog entry, + // effectively pausing the index build on the secondary too as it will wait for the primary to + // commit or abort. + IndexBuildTest.pauseIndexBuilds(primary); + + const tookActionCountBefore = secondaryDB.serverStatus().metrics.diskSpaceMonitor.tookAction; + + jsTestLog("Waiting for index build to start on secondary"); + const hangAfterInitFailPoint = + configureFailPoint(secondaryDB, 'hangAfterInitializingIndexBuild'); + const createIdx = IndexBuildTest.startIndexBuild( + primary, primaryColl.getFullName(), {a: 1}, null, [ErrorCodes.IndexBuildAborted]); + IndexBuildTest.waitForIndexBuildToStart(secondaryDB, secondaryColl.getName(), 'a_1'); + + // Ensure the index build is in an abortable state before the DiskSpaceMonitor runs. + hangAfterInitFailPoint.wait(); + + // Default indexBuildMinAvailableDiskSpaceMB is 500 MB. + // Simulate a remaining disk space of 450MB on the secondary node. + const simulateDiskSpaceFp = + configureFailPoint(secondaryDB, 'simulateAvailableDiskSpace', {bytes: 450 * 1024 * 1024}); + + jsTestLog("Waiting for the disk space monitor to take action on secondary"); + assert.soon(() => { + return secondaryDB.serverStatus().metrics.diskSpaceMonitor.tookAction > + tookActionCountBefore; + }); + IndexBuildTest.resumeIndexBuilds(primary); + + jsTestLog("Waiting for the index build to be killed"); + // "Index build: joined after abort". + checkLog.containsJson(secondary, 20655); + + jsTestLog("Waiting for threads to join"); + createIdx(); + simulateDiskSpaceFp.off(); + + // "Index build: aborted due to insufficient disk space" + checkLog.containsJson(secondaryDB, 7333601); + + // Disable failpoint only after we know the build is aborted. We want the build to be aborted + // before it has voted for commit, and this ensures that is the case. + hangAfterInitFailPoint.off(); + + assert.eq(primaryKilledDueToDiskSpaceBefore, + primaryDB.serverStatus().indexBuilds.killedDueToInsufficientDiskSpace); + assert.eq(secondaryKilledDueToDiskSpaceBefore + 1, + secondaryDB.serverStatus().indexBuilds.killedDueToInsufficientDiskSpace); + + rst.awaitReplication(); + IndexBuildTest.assertIndexes(primaryColl, 1, ['_id_']); + IndexBuildTest.assertIndexes(secondaryColl, 1, ['_id_']); +} + +function killAfterVoteCommitFails(rst) { + jsTestLog( + "Index build in a secondary cannot killed by the DiskSpaceMonitor after it has voted for commit"); + + const dbName = 'test'; + const collName = 'coll'; + const primary = rst.getPrimary(); + const primaryDB = primary.getDB(dbName); + const primaryColl = primaryDB.getCollection(collName); + + primaryColl.drop(); + assert.commandWorked(primaryColl.insert({a: 1})); + + rst.awaitReplication(); + + const secondary = rst.getSecondary(); + const secondaryDB = secondary.getDB(dbName); + const secondaryColl = secondaryDB.getCollection(collName); + + const primaryKilledDueToDiskSpaceBefore = + primaryDB.serverStatus().indexBuilds.killedDueToInsufficientDiskSpace; + const secondaryKilledDueToDiskSpaceBefore = + secondaryDB.serverStatus().indexBuilds.killedDueToInsufficientDiskSpace; + + // Pause the index build on the primary after it replicates the startIndexBuild oplog entry, + // effectively pausing the index build on the secondary too as it will wait for the primary to + // commit or abort. + IndexBuildTest.pauseIndexBuilds(primary); + + const tookActionCountBefore = secondaryDB.serverStatus().metrics.diskSpaceMonitor.tookAction; + + jsTestLog("Waiting for index build to start on secondary"); + const hangAfterVoteCommit = + configureFailPoint(secondaryDB, 'hangIndexBuildAfterSignalPrimaryForCommitReadiness'); + const createIdx = + IndexBuildTest.startIndexBuild(primary, primaryColl.getFullName(), {a: 1}, null); + IndexBuildTest.waitForIndexBuildToStart(secondaryDB, secondaryColl.getName(), 'a_1'); + + // Ensure the index build is in an abortable state before the DiskSpaceMonitor runs. + hangAfterVoteCommit.wait(); + + // Default indexBuildMinAvailableDiskSpaceMB is 500 MB. + // Simulate a remaining disk space of 450MB on the secondary node. + const simulateDiskSpaceFp = + configureFailPoint(secondaryDB, 'simulateAvailableDiskSpace', {bytes: 450 * 1024 * 1024}); + + jsTestLog("Waiting for the disk space monitor to take action on secondary"); + assert.soon(() => { + return secondaryDB.serverStatus().metrics.diskSpaceMonitor.tookAction > + tookActionCountBefore; + }); + IndexBuildTest.resumeIndexBuilds(primary); + + jsTestLog("Waiting for the index build kill attempt to fail"); + // "Index build: cannot force abort". + checkLog.containsJson(secondary, 7617000); + + // Disable failpoint only after the abort attempt. + hangAfterVoteCommit.off(); + + jsTestLog("Waiting for threads to join"); + createIdx(); + simulateDiskSpaceFp.off(); + + assert.eq(primaryKilledDueToDiskSpaceBefore, + primaryDB.serverStatus().indexBuilds.killedDueToInsufficientDiskSpace); + assert.eq(secondaryKilledDueToDiskSpaceBefore, + secondaryDB.serverStatus().indexBuilds.killedDueToInsufficientDiskSpace); + + rst.awaitReplication(); + IndexBuildTest.assertIndexes(primaryColl, 2, ['_id_', 'a_1']); + IndexBuildTest.assertIndexes(secondaryColl, 2, ['_id_', 'a_1']); +} + const rst = new ReplSetTest({ nodes: [ {}, @@ -27,65 +179,8 @@ const rst = new ReplSetTest({ rst.startSet(); rst.initiate(); -const dbName = 'test'; -const collName = 'coll'; -const primary = rst.getPrimary(); -const primaryDB = primary.getDB(dbName); -const primaryColl = primaryDB.getCollection(collName); - -assert.commandWorked(primaryColl.insert({a: 1})); - -rst.awaitReplication(); - -const secondary = rst.getSecondary(); -const secondaryDB = secondary.getDB(dbName); -const secondaryColl = secondaryDB.getCollection(collName); - -// Pause the index build on the primary after it replicates the startIndexBuild oplog entry, -// effectively pausing the index build on the secondary too as it will wait for the primary to -// commit or abort. -IndexBuildTest.pauseIndexBuilds(primary); - -const tookActionCountBefore = secondaryDB.serverStatus().metrics.diskSpaceMonitor.tookAction; - -jsTestLog("Waiting for index build to start on secondary"); -const hangAfterInitFailPoint = configureFailPoint(secondaryDB, 'hangAfterInitializingIndexBuild'); -const createIdx = IndexBuildTest.startIndexBuild( - primary, primaryColl.getFullName(), {a: 1}, null, [ErrorCodes.IndexBuildAborted]); -IndexBuildTest.waitForIndexBuildToStart(secondaryDB, secondaryColl.getName(), 'a_1'); - -// Ensure the index build is in an abortable state before the DiskSpaceMonitor runs. -hangAfterInitFailPoint.wait(); -hangAfterInitFailPoint.off(); - -// Default indexBuildMinAvailableDiskSpaceMB is 500 MB. -// Simulate a remaining disk space of 450MB on the secondary node. -const simulateDiskSpaceFp = - configureFailPoint(secondaryDB, 'simulateAvailableDiskSpace', {bytes: 450 * 1024 * 1024}); - -jsTestLog("Waiting for the disk space monitor to take action on secondary"); -assert.soon(() => { - return secondaryDB.serverStatus().metrics.diskSpaceMonitor.tookAction > tookActionCountBefore; -}); -IndexBuildTest.resumeIndexBuilds(primary); - -jsTestLog("Waiting for the index build to be killed"); -// "Index build: joined after abort". -checkLog.containsJson(secondary, 20655); - -jsTestLog("Waiting for threads to join"); -createIdx(); -simulateDiskSpaceFp.off(); - -// "Index build: aborted due to insufficient disk space" -checkLog.containsJson(secondaryDB, 7333601); - -assert.eq(0, primaryDB.serverStatus().indexBuilds.killedDueToInsufficientDiskSpace); -assert.eq(1, secondaryDB.serverStatus().indexBuilds.killedDueToInsufficientDiskSpace); - -rst.awaitReplication(); -IndexBuildTest.assertIndexes(primaryColl, 1, ['_id_']); -IndexBuildTest.assertIndexes(secondaryColl, 1, ['_id_']); +killBeforeVoteCommitSucceeds(rst); +killAfterVoteCommitFails(rst); rst.stopSet(); })(); diff --git a/jstests/noPassthrough/index_build_killop_primary.js b/jstests/noPassthrough/index_build_killop_primary.js new file mode 100644 index 00000000000..2da1ee477c8 --- /dev/null +++ b/jstests/noPassthrough/index_build_killop_primary.js @@ -0,0 +1,107 @@ +/** + * Confirms that background index builds on a primary can be aborted using killop. + * @tags: [ + * requires_replication, + * ] + */ +(function() { +"use strict"; + +load('jstests/noPassthrough/libs/index_build.js'); +load("jstests/libs/fail_point_util.js"); + +function killopOnFailpoint(rst, failpointName, collName) { + const primary = rst.getPrimary(); + const testDB = primary.getDB('test'); + const coll = testDB.getCollection(collName); + + assert.commandWorked(coll.insert({a: 1})); + + const fp = configureFailPoint(testDB, failpointName); + IndexBuildTest.pauseIndexBuilds(primary); + + const createIdx = + IndexBuildTest.startIndexBuild(primary, coll.getFullName(), {a: 1}, {background: true}); + + // When the index build starts, find its op id. + const opId = IndexBuildTest.waitForIndexBuildToScanCollection(testDB, coll.getName(), 'a_1'); + + IndexBuildTest.assertIndexBuildCurrentOpContents(testDB, opId, (op) => { + jsTestLog('Inspecting db.currentOp() entry for index build: ' + tojson(op)); + assert.eq( + undefined, + op.connectionId, + 'Was expecting IndexBuildsCoordinator op; found db.currentOp() for connection thread instead: ' + + tojson(op)); + assert.eq( + coll.getFullName(), + op.ns, + 'Unexpected ns field value in db.currentOp() result for index build: ' + tojson(op)); + }); + + // Index build should be present in the config.system.indexBuilds collection. + const indexMap = + IndexBuildTest.assertIndexes(coll, 2, ["_id_"], ["a_1"], {includeBuildUUIDs: true}); + const indexBuildUUID = indexMap['a_1'].buildUUID; + assert(primary.getCollection('config.system.indexBuilds').findOne({_id: indexBuildUUID})); + + IndexBuildTest.resumeIndexBuilds(primary); + + // Kill the index builder thread. + fp.wait(); + assert.commandWorked(testDB.killOp(opId)); + fp.off(); + + // Wait for the index build to stop from the killop signal. + try { + IndexBuildTest.waitForIndexBuildToStop(testDB); + } finally { + IndexBuildTest.resumeIndexBuilds(primary); + } + + const exitCode = createIdx({checkExitSuccess: false}); + assert.neq( + 0, exitCode, 'expected shell to exit abnormally due to index build being terminated'); + + // Check that no new index has been created. This verifies that the index build was aborted + // rather than successfully completed. + IndexBuildTest.assertIndexes(coll, 1, ['_id_']); + + const cmdNs = testDB.getCollection('$cmd').getFullName(); + let ops = rst.dumpOplog(primary, {op: 'c', ns: cmdNs, 'o.startIndexBuild': coll.getName()}); + assert.eq(1, ops.length, 'incorrect number of startIndexBuild oplog entries: ' + tojson(ops)); + ops = rst.dumpOplog(primary, {op: 'c', ns: cmdNs, 'o.abortIndexBuild': coll.getName()}); + assert.eq(1, ops.length, 'incorrect number of abortIndexBuild oplog entries: ' + tojson(ops)); + ops = rst.dumpOplog(primary, {op: 'c', ns: cmdNs, 'o.commitIndexBuild': coll.getName()}); + assert.eq(0, ops.length, 'incorrect number of commitIndexBuild oplog entries: ' + tojson(ops)); + + // Index build should be removed from the config.system.indexBuilds collection. + assert.isnull( + primary.getCollection('config.system.indexBuilds').findOne({_id: indexBuildUUID})); +} + +const rst = new ReplSetTest({ + nodes: [ + {}, + { + // Disallow elections on secondary. + rsConfig: { + priority: 0, + votes: 0, + }, + }, + ] +}); +rst.startSet(); +rst.initiate(); + +// Kill the build before it has voted for commit. +jsTestLog("killOp index build on primary before vote for commit readiness"); +killopOnFailpoint(rst, 'hangAfterIndexBuildFirstDrain', 'beforeVoteCommit'); + +// Kill the build after it has voted for commit. +jsTestLog("killOp index build on primary after vote for commit readiness"); +killopOnFailpoint(rst, 'hangIndexBuildAfterSignalPrimaryForCommitReadiness', 'afterVoteCommit'); + +rst.stopSet(); +})(); diff --git a/jstests/noPassthrough/indexbg_killop_secondary.js b/jstests/noPassthrough/index_build_killop_secondary_after_commit.js index a404e6acd92..a404e6acd92 100644 --- a/jstests/noPassthrough/indexbg_killop_secondary.js +++ b/jstests/noPassthrough/index_build_killop_secondary_after_commit.js diff --git a/jstests/noPassthrough/index_build_killop_secondary_before_commit.js b/jstests/noPassthrough/index_build_killop_secondary_before_commit.js index c0679086f80..35b9fac0495 100644 --- a/jstests/noPassthrough/index_build_killop_secondary_before_commit.js +++ b/jstests/noPassthrough/index_build_killop_secondary_before_commit.js @@ -1,6 +1,7 @@ /** - * Sends a killop to an index build on a secondary node before it commits and confirms that the - * index build is canceled on all nodes. + * Sends a killop to an index build on a secondary node before it commits and confirms that: + * - the index build is canceled on all nodes if killop is before voting for commit. + * - the killop results in the secondary crashing if the killop is after voting for commit. * * @tags: [ * requires_fcv_71, @@ -13,6 +14,91 @@ load("jstests/libs/feature_flag_util.js"); load('jstests/noPassthrough/libs/index_build.js'); +TestData.skipEnforceFastCountOnValidate = true; + +function killopIndexBuildOnSecondaryOnFailpoint(rst, failpointName, shouldSucceed) { + const primary = rst.getPrimary(); + const testDB = primary.getDB('test'); + const coll = testDB.getCollection('test'); + let secondary = rst.getSecondary(); + let secondaryDB = secondary.getDB(testDB.getName()); + + coll.drop(); + assert.commandWorked(coll.insert({a: 1})); + + // Pause the index build on the primary so that it does not commit. + IndexBuildTest.pauseIndexBuilds(primary); + IndexBuildTest.pauseIndexBuilds(secondary); + + let expectedErrors = shouldSucceed ? ErrorCodes.IndexBuildAborted : []; + + const fp = configureFailPoint(secondary, failpointName); + const createIdx = + IndexBuildTest.startIndexBuild(primary, coll.getFullName(), {a: 1}, {}, expectedErrors); + + // When the index build starts, find its op id. + const opId = IndexBuildTest.waitForIndexBuildToStart(secondaryDB); + + IndexBuildTest.assertIndexBuildCurrentOpContents(secondaryDB, opId, (op) => { + jsTestLog('Inspecting db.currentOp() entry for index build: ' + tojson(op)); + assert.eq( + coll.getFullName(), + op.ns, + 'Unexpected ns field value in db.currentOp() result for index build: ' + tojson(op)); + }); + + // Resume index build to the desired failpoint, and kill it. + IndexBuildTest.resumeIndexBuilds(secondary); + fp.wait(); + assert.commandWorked(secondaryDB.killOp(opId)); + fp.off(); + + if (shouldSucceed) { + // "attempting to abort index build". + checkLog.containsJson(primary, 4656010); + + IndexBuildTest.resumeIndexBuilds(primary); + // "Index build: joined after abort". + checkLog.containsJson(primary, 20655); + + // Wait for the index build abort to replicate. + rst.awaitReplication(); + + // Expect the index build to fail and for the index to not exist on either node. + createIdx(); + + IndexBuildTest.assertIndexes(coll, 1, ['_id_']); + + const secondaryColl = secondaryDB.getCollection(coll.getName()); + IndexBuildTest.assertIndexes(secondaryColl, 1, ['_id_']); + } else { + // We expect this to crash the secondary because this error is not recoverable. + assert.soon(function() { + return rawMongoProgramOutput().search(/Fatal assertion.*(51101)/) >= 0; + }); + + // After restarting the secondary, expect that the index build completes successfully. + rst.stop(secondary.nodeId, + undefined, + {forRestart: true, allowedExitCode: MongoRunner.EXIT_ABORT}); + rst.start(secondary.nodeId, undefined, true /* restart */); + + secondary = rst.getSecondary(); + secondaryDB = secondary.getDB(testDB.getName()); + + IndexBuildTest.resumeIndexBuilds(primary); + // Expect the index build to succeed. + createIdx(); + + // Wait for the index build commit to replicate. + rst.awaitReplication(); + IndexBuildTest.assertIndexes(coll, 2, ['_id_', 'a_1']); + + const secondaryColl = secondaryDB.getCollection(coll.getName()); + IndexBuildTest.assertIndexes(secondaryColl, 2, ['_id_', 'a_1']); + } +} + const rst = new ReplSetTest({ nodes: [ {}, @@ -28,54 +114,14 @@ const rst = new ReplSetTest({ rst.startSet(); rst.initiate(); -const primary = rst.getPrimary(); -const testDB = primary.getDB('test'); -const coll = testDB.getCollection('test'); - -assert.commandWorked(coll.insert({a: 1})); - -// Pause the index build on the primary so that it does not commit. -IndexBuildTest.pauseIndexBuilds(primary); - -const secondary = rst.getSecondary(); -IndexBuildTest.pauseIndexBuilds(secondary); - -const createIdx = IndexBuildTest.startIndexBuild( - primary, coll.getFullName(), {a: 1}, {}, ErrorCodes.IndexBuildAborted); - -// When the index build starts, find its op id. -const secondaryDB = secondary.getDB(testDB.getName()); -const opId = IndexBuildTest.waitForIndexBuildToStart(secondaryDB); - -IndexBuildTest.assertIndexBuildCurrentOpContents(secondaryDB, opId, (op) => { - jsTestLog('Inspecting db.currentOp() entry for index build: ' + tojson(op)); - assert.eq(coll.getFullName(), - op.ns, - 'Unexpected ns field value in db.currentOp() result for index build: ' + tojson(op)); -}); - -// Kill the index build. -assert.commandWorked(secondaryDB.killOp(opId)); - -// Resume index build, allowing it to cancel. -IndexBuildTest.resumeIndexBuilds(secondary); -// "attempting to abort index build". -checkLog.containsJson(primary, 4656010); - -IndexBuildTest.resumeIndexBuilds(primary); -// "Index build: joined after abort". -checkLog.containsJson(primary, 20655); - -// Wait for the index build abort to replicate. -rst.awaitReplication(); - -// Expect the index build to fail and for the index to not exist on either node. -createIdx(); - -IndexBuildTest.assertIndexes(coll, 1, ['_id_']); +// Kill the build before it has voted for commit. +jsTestLog("killOp index build on secondary before vote for commit readiness"); +killopIndexBuildOnSecondaryOnFailpoint( + rst, 'hangAfterIndexBuildFirstDrain', /*shouldSucceed*/ true); -const secondaryColl = secondaryDB.getCollection(coll.getName()); -IndexBuildTest.assertIndexes(secondaryColl, 1, ['_id_']); +jsTestLog("killOp index build on secondary after vote for commit readiness"); +killopIndexBuildOnSecondaryOnFailpoint( + rst, 'hangIndexBuildAfterSignalPrimaryForCommitReadiness', /*shouldSucceed*/ false); rst.stopSet(); })(); diff --git a/jstests/noPassthrough/index_build_vote_abort_while_vote_commit.js b/jstests/noPassthrough/index_build_vote_abort_while_vote_commit.js index b847024ff51..a7689f083d6 100644 --- a/jstests/noPassthrough/index_build_vote_abort_while_vote_commit.js +++ b/jstests/noPassthrough/index_build_vote_abort_while_vote_commit.js @@ -1,6 +1,5 @@ /** - * Ensures that index builds can safely be aborted, for instance by the DiskSpaceMonitor, while a - * voteCommitIndexBuild is in progress. + * Ensures that index builds cannot be aborted after voting for commit. * * @tags: [ * requires_fcv_71, @@ -45,7 +44,7 @@ const secondaryColl = secondaryDB.getCollection(collName); // effectively pausing the index build on the secondary too as it will wait for the primary to // commit or abort. IndexBuildTest.pauseIndexBuilds(primary); -const hangVoteCommit = configureFailPoint(primary, 'hangBeforeVoteCommitIndexBuild'); +const hangBeforeVoteCommit = configureFailPoint(primary, 'hangBeforeVoteCommitIndexBuild'); const tookActionCountBefore = secondaryDB.serverStatus().metrics.diskSpaceMonitor.tookAction; @@ -55,7 +54,7 @@ const createIdx = IndexBuildTest.startIndexBuild( IndexBuildTest.waitForIndexBuildToStart(secondaryDB, secondaryColl.getName(), 'a_1'); // Wait until secondary is voting for commit. -hangVoteCommit.wait(); +hangBeforeVoteCommit.wait(); // Default indexBuildMinAvailableDiskSpaceMB is 500 MB. // Simulate a remaining disk space of 450MB on the secondary node. @@ -68,20 +67,20 @@ assert.soon(() => { }); IndexBuildTest.resumeIndexBuilds(primary); -jsTestLog("Waiting for the index build to be killed"); -// "Index build: joined after abort". -checkLog.containsJson(secondary, 20655); +jsTestLog("Waiting for the index build kill attempt to fail"); +// "Index build: cannot force abort". +checkLog.containsJson(secondary, 7617000); +hangBeforeVoteCommit.off(); jsTestLog("Waiting for threads to join"); createIdx(); simulateDiskSpaceFp.off(); assert.eq(0, primaryDB.serverStatus().indexBuilds.killedDueToInsufficientDiskSpace); -assert.eq(1, secondaryDB.serverStatus().indexBuilds.killedDueToInsufficientDiskSpace); +assert.eq(0, secondaryDB.serverStatus().indexBuilds.killedDueToInsufficientDiskSpace); -rst.awaitReplication(); -IndexBuildTest.assertIndexes(primaryColl, 1, ['_id_']); -IndexBuildTest.assertIndexes(secondaryColl, 1, ['_id_']); +IndexBuildTest.assertIndexesSoon(primaryColl, 2, ['_id_', 'a_1']); +IndexBuildTest.assertIndexesSoon(secondaryColl, 2, ['_id_', 'a_1']); rst.stopSet(); })(); diff --git a/jstests/noPassthrough/indexbg_killop_primary.js b/jstests/noPassthrough/indexbg_killop_primary.js deleted file mode 100644 index de0f1c66e86..00000000000 --- a/jstests/noPassthrough/indexbg_killop_primary.js +++ /dev/null @@ -1,88 +0,0 @@ -/** - * Confirms that background index builds on a primary can be aborted using killop. - * @tags: [ - * requires_replication, - * ] - */ -(function() { -"use strict"; - -load('jstests/noPassthrough/libs/index_build.js'); - -const rst = new ReplSetTest({ - nodes: [ - {}, - { - // Disallow elections on secondary. - rsConfig: { - priority: 0, - votes: 0, - }, - }, - ] -}); -const nodes = rst.startSet(); -rst.initiate(); - -const primary = rst.getPrimary(); -const testDB = primary.getDB('test'); -const coll = testDB.getCollection('test'); - -assert.commandWorked(coll.insert({a: 1})); - -IndexBuildTest.pauseIndexBuilds(primary); - -const createIdx = - IndexBuildTest.startIndexBuild(primary, coll.getFullName(), {a: 1}, {background: true}); - -// When the index build starts, find its op id. -const opId = IndexBuildTest.waitForIndexBuildToScanCollection(testDB, coll.getName(), 'a_1'); - -IndexBuildTest.assertIndexBuildCurrentOpContents(testDB, opId, (op) => { - jsTestLog('Inspecting db.currentOp() entry for index build: ' + tojson(op)); - assert.eq( - undefined, - op.connectionId, - 'Was expecting IndexBuildsCoordinator op; found db.currentOp() for connection thread instead: ' + - tojson(op)); - assert.eq(coll.getFullName(), - op.ns, - 'Unexpected ns field value in db.currentOp() result for index build: ' + tojson(op)); -}); - -// Index build should be present in the config.system.indexBuilds collection. -const indexMap = - IndexBuildTest.assertIndexes(coll, 2, ["_id_"], ["a_1"], {includeBuildUUIDs: true}); -const indexBuildUUID = indexMap['a_1'].buildUUID; -assert(primary.getCollection('config.system.indexBuilds').findOne({_id: indexBuildUUID})); - -// Kill the index builder thread. -assert.commandWorked(testDB.killOp(opId)); - -// Wait for the index build to stop from the killop signal. -try { - IndexBuildTest.waitForIndexBuildToStop(testDB); -} finally { - IndexBuildTest.resumeIndexBuilds(primary); -} - -const exitCode = createIdx({checkExitSuccess: false}); -assert.neq(0, exitCode, 'expected shell to exit abnormally due to index build being terminated'); - -// Check that no new index has been created. This verifies that the index build was aborted -// rather than successfully completed. -IndexBuildTest.assertIndexes(coll, 1, ['_id_']); - -const cmdNs = testDB.getCollection('$cmd').getFullName(); -let ops = rst.dumpOplog(primary, {op: 'c', ns: cmdNs, 'o.startIndexBuild': coll.getName()}); -assert.eq(1, ops.length, 'incorrect number of startIndexBuild oplog entries: ' + tojson(ops)); -ops = rst.dumpOplog(primary, {op: 'c', ns: cmdNs, 'o.abortIndexBuild': coll.getName()}); -assert.eq(1, ops.length, 'incorrect number of abortIndexBuild oplog entries: ' + tojson(ops)); -ops = rst.dumpOplog(primary, {op: 'c', ns: cmdNs, 'o.commitIndexBuild': coll.getName()}); -assert.eq(0, ops.length, 'incorrect number of commitIndexBuild oplog entries: ' + tojson(ops)); - -// Index build should be removed from the config.system.indexBuilds collection. -assert.isnull(primary.getCollection('config.system.indexBuilds').findOne({_id: indexBuildUUID})); - -rst.stopSet(); -})(); diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index 0a426d21e8b..e95b94c33d3 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -2656,9 +2656,13 @@ void IndexBuildsCoordinator::_cleanUpTwoPhaseAfterNonShutdownFailure( opCtx, "self-abort", [this, replState, status](OperationContext* abortCtx) { // The index builder thread will need to reach out to the current primary to abort on // its own. This can happen if an error is thrown, it is interrupted by a user killop, - // or is killed internally by something like the DiskSpaceMonitor. + // or is killed internally by something like the DiskSpaceMonitor. Voting for abort is + // only allowed if the node did not previously attempt to vote for commit. + + // (Ignore FCV check): This feature flag doesn't have any upgrade/downgrade concerns. if (feature_flags::gIndexBuildGracefulErrorHandling.isEnabled( - serverGlobalParams.featureCompatibility)) { + serverGlobalParams.featureCompatibility) && + replState->canVoteForAbort()) { // If we were interrupted by a caller internally who set a status, use that // status instead of the generic interruption error status. auto abortStatus = @@ -2679,9 +2683,10 @@ void IndexBuildsCoordinator::_cleanUpTwoPhaseAfterNonShutdownFailure( ShouldNotConflictWithSecondaryBatchApplicationBlock noConflict( abortCtx->lockState()); - // Take RSTL (implicitly by DBLock) to observe and prevent replication state - // from changing. - Lock::DBLock dbLock(abortCtx, replState->dbName, MODE_IX); + // Take RSTL to observe and prevent replication state from changing. This is done + // with the release/reacquire strategy to avoid deadlock with prepared txns. + auto [dbLock, collLock, rstl] = std::move( + _acquireExclusiveLockWithRSTLRetry(abortCtx, replState.get()).getValue()); const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID); auto replCoord = repl::ReplicationCoordinator::get(abortCtx); @@ -2703,7 +2708,6 @@ void IndexBuildsCoordinator::_cleanUpTwoPhaseAfterNonShutdownFailure( } } - CollectionNamespaceOrUUIDLock collLock(abortCtx, dbAndUUID, MODE_X); AutoGetCollection indexBuildEntryColl( abortCtx, NamespaceString::kIndexBuildEntryNamespace, MODE_IX); _completeSelfAbort(abortCtx, replState, *indexBuildEntryColl, status); diff --git a/src/mongo/db/index_builds_coordinator_mongod.cpp b/src/mongo/db/index_builds_coordinator_mongod.cpp index 013d1dbd527..5a5773b1877 100644 --- a/src/mongo/db/index_builds_coordinator_mongod.cpp +++ b/src/mongo/db/index_builds_coordinator_mongod.cpp @@ -798,6 +798,14 @@ void IndexBuildsCoordinatorMongod::_signalPrimaryForCommitReadiness( logAttrs(replState->dbName), "collectionUUID"_attr = replState->collectionUUID); + // Indicate that the index build in this node has already tried to vote for commit readiness. + // We do not try to determine whether the vote has actually succeeded or not, as it is + // challenging due to the asynchronous request and potential concurrent interrupts. After this + // point, the node cannot vote to abort this index build, and if it needs to abort the index + // build it must try to do so independently. Meaning, as a primary it will succeed, but as a + // secondary it will fassert. + replState->setVotedForCommitReadiness(opCtx); + const auto generateCmd = [](const UUID& uuid, const std::string& address) { return BSON("voteCommitIndexBuild" << uuid << "hostAndPort" << address << "writeConcern" << BSON("w" diff --git a/src/mongo/db/repl_index_build_state.cpp b/src/mongo/db/repl_index_build_state.cpp index 528abe95848..a6f7c97ad57 100644 --- a/src/mongo/db/repl_index_build_state.cpp +++ b/src/mongo/db/repl_index_build_state.cpp @@ -226,6 +226,18 @@ Status ReplIndexBuildState::tryStart(OperationContext* opCtx) { return interruptCheck; } +void ReplIndexBuildState::setVotedForCommitReadiness(OperationContext* opCtx) { + stdx::lock_guard lk(_mutex); + invariant(!_votedForCommitReadiness); + opCtx->checkForInterrupt(); + _votedForCommitReadiness = true; +} + +bool ReplIndexBuildState::canVoteForAbort() const { + stdx::lock_guard lk(_mutex); + return !_votedForCommitReadiness; +} + void ReplIndexBuildState::commit(OperationContext* opCtx) { auto skipCheck = _shouldSkipIndexBuildStateTransitionCheck(opCtx); opCtx->recoveryUnit()->onCommit( @@ -517,9 +529,14 @@ bool ReplIndexBuildState::forceSelfAbort(OperationContext* opCtx, const Status& stdx::lock_guard lk(_mutex); if (_indexBuildState.isSettingUp() || _indexBuildState.isAborted() || _indexBuildState.isCommitted() || _indexBuildState.isAwaitingPrimaryAbort() || - _indexBuildState.isApplyingCommitOplogEntry()) { + _indexBuildState.isApplyingCommitOplogEntry() || _votedForCommitReadiness) { // If the build is setting up, it is not yet abortable. If the index build has already // passed a point of no return, interrupting will not be productive. + LOGV2(7617000, + "Index build: cannot force abort", + "buildUUID"_attr = buildUUID, + "state"_attr = _indexBuildState, + "votedForCommit"_attr = _votedForCommitReadiness); return false; } diff --git a/src/mongo/db/repl_index_build_state.h b/src/mongo/db/repl_index_build_state.h index e085cb92206..2eebed6de14 100644 --- a/src/mongo/db/repl_index_build_state.h +++ b/src/mongo/db/repl_index_build_state.h @@ -303,6 +303,20 @@ public: Status tryStart(OperationContext* opCtx); /** + * Indicate that the index build has attempted to vote for commit readiness. After calling this, + * the index build cannot vote for abort. Performs an interrupt check, in case the build was + * concurrently forced to self abort or received a killop, in which case the vote for abort is + * necessary. + */ + void setVotedForCommitReadiness(OperationContext* opCtx); + + /** + * Returns true if this index build can still vote for abort. Voting for abort is not possible + * after the index build has voted for commit. + */ + bool canVoteForAbort() const; + + /** * This index build has completed successfully and there is no further work to be done. */ void commit(OperationContext* opCtx); @@ -568,6 +582,9 @@ private: // Set once setup is complete, indicating that a clean up is required in case of abort. bool _cleanUpRequired = false; + + // Set once before attempting to vote for commit readiness. + bool _votedForCommitReadiness = false; }; } // namespace mongo |