diff options
author | mathisbessamdb <mathis.bessa@mongodb.com> | 2022-06-16 14:07:33 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-06-16 14:36:07 +0000 |
commit | 98374072b1cbaa5c4ffae1325958179d5019a796 (patch) | |
tree | f8b557b59f19a965654a4897ea4670bdf91b96c4 /jstests/serverless | |
parent | c5b3c193d802c5618db349af4efdadbca5e59125 (diff) | |
download | mongo-98374072b1cbaa5c4ffae1325958179d5019a796.tar.gz |
SERVER-66365 Copy missing applicable JSTests for shard split
Diffstat (limited to 'jstests/serverless')
14 files changed, 1698 insertions, 0 deletions
diff --git a/jstests/serverless/libs/basic_serverless_test.js b/jstests/serverless/libs/basic_serverless_test.js index 6017c17c756..131aa394c46 100644 --- a/jstests/serverless/libs/basic_serverless_test.js +++ b/jstests/serverless/libs/basic_serverless_test.js @@ -528,6 +528,33 @@ class BasicServerlessTest { } /** + * Asserts that the TenantMigrationAccessBlocker for the given tenant on the given node has the + * expected statistics. + */ + static checkShardSplitAccessBlocker(node, tenantId, { + numBlockedWrites = 0, + numBlockedReads = 0, + numTenantMigrationCommittedErrors = 0, + numTenantMigrationAbortedErrors = 0 + }) { + const mtab = BasicServerlessTest.getTenantMigrationAccessBlocker({node, tenantId}).donor; + if (!mtab) { + assert.eq(0, numBlockedWrites); + assert.eq(0, numTenantMigrationCommittedErrors); + assert.eq(0, numTenantMigrationAbortedErrors); + return; + } + + assert.eq(mtab.numBlockedReads, numBlockedReads, tojson(mtab)); + assert.eq(mtab.numBlockedWrites, numBlockedWrites, tojson(mtab)); + assert.eq(mtab.numTenantMigrationCommittedErrors, + numTenantMigrationCommittedErrors, + tojson(mtab)); + assert.eq( + mtab.numTenantMigrationAbortedErrors, numTenantMigrationAbortedErrors, tojson(mtab)); + } + + /** * Get the current donor primary by ignoring all the recipient nodes from the current donor set. */ getDonorPrimary() { diff --git a/jstests/serverless/shard_split_concurrent_reads_on_donor_aborted.js b/jstests/serverless/shard_split_concurrent_reads_on_donor_aborted.js new file mode 100644 index 00000000000..1c5af907e79 --- /dev/null +++ b/jstests/serverless/shard_split_concurrent_reads_on_donor_aborted.js @@ -0,0 +1,108 @@ +/** + * Tests that the donor + * - does not rejects reads with atClusterTime/afterClusterTime >= blockTimestamp reads and + * linearizable reads after the split aborts. + * + * @tags: [ + * incompatible_with_eft, + * incompatible_with_macos, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * serverless, + * requires_fcv_52, + * featureFlagShardSplit + * ] + */ + +(function() { +'use strict'; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/parallelTester.js"); +load("jstests/libs/uuid_util.js"); +load("jstests/serverless/libs/basic_serverless_test.js"); +load("jstests/serverless/shard_split_concurrent_reads_on_donor_util.js"); + +const kCollName = "testColl"; +const kTenantDefinedDbName = "0"; + +/** + * Tests that after the split abort, the donor does not reject linearizable reads or reads with + * atClusterTime/afterClusterTime >= blockTimestamp. + */ +function testDoNotRejectReadsAfterMigrationAborted(testCase, dbName, collName) { + const tenantId = dbName.split('_')[0]; + const donorDoc = findSplitOperation(donorPrimary, operation.migrationId); + const nodes = testCase.isSupportedOnSecondaries ? donorRst.nodes : [donorPrimary]; + nodes.forEach(node => { + const db = node.getDB(dbName); + if (testCase.requiresReadTimestamp) { + runCommandForConcurrentReadTest(db, + testCase.command(collName, donorDoc.blockTimestamp), + null, + testCase.isTransaction); + runCommandForConcurrentReadTest( + db, + testCase.command(collName, donorDoc.commitOrAbortOpTime.ts), + null, + testCase.isTransaction); + BasicServerlessTest.checkShardSplitAccessBlocker( + node, tenantId, {numTenantMigrationAbortedErrors: 0}); + } else { + runCommandForConcurrentReadTest( + db, testCase.command(collName), null, testCase.isTransaction); + BasicServerlessTest.checkShardSplitAccessBlocker( + node, tenantId, {numTenantMigrationAbortedErrors: 0}); + } + }); +} + +const testCases = shardSplitConcurrentReadTestCases; + +const test = new BasicServerlessTest({ + recipientTagName: "recipientTag", + recipientSetName: "recipientSet", + quickGarbageCollection: true +}); +test.addRecipientNodes(); + +const tenantId = "tenantId"; + +const donorRst = test.donor; +const donorPrimary = test.getDonorPrimary(); + +// Force the donor to preserve all snapshot history to ensure that transactional reads do not +// fail with TransientTransactionError "Read timestamp is older than the oldest available +// timestamp". +donorRst.nodes.forEach(node => { + configureFailPoint(node, "WTPreserveSnapshotHistoryIndefinitely"); +}); + +let blockFp = configureFailPoint(donorPrimary, "pauseShardSplitAfterBlocking"); + +const operation = test.createSplitOperation([tenantId]); +const splitThread = operation.commitAsync(); + +blockFp.wait(); +operation.abort(); + +blockFp.off(); + +splitThread.join(); +assert.commandFailed(splitThread.returnData()); +assertMigrationState(donorPrimary, operation.migrationId, "aborted"); + +// Wait for the last oplog entry on the primary to be visible in the committed snapshot view of +// the oplog on all the secondaries. This is to ensure that snapshot reads on secondaries with +// unspecified atClusterTime have read timestamp >= abortTimestamp. +donorRst.awaitLastOpCommitted(); + +for (const [testCaseName, testCase] of Object.entries(testCases)) { + jsTest.log(`Testing inAborted with testCase ${testCaseName}`); + const dbName = `${tenantId}_${testCaseName}-inAborted-${kTenantDefinedDbName}`; + testDoNotRejectReadsAfterMigrationAborted(testCase, dbName, kCollName); +} + +test.stop(); +})(); diff --git a/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking.js b/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking.js new file mode 100644 index 00000000000..d3bb1d3e85b --- /dev/null +++ b/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking.js @@ -0,0 +1,112 @@ +/** + * Tests that the donor + * - blocks reads with atClusterTime/afterClusterTime >= blockTimestamp that are executed while the + * split is in the blocking state but does not block linearizable reads. + * + * @tags: [ + * incompatible_with_eft, + * incompatible_with_macos, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * serverless, + * requires_fcv_52, + * featureFlagShardSplit + * ] + */ + +(function() { +'use strict'; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/parallelTester.js"); +load("jstests/libs/uuid_util.js"); +load("jstests/serverless/libs/basic_serverless_test.js"); +load("jstests/serverless/shard_split_concurrent_reads_on_donor_util.js"); + +const kCollName = "testColl"; +const kTenantDefinedDbName = "0"; + +const kMaxTimeMS = 1 * 1000; + +/** + * Tests that in the blocking state, the donor blocks reads with atClusterTime/afterClusterTime >= + * blockTimestamp but does not block linearizable reads. + */ +let countBlockedReadsPrimary = 0; +let countBlockedReadsSecondaries = 0; +function testBlockReadsAfterMigrationEnteredBlocking(testCase, primary, dbName, collName) { + const donorDoc = findSplitOperation(primary, operation.migrationId); + const command = testCase.requiresReadTimestamp + ? testCase.command(collName, donorDoc.blockTimestamp) + : testCase.command(collName); + const shouldBlock = !testCase.isLinearizableRead; + if (shouldBlock) { + command.maxTimeMS = kMaxTimeMS; + countBlockedReadsPrimary += 1; + } + let nodes = [primary]; + if (testCase.isSupportedOnSecondaries) { + nodes = donorRst.nodes; + + if (shouldBlock) { + countBlockedReadsSecondaries += 1; + } + } + nodes.forEach(node => { + const db = node.getDB(dbName); + runCommandForConcurrentReadTest( + db, command, shouldBlock ? ErrorCodes.MaxTimeMSExpired : null, testCase.isTransaction); + }); +} + +const testCases = shardSplitConcurrentReadTestCases; + +const tenantId = "tenantId"; +const test = new BasicServerlessTest({ + recipientTagName: "recipientTag", + recipientSetName: "recipientSet", + quickGarbageCollection: true +}); +test.addRecipientNodes(); + +const donorRst = test.donor; +const donorPrimary = donorRst.getPrimary(); + +let blockingFp = configureFailPoint(donorPrimary, "pauseShardSplitAfterBlocking"); + +const operation = test.createSplitOperation([tenantId]); +const splitThread = operation.commitAsync(); + +// Wait for the split to enter the blocking state. +blockingFp.wait(); + +// Wait for the last oplog entry on the primary to be visible in the committed snapshot view of +// the oplog on all secondaries to ensure that snapshot reads on the secondaries with +// unspecified atClusterTime have read timestamp >= blockTimestamp. +donorRst.awaitLastOpCommitted(); + +for (const [testCaseName, testCase] of Object.entries(testCases)) { + jsTest.log(`Testing inBlocking with testCase ${testCaseName}`); + const dbName = `${tenantId}_${testCaseName}-inBlocking-${kTenantDefinedDbName}`; + testBlockReadsAfterMigrationEnteredBlocking(testCase, donorPrimary, dbName, kCollName); +} + +// check on primary +BasicServerlessTest.checkShardSplitAccessBlocker( + donorPrimary, tenantId, {numBlockedReads: countBlockedReadsPrimary}); + +// check on secondaries +const secondaries = donorRst.getSecondaries(); +secondaries.forEach(node => { + BasicServerlessTest.checkShardSplitAccessBlocker( + node, tenantId, {numBlockedReads: countBlockedReadsSecondaries}); +}); + +blockingFp.off(); + +splitThread.join(); +assert.commandWorked(splitThread.returnData()); + +test.stop(); +})(); diff --git a/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking_then_aborted.js b/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking_then_aborted.js new file mode 100644 index 00000000000..bb76d0aa4aa --- /dev/null +++ b/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking_then_aborted.js @@ -0,0 +1,133 @@ +/** + * Tests that the donor + * - blocks reads with atClusterTime/afterClusterTime >= blockTimestamp that are executed while the + * split is in the blocking state but does not block linearizable reads. + * - does not reject reads with atClusterTime/afterClusterTime >= blockTimestamp and linearizable + * reads after the split aborts. + * + * @tags: [ + * incompatible_with_eft, + * incompatible_with_macos, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * serverless, + * requires_fcv_52, + * featureFlagShardSplit + * ] + */ + +(function() { +'use strict'; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/parallelTester.js"); +load("jstests/libs/uuid_util.js"); +load("jstests/serverless/libs/basic_serverless_test.js"); +load("jstests/serverless/shard_split_concurrent_reads_on_donor_util.js"); + +const kCollName = "testColl"; +const kTenantDefinedDbName = "0"; + +function getTenantId(dbName) { + return dbName.split('_')[0]; +} + +/** + * To be used to resume a split that is paused after entering the blocking state. Waits for the + * number of blocked reads to reach 'targetNumBlockedReads' and unpauses the split. + */ +function resumeMigrationAfterBlockingRead(host, tenantId, targetNumBlockedReads) { + load("jstests/libs/fail_point_util.js"); + load("jstests/serverless/libs/basic_serverless_test.js"); + const primary = new Mongo(host); + + assert.soon(() => BasicServerlessTest.getNumBlockedReads(primary, tenantId) == + targetNumBlockedReads); + + assert.commandWorked( + primary.adminCommand({configureFailPoint: "pauseShardSplitAfterBlocking", mode: "off"})); +} + +/** + * Tests that the donor unblocks blocked reads (reads with atClusterTime/afterClusterTime >= + * blockingTimestamp) once the split aborts. + */ +function testUnblockBlockedReadsAfterMigrationAborted(testCase, dbName, collName) { + if (testCase.isLinearizableRead) { + // Linearizable reads are not blocked. + return; + } + + const tenantId = getTenantId(dbName); + const test = new BasicServerlessTest({ + recipientTagName: "recipientTag", + recipientSetName: "recipientSet", + quickGarbageCollection: true + }); + test.addRecipientNodes(); + + const donorRst = test.donor; + const donorPrimary = test.getDonorPrimary(); + + let blockingFp = configureFailPoint(donorPrimary, "pauseShardSplitAfterBlocking"); + let abortFp = configureFailPoint(donorPrimary, "abortShardSplitBeforeLeavingBlockingState"); + const operation = test.createSplitOperation([tenantId]); + + // Run the commands after the split enters the blocking state. + const splitThread = operation.commitAsync(); + + let resumeMigrationThread = + new Thread(resumeMigrationAfterBlockingRead, donorPrimary.host, tenantId, 1); + + // Run the commands after the split enters the blocking state. + resumeMigrationThread.start(); + blockingFp.wait(); + + // Wait for the last oplog entry on the primary to be visible in the committed snapshot view of + // the oplog on all secondaries to ensure that snapshot reads on the secondaries with + // unspecified atClusterTime have read timestamp >= blockTimestamp. + donorRst.awaitLastOpCommitted(); + + const donorDoc = findSplitOperation(donorPrimary, operation.migrationId); + const command = testCase.requiresReadTimestamp + ? testCase.command(collName, donorDoc.blockTimestamp) + : testCase.command(collName); + + // The split should unpause and abort after the read is blocked. Verify that the read + // unblocks. + const db = donorPrimary.getDB(dbName); + runCommandForConcurrentReadTest(db, command, null, testCase.isTransaction); + if (testCase.isSupportedOnSecondaries) { + const primaryPort = String(donorPrimary).split(":")[1]; + const secondaries = donorRst.nodes.filter(node => node.port != primaryPort); + secondaries.forEach(node => { + const db = node.getDB(dbName); + runCommandForConcurrentReadTest(db, command, null, testCase.isTransaction); + }); + } + + const shouldBlock = !testCase.isLinearizableRead; + BasicServerlessTest.checkShardSplitAccessBlocker(donorPrimary, tenantId, { + numBlockedReads: shouldBlock ? 1 : 0, + // Reads just get unblocked if the split aborts. + numTenantMigrationAbortedErrors: 0 + }); + + jsTestLog("Joining"); + splitThread.join(); + assert.commandFailed(splitThread.returnData()); + + resumeMigrationThread.join(); + abortFp.off(); + test.stop(); +} + +const testCases = shardSplitConcurrentReadTestCases; + +for (const [testCaseName, testCase] of Object.entries(testCases)) { + jsTest.log(`Testing inBlockingThenAborted with testCase ${testCaseName}`); + const dbName = `${testCaseName}-inBlockingThenAborted_${kTenantDefinedDbName}`; + testUnblockBlockedReadsAfterMigrationAborted(testCase, dbName, kCollName); +} +})(); diff --git a/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking_then_committed.js b/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking_then_committed.js new file mode 100644 index 00000000000..6505439c7cb --- /dev/null +++ b/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking_then_committed.js @@ -0,0 +1,127 @@ +/** + * Tests that the donor + * - rejects reads with atClusterTime/afterClusterTime >= blockTimestamp reads and linearizable + * reads after the split commits. + * + * @tags: [ + * incompatible_with_eft, + * incompatible_with_macos, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * serverless, + * requires_fcv_52, + * featureFlagShardSplit + * ] + */ + +(function() { +'use strict'; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/parallelTester.js"); +load("jstests/libs/uuid_util.js"); +load("jstests/serverless/libs/basic_serverless_test.js"); +load("jstests/serverless/shard_split_concurrent_reads_on_donor_util.js"); + +const kCollName = "testColl"; +const kTenantDefinedDbName = "0"; + +function getTenantId(dbName) { + return dbName.split('_')[0]; +} + +/** + * To be used to resume a split that is paused after entering the blocking state. Waits for the + * number of blocked reads to reach 'targetNumBlockedReads' and unpauses the split. + */ +function resumeMigrationAfterBlockingRead(host, tenantId, targetNumBlockedReads) { + load("jstests/libs/fail_point_util.js"); + load("jstests/serverless/libs/basic_serverless_test.js"); + const primary = new Mongo(host); + + assert.soon(() => BasicServerlessTest.getNumBlockedReads(primary, tenantId) == + targetNumBlockedReads); + + assert.commandWorked( + primary.adminCommand({configureFailPoint: "pauseShardSplitAfterBlocking", mode: "off"})); +} + +/** + * Tests that the donor rejects the blocked reads (reads with atClusterTime/afterClusterTime >= + * blockingTimestamp) once the split commits. + */ +function testRejectBlockedReadsAfterMigrationCommitted(testCase, dbName, collName) { + if (testCase.isLinearizableRead) { + // Linearizable reads are not blocked. + return; + } + + const tenantId = getTenantId(dbName); + const test = new BasicServerlessTest({ + recipientTagName: "recipientTag", + recipientSetName: "recipientSet", + quickGarbageCollection: true + }); + test.addRecipientNodes(); + + const donorRst = test.donor; + const donorPrimary = test.getDonorPrimary(); + + let blockingFp = configureFailPoint(donorPrimary, "pauseShardSplitAfterBlocking"); + + const operation = test.createSplitOperation([tenantId]); + + let resumeMigrationThread = + new Thread(resumeMigrationAfterBlockingRead, donorPrimary.host, tenantId, 1); + resumeMigrationThread.start(); + + // Run the commands after the split enters the blocking state. + const splitThread = operation.commitAsync(); + blockingFp.wait(); + + // Wait for the last oplog entry on the primary to be visible in the committed snapshot view of + // the oplog on all secondaries to ensure that snapshot reads on the secondaries with + // unspecified atClusterTime have read timestamp >= blockTimestamp. + donorRst.awaitLastOpCommitted(); + + const donorDoc = findSplitOperation(donorPrimary, operation.migrationId); + const command = testCase.requiresReadTimestamp + ? testCase.command(collName, donorDoc.blockTimestamp) + : testCase.command(collName); + + // The split should unpause and commit after the read is blocked. Verify that the read + // is rejected on donor nodes. + const db = donorPrimary.getDB(dbName); + runCommandForConcurrentReadTest( + db, command, ErrorCodes.TenantMigrationCommitted, testCase.isTransaction); + if (testCase.isSupportedOnSecondaries) { + const primaryPort = String(donorPrimary).split(":")[1]; + const secondaries = donorRst.nodes.filter(node => node.port != primaryPort); + secondaries.filter(node => !test.recipientNodes.includes(node)).forEach(node => { + const db = node.getDB(dbName); + runCommandForConcurrentReadTest( + db, command, ErrorCodes.TenantMigrationCommitted, testCase.isTransaction); + }); + } + + BasicServerlessTest.checkShardSplitAccessBlocker( + donorPrimary, tenantId, {numBlockedReads: 1, numTenantMigrationCommittedErrors: 1}); + + resumeMigrationThread.join(); + // Verify that the split succeeded. + splitThread.join(); + assert.commandWorked(splitThread.returnData()); + test.removeAndStopRecipientNodes(); + + test.stop(); +} + +const testCases = shardSplitConcurrentReadTestCases; + +for (const [testCaseName, testCase] of Object.entries(testCases)) { + jsTest.log(`Testing inBlockingThenCommitted with testCase ${testCaseName}`); + const dbName = `${testCaseName}-inBlockingThenCommitted_${kTenantDefinedDbName}`; + testRejectBlockedReadsAfterMigrationCommitted(testCase, dbName, kCollName); +} +})(); diff --git a/jstests/serverless/shard_split_concurrent_reads_on_donor_committed.js b/jstests/serverless/shard_split_concurrent_reads_on_donor_committed.js new file mode 100644 index 00000000000..16e16799fe6 --- /dev/null +++ b/jstests/serverless/shard_split_concurrent_reads_on_donor_committed.js @@ -0,0 +1,128 @@ +/** + * Tests that the donor + * - rejects reads with atClusterTime/afterClusterTime >= blockTimestamp reads and linearizable + * reads after the split commits. + * + * @tags: [ + * incompatible_with_eft, + * incompatible_with_macos, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * serverless, + * requires_fcv_52, + * featureFlagShardSplit + * ] + */ + +(function() { +'use strict'; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/parallelTester.js"); +load("jstests/libs/uuid_util.js"); +load("jstests/serverless/libs/basic_serverless_test.js"); +load("jstests/serverless/shard_split_concurrent_reads_on_donor_util.js"); + +const kCollName = "testColl"; +const kTenantDefinedDbName = "0"; + +/** + * Tests that after the split commits, the donor rejects linearizable reads and reads with + * atClusterTime/afterClusterTime >= blockTimestamp. + */ +let countTenantMigrationCommittedErrorsPrimary = 0; +let countTenantMigrationCommittedErrorsSecondaries = 0; +function testRejectReadsAfterMigrationCommitted(testCase, primary, dbName, collName, migrationId) { + const donorDoc = findSplitOperation(primary, migrationId); + + let nodes = [primary]; + if (testCase.isSupportedOnSecondaries) { + nodes = donorRst.nodes; + + if (testCase.requiresReadTimestamp) { + countTenantMigrationCommittedErrorsSecondaries += 2; + } else { + countTenantMigrationCommittedErrorsSecondaries += 1; + } + } + + if (testCase.requiresReadTimestamp) { + countTenantMigrationCommittedErrorsPrimary += 2; + } else { + countTenantMigrationCommittedErrorsPrimary += 1; + } + + nodes.forEach(node => { + const db = node.getDB(dbName); + if (testCase.requiresReadTimestamp) { + runCommandForConcurrentReadTest(db, + testCase.command(collName, donorDoc.blockTimestamp), + ErrorCodes.TenantMigrationCommitted, + testCase.isTransaction); + runCommandForConcurrentReadTest( + db, + testCase.command(collName, donorDoc.commitOrAbortOpTime.ts), + ErrorCodes.TenantMigrationCommitted, + testCase.isTransaction); + } else { + runCommandForConcurrentReadTest(db, + testCase.command(collName), + ErrorCodes.TenantMigrationCommitted, + testCase.isTransaction); + } + }); +} + +const testCases = shardSplitConcurrentReadTestCases; + +const test = new BasicServerlessTest({ + recipientTagName: "recipientTag", + recipientSetName: "recipientSet", + quickGarbageCollection: true +}); +test.addRecipientNodes(); + +const tenantId = "tenantId"; + +let donorRst = test.donor; +const donorPrimary = test.getDonorPrimary(); + +// Force the donor to preserve all snapshot history to ensure that transactional reads do not +// fail with TransientTransactionError "Read timestamp is older than the oldest available +// timestamp". +donorRst.nodes.forEach(node => { + configureFailPoint(node, "WTPreserveSnapshotHistoryIndefinitely"); +}); + +const operation = test.createSplitOperation([tenantId]); +assert.commandWorked(operation.commit()); + +test.removeRecipientNodesFromDonor(); + +// Wait for the last oplog entry on the primary to be visible in the committed snapshot view of +// the oplog on all the secondaries. This is to ensure that snapshot reads on secondaries with +// unspecified atClusterTime have read timestamp >= commitTimestamp. +donorRst.awaitLastOpCommitted(); + +for (const [testCaseName, testCase] of Object.entries(testCases)) { + jsTest.log(`Testing inCommitted with testCase ${testCaseName}`); + const dbName = `${tenantId}_${testCaseName}-inCommitted-${kTenantDefinedDbName}`; + testRejectReadsAfterMigrationCommitted( + testCase, donorPrimary, dbName, kCollName, operation.migrationId); +} + +// check on primary +BasicServerlessTest.checkShardSplitAccessBlocker(donorPrimary, tenantId, { + numTenantMigrationCommittedErrors: countTenantMigrationCommittedErrorsPrimary +}); +let secondaries = donorRst.getSecondaries(); +// check on secondaries +secondaries.forEach(node => { + BasicServerlessTest.checkShardSplitAccessBlocker(node, tenantId, { + numTenantMigrationCommittedErrors: countTenantMigrationCommittedErrorsSecondaries + }); +}); + +test.stop(); +})(); diff --git a/jstests/serverless/shard_split_concurrent_reads_on_donor_util.js b/jstests/serverless/shard_split_concurrent_reads_on_donor_util.js new file mode 100644 index 00000000000..2403d21aecd --- /dev/null +++ b/jstests/serverless/shard_split_concurrent_reads_on_donor_util.js @@ -0,0 +1,126 @@ +/** + * This utility file is used to list the different test cases needed for the + * shard_split_concurrent_reads_on_donor*tests. + */ + +'use strict'; + +function runCommandForConcurrentReadTest(db, cmd, expectedError, isTransaction) { + const res = db.runCommand(cmd); + + if (expectedError) { + assert.commandFailedWithCode(res, expectedError, tojson(cmd)); + // The 'TransientTransactionError' label is attached only in a scope of a transaction. + if (isTransaction && + (expectedError == ErrorCodes.TenantMigrationAborted || + expectedError == ErrorCodes.TenantMigrationCommitted)) { + assert(res["errorLabels"] != null, "Error labels are absent from " + tojson(res)); + const expectedErrorLabels = ['TransientTransactionError']; + assert.sameMembers(res["errorLabels"], + expectedErrorLabels, + "Error labels " + tojson(res["errorLabels"]) + + " are different from expected " + expectedErrorLabels); + } + } else { + assert.commandWorked(res); + } + + if (cmd.lsid) { + assert.commandWorked(db.runCommand({killSessions: [cmd.lsid]})); + } +} + +const shardSplitConcurrentReadTestCases = { + snapshotReadWithAtClusterTime: { + isSupportedOnSecondaries: true, + requiresReadTimestamp: true, + command: function(collName, readTimestamp) { + return { + find: collName, + readConcern: { + level: "snapshot", + atClusterTime: readTimestamp, + } + }; + }, + }, + snapshotReadWithoutAtClusterTime: { + isSupportedOnSecondaries: true, + command: function(collName) { + return { + find: collName, + readConcern: { + level: "snapshot", + } + }; + }, + }, + snapshotReadWithAtClusterTimeInTxn: { + isSupportedOnSecondaries: false, + requiresReadTimestamp: true, + isTransaction: true, + command: function(collName, readTimestamp) { + return { + find: collName, + lsid: {id: UUID()}, + txnNumber: NumberLong(0), + startTransaction: true, + autocommit: false, + readConcern: {level: "snapshot", atClusterTime: readTimestamp} + }; + } + }, + snapshotReadWithoutAtClusterTimeInTxn: { + isSupportedOnSecondaries: false, + isTransaction: true, + command: function(collName) { + return { + find: collName, + lsid: {id: UUID()}, + txnNumber: NumberLong(0), + startTransaction: true, + autocommit: false, + readConcern: {level: "snapshot"} + }; + } + }, + readWithAfterClusterTime: { + isSupportedOnSecondaries: true, + requiresReadTimestamp: true, + command: function(collName, readTimestamp) { + return { + find: collName, + readConcern: { + afterClusterTime: readTimestamp, + } + }; + }, + }, + readWithAfterClusterTimeInTxn: { + isSupportedOnSecondaries: false, + requiresReadTimestamp: true, + isTransaction: true, + command: function(collName, readTimestamp) { + return { + find: collName, + lsid: {id: UUID()}, + txnNumber: NumberLong(0), + startTransaction: true, + autocommit: false, + readConcern: { + afterClusterTime: readTimestamp, + } + }; + }, + }, + linearizableRead: { + isSupportedOnSecondaries: false, + isLinearizableRead: true, + command: function(collName) { + return { + find: collName, + readConcern: {level: "linearizable"}, + }; + } + } +}; diff --git a/jstests/serverless/shard_split_concurrent_writes_on_donor_aborted.js b/jstests/serverless/shard_split_concurrent_writes_on_donor_aborted.js new file mode 100644 index 00000000000..b7b59387df7 --- /dev/null +++ b/jstests/serverless/shard_split_concurrent_writes_on_donor_aborted.js @@ -0,0 +1,154 @@ +/** + * Tests that the donor accepts writes after the shard split aborts. + * + * @tags: [ + * incompatible_with_macos, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * serverless, + * requires_fcv_52, + * featureFlagShardSplit + * ] + */ +(function() { +'use strict'; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/parallelTester.js"); +load("jstests/libs/uuid_util.js"); +load("jstests/replsets/libs/tenant_migration_test.js"); +load("jstests/replsets/tenant_migration_concurrent_writes_on_donor_util.js"); +load("jstests/serverless/libs/basic_serverless_test.js"); + +TestData.skipCheckDBHashes = true; +const recipientTagName = "recipientNode"; +const recipientSetName = "recipient"; +const tenantMigrationTest = new BasicServerlessTest({ + recipientTagName, + recipientSetName, + quickGarbageCollection: true, + allowStaleReadsOnDonor: true, + initiateWithShortElectionTimeout: true +}); + +const donorPrimary = tenantMigrationTest.getDonorPrimary(); + +const kCollName = "testColl"; +const kTenantDefinedDbName = "0"; + +const testCases = TenantMigrationConcurrentWriteUtil.testCases; +const kTenantID = "tenantId"; + +function setupTest(testCase, collName, testOpts) { + if (testCase.explicitlyCreateCollection) { + createCollectionAndInsertDocsForConcurrentWritesTest( + testOpts.primaryDB, collName, testCase.isCapped); + } + + if (testCase.setUp) { + testCase.setUp(testOpts.primaryDB, collName, testOpts.testInTransaction); + } +} + +/** + * Tests that the donor does not reject writes after the migration aborts. + */ +function testDoNotRejectWritesAfterMigrationAborted(testCase, testOpts) { + const tenantId = testOpts.dbName.split('_')[0]; + + // Wait until the in-memory migration state is updated after the migration has majority + // committed the abort decision. Otherwise, the command below is expected to block and then get + // rejected. + assert.soon(() => { + const mtab = BasicServerlessTest.getTenantMigrationAccessBlocker( + {node: testOpts.primaryDB, tenantId}); + return mtab.donor.state === TenantMigrationTest.DonorAccessState.kAborted; + }); + + runCommandForConcurrentWritesTest(testOpts); + testCase.assertCommandSucceeded(testOpts.primaryDB, testOpts.dbName, testOpts.collName); + BasicServerlessTest.checkShardSplitAccessBlocker( + testOpts.primaryDB, tenantId, {numTenantMigrationAbortedErrors: 0}); +} + +const testOptsMap = {}; + +/** + * run the setup for each cases before the migration starts + */ +function setupTestsBeforeMigration() { + for (const [commandName, testCase] of Object.entries(testCases)) { + let baseDbName = kTenantID + "_" + commandName + "-inCommitted0"; + + if (testCase.skip) { + print("Skipping " + commandName + ": " + testCase.skip); + continue; + } + + let basicFullDb = baseDbName + "Basic-" + kTenantDefinedDbName; + const basicTestOpts = makeTestOptionsForConcurrentWritesTest( + donorPrimary, testCase, basicFullDb, kCollName, false, false); + testOptsMap[basicFullDb] = basicTestOpts; + setupTest(testCase, kCollName, basicTestOpts); + + if (testCase.testInTransaction) { + let TxnFullDb = baseDbName + "Txn-" + kTenantDefinedDbName; + const txnTestOpts = makeTestOptionsForConcurrentWritesTest( + donorPrimary, testCase, TxnFullDb, kCollName, true, false); + testOptsMap[TxnFullDb] = txnTestOpts; + setupTest(testCase, kCollName, txnTestOpts); + } + + if (testCase.testAsRetryableWrite) { + let retryableFullDb = baseDbName + "Retryable-" + kTenantDefinedDbName; + const retryableTestOpts = makeTestOptionsForConcurrentWritesTest( + donorPrimary, testCase, retryableFullDb, kCollName, false, true); + testOptsMap[retryableFullDb] = retryableTestOpts; + setupTest(testCase, kCollName, retryableTestOpts); + } + } +} + +/** + * Run the test cases after the migration has aborted. + */ +function runTestsAfterMigration() { + for (const [commandName, testCase] of Object.entries(testCases)) { + let baseDbName = kTenantID + "_" + commandName + "-inCommitted0"; + if (testCase.skip) { + continue; + } + + const basicTesTOpts = testOptsMap[baseDbName + "Basic-" + kTenantDefinedDbName]; + testDoNotRejectWritesAfterMigrationAborted(testCase, basicTesTOpts); + + if (testCase.testInTransaction) { + const txnTesTOpts = testOptsMap[baseDbName + "Txn-" + kTenantDefinedDbName]; + testDoNotRejectWritesAfterMigrationAborted(testCase, txnTesTOpts); + } + + if (testCase.testAsRetryableWrite) { + const retryableTestOpts = testOptsMap[baseDbName + "Retryable-" + kTenantDefinedDbName]; + testDoNotRejectWritesAfterMigrationAborted(testCase, retryableTestOpts); + } + } +} + +const abortFp = configureFailPoint(donorPrimary, "abortShardSplitBeforeLeavingBlockingState"); + +tenantMigrationTest.addRecipientNodes(); +const tenantIds = [kTenantID]; +const operation = tenantMigrationTest.createSplitOperation(tenantIds); + +setupTestsBeforeMigration(); + +operation.commit({retryOnRetryableErrors: false}, {enableDonorStartMigrationFsync: true}); +assertMigrationState(tenantMigrationTest.getDonorPrimary(), operation.migrationId, "aborted"); + +abortFp.off(); + +runTestsAfterMigration(); + +tenantMigrationTest.stop(); +})(); diff --git a/jstests/serverless/shard_split_concurrent_writes_on_donor_blocking.js b/jstests/serverless/shard_split_concurrent_writes_on_donor_blocking.js new file mode 100644 index 00000000000..18d22487df2 --- /dev/null +++ b/jstests/serverless/shard_split_concurrent_writes_on_donor_blocking.js @@ -0,0 +1,194 @@ +/** + * Tests that the donor blocks writes that are executed while the shard split in the blocking state, + * then rejects the writes when the migration completes. + * + * @tags: [ + * incompatible_with_macos, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * serverless, + * requires_fcv_52, + * featureFlagShardSplit + * ] + */ +(function() { +'use strict'; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/parallelTester.js"); +load("jstests/libs/uuid_util.js"); +load("jstests/replsets/libs/tenant_migration_test.js"); +load("jstests/replsets/tenant_migration_concurrent_writes_on_donor_util.js"); +load("jstests/serverless/libs/basic_serverless_test.js"); + +TestData.skipCheckDBHashes = true; +const recipientTagName = "recipientNode"; +const recipientSetName = "recipient"; +const tenantMigrationTest = new BasicServerlessTest({ + recipientTagName, + recipientSetName, + quickGarbageCollection: true, + allowStaleReadsOnDonor: true, + initiateWithShortElectionTimeout: true +}); + +const donorPrimary = tenantMigrationTest.getDonorPrimary(); + +const kCollName = "testColl"; +const kTenantDefinedDbName = "0"; + +const testCases = TenantMigrationConcurrentWriteUtil.testCases; +const kTenantID = "tenantId"; + +const kMaxTimeMS = 1 * 1000; + +let countBlockedWrites = 0; + +/** + * Tests that the donor blocks writes that are executed in the blocking state and increase the + * countBlockedWrites count. + */ +function testBlockWritesAfterMigrationEnteredBlocking(testOpts) { + testOpts.command.maxTimeMS = kMaxTimeMS; + runCommandForConcurrentWritesTest(testOpts, ErrorCodes.MaxTimeMSExpired); +} + +function setupTest(testCase, collName, testOpts) { + if (testCase.explicitlyCreateCollection) { + createCollectionAndInsertDocsForConcurrentWritesTest( + testOpts.primaryDB, collName, testCase.isCapped); + } + + if (testCase.setUp) { + testCase.setUp(testOpts.primaryDB, collName, testOpts.testInTransaction); + } +} + +const testOptsMap = {}; + +/** + * run the setup for each cases before the migration starts + */ +function setupTestsBeforeMigration() { + for (const [commandName, testCase] of Object.entries(testCases)) { + let baseDbName = kTenantID + "_" + commandName + "-inCommitted0"; + + if (testCase.skip) { + print("Skipping " + commandName + ": " + testCase.skip); + continue; + } + + let basicFullDb = baseDbName + "Basic-" + kTenantDefinedDbName; + const basicTestOpts = makeTestOptionsForConcurrentWritesTest( + donorPrimary, testCase, basicFullDb, kCollName, false, false); + testOptsMap[basicFullDb] = basicTestOpts; + setupTest(testCase, kCollName, basicTestOpts); + + if (testCase.testInTransaction) { + let TxnFullDb = baseDbName + "Txn-" + kTenantDefinedDbName; + const txnTestOpts = makeTestOptionsForConcurrentWritesTest( + donorPrimary, testCase, TxnFullDb, kCollName, true, false); + testOptsMap[TxnFullDb] = txnTestOpts; + setupTest(testCase, kCollName, txnTestOpts); + } + + if (testCase.testAsRetryableWrite) { + let retryableFullDb = baseDbName + "Retryable-" + kTenantDefinedDbName; + const retryableTestOpts = makeTestOptionsForConcurrentWritesTest( + donorPrimary, testCase, retryableFullDb, kCollName, false, true); + testOptsMap[retryableFullDb] = retryableTestOpts; + setupTest(testCase, kCollName, retryableTestOpts); + } + } +} + +/** + * Run the test cases after the migration has committed + */ +function runTestsWhileBlocking() { + for (const [commandName, testCase] of Object.entries(testCases)) { + let baseDbName = kTenantID + "_" + commandName + "-inCommitted0"; + if (testCase.skip) { + continue; + } + + testBlockWritesAfterMigrationEnteredBlocking( + testOptsMap[baseDbName + "Basic-" + kTenantDefinedDbName]); + countBlockedWrites += 1; + + if (testCase.testInTransaction) { + testBlockWritesAfterMigrationEnteredBlocking( + testOptsMap[baseDbName + "Txn-" + kTenantDefinedDbName]); + countBlockedWrites += 1; + } + + if (testCase.testAsRetryableWrite) { + testBlockWritesAfterMigrationEnteredBlocking( + testOptsMap[baseDbName + "Retryable-" + kTenantDefinedDbName]); + countBlockedWrites += 1; + } + } +} + +/** + * Run the test cases after the migration has committed + */ +function runTestsAfterMigrationCommitted() { + for (const [commandName, testCase] of Object.entries(testCases)) { + let baseDbName = kTenantID + "_" + commandName + "-inCommitted0"; + if (testCase.skip) { + continue; + } + + const basicTesTOpts = testOptsMap[baseDbName + "Basic-" + kTenantDefinedDbName]; + testCase.assertCommandFailed( + basicTesTOpts.primaryDB, basicTesTOpts.dbName, basicTesTOpts.collName); + + if (testCase.testInTransaction) { + const txnTesTOpts = testOptsMap[baseDbName + "Txn-" + kTenantDefinedDbName]; + testCase.assertCommandFailed( + txnTesTOpts.primaryDB, txnTesTOpts.dbName, txnTesTOpts.collName); + } + + if (testCase.testAsRetryableWrite) { + const retryableTestOpts = testOptsMap[baseDbName + "Retryable-" + kTenantDefinedDbName]; + testCase.assertCommandFailed( + retryableTestOpts.primaryDB, retryableTestOpts.dbName, retryableTestOpts.collName); + } + } +} + +tenantMigrationTest.addRecipientNodes(); +const tenantIds = [kTenantID]; +const operation = tenantMigrationTest.createSplitOperation(tenantIds); + +setupTestsBeforeMigration(); + +let blockFp = configureFailPoint(donorPrimary, "pauseShardSplitAfterBlocking"); + +// start a shard split operation asynchronously. +const splitThread = operation.commitAsync(); + +// Run the command after the migration enters the blocking state. +blockFp.wait(); + +// Run test cases while the migration is in blocking state. +runTestsWhileBlocking(); + +// Allow the migration to complete. +blockFp.off(); +splitThread.join(); + +const data = splitThread.returnData(); +assert.commandWorked(data); +assert.eq(data.state, "committed"); + +// run test after blocking is over and the migration committed. +runTestsAfterMigrationCommitted(); + +BasicServerlessTest.checkShardSplitAccessBlocker( + donorPrimary, kTenantID, {numBlockedWrites: countBlockedWrites}); + +tenantMigrationTest.stop(); +})(); diff --git a/jstests/serverless/shard_split_concurrent_writes_on_donor_committed.js b/jstests/serverless/shard_split_concurrent_writes_on_donor_committed.js new file mode 100644 index 00000000000..19db6591205 --- /dev/null +++ b/jstests/serverless/shard_split_concurrent_writes_on_donor_committed.js @@ -0,0 +1,145 @@ +/** + * Tests that the donor blocks writes that are executed after the shard split committed state are + * rejected. + * @tags: [ + * incompatible_with_macos, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * serverless, + * requires_fcv_52, + * featureFlagShardSplit + * ] + */ +(function() { +'use strict'; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/parallelTester.js"); +load("jstests/libs/uuid_util.js"); +load("jstests/replsets/libs/tenant_migration_test.js"); +load("jstests/replsets/tenant_migration_concurrent_writes_on_donor_util.js"); +load("jstests/serverless/libs/basic_serverless_test.js"); + +TestData.skipCheckDBHashes = true; +const recipientTagName = "recipientNode"; +const recipientSetName = "recipient"; +const tenantMigrationTest = new BasicServerlessTest({ + recipientTagName, + recipientSetName, + quickGarbageCollection: true, + allowStaleReadsOnDonor: true, + initiateWithShortElectionTimeout: true +}); + +const donorPrimary = tenantMigrationTest.getDonorPrimary(); + +const kCollName = "testColl"; +const kTenantDefinedDbName = "0"; + +const testCases = TenantMigrationConcurrentWriteUtil.testCases; +const kTenantID = "tenantId"; + +let countTenantMigrationCommittedErrors = 0; + +function setupTest(testCase, collName, testOpts) { + if (testCase.explicitlyCreateCollection) { + createCollectionAndInsertDocsForConcurrentWritesTest( + testOpts.primaryDB, collName, testCase.isCapped); + } + + if (testCase.setUp) { + testCase.setUp(testOpts.primaryDB, collName, testOpts.testInTransaction); + } +} + +/** + * Tests that the donor rejects writes after a migration has already committed. + */ +function testRejectWritesAfterMigrationCommitted(testCase, testOpts) { + runCommandForConcurrentWritesTest(testOpts, ErrorCodes.TenantMigrationCommitted); + testCase.assertCommandFailed(testOpts.primaryDB, testOpts.dbName, testOpts.collName); +} + +const testOptsMap = {}; + +/** + * run the setup for each cases before the migration starts + */ +function setupTestsBeforeMigration() { + for (const [commandName, testCase] of Object.entries(testCases)) { + let baseDbName = kTenantID + "_" + commandName + "-inCommitted0"; + + if (testCase.skip) { + print("Skipping " + commandName + ": " + testCase.skip); + continue; + } + + let basicFullDb = baseDbName + "Basic-" + kTenantDefinedDbName; + const basicTestOpts = makeTestOptionsForConcurrentWritesTest( + donorPrimary, testCase, basicFullDb, kCollName, false, false); + testOptsMap[basicFullDb] = basicTestOpts; + setupTest(testCase, kCollName, basicTestOpts); + + if (testCase.testInTransaction) { + let TxnFullDb = baseDbName + "Txn-" + kTenantDefinedDbName; + const txnTestOpts = makeTestOptionsForConcurrentWritesTest( + donorPrimary, testCase, TxnFullDb, kCollName, true, false); + testOptsMap[TxnFullDb] = txnTestOpts; + setupTest(testCase, kCollName, txnTestOpts); + } + + if (testCase.testAsRetryableWrite) { + let retryableFullDb = baseDbName + "Retryable-" + kTenantDefinedDbName; + const retryableTestOpts = makeTestOptionsForConcurrentWritesTest( + donorPrimary, testCase, retryableFullDb, kCollName, false, true); + testOptsMap[retryableFullDb] = retryableTestOpts; + setupTest(testCase, kCollName, retryableTestOpts); + } + } +} + +/** + * Run the test cases after the migration has committed + */ +function runTestsAfterMigration() { + for (const [commandName, testCase] of Object.entries(testCases)) { + let baseDbName = kTenantID + "_" + commandName + "-inCommitted0"; + if (testCase.skip) { + continue; + } + + const basicTesTOpts = testOptsMap[baseDbName + "Basic-" + kTenantDefinedDbName]; + testRejectWritesAfterMigrationCommitted(testCase, basicTesTOpts); + countTenantMigrationCommittedErrors += 1; + + if (testCase.testInTransaction) { + const txnTesTOpts = testOptsMap[baseDbName + "Txn-" + kTenantDefinedDbName]; + testRejectWritesAfterMigrationCommitted(testCase, txnTesTOpts); + countTenantMigrationCommittedErrors += 1; + } + + if (testCase.testAsRetryableWrite) { + const retryableTestOpts = testOptsMap[baseDbName + "Retryable-" + kTenantDefinedDbName]; + testRejectWritesAfterMigrationCommitted(testCase, retryableTestOpts); + countTenantMigrationCommittedErrors += 1; + } + } +} + +tenantMigrationTest.addRecipientNodes(); +const tenantIds = [kTenantID]; +const operation = tenantMigrationTest.createSplitOperation(tenantIds); + +setupTestsBeforeMigration(); + +assert.commandWorked( + operation.commit({retryOnRetryableErrors: false}, {enableDonorStartMigrationFsync: true})); + +runTestsAfterMigration(); +BasicServerlessTest.checkShardSplitAccessBlocker(donorPrimary, kTenantID, { + numTenantMigrationCommittedErrors: countTenantMigrationCommittedErrors +}); + +tenantMigrationTest.stop(); +})(); diff --git a/jstests/serverless/shard_split_drop_state_doc_collection_aborted.js b/jstests/serverless/shard_split_drop_state_doc_collection_aborted.js new file mode 100644 index 00000000000..7f23f2840d0 --- /dev/null +++ b/jstests/serverless/shard_split_drop_state_doc_collection_aborted.js @@ -0,0 +1,114 @@ +/** + * Tests dropping the donor state doc collections after the shard split has aborted. + * + * @tags: [ + * incompatible_with_eft, + * incompatible_with_macos, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * serverless, + * requires_fcv_52, + * featureFlagShardSplit + * ] + */ + +(function() { +"use strict"; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/uuid_util.js"); +load("jstests/serverless/libs/basic_serverless_test.js"); + +const recipientTagName = "recipientNode"; +const recipientSetName = "recipient"; + +TestData.skipCheckDBHashes = true; + +function testDroppingStateDocCollections( + test, + fpName, + {dropDonorsCollection = false, retryWithDifferentMigrationId = false, expectedAbortReason}) { + jsTest.log(`Testing with failpoint: ${fpName} dropDonorsCollection: ${ + dropDonorsCollection}, retryWithDifferentMigrationId: ${retryWithDifferentMigrationId}`); + + test.addRecipientNodes(); + let donorPrimary = test.donor.getPrimary(); + + const tenantIds = ["tenant1", "tenant2"]; + + const operation = test.createSplitOperation(tenantIds); + let migrationId = operation.migrationId; + + let fp = configureFailPoint(donorPrimary.getDB("admin"), fpName); + let commitShardSplitThread = operation.commitAsync(); + fp.wait(); + + if (dropDonorsCollection) { + assert(donorPrimary.getCollection(BasicServerlessTest.kConfigSplitDonorsNS).drop()); + let donorDoc = findSplitOperation(donorPrimary, migrationId); + assert.eq(donorDoc, null); + + const currOpDonor = assert.commandWorked( + donorPrimary.adminCommand({currentOp: true, desc: "shard split operation"})); + assert.eq(currOpDonor.inprog.length, 0); + + // Trigger stepup to allow the donor service to rebuild. + assert.commandWorked(donorPrimary.adminCommand({replSetStepDown: 30, force: true})); + } + + fp.off(); + commitShardSplitThread.join(); + if (expectedAbortReason) { + const data = commitShardSplitThread.returnData(); + assert.commandFailedWithCode(data, expectedAbortReason); + assert.eq(data.code, expectedAbortReason); + } + test.removeRecipientNodesFromDonor(); + if (!dropDonorsCollection) { + operation.forget(); + test.waitForGarbageCollection(migrationId, tenantIds); + } + test.removeAndStopRecipientNodes(); + test.reconfigDonorSetAfterSplit(); + + test.addRecipientNodes(); + + const operation2 = + retryWithDifferentMigrationId ? test.createSplitOperation(tenantIds) : operation; + migrationId = operation2.migrationId; + const runMigrationRes = operation2.commit(); + + assert.commandWorked(runMigrationRes); + assert.eq(runMigrationRes.state, "committed"); + + operation2.forget(); + + test.cleanupSuccesfulCommitted(migrationId, tenantIds); +} + +jsTest.log("Test dropping donor and recipient state doc collections during a shard split."); +const test = new BasicServerlessTest({ + recipientTagName, + recipientSetName, + quickGarbageCollection: true, + initiateWithShortElectionTimeout: true +}); + +const fpName = "abortShardSplitBeforeLeavingBlockingState"; +testDroppingStateDocCollections(test, fpName, {dropDonorsCollection: true}); + +testDroppingStateDocCollections( + test, fpName, {dropDonorsCollection: true, retryWithDifferentMigrationId: true}); + +testDroppingStateDocCollections(test, fpName, {dropDonorsCollection: false}); + +testDroppingStateDocCollections(test, fpName, { + dropDonorsCollection: false, + expectedAbortReason: (fpName == "abortShardSplitBeforeLeavingBlockingState") + ? ErrorCodes.TenantMigrationAborted + : null +}); + +test.stop(); +})(); diff --git a/jstests/serverless/shard_split_drop_state_doc_collection_blocking.js b/jstests/serverless/shard_split_drop_state_doc_collection_blocking.js new file mode 100644 index 00000000000..91a6a8df296 --- /dev/null +++ b/jstests/serverless/shard_split_drop_state_doc_collection_blocking.js @@ -0,0 +1,118 @@ +/** + * Tests dropping the donor state doc collections in the middle of a shard split in blocking state. + * + * @tags: [ + * incompatible_with_eft, + * incompatible_with_macos, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * serverless, + * requires_fcv_52, + * featureFlagShardSplit + * ] + */ + +(function() { +"use strict"; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/uuid_util.js"); +load("jstests/serverless/libs/basic_serverless_test.js"); + +const recipientTagName = "recipientNode"; +const recipientSetName = "recipient"; + +TestData.skipCheckDBHashes = true; + +function testDroppingStateDocCollections( + test, + fpName, + {dropDonorsCollection = false, retryWithDifferentMigrationId = false, expectedAbortReason}) { + jsTest.log(`Testing with failpoint: ${fpName} dropDonorsCollection: ${ + dropDonorsCollection}, retryWithDifferentMigrationId: ${retryWithDifferentMigrationId}`); + + test.addRecipientNodes(); + let donorPrimary = test.donor.getPrimary(); + + const tenantIds = ["tenant1", "tenant2"]; + + const operation = test.createSplitOperation(tenantIds); + let migrationId = operation.migrationId; + + let fp = configureFailPoint(donorPrimary.getDB("admin"), fpName); + let commitShardSplitThread = operation.commitAsync(); + fp.wait(); + + if (dropDonorsCollection) { + assert(donorPrimary.getCollection(BasicServerlessTest.kConfigSplitDonorsNS).drop()); + let donorDoc = findSplitOperation(donorPrimary, migrationId); + assert.eq(donorDoc, null); + + const currOpDonor = assert.commandWorked( + donorPrimary.adminCommand({currentOp: true, desc: "shard split operation"})); + assert.eq(currOpDonor.inprog.length, 0); + + // Trigger stepup to allow the donor service to rebuild. + assert.commandWorked(donorPrimary.adminCommand({replSetStepDown: 30, force: true})); + } + + fp.off(); + commitShardSplitThread.join(); + if (expectedAbortReason) { + const data = commitShardSplitThread.returnData(); + assert.commandFailedWithCode(data, expectedAbortReason); + assert.eq(data.code, expectedAbortReason); + } + test.removeRecipientNodesFromDonor(); + if (!dropDonorsCollection) { + operation.forget(); + test.waitForGarbageCollection(migrationId, tenantIds); + } + test.removeAndStopRecipientNodes(); + test.reconfigDonorSetAfterSplit(); + + test.addRecipientNodes(); + + const operation2 = + retryWithDifferentMigrationId ? test.createSplitOperation(tenantIds) : operation; + migrationId = operation2.migrationId; + const runMigrationRes = operation2.commit(); + + assert.commandWorked(runMigrationRes); + assert.eq(runMigrationRes.state, "committed"); + + operation2.forget(); + + test.cleanupSuccesfulCommitted(migrationId, tenantIds); +} + +jsTest.log("Test dropping donor and recipient state doc collections during a shard split."); +const test = new BasicServerlessTest({ + recipientTagName, + recipientSetName, + quickGarbageCollection: true, + initiateWithShortElectionTimeout: true +}); + +const fpName = "pauseShardSplitAfterBlocking"; +testDroppingStateDocCollections(test, fpName, {dropDonorsCollection: true}); + +testDroppingStateDocCollections( + test, fpName, {dropDonorsCollection: true, retryWithDifferentMigrationId: true}); + +if (fpName) { + // if we do not have a failpoint and we do not drop the donor state doc there is no need + // to run a test. + testDroppingStateDocCollections(test, fpName, {dropDonorsCollection: false}); + + testDroppingStateDocCollections(test, fpName, { + dropDonorsCollection: false, + expectedAbortReason: (fpName == "abortShardSplitBeforeLeavingBlockingState") + ? ErrorCodes.TenantMigrationAborted + : null + }); +} + +test.stop(); +})(); diff --git a/jstests/serverless/shard_split_drop_state_doc_collection_committed.js b/jstests/serverless/shard_split_drop_state_doc_collection_committed.js new file mode 100644 index 00000000000..ba9c5c5bc65 --- /dev/null +++ b/jstests/serverless/shard_split_drop_state_doc_collection_committed.js @@ -0,0 +1,91 @@ +/** + * Tests dropping the donor state doc collections after the shard split has committed. + * + * @tags: [ + * incompatible_with_eft, + * incompatible_with_macos, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * serverless, + * requires_fcv_52, + * featureFlagShardSplit + * ] + */ + +(function() { +"use strict"; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/uuid_util.js"); +load("jstests/serverless/libs/basic_serverless_test.js"); + +const recipientTagName = "recipientNode"; +const recipientSetName = "recipient"; + +TestData.skipCheckDBHashes = true; + +function testDroppingStateDocCollections( + test, + fpName, + {dropDonorsCollection = false, retryWithDifferentMigrationId = false, expectedAbortReason}) { + jsTest.log(`Testing with failpoint: ${fpName} dropDonorsCollection: ${ + dropDonorsCollection}, retryWithDifferentMigrationId: ${retryWithDifferentMigrationId}`); + + test.addRecipientNodes(); + let donorPrimary = test.donor.getPrimary(); + + const tenantIds = ["tenant1", "tenant2"]; + + const operation = test.createSplitOperation(tenantIds); + let migrationId = operation.migrationId; + + assert.commandWorked(operation.commit()); + operation.forget(); + + test.cleanupSuccesfulCommitted(migrationId, tenantIds); + + if (dropDonorsCollection) { + assert(donorPrimary.getCollection(BasicServerlessTest.kConfigSplitDonorsNS).drop()); + let donorDoc = findSplitOperation(donorPrimary, migrationId); + assert.eq(donorDoc, null); + + const currOpDonor = assert.commandWorked( + donorPrimary.adminCommand({currentOp: true, desc: "shard split operation"})); + assert.eq(currOpDonor.inprog.length, 0); + + // Trigger stepup to allow the donor service to rebuild. + assert.commandWorked(donorPrimary.adminCommand({replSetStepDown: 30, force: true})); + } + + test.addRecipientNodes(); + + const operation2 = + retryWithDifferentMigrationId ? test.createSplitOperation(tenantIds) : operation; + migrationId = operation2.migrationId; + const runMigrationRes = operation2.commit(); + + assert.commandWorked(runMigrationRes); + assert.eq(runMigrationRes.state, "committed"); + + operation2.forget(); + + test.cleanupSuccesfulCommitted(migrationId, tenantIds); +} + +jsTest.log("Test dropping donor and recipient state doc collections during a shard split."); +const test = new BasicServerlessTest({ + recipientTagName, + recipientSetName, + quickGarbageCollection: true, + initiateWithShortElectionTimeout: true +}); + +const fpName = undefined; +testDroppingStateDocCollections(test, fpName, {dropDonorsCollection: true}); + +testDroppingStateDocCollections( + test, fpName, {dropDonorsCollection: true, retryWithDifferentMigrationId: true}); + +test.stop(); +})(); diff --git a/jstests/serverless/shard_split_drop_state_doc_collection_decision_fullfilled.js b/jstests/serverless/shard_split_drop_state_doc_collection_decision_fullfilled.js new file mode 100644 index 00000000000..1c5120d3e95 --- /dev/null +++ b/jstests/serverless/shard_split_drop_state_doc_collection_decision_fullfilled.js @@ -0,0 +1,121 @@ +/** + * Tests dropping the donor state doc collections after the shard split decision promise is + * fulfilled. + * + * @tags: [ + * incompatible_with_eft, + * incompatible_with_macos, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * serverless, + * requires_fcv_52, + * featureFlagShardSplit + * ] + */ + +(function() { +"use strict"; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/uuid_util.js"); +load("jstests/serverless/libs/basic_serverless_test.js"); + +const recipientTagName = "recipientNode"; +const recipientSetName = "recipient"; + +TestData.skipCheckDBHashes = true; + +function testDroppingStateDocCollections( + test, + fpName, + {dropDonorsCollection = false, retryWithDifferentMigrationId = false, expectedAbortReason}) { + jsTest.log(`Testing with failpoint: ${fpName} dropDonorsCollection: ${ + dropDonorsCollection}, retryWithDifferentMigrationId: ${retryWithDifferentMigrationId}`); + + test.addRecipientNodes(); + let donorPrimary = test.donor.getPrimary(); + + const tenantIds = ["tenant1", "tenant2"]; + + const operation = test.createSplitOperation(tenantIds); + let migrationId = operation.migrationId; + + let commitShardSplitThread = undefined; + + let fp = configureFailPoint(donorPrimary.getDB("admin"), fpName); + commitShardSplitThread = operation.commitAsync(); + fp.wait(); + + if (dropDonorsCollection) { + assert(donorPrimary.getCollection(BasicServerlessTest.kConfigSplitDonorsNS).drop()); + let donorDoc = findSplitOperation(donorPrimary, migrationId); + assert.eq(donorDoc, null); + + const currOpDonor = assert.commandWorked( + donorPrimary.adminCommand({currentOp: true, desc: "shard split operation"})); + assert.eq(currOpDonor.inprog.length, 0); + + // Trigger stepup to allow the donor service to rebuild. + assert.commandWorked(donorPrimary.adminCommand({replSetStepDown: 30, force: true})); + } + + fp.off(); + commitShardSplitThread.join(); + if (expectedAbortReason) { + const data = commitShardSplitThread.returnData(); + assert.commandFailedWithCode(data, expectedAbortReason); + assert.eq(data.code, expectedAbortReason); + } + test.removeRecipientNodesFromDonor(); + if (!dropDonorsCollection) { + operation.forget(); + test.waitForGarbageCollection(migrationId, tenantIds); + } + test.removeAndStopRecipientNodes(); + test.reconfigDonorSetAfterSplit(); + + test.addRecipientNodes(); + + const operation2 = + retryWithDifferentMigrationId ? test.createSplitOperation(tenantIds) : operation; + migrationId = operation2.migrationId; + const runMigrationRes = operation2.commit(); + + assert.commandWorked(runMigrationRes); + assert.eq(runMigrationRes.state, "committed"); + + operation2.forget(); + + test.cleanupSuccesfulCommitted(migrationId, tenantIds); +} + +jsTest.log("Test dropping donor and recipient state doc collections during a shard split."); +const test = new BasicServerlessTest({ + recipientTagName, + recipientSetName, + quickGarbageCollection: true, + initiateWithShortElectionTimeout: true +}); + +const fpName = "pauseShardSplitAfterDecision"; +testDroppingStateDocCollections(test, fpName, {dropDonorsCollection: true}); + +testDroppingStateDocCollections( + test, fpName, {dropDonorsCollection: true, retryWithDifferentMigrationId: true}); + +if (fpName) { + // if we do not have a failpoint and we do not drop the donor state doc there is no need + // to run a test. + testDroppingStateDocCollections(test, fpName, {dropDonorsCollection: false}); + + testDroppingStateDocCollections(test, fpName, { + dropDonorsCollection: false, + expectedAbortReason: (fpName == "abortShardSplitBeforeLeavingBlockingState") + ? ErrorCodes.TenantMigrationAborted + : null + }); +} + +test.stop(); +})(); |