diff options
author | A. Jesse Jiryu Davis <jesse@mongodb.com> | 2021-11-29 15:10:53 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-11-29 15:35:37 +0000 |
commit | 5bdd93592aa5199ab52d5392737b8eab74ca409b (patch) | |
tree | 28a7bb9c0cbb1c2ee64a0bdf4a8e83c234b9b7ed /jstests/replsets | |
parent | aaa87a2ffa3485a5cdbd12d7bb00abe727cab119 (diff) | |
download | mongo-5bdd93592aa5199ab52d5392737b8eab74ca409b.tar.gz |
SERVER-61128 voteCommitMigrationProgress command
Diffstat (limited to 'jstests/replsets')
18 files changed, 169 insertions, 5 deletions
diff --git a/jstests/replsets/db_reads_while_recovering_all_commands.js b/jstests/replsets/db_reads_while_recovering_all_commands.js index 615e4a0a4ab..cc3df9adf90 100644 --- a/jstests/replsets/db_reads_while_recovering_all_commands.js +++ b/jstests/replsets/db_reads_while_recovering_all_commands.js @@ -341,6 +341,7 @@ const allCommands = { }, voteCommitImportCollection: {skip: isNotAUserDataRead}, voteCommitIndexBuild: {skip: isNotAUserDataRead}, + voteCommitMigrationProgress: {skip: isNotAUserDataRead}, waitForFailPoint: {skip: isNotAUserDataRead}, waitForOngoingChunkSplits: {skip: isNotAUserDataRead}, whatsmysni: {skip: isNotAUserDataRead}, diff --git a/jstests/replsets/libs/tenant_migration_test.js b/jstests/replsets/libs/tenant_migration_test.js index 8c9c779bb75..0c87a193aea 100644 --- a/jstests/replsets/libs/tenant_migration_test.js +++ b/jstests/replsets/libs/tenant_migration_test.js @@ -138,14 +138,22 @@ function TenantMigrationTest({ * * Returns the result of the last 'donorStartMigration' command executed. */ - this.waitForMigrationToComplete = function(migrationOpts, retryOnRetryableErrors = false) { + this.waitForMigrationToComplete = function( + migrationOpts, retryOnRetryableErrors = false, forgetMigration = false) { // Assert that the migration has already been started. const tenantId = migrationOpts.tenantId; assert(this.getDonorPrimary() .getCollection(TenantMigrationTest.kConfigDonorsNS) .findOne({tenantId})); - return this.runDonorStartMigration( + + const donorStartReply = this.runDonorStartMigration( migrationOpts, true /* waitForMigrationToComplete */, retryOnRetryableErrors); + if (!forgetMigration) { + return donorStartReply; + } + + this.forgetMigration(migrationOpts.migrationIdString, retryOnRetryableErrors); + return donorStartReply; }; /** diff --git a/jstests/replsets/tenant_migration_cluster_time_keys_cloning.js b/jstests/replsets/tenant_migration_cluster_time_keys_cloning.js index c6b51595df1..0adcb568bcf 100644 --- a/jstests/replsets/tenant_migration_cluster_time_keys_cloning.js +++ b/jstests/replsets/tenant_migration_cluster_time_keys_cloning.js @@ -57,6 +57,7 @@ function runMigrationAndAssertExternalKeysCopied(tenantMigrationTest, tenantId) }; TenantMigrationTest.assertCommitted(tenantMigrationTest.runMigration(migrationOpts)); assertCopiedExternalKeys(tenantMigrationTest, migrationId); + tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString); } function assertHasExternalKeys(conn, migrationId) { @@ -129,6 +130,11 @@ const migrationX509Options = TenantMigrationUtil.makeX509OptionsForTest(); new ReplSetTest({nodes: 3, name: "donorRst", nodeOptions: migrationX509Options.donor}); donorRst.startSet(); donorRst.initiate(); + if (TenantMigrationUtil.isShardMergeEnabled(donorRst.getPrimary().getDB("adminDB"))) { + jsTestLog("Skip: featureFlagShardMerge enabled, but shard merge does not survive failover"); + donorRst.stopSet(); + return; + } const tenantMigrationTest = new TenantMigrationTest({name: jsTestName(), donorRst}); @@ -169,6 +175,11 @@ const migrationX509Options = TenantMigrationUtil.makeX509OptionsForTest(); {nodes: 3, name: "recipientRst", nodeOptions: migrationX509Options.recipient}); recipientRst.startSet(); recipientRst.initiate(); + if (TenantMigrationUtil.isShardMergeEnabled(recipientRst.getPrimary().getDB("adminDB"))) { + jsTestLog("Skip: featureFlagShardMerge enabled, but shard merge does not survive failover"); + recipientRst.stopSet(); + return; + } const tenantMigrationTest = new TenantMigrationTest({name: jsTestName(), recipientRst}); @@ -217,6 +228,14 @@ const migrationX509Options = TenantMigrationUtil.makeX509OptionsForTest(); const tenantMigrationTest = new TenantMigrationTest({name: jsTestName(), donorRst}); function runTest(tenantId, withFailover) { + if (withFailover && + TenantMigrationUtil.isShardMergeEnabled(donorRst.getPrimary().getDB("adminDB"))) { + jsTestLog( + "Skip: featureFlagShardMerge enabled, but shard merge does not survive failover"); + tenantMigrationTest.stop(); + return; + } + const migrationId = UUID(); const migrationOpts = { migrationIdString: extractUUIDFromObject(migrationId), diff --git a/jstests/replsets/tenant_migration_concurrent_bulk_writes.js b/jstests/replsets/tenant_migration_concurrent_bulk_writes.js index 8ab8f6b56b5..beda0b68c67 100644 --- a/jstests/replsets/tenant_migration_concurrent_bulk_writes.js +++ b/jstests/replsets/tenant_migration_concurrent_bulk_writes.js @@ -244,6 +244,7 @@ function bulkMultiUpdateDocsUnordered(primaryHost, dbName, collName, numDocs) { migrationThread.join(); TenantMigrationTest.assertCommitted(migrationThread.returnData()); + tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString); let bulkWriteRes = bulkWriteThread.returnData(); let writeErrors = bulkWriteRes.res.writeErrors; @@ -312,6 +313,7 @@ function bulkMultiUpdateDocsUnordered(primaryHost, dbName, collName, numDocs) { abortFp.off(); TenantMigrationTest.assertAborted(migrationThread.returnData()); + tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString); const bulkWriteRes = bulkWriteThread.returnData(); const writeErrors = bulkWriteRes.res.writeErrors; @@ -414,6 +416,7 @@ function bulkMultiUpdateDocsUnordered(primaryHost, dbName, collName, numDocs) { migrationThread.join(); TenantMigrationTest.assertCommitted(migrationThread.returnData()); + tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString); const bulkWriteRes = bulkWriteThread.returnData(); const writeErrors = bulkWriteRes.res.writeErrors; @@ -474,6 +477,7 @@ function bulkMultiUpdateDocsUnordered(primaryHost, dbName, collName, numDocs) { abortFp.off(); TenantMigrationTest.assertAborted(migrationThread.returnData()); + tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString); const bulkWriteRes = bulkWriteThread.returnData(); const writeErrors = bulkWriteRes.res.writeErrors; @@ -528,6 +532,7 @@ function bulkMultiUpdateDocsUnordered(primaryHost, dbName, collName, numDocs) { migrationThread.join(); TenantMigrationTest.assertCommitted(migrationThread.returnData()); + tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString); let bulkWriteRes = bulkWriteThread.returnData(); assert.eq(bulkWriteRes.res.code, ErrorCodes.Interrupted, tojson(bulkWriteRes)); @@ -577,6 +582,7 @@ function bulkMultiUpdateDocsUnordered(primaryHost, dbName, collName, numDocs) { migrationThread.join(); TenantMigrationTest.assertCommitted(migrationThread.returnData()); + tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString); let bulkWriteRes = bulkWriteThread.returnData(); assert.eq(bulkWriteRes.res.code, ErrorCodes.Interrupted, tojson(bulkWriteRes)); diff --git a/jstests/replsets/tenant_migration_concurrent_migrations.js b/jstests/replsets/tenant_migration_concurrent_migrations.js index 2996a24155d..a26c4ab7f15 100644 --- a/jstests/replsets/tenant_migration_concurrent_migrations.js +++ b/jstests/replsets/tenant_migration_concurrent_migrations.js @@ -6,9 +6,12 @@ * migration state to "committed" and "aborted" to be majority committed but it cannot do that on * ephemeralForTest. * + * Incompatible with shard merge, which can't handle concurrent migrations. + * * @tags: [ * incompatible_with_eft, * incompatible_with_macos, + * incompatible_with_shard_merge, * incompatible_with_windows_tls, * requires_majority_read_concern, * requires_persistence, diff --git a/jstests/replsets/tenant_migration_concurrent_migrations_recipient.js b/jstests/replsets/tenant_migration_concurrent_migrations_recipient.js index ef6e02e5b83..8d4dc9a0bd9 100644 --- a/jstests/replsets/tenant_migration_concurrent_migrations_recipient.js +++ b/jstests/replsets/tenant_migration_concurrent_migrations_recipient.js @@ -1,9 +1,13 @@ /** * Tests running 50 concurrent migrations against the same recipient. + * + * Incompatible with shard merge, which can't handle concurrent migrations. + * * @tags: [ * incompatible_with_amazon_linux, * incompatible_with_eft, * incompatible_with_macos, + * incompatible_with_shard_merge, * incompatible_with_windows_tls, * requires_majority_read_concern, * requires_persistence, diff --git a/jstests/replsets/tenant_migration_concurrent_writes_on_donor.js b/jstests/replsets/tenant_migration_concurrent_writes_on_donor.js index 3c607241244..96dbe3089a3 100644 --- a/jstests/replsets/tenant_migration_concurrent_writes_on_donor.js +++ b/jstests/replsets/tenant_migration_concurrent_writes_on_donor.js @@ -995,6 +995,7 @@ const testCases = { usersInfo: {skip: isNotRunOnUserDatabase}, validate: {skip: isNotWriteCommand}, voteCommitIndexBuild: {skip: isNotRunOnUserDatabase}, + voteCommitMigrationProgress: {skip: isNotRunOnUserDatabase}, waitForFailPoint: {skip: isNotRunOnUserDatabase}, waitForOngoingChunkSplits: {skip: isNotRunOnUserDatabase}, whatsmysni: {skip: isNotRunOnUserDatabase}, diff --git a/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js b/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js index 7decac8c310..e83e54d62d7 100644 --- a/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js +++ b/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js @@ -1,9 +1,12 @@ /** * Tests that tenant migrations resume successfully on donor stepup and restart. * + * Incompatible with shard merge, which can't handle restart. + * * @tags: [ * incompatible_with_eft, * incompatible_with_macos, + * incompatible_with_shard_merge, * incompatible_with_windows_tls, * requires_majority_read_concern, * requires_persistence, diff --git a/jstests/replsets/tenant_migration_donor_rollback_during_cloning.js b/jstests/replsets/tenant_migration_donor_rollback_during_cloning.js index c94521120ac..f9fb206e094 100644 --- a/jstests/replsets/tenant_migration_donor_rollback_during_cloning.js +++ b/jstests/replsets/tenant_migration_donor_rollback_during_cloning.js @@ -3,9 +3,12 @@ * tenant cloner performs (such as 'listCollections' and 'listDatabases') account for donor * rollback. * + * Incompatible with shard merge, which can't handle rollback. + * * @tags: [ * incompatible_with_eft, * incompatible_with_macos, + * incompatible_with_shard_merge, * incompatible_with_windows_tls, * requires_majority_read_concern, * requires_persistence, diff --git a/jstests/replsets/tenant_migration_donor_rollback_recovery.js b/jstests/replsets/tenant_migration_donor_rollback_recovery.js index 67c9f651897..570874ed0d8 100644 --- a/jstests/replsets/tenant_migration_donor_rollback_recovery.js +++ b/jstests/replsets/tenant_migration_donor_rollback_recovery.js @@ -1,9 +1,12 @@ /** * Tests that tenant migrations that go through donor rollback are recovered correctly. * + * Incompatible with shard merge, which can't handle rollback. + * * @tags: [ * incompatible_with_eft, * incompatible_with_macos, + * incompatible_with_shard_merge, * incompatible_with_windows_tls, * requires_majority_read_concern, * requires_persistence, diff --git a/jstests/replsets/tenant_migration_donor_startup_recovery.js b/jstests/replsets/tenant_migration_donor_startup_recovery.js index 37d2bf6f751..d599b061a8a 100644 --- a/jstests/replsets/tenant_migration_donor_startup_recovery.js +++ b/jstests/replsets/tenant_migration_donor_startup_recovery.js @@ -3,10 +3,12 @@ * randomly selects a point during the migration to shutdown the donor. * * Tenant migrations are not expected to be run on servers with ephemeralForTest. + * Incompatible with shard merge, which can't handle restart. * * @tags: [ * incompatible_with_eft, * incompatible_with_macos, + * incompatible_with_shard_merge, * incompatible_with_windows_tls, * requires_majority_read_concern, * requires_persistence, diff --git a/jstests/replsets/tenant_migration_external_keys_ttl.js b/jstests/replsets/tenant_migration_external_keys_ttl.js index 3971b7136ee..65f06b338ac 100644 --- a/jstests/replsets/tenant_migration_external_keys_ttl.js +++ b/jstests/replsets/tenant_migration_external_keys_ttl.js @@ -2,9 +2,12 @@ * Tests that tenant migrations correctly set the TTL values for keys in the * config.external_validation_keys collection. * + * TODO SERVER-61231: shard merge can't handle concurrent migrations, adapt this test. + * * @tags: [ * incompatible_with_eft, * incompatible_with_macos, + * incompatible_with_shard_merge, * incompatible_with_windows_tls, * requires_majority_read_concern, * requires_persistence, diff --git a/jstests/replsets/tenant_migration_fetch_committed_transactions_retry.js b/jstests/replsets/tenant_migration_fetch_committed_transactions_retry.js index 02844db2648..286c84da686 100644 --- a/jstests/replsets/tenant_migration_fetch_committed_transactions_retry.js +++ b/jstests/replsets/tenant_migration_fetch_committed_transactions_retry.js @@ -6,9 +6,12 @@ * 3) Retrying while the migration is updating, and the donor starts a new transaction on an * existing session. * + * TODO SERVER-61231: shard merge can't handle restart, adapt this test. + * * @tags: [ * incompatible_with_eft, * incompatible_with_macos, + * incompatible_with_shard_merge, * incompatible_with_windows_tls, * requires_majority_read_concern, * requires_persistence, diff --git a/jstests/replsets/tenant_migration_network_error_via_rollback.js b/jstests/replsets/tenant_migration_network_error_via_rollback.js index 0382fbbca15..6a2f84fc892 100644 --- a/jstests/replsets/tenant_migration_network_error_via_rollback.js +++ b/jstests/replsets/tenant_migration_network_error_via_rollback.js @@ -3,9 +3,12 @@ * connection errors between the recipient primary and the sync source at various stages in the * process. (Replica set members close connections as part of rollback.) * + * TODO SERVER-61231: shard merge can't handle concurrent rollback, adapt this test. + * * @tags: [ * incompatible_with_eft, * incompatible_with_macos, + * incompatible_with_shard_merge, * incompatible_with_windows_tls, * requires_majority_read_concern, * requires_persistence, diff --git a/jstests/replsets/tenant_migration_recipient_resumes_on_donor_failover.js b/jstests/replsets/tenant_migration_recipient_resumes_on_donor_failover.js index 445321f7253..3cf797e7b32 100644 --- a/jstests/replsets/tenant_migration_recipient_resumes_on_donor_failover.js +++ b/jstests/replsets/tenant_migration_recipient_resumes_on_donor_failover.js @@ -6,9 +6,12 @@ * - donor shuts down after cloning is finished but the recipient has yet to declare that the data * is consistent * + * Incompatible with shard merge, which can't handle restart. + * * @tags: [ * incompatible_with_eft, * incompatible_with_macos, + * incompatible_with_shard_merge, * incompatible_with_windows_tls, * requires_majority_read_concern, * requires_persistence, diff --git a/jstests/replsets/tenant_migration_sync_source_too_stale.js b/jstests/replsets/tenant_migration_sync_source_too_stale.js index 60abaa24ccf..1ce602c3c35 100644 --- a/jstests/replsets/tenant_migration_sync_source_too_stale.js +++ b/jstests/replsets/tenant_migration_sync_source_too_stale.js @@ -1,5 +1,5 @@ /** - * Tests that a migration will retry if the oplog fetcher discoveres that its sync source is too + * Tests that a migration will retry if the oplog fetcher discovers that its sync source is too * stale. We test this with a donor replica set that has two secondaries, 'donorSecondary' and * 'delayedSecondary'. We force the recipient to sync from 'donorSecondary'. Then, after the * recipient has set its 'startFetchingDonorOpTime', we stop replication on 'delayedSecondary' and @@ -9,9 +9,12 @@ * 'delayedSecondary', it should see that it is too stale. As a result, it should retry sync source * selection until it finds a sync source that is no longer too stale. * + * TODO SERVER-61231: shard merge can't handle restart, adapt this test. + * * @tags: [ * incompatible_with_eft, * incompatible_with_macos, + * incompatible_with_shard_merge, * incompatible_with_windows_tls, * requires_majority_read_concern, * requires_persistence, diff --git a/jstests/replsets/tenant_migration_vote_progress.js b/jstests/replsets/tenant_migration_vote_progress.js new file mode 100644 index 00000000000..41f7dfe7e78 --- /dev/null +++ b/jstests/replsets/tenant_migration_vote_progress.js @@ -0,0 +1,94 @@ +/** + * Tests the voteCommitMigrationProgress command. + * + * @tags: [ + * incompatible_with_eft, + * incompatible_with_macos, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * requires_fcv_52, + * ] + */ + +(function() { +"use strict"; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/parallelTester.js"); +load("jstests/libs/uuid_util.js"); +load("jstests/replsets/libs/tenant_migration_test.js"); +load("jstests/replsets/libs/tenant_migration_util.js"); + +const tenantMigrationTest = new TenantMigrationTest({name: jsTestName()}); + +const kTenantId = "testTenantId1"; +const recipientPrimary = tenantMigrationTest.getRecipientPrimary(); + +function runVoteCmd(migrationId, step) { + return recipientPrimary.adminCommand({ + voteCommitMigrationProgress: 1, + migrationId: migrationId, + from: tenantMigrationTest.getRecipientPrimary().host, + step: step, + success: true + }); +} + +function voteShouldFail(migrationId, steps) { + for (let step of steps) { + const reply = runVoteCmd(migrationId, step); + jsTestLog(`Vote with migrationId ${migrationId}, step '${step}', reply` + + ` (should fail): ${tojson(reply)}`); + assert.commandFailed(reply); + } +} + +function voteShouldSucceed(migrationId, steps) { + for (let step of steps) { + assert.commandWorked(runVoteCmd(migrationId, step)); + } +} + +const migrationId = UUID(); +const migrationOpts = { + migrationIdString: extractUUIDFromObject(migrationId), + recipientConnString: tenantMigrationTest.getRecipientConnString(), + tenantId: kTenantId, +}; + +const donorRstArgs = TenantMigrationUtil.createRstArgs(tenantMigrationTest.getDonorRst()); + +jsTestLog("Test that voteCommitMigrationProgress fails with no migration in flight"); +voteShouldFail(migrationId, ["copied files", "imported files"]); + +jsTestLog("Start a migration and pause after cloning"); +const fpAfterCollectionClonerDone = + configureFailPoint(recipientPrimary, "fpAfterCollectionClonerDone", {action: "hang"}); +const fpAfterDataConsistentMigrationRecipientInstance = configureFailPoint( + recipientPrimary, "fpAfterDataConsistentMigrationRecipientInstance", {action: "hang"}); +const migrationThread = + new Thread(TenantMigrationUtil.runMigrationAsync, migrationOpts, donorRstArgs); +migrationThread.start(); +fpAfterCollectionClonerDone.wait(); +fpAfterCollectionClonerDone.off(); + +if (TenantMigrationUtil.isShardMergeEnabled(recipientPrimary.getDB("admin"))) { + jsTestLog("Test that voteCommitMigrationProgress succeeds with step 'copied files'"); + voteShouldSucceed(migrationId, ["copied files"]); +} else { + jsTestLog("Test that voteCommitMigrationProgress fails with shard merge disabled"); + voteShouldFail(migrationId, ["copied files"]); +} + +jsTestLog("Test that voteCommitMigrationProgress fails with wrong 'step'"); +voteShouldFail(migrationId, ["imported files"]); + +fpAfterDataConsistentMigrationRecipientInstance.wait(); +fpAfterDataConsistentMigrationRecipientInstance.off(); + +TenantMigrationTest.assertCommitted(migrationThread.returnData()); +assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString)); +voteShouldFail(migrationId, ["copied files", "imported files"]); +tenantMigrationTest.stop(); +})(); diff --git a/jstests/replsets/tenant_migrations_noop_writes.js b/jstests/replsets/tenant_migrations_noop_writes.js index 687b71b980e..abae2350f4c 100644 --- a/jstests/replsets/tenant_migrations_noop_writes.js +++ b/jstests/replsets/tenant_migrations_noop_writes.js @@ -130,7 +130,8 @@ const recipientPrimary = tmt.getRecipientPrimary(); tojson(donorRes)); fp.off(); - TenantMigrationTest.assertCommitted(tmt.waitForMigrationToComplete(migrationOpts)); + TenantMigrationTest.assertCommitted(tmt.waitForMigrationToComplete( + migrationOpts, false /* retryOnRetryableErrors */, true /* forgetMigration */)); // // Verify reading on the recipient with an afterClusterTime > the block timestamp @@ -174,7 +175,8 @@ const recipientPrimary = tmt.getRecipientPrimary(); stopServerReplication(laggedDonorSecondary); fp.off(); - TenantMigrationTest.assertCommitted(tmt.waitForMigrationToComplete(migrationOpts)); + TenantMigrationTest.assertCommitted(tmt.waitForMigrationToComplete( + migrationOpts, false /* retryOnRetryableErrors */, true /* forgetMigration */)); // // Advance cluster time on the recipient beyond the block timestamp. |