diff options
author | mathisbessamdb <mathis.bessa@mongodb.com> | 2022-06-27 14:15:36 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-08-31 15:03:13 +0000 |
commit | b0bde1c01a675b9d104def92c49753bb8e6438b8 (patch) | |
tree | c28848eaa757d447dd9c5f7c1b85a204acbfb2ac | |
parent | 5521f98c392f8af2fbb678f3cbf8089b641d9dba (diff) | |
download | mongo-b0bde1c01a675b9d104def92c49753bb8e6438b8.tar.gz |
SERVER-67194 Replace the logic in our jstest to retry when calling replSetStepUp
(cherry picked from commit 3e3aae871cf2213d5288336f95f67c9df343a814)
16 files changed, 23 insertions, 26 deletions
diff --git a/jstests/replsets/tenant_migration_cloner_stats_with_failover.js b/jstests/replsets/tenant_migration_cloner_stats_with_failover.js index 43c4a63826e..e84f77b5d20 100644 --- a/jstests/replsets/tenant_migration_cloner_stats_with_failover.js +++ b/jstests/replsets/tenant_migration_cloner_stats_with_failover.js @@ -124,8 +124,7 @@ jsTestLog("Bytes copied after first batch of second database: " + bytesCopiedInc // original primary to the new primary. Then, step up the new primary. const fpAfterCreatingCollectionOfSecondDB = configureFailPoint(newRecipientPrimary, "tenantCollectionClonerHangAfterCreateCollection"); -tenantMigrationTest.getRecipientRst().awaitReplication(); -newRecipientPrimary.adminCommand({replSetStepUp: 1}); +tenantMigrationTest.getRecipientRst().stepUp(newRecipientPrimary); fpAfterBatchOfSecondDB.off(); jsTestLog("Wait until the new primary creates collection of second database."); diff --git a/jstests/replsets/tenant_migration_donor_rollback_during_cloning.js b/jstests/replsets/tenant_migration_donor_rollback_during_cloning.js index 9385c37d4ca..d781c28330f 100644 --- a/jstests/replsets/tenant_migration_donor_rollback_during_cloning.js +++ b/jstests/replsets/tenant_migration_donor_rollback_during_cloning.js @@ -145,7 +145,7 @@ function runTest(tenantId, // this situation. Allow replication once again. fpAfterListCall.wait(); const newDonorPrimary = otherNodes[0]; - newDonorPrimary.adminCommand({replSetStepUp: 1}); + donorRst.stepUp(newDonorPrimary, {awaitReplicationBeforeStepUp: false}); restartServerReplication(otherNodes); // Advance the cluster time by applying new operations on the new primary. We insert documents diff --git a/jstests/replsets/tenant_migration_network_error_via_rollback.js b/jstests/replsets/tenant_migration_network_error_via_rollback.js index cdb09f63577..1d2ad602941 100644 --- a/jstests/replsets/tenant_migration_network_error_via_rollback.js +++ b/jstests/replsets/tenant_migration_network_error_via_rollback.js @@ -124,7 +124,7 @@ function runTest({failPointName, failPointData = {}, batchSize = 10 * 1000}) { jsTestLog("Failing over to next primary"); assert.commandWorked( donorA.adminCommand({replSetStepDown: ReplSetTest.kDefaultTimeoutMS, force: true})); - assert.commandWorked(nextPrimary.adminCommand({replSetStepUp: ReplSetTest.kDefaultTimeoutMS})); + donorRst.stepUp(nextPrimary, {awaitReplicationBeforeStepUp: false}); assert.eq(nextPrimary, donorRst.getPrimary()); restartServerReplication(nextPrimary); restartServerReplication(donorD); diff --git a/jstests/replsets/tenant_migration_recipient_aborts_merge_on_donor_failure.js b/jstests/replsets/tenant_migration_recipient_aborts_merge_on_donor_failure.js index 95d9710ef0f..77656281600 100644 --- a/jstests/replsets/tenant_migration_recipient_aborts_merge_on_donor_failure.js +++ b/jstests/replsets/tenant_migration_recipient_aborts_merge_on_donor_failure.js @@ -68,7 +68,9 @@ load("jstests/replsets/libs/tenant_migration_util.js"); // step up a secondary so that the migration will complete and the // waitForMigrationToComplete call to the donor primary succeeds - assert.commandWorked(donorSecondary.adminCommand({replSetStepUp: 1})); + assert.soonNoExcept(() => { + return assert.commandWorked(donorSecondary.adminCommand({replSetStepUp: 1})); + }); hangBeforeTaskCompletion.off(); TenantMigrationTest.assertAborted( diff --git a/jstests/replsets/tenant_migration_recipient_does_not_change_sync_source_after_step_down.js b/jstests/replsets/tenant_migration_recipient_does_not_change_sync_source_after_step_down.js index 62c899fa87c..b970f20cce1 100644 --- a/jstests/replsets/tenant_migration_recipient_does_not_change_sync_source_after_step_down.js +++ b/jstests/replsets/tenant_migration_recipient_does_not_change_sync_source_after_step_down.js @@ -102,8 +102,7 @@ assert.soon(() => recipientColl.find().itcount() === batchSize); verifySyncSource(recipientPrimary, migrationId, donorPrimary.host); // Steps down the current donor's primary and wait for the new primary to be discovered. -donorRst.awaitLastOpCommitted(); -assert.commandWorked(donorRst.getSecondary().adminCommand({replSetStepUp: 1})); +donorRst.stepUp(donorRst.getSecondary()); const newDonorPrimary = donorRst.getPrimary(); assert.neq(newDonorPrimary.host, donorPrimary.host); diff --git a/jstests/replsets/tenant_migration_recipient_failover_before_creating_oplog_buffer.js b/jstests/replsets/tenant_migration_recipient_failover_before_creating_oplog_buffer.js index c4ac48d46c8..be99a6ffacb 100644 --- a/jstests/replsets/tenant_migration_recipient_failover_before_creating_oplog_buffer.js +++ b/jstests/replsets/tenant_migration_recipient_failover_before_creating_oplog_buffer.js @@ -48,8 +48,8 @@ jsTestLog("Waiting until the recipient primary is about to create an oplog buffe fpBeforeCreatingOplogBuffer.wait(); jsTestLog("Stepping a new primary up."); -assert.commandWorked(tenantMigrationTest.getRecipientRst().getSecondaries()[0].adminCommand( - {replSetStepUp: ReplSetTest.kForeverSecs, force: true})); +tenantMigrationTest.getRecipientRst().stepUp( + tenantMigrationTest.getRecipientRst().getSecondaries()[0]); fpBeforeCreatingOplogBuffer.off(); diff --git a/jstests/replsets/tenant_migration_recipient_initial_sync_cloning.js b/jstests/replsets/tenant_migration_recipient_initial_sync_cloning.js index e532419c76f..61535095919 100644 --- a/jstests/replsets/tenant_migration_recipient_initial_sync_cloning.js +++ b/jstests/replsets/tenant_migration_recipient_initial_sync_cloning.js @@ -90,7 +90,7 @@ function restartNodeAndCheckStateWithoutOplogApplication( jsTestLog("Stepping up the new node."); // Now step up the new node - assert.commandWorked(initialSyncNode.adminCommand({"replSetStepUp": 1})); + tenantMigrationTest.getRecipientRst().stepUp(initialSyncNode); fpOnRecipient.off(); } @@ -119,7 +119,7 @@ function restartNodeAndCheckStateDuringOplogApplication( jsTestLog("Stepping up the new node."); // Now step up the new node - assert.commandWorked(initialSyncNode.adminCommand({"replSetStepUp": 1})); + tenantMigrationTest.getRecipientRst().stepUp(initialSyncNode); fpPauseOplogApplierOnBatch.off(); fpOnRecipient.off(); } diff --git a/jstests/replsets/tenant_migration_recipient_resumes_on_donor_failover.js b/jstests/replsets/tenant_migration_recipient_resumes_on_donor_failover.js index d47dd189d2c..c917fb6cc29 100644 --- a/jstests/replsets/tenant_migration_recipient_resumes_on_donor_failover.js +++ b/jstests/replsets/tenant_migration_recipient_resumes_on_donor_failover.js @@ -112,7 +112,9 @@ function runTest(failPoint) { 'fpAfterStartingOplogFetcherMigrationRecipientInstance', {action: "hang"}); // Step up a new donor primary. - assert.commandWorked(donorSecondary.adminCommand({replSetStepUp: 1})); + assert.soonNoExcept(() => { + return assert.commandWorked(donorSecondary.adminCommand({replSetStepUp: 1})); + }); hangOnRetry.wait(); res = recipientPrimary.adminCommand({currentOp: true, desc: "tenant recipient migration"}); currOp = res.inprog[0]; diff --git a/jstests/replsets/tenant_migration_recipient_retry_forget_migration.js b/jstests/replsets/tenant_migration_recipient_retry_forget_migration.js index da694d8dc3d..3023a2fb61f 100644 --- a/jstests/replsets/tenant_migration_recipient_retry_forget_migration.js +++ b/jstests/replsets/tenant_migration_recipient_retry_forget_migration.js @@ -85,7 +85,7 @@ const newRecipientPrimary = tenantMigrationTest.getRecipientRst().getSecondary() const newPrimaryFp = configureFailPoint(newRecipientPrimary, "hangBeforeTaskCompletion"); // Step up a new recipient primary before the state doc is truly marked as garbage collectable. -assert.commandWorked(newRecipientPrimary.adminCommand({replSetStepUp: 1})); +tenantMigrationTest.getRecipientRst().stepUp(newRecipientPrimary); fp.off(); // The new primary should skip all tenant migration steps but wait for another diff --git a/jstests/replsets/tenant_migration_recipient_retryable_writes_failover.js b/jstests/replsets/tenant_migration_recipient_retryable_writes_failover.js index 4ecdac15965..68ceae0b709 100644 --- a/jstests/replsets/tenant_migration_recipient_retryable_writes_failover.js +++ b/jstests/replsets/tenant_migration_recipient_retryable_writes_failover.js @@ -94,9 +94,7 @@ const recipientSecondary = recipientRst.getSecondary(); const fpAfterFetchingRetryableWritesEntries = configureFailPoint( recipientSecondary, "fpAfterFetchingRetryableWritesEntriesBeforeStartOpTime", {action: "hang"}); -recipientRst.awaitLastOpCommitted(); -assert.commandWorked( - recipientSecondary.adminCommand({replSetStepUp: ReplSetTest.kForeverSecs, force: true})); +recipientRst.stepUp(recipientSecondary); fpPauseAfterRetrievingRetryableWritesBatch.off(); const newRecipientPrimary = recipientRst.getPrimary(); diff --git a/jstests/replsets/tenant_migration_recipient_stepdown_after_forget.js b/jstests/replsets/tenant_migration_recipient_stepdown_after_forget.js index 7e2ca21b321..b25df0e9d9b 100644 --- a/jstests/replsets/tenant_migration_recipient_stepdown_after_forget.js +++ b/jstests/replsets/tenant_migration_recipient_stepdown_after_forget.js @@ -55,8 +55,8 @@ forgetMigrationThread.start(); fpBeforeDroppingOplogBufferCollection.wait(); jsTestLog("Step up a new recipient primary."); -assert.commandWorked(tenantMigrationTest.getRecipientRst().getSecondaries()[0].adminCommand( - {replSetStepUp: ReplSetTest.kForeverSecs, force: true})); +tenantMigrationTest.getRecipientRst().stepUp( + tenantMigrationTest.getRecipientRst().getSecondaries()[0]); fpBeforeDroppingOplogBufferCollection.off(); diff --git a/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover.js b/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover.js index 26fb31e41e8..6a25e0e0c6b 100644 --- a/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover.js +++ b/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover.js @@ -90,8 +90,7 @@ const tenantMigrationFailoverTest = function(isTimeSeries, createCollFn, docs) { // Step up a new node in the recipient set and trigger a failover. The new primary should resume // cloning starting from the third document. const newRecipientPrimary = recipientRst.getSecondaries()[0]; - recipientRst.awaitLastOpCommitted(); - assert.commandWorked(newRecipientPrimary.adminCommand({replSetStepUp: 1})); + recipientRst.stepUp(newRecipientPrimary); hangDuringCollectionClone.off(); recipientRst.getPrimary(); diff --git a/jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js b/jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js index 2fd5aeec36c..45284f9f358 100644 --- a/jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js +++ b/jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js @@ -95,7 +95,7 @@ const fpPauseAtStartOfMigration = // Step up a new node in the recipient set and trigger a failover. The new primary should resume // cloning starting from the third document. -assert.commandWorked(newRecipientPrimary.adminCommand({replSetStepUp: 1})); +recipientRst.stepUp(newRecipientPrimary); hangDuringCollectionClone.off(); recipientRst.getPrimary(); diff --git a/jstests/replsets/tenant_migration_resume_oplog_application.js b/jstests/replsets/tenant_migration_resume_oplog_application.js index 530c9314574..532908d960b 100644 --- a/jstests/replsets/tenant_migration_resume_oplog_application.js +++ b/jstests/replsets/tenant_migration_resume_oplog_application.js @@ -94,7 +94,7 @@ if (appliedNoOps.count() === 2) { // Step up a new node in the recipient set and trigger a failover. The new primary should resume // fetching starting from the unapplied documents. const newRecipientPrimary = recipientRst.getSecondaries()[0]; -assert.commandWorked(newRecipientPrimary.adminCommand({replSetStepUp: 1})); +recipientRst.stepUp(newRecipientPrimary); waitAfterDatabaseClone.off(); waitInOplogApplier.off(); recipientRst.getPrimary(); diff --git a/jstests/replsets/tenant_migration_retryable_write_retry_on_recipient.js b/jstests/replsets/tenant_migration_retryable_write_retry_on_recipient.js index 7b7292f85a3..ad823e098a4 100644 --- a/jstests/replsets/tenant_migration_retryable_write_retry_on_recipient.js +++ b/jstests/replsets/tenant_migration_retryable_write_retry_on_recipient.js @@ -264,8 +264,7 @@ testRecipientRetryableWrites(recipientDb, beforeWrites); testRecipientRetryableWrites(recipientDb, duringWrites); jsTestLog("Step up secondary"); const recipientRst = tenantMigrationTest.getRecipientRst(); -recipientRst.awaitReplication(); -assert.commandWorked(recipientRst.getSecondary().adminCommand({replSetStepUp: 1})); +recipientRst.stepUp(recipientRst.getSecondary()); jsTestLog("Run retryable write on secondary after the migration"); testRecipientRetryableWrites(recipientRst.getPrimary().getDB(kDbName), beforeWrites); testRecipientRetryableWrites(recipientRst.getPrimary().getDB(kDbName), duringWrites); diff --git a/jstests/replsets/tenant_migration_timeseries_retryable_write_retry_on_recipient.js b/jstests/replsets/tenant_migration_timeseries_retryable_write_retry_on_recipient.js index 5fc0eb8b9ee..0e7a4a3fcf8 100644 --- a/jstests/replsets/tenant_migration_timeseries_retryable_write_retry_on_recipient.js +++ b/jstests/replsets/tenant_migration_timeseries_retryable_write_retry_on_recipient.js @@ -127,8 +127,7 @@ function testRetryOnRecipient(ordered) { jsTestLog("Step up secondary"); const recipientRst = tenantMigrationTest.getRecipientRst(); - recipientRst.awaitReplication(); - assert.commandWorked(recipientRst.getSecondary().adminCommand({replSetStepUp: 1})); + recipientRst.stepUp(recipientRst.getSecondary()); jsTestLog("Run retryable write on secondary after the migration"); testRecipientRetryableWrites(recipientRst.getPrimary().getDB(kDbName), beforeWrites); testRecipientRetryableWrites(recipientRst.getPrimary().getDB(kDbName), duringWrites); |