summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Shuvalov <andrew.shuvalov@mongodb.com>2021-09-30 17:35:37 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-09-30 18:48:54 +0000
commit05de8eed555cd5beb6c8b9380b28e72d1202dd20 (patch)
tree55da6bd4a2ed88f2faab2c575857cb38007272b7
parent88dc3b889025bdd39405f42eb1e53d759d061786 (diff)
downloadmongo-05de8eed555cd5beb6c8b9380b28e72d1202dd20.tar.gz
SERVER-60045 Fix tenant migrations test to support aborted migrations
-rw-r--r--jstests/replsets/tenant_migration_concurrent_migrations_stress_test.js68
1 files changed, 64 insertions, 4 deletions
diff --git a/jstests/replsets/tenant_migration_concurrent_migrations_stress_test.js b/jstests/replsets/tenant_migration_concurrent_migrations_stress_test.js
index 636b22a12a7..17334a5cda6 100644
--- a/jstests/replsets/tenant_migration_concurrent_migrations_stress_test.js
+++ b/jstests/replsets/tenant_migration_concurrent_migrations_stress_test.js
@@ -64,13 +64,60 @@ tenantIds.forEach((tenantId) => {
migrationOptsArray.push(migrationOpts);
});
+// Blocks until the migration with index `id` completes (it is supposed to be aborted so
+// the wait should be short) and creates another migration.
+function retryAbortedMigration(id) {
+ let tenantId = migrationOptsArray[id].tenantId;
+ jsTestLog(
+ `Forgetting and restarting aborted migration
+ ${migrationOptsArray[id].migrationIdString} for tenant: ${tenantId}`);
+ let waitState = tenantMigrationTest.waitForMigrationToComplete(migrationOptsArray[id]);
+ assert.commandWorked(waitState);
+ if (waitState.state != TenantMigrationTest.DonorState.kAborted) {
+ // The `currentOp()` seems to be lagging so this condition actually happens.
+ // We simply ignore this condition.
+ // Note: this is not a bug, the code is fast enough to forget, replace Id
+ // and restart the migration with the same name to get a stale currentOp() result
+ // from previous attempt with same name. As we replace UUID() below it is guaranteed
+ // that we do not restart the same migration and 'aborted' state is terminal. The
+ // currentOp() is stale because forgetting the migration only marks it for garbage
+ // collection, which happens later.
+ jsTestLog(`Migration was supposed to be aborted, got: ${tojson(waitState)}`);
+ return;
+ }
+
+ assert.commandWorked(
+ tenantMigrationTest.forgetMigration(migrationOptsArray[id].migrationIdString));
+
+ // Drop recipient DB.
+ const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB");
+ let db = recipientPrimary.getDB(dbName);
+ try {
+ db.dropDatabase();
+ } catch (err) {
+ jsTestLog(`Dropping recipient DB: ${tojson(err)}`);
+ }
+
+ // Replace migration UUID.
+ migrationOptsArray[id].migrationIdString = extractUUIDFromObject(UUID());
+ // Old migration needs to be garbage collected before this works.
+ assert.soon(function() {
+ let status = tenantMigrationTest.startMigration(migrationOptsArray[id]);
+ if (!status.ok) {
+ jsTestLog(`${tojson(status)}`);
+ }
+ return status.ok;
+ }, 'Failed to start', 60 * 1000, 5 * 1000);
+}
+
// Start the migrations.
let nextMigration = 0;
let runningMigrations = 0;
let setOfCompleteMigrations = new Set();
let didFirstLoopSleep = false;
const regexId = /testTenantId-([0-9]+)/;
-let loggedAbortedMigration = false; // Reduce spam by logging the aborted migration once.
+// Reduce spam by logging the aborted migration once, also use this flag to abort one migration.
+let seenAbortedMigration = false;
while (setOfCompleteMigrations.size < kMigrationsCount) {
while (runningMigrations < kConcurrentMigrationsCount && nextMigration < kMigrationsCount) {
@@ -112,9 +159,12 @@ while (setOfCompleteMigrations.size < kMigrationsCount) {
}
}
- if (op.lastDurableState === migrationStates.kAborted && !loggedAbortedMigration) {
- loggedAbortedMigration = true;
- jsTestLog(`Found an aborted migration in ${tojson(currentOp)}`);
+ if (op.lastDurableState === migrationStates.kAborted) {
+ if (!seenAbortedMigration) {
+ seenAbortedMigration = true;
+ jsTestLog(`Found an aborted migration in ${tojson(currentOp)}`);
+ }
+ retryAbortedMigration(id);
}
if (!(op.lastDurableState in migrationsByState)) {
@@ -126,6 +176,16 @@ while (setOfCompleteMigrations.size < kMigrationsCount) {
return true;
});
+ // Abort a random migration until observed by the `currentOp`.
+ if (!seenAbortedMigration && migrationStates.kDataSync in migrationsByState &&
+ migrationsByState[migrationStates.kDataSync].size > 0) {
+ let items = Array.from(migrationsByState[migrationStates.kDataSync]);
+ let id = items[Math.floor(Math.random() * items.length)];
+ jsTestLog(`${id}`);
+ tenantMigrationTest.tryAbortMigration(
+ {migrationIdString: migrationOptsArray[id].migrationIdString});
+ }
+
jsTestLog("Currently running " + runningMigrations + ", complete count " +
setOfCompleteMigrations.size);