diff options
author | Andrew Shuvalov <andrew.shuvalov@mongodb.com> | 2021-09-30 17:35:37 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-09-30 18:48:54 +0000 |
commit | 05de8eed555cd5beb6c8b9380b28e72d1202dd20 (patch) | |
tree | 55da6bd4a2ed88f2faab2c575857cb38007272b7 | |
parent | 88dc3b889025bdd39405f42eb1e53d759d061786 (diff) | |
download | mongo-05de8eed555cd5beb6c8b9380b28e72d1202dd20.tar.gz |
SERVER-60045 Fix tenant migrations test to support aborted migrations
-rw-r--r-- | jstests/replsets/tenant_migration_concurrent_migrations_stress_test.js | 68 |
1 files changed, 64 insertions, 4 deletions
diff --git a/jstests/replsets/tenant_migration_concurrent_migrations_stress_test.js b/jstests/replsets/tenant_migration_concurrent_migrations_stress_test.js index 636b22a12a7..17334a5cda6 100644 --- a/jstests/replsets/tenant_migration_concurrent_migrations_stress_test.js +++ b/jstests/replsets/tenant_migration_concurrent_migrations_stress_test.js @@ -64,13 +64,60 @@ tenantIds.forEach((tenantId) => { migrationOptsArray.push(migrationOpts); }); +// Blocks until the migration with index `id` completes (it is supposed to be aborted so +// the wait should be short) and creates another migration. +function retryAbortedMigration(id) { + let tenantId = migrationOptsArray[id].tenantId; + jsTestLog( + `Forgetting and restarting aborted migration + ${migrationOptsArray[id].migrationIdString} for tenant: ${tenantId}`); + let waitState = tenantMigrationTest.waitForMigrationToComplete(migrationOptsArray[id]); + assert.commandWorked(waitState); + if (waitState.state != TenantMigrationTest.DonorState.kAborted) { + // The `currentOp()` seems to be lagging so this condition actually happens. + // We simply ignore this condition. + // Note: this is not a bug, the code is fast enough to forget, replace Id + // and restart the migration with the same name to get a stale currentOp() result + // from previous attempt with same name. As we replace UUID() below it is guaranteed + // that we do not restart the same migration and 'aborted' state is terminal. The + // currentOp() is stale because forgetting the migration only marks it for garbage + // collection, which happens later. + jsTestLog(`Migration was supposed to be aborted, got: ${tojson(waitState)}`); + return; + } + + assert.commandWorked( + tenantMigrationTest.forgetMigration(migrationOptsArray[id].migrationIdString)); + + // Drop recipient DB. + const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB"); + let db = recipientPrimary.getDB(dbName); + try { + db.dropDatabase(); + } catch (err) { + jsTestLog(`Dropping recipient DB: ${tojson(err)}`); + } + + // Replace migration UUID. + migrationOptsArray[id].migrationIdString = extractUUIDFromObject(UUID()); + // Old migration needs to be garbage collected before this works. + assert.soon(function() { + let status = tenantMigrationTest.startMigration(migrationOptsArray[id]); + if (!status.ok) { + jsTestLog(`${tojson(status)}`); + } + return status.ok; + }, 'Failed to start', 60 * 1000, 5 * 1000); +} + // Start the migrations. let nextMigration = 0; let runningMigrations = 0; let setOfCompleteMigrations = new Set(); let didFirstLoopSleep = false; const regexId = /testTenantId-([0-9]+)/; -let loggedAbortedMigration = false; // Reduce spam by logging the aborted migration once. +// Reduce spam by logging the aborted migration once, also use this flag to abort one migration. +let seenAbortedMigration = false; while (setOfCompleteMigrations.size < kMigrationsCount) { while (runningMigrations < kConcurrentMigrationsCount && nextMigration < kMigrationsCount) { @@ -112,9 +159,12 @@ while (setOfCompleteMigrations.size < kMigrationsCount) { } } - if (op.lastDurableState === migrationStates.kAborted && !loggedAbortedMigration) { - loggedAbortedMigration = true; - jsTestLog(`Found an aborted migration in ${tojson(currentOp)}`); + if (op.lastDurableState === migrationStates.kAborted) { + if (!seenAbortedMigration) { + seenAbortedMigration = true; + jsTestLog(`Found an aborted migration in ${tojson(currentOp)}`); + } + retryAbortedMigration(id); } if (!(op.lastDurableState in migrationsByState)) { @@ -126,6 +176,16 @@ while (setOfCompleteMigrations.size < kMigrationsCount) { return true; }); + // Abort a random migration until observed by the `currentOp`. + if (!seenAbortedMigration && migrationStates.kDataSync in migrationsByState && + migrationsByState[migrationStates.kDataSync].size > 0) { + let items = Array.from(migrationsByState[migrationStates.kDataSync]); + let id = items[Math.floor(Math.random() * items.length)]; + jsTestLog(`${id}`); + tenantMigrationTest.tryAbortMigration( + {migrationIdString: migrationOptsArray[id].migrationIdString}); + } + jsTestLog("Currently running " + runningMigrations + ", complete count " + setOfCompleteMigrations.size); |