diff options
author | Wenbin Zhu <wenbin.zhu@mongodb.com> | 2021-04-02 20:32:34 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-04-02 22:33:13 +0000 |
commit | 424758195f46ed52b41fa9c9f8909e4ef1ca9525 (patch) | |
tree | ed533a6da57910d3d2563d4a50562efbd7b19eda /jstests | |
parent | 5ec706707e6e0bd3c0b4860183f7df268ff83680 (diff) | |
download | mongo-424758195f46ed52b41fa9c9f8909e4ef1ca9525.tar.gz |
SERVER-53872 Support time-series collections in tenant migration failover test.
Diffstat (limited to 'jstests')
-rw-r--r-- | jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover.js | 227 |
1 files changed, 133 insertions, 94 deletions
diff --git a/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover.js b/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover.js index 2c62f346f09..4c4e62cee64 100644 --- a/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover.js +++ b/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover.js @@ -1,104 +1,143 @@ /** * Tests that in tenant migration, the recipient set can resume collection cloning from the last * document cloned after a failover. - * @tags: [requires_majority_read_concern, requires_fcv_49, incompatible_with_windows_tls] + * @tags: [ + * requires_majority_read_concern, + * requires_fcv_49, + * incompatible_with_windows_tls, + * incompatible_with_eft + * ] */ (function() { -"use strict"; - -load("jstests/libs/fail_point_util.js"); -load("jstests/libs/uuid_util.js"); // for 'extractUUIDFromObject' -load("jstests/libs/parallelTester.js"); // for 'Thread' -load("jstests/replsets/libs/tenant_migration_test.js"); -load("jstests/replsets/libs/tenant_migration_util.js"); - -const recipientRst = new ReplSetTest({ - nodes: 2, - name: jsTestName() + "_recipient", - nodeOptions: Object.assign(TenantMigrationUtil.makeX509OptionsForTest().recipient, { - setParameter: { - // Use a batch size of 2 so that collection cloner requires more than a single batch to - // complete. - collectionClonerBatchSize: 2, - // Allow reads on recipient before migration completes for testing. - 'failpoint.tenantMigrationRecipientNotRejectReads': tojson({mode: 'alwaysOn'}), - } - }) -}); - -recipientRst.startSet(); -recipientRst.initiate(); -if (!TenantMigrationUtil.isFeatureFlagEnabled(recipientRst.getPrimary())) { - jsTestLog("Skipping test because the tenant migrations feature flag is disabled"); +const tenantMigrationFailoverTest = function(isTimeSeries, createCollFn, docs) { + "use strict"; + + load("jstests/core/timeseries/libs/timeseries.js"); + load("jstests/libs/fail_point_util.js"); + load("jstests/libs/uuid_util.js"); // for 'extractUUIDFromObject' + load("jstests/replsets/libs/tenant_migration_test.js"); + load("jstests/replsets/libs/tenant_migration_util.js"); + + const batchSize = 2; + const recipientRst = new ReplSetTest({ + nodes: 2, + name: jsTestName() + "_recipient", + nodeOptions: Object.assign(TenantMigrationUtil.makeX509OptionsForTest().recipient, { + setParameter: { + // Use a batch size of 2 so that collection cloner requires more than a single + // batch to complete. + collectionClonerBatchSize: batchSize, + // Allow reads on recipient before migration completes for testing. + 'failpoint.tenantMigrationRecipientNotRejectReads': tojson({mode: 'alwaysOn'}), + } + }) + }); + + recipientRst.startSet(); + recipientRst.initiate(); + + const tenantMigrationTest = + new TenantMigrationTest({name: jsTestName(), recipientRst: recipientRst}); + const donarPrimary = tenantMigrationTest.getDonorPrimary(); + + if (!TenantMigrationUtil.isFeatureFlagEnabled(donarPrimary)) { + jsTestLog("Skipping test because the tenant migrations feature flag is disabled"); + tenantMigrationTest.stop(); + recipientRst.stopSet(); + return; + } + + if (isTimeSeries && !TimeseriesTest.timeseriesCollectionsEnabled(donarPrimary)) { + jsTestLog("Skipping test because the time-series collection feature flag is disabled"); + tenantMigrationTest.stop(); + recipientRst.stopSet(); + return; + } + + const tenantId = "testTenantId"; + const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB"); + const donorDB = donarPrimary.getDB(dbName); + const collName = "testColl"; + + const recipientPrimary = tenantMigrationTest.getRecipientPrimary(); + + // Create collection and insert documents. + assert.commandWorked(createCollFn(donorDB, collName)); + tenantMigrationTest.insertDonorDB(dbName, collName, docs); + + const migrationId = UUID(); + const migrationIdString = extractUUIDFromObject(migrationId); + const migrationOpts = { + migrationIdString: migrationIdString, + recipientConnString: tenantMigrationTest.getRecipientConnString(), + tenantId: tenantId, + }; + + // Configure a fail point to have the recipient primary hang after cloning 2 documents. + const recipientDb = recipientPrimary.getDB(dbName); + let recipientColl = isTimeSeries ? recipientDb.getCollection("system.buckets." + collName) + : recipientDb.getCollection(collName); + + const hangDuringCollectionClone = + configureFailPoint(recipientDb, + "tenantMigrationHangCollectionClonerAfterHandlingBatchResponse", + {nss: recipientColl.getFullName()}); + + // Start a migration and wait for recipient to hang after cloning 2 documents. + assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts)); + hangDuringCollectionClone.wait(); + assert.soon(() => recipientColl.find().itcount() === 2); + + // Insert some documents that will be fetched by the recipient. This is to test that on + // failover, the fetcher will resume fetching from where it left off. The system is expected + // to crash if the recipient fetches a duplicate oplog entry upon resuming the migration. + tenantMigrationTest.insertDonorDB(dbName, "aNewColl", [{_id: "docToBeFetched"}]); + assert.soon(() => { + const configDb = recipientPrimary.getDB("config"); + const oplogBuffer = configDb.getCollection("repl.migration.oplog_" + migrationIdString); + return oplogBuffer.find({"entry.o._id": "docToBeFetched"}).count() === 1; + }); + + // Step up a new node in the recipient set and trigger a failover. The new primary should resume + // cloning starting from the third document. + const newRecipientPrimary = recipientRst.getSecondaries()[0]; + recipientRst.awaitLastOpCommitted(); + assert.commandWorked(newRecipientPrimary.adminCommand({replSetStepUp: 1})); + hangDuringCollectionClone.off(); + recipientRst.getPrimary(); + + // The migration should go through after recipient failover. + const stateRes = + assert.commandWorked(tenantMigrationTest.waitForMigrationToComplete(migrationOpts)); + assert.eq(stateRes.state, TenantMigrationTest.DonorState.kCommitted); + + // Check that recipient has cloned all documents in the collection. + recipientColl = newRecipientPrimary.getDB(dbName).getCollection(collName); + assert.eq(docs.length, recipientColl.find().itcount()); + assert.docEq(recipientColl.find().sort({_id: 1}).toArray(), docs); + TenantMigrationUtil.checkTenantDBHashes( + tenantMigrationTest.getDonorRst(), tenantMigrationTest.getRecipientRst(), tenantId); + + tenantMigrationTest.stop(); recipientRst.stopSet(); - return; -} - -const tenantMigrationTest = - new TenantMigrationTest({name: jsTestName(), recipientRst: recipientRst}); -const tenantId = "testTenantId"; -const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB"); -const collName = "testColl"; - -const recipientPrimary = tenantMigrationTest.getRecipientPrimary(); - -// Test _id with mixed bson types. -const docs = [{_id: 0}, {_id: "string"}, {_id: UUID()}, {_id: new Date()}]; -tenantMigrationTest.insertDonorDB(dbName, collName, docs); - -const migrationId = UUID(); -const migrationIdString = extractUUIDFromObject(migrationId); -const migrationOpts = { - migrationIdString: migrationIdString, - recipientConnString: tenantMigrationTest.getRecipientConnString(), - tenantId: tenantId, }; -// Configure a fail point to have the recipient primary hang after cloning 2 documents. -const recipientDb = recipientPrimary.getDB(dbName); -let recipientColl = recipientDb.getCollection(collName); -const hangDuringCollectionClone = - configureFailPoint(recipientDb, - "tenantMigrationHangCollectionClonerAfterHandlingBatchResponse", - {nss: recipientColl.getFullName()}); - -// Start a migration and wait for recipient to hang after cloning 2 documents. -const donorRstArgs = TenantMigrationUtil.createRstArgs(tenantMigrationTest.getDonorRst()); -const migrationThread = - new Thread(TenantMigrationUtil.runMigrationAsync, migrationOpts, donorRstArgs); -migrationThread.start(); -hangDuringCollectionClone.wait(); -assert.soon(() => recipientColl.find().itcount() === 2); - -// Insert some documents that will be fetched by the recipient. This is to test that on failover, -// the fetcher will resume fetching from where it left off. The system is expected to crash if -// the recipient fetches a duplicate oplog entry upon resuming the migration. -tenantMigrationTest.insertDonorDB(dbName, "aNewColl", [{_id: "docToBeFetched"}]); -assert.soon(() => { - const configDb = recipientPrimary.getDB("config"); - const oplogBuffer = configDb.getCollection("repl.migration.oplog_" + migrationIdString); - return oplogBuffer.find({"entry.o._id": "docToBeFetched"}).count() === 1; -}); - -// Step up a new node in the recipient set and trigger a failover. The new primary should resume -// cloning starting from the third document. -const newRecipientPrimary = recipientRst.getSecondaries()[0]; -recipientRst.awaitLastOpCommitted(); -assert.commandWorked(newRecipientPrimary.adminCommand({replSetStepUp: 1})); -hangDuringCollectionClone.off(); -recipientRst.getPrimary(); - -// The migration should go through after recipient failover. -assert.commandWorked(migrationThread.returnData()); - -// Check that recipient has cloned all documents in the collection. -recipientColl = newRecipientPrimary.getDB(dbName).getCollection(collName); -assert.eq(4, recipientColl.find().itcount()); -assert.eq(recipientColl.find().sort({_id: 1}).toArray(), docs); -TenantMigrationUtil.checkTenantDBHashes( - tenantMigrationTest.getDonorRst(), tenantMigrationTest.getRecipientRst(), tenantId); - -tenantMigrationTest.stop(); -recipientRst.stopSet(); +jsTestLog("Running tenant migration test for time-series collection"); +tenantMigrationFailoverTest(true, + (db, collName) => db.createCollection( + collName, {timeseries: {timeField: "time", metaField: "bucket"}}), + [ + // Group each document in its own bucket in order to work with the + // collectionClonerBatchSize we set at the recipient replSet. + {_id: 1, time: ISODate(), bucket: "a"}, + {_id: 2, time: ISODate(), bucket: "b"}, + {_id: 3, time: ISODate(), bucket: "c"}, + {_id: 4, time: ISODate(), bucket: "d"} + ]); + +jsTestLog("Running tenant migration test for regular collection"); +tenantMigrationFailoverTest(false, + (db, collName) => db.createCollection(collName), + [{_id: 0}, {_id: "string"}, {_id: UUID()}, {_id: new Date()}]); })(); |