diff options
author | Jack Mulrow <jack.mulrow@mongodb.com> | 2021-06-21 14:34:30 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-06-22 16:18:28 +0000 |
commit | 536090be65b70cc0f73ddbbffe5c501c12143a0d (patch) | |
tree | b2fc9340606d704b1f4c9a0ba107d81a78b46215 /jstests/replsets | |
parent | 3befdc7d70fa56085bbdc9606da0db84b5b48ccd (diff) | |
download | mongo-536090be65b70cc0f73ddbbffe5c501c12143a0d.tar.gz |
SERVER-57769 Allow tenant migration donors to advance cluster time with noops
Diffstat (limited to 'jstests/replsets')
-rw-r--r-- | jstests/replsets/tenant_migrations_noop_writes.js | 218 |
1 files changed, 218 insertions, 0 deletions
diff --git a/jstests/replsets/tenant_migrations_noop_writes.js b/jstests/replsets/tenant_migrations_noop_writes.js new file mode 100644 index 00000000000..7746ca12cd7 --- /dev/null +++ b/jstests/replsets/tenant_migrations_noop_writes.js @@ -0,0 +1,218 @@ +/** + * Verifies that nodes can trigger noop writes to satisfy cluster time reads after a tenant + * migration. + * + * @tags: [requires_fcv_47, requires_majority_read_concern, incompatible_with_windows_tls, + * incompatible_with_eft, incompatible_with_macos, requires_persistence] + */ + +(function() { +"use strict"; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/uuid_util.js"); +load("jstests/libs/write_concern_util.js"); +load("jstests/replsets/libs/tenant_migration_test.js"); +load('jstests/libs/parallel_shell_helpers.js'); + +const kTenantIdPrefix = "testTenantId"; +const kUnrelatedDbName = "unrelatedDB"; +const collName = "foo"; +const migrationX509Options = TenantMigrationUtil.makeX509OptionsForTest(); + +let counter = 0; +let makeTenantId = function() { + return kTenantIdPrefix + "_" + counter++; +}; + +function makeTestParams() { + const tenantId = makeTenantId(); + const migrationId = UUID(); + const migrationOpts = { + migrationIdString: extractUUIDFromObject(migrationId), + tenantId: tenantId, + }; + const dbName = tenantId + "_db"; + return [tenantId, migrationId, migrationOpts, dbName]; +} + +function advanceClusterTime(conn, dbName, collName) { + let bulk = conn.getDB(dbName)[collName].initializeUnorderedBulkOp(); + for (let i = 0; i < 200; i++) { + bulk.insert({x: i}); + } + assert.commandWorked(bulk.execute()); +} + +function getBlockTimestamp(conn, tenantId) { + const mtabServerStatus = + TenantMigrationUtil.getTenantMigrationAccessBlocker(conn, tenantId).donor; + assert(mtabServerStatus.blockTimestamp, tojson(mtabServerStatus)); + return mtabServerStatus.blockTimestamp; +} + +function runAfterClusterTimeRead(dbName, collName, operationTime, clusterTime, expectedCode) { + db.getMongo().setSecondaryOk(); + const res = db.getSiblingDB(dbName).runCommand({ + find: collName, + readConcern: {afterClusterTime: operationTime}, + $clusterTime: clusterTime + }); + if (expectedCode) { + assert.commandFailedWithCode(res, expectedCode); + } else { + assert.commandWorked(res); + } +} + +const donorRst = new ReplSetTest({ + nodes: 3, + name: "donor", + settings: {chainingAllowed: false}, + nodeOptions: Object.assign(migrationX509Options.donor, { + setParameter: { + // To allow after test hooks to run without errors. + "failpoint.tenantMigrationDonorAllowsNonTimestampedReads": tojson({mode: "alwaysOn"}), + } + }) +}); +donorRst.startSet(); +donorRst.initiate(); + +const recipientRst = new ReplSetTest({ + nodes: 3, + name: "recipient", + settings: {chainingAllowed: false}, + nodeOptions: migrationX509Options.recipient +}); +recipientRst.startSet(); +recipientRst.initiate(); + +const tmt = new TenantMigrationTest({name: jsTestName(), donorRst, recipientRst}); +if (!tmt.isFeatureFlagEnabled()) { + jsTestLog("Skipping test because the tenant migrations feature flag is disabled"); + return; +} + +const donorPrimary = tmt.getDonorPrimary(); +const recipientPrimary = tmt.getRecipientPrimary(); + +{ + jsTestLog("Testing noops on the recipient"); + + const [tenantId, migrationId, migrationOpts, tenantDbName] = makeTestParams(); + const laggedRecipientSecondary = tmt.getRecipientRst().getSecondary(); + const fp = configureFailPoint(donorPrimary, "pauseTenantMigrationBeforeLeavingBlockingState"); + + // + // Run a migration, pausing after selecting a block timestamp to advance cluster time beyond it + // on the donor. + // + + tmt.insertDonorDB(tenantDbName, collName); + assert.commandWorked(tmt.startMigration(migrationOpts)); + + fp.wait(); + + // Disable replication on a recipient secondary so it cannot advance its last applied opTime + // beyond the latest time on the donor from unrelated writes. The block timestamp will have + // already been replicated by this point. + stopServerReplication(laggedRecipientSecondary); + + advanceClusterTime(donorPrimary, kUnrelatedDbName, collName); + + const donorRes = + assert.commandWorked(donorPrimary.getDB(tenantDbName).runCommand({find: collName})); + assert(donorRes.operationTime, tojson(donorRes)); + assert.eq(timestampCmp(donorRes.operationTime, getBlockTimestamp(donorPrimary, tenantId)), + 1, + tojson(donorRes)); + + fp.off(); + TenantMigrationTest.assertCommitted(tmt.waitForMigrationToComplete(migrationOpts)); + + // + // Verify reading on the recipient with an afterClusterTime > the block timestamp + // triggers a noop write on the recipient primary. Unrelated writes on the primary may + // prevent the noop from taking effect, so we can't check the oplog. appendOplogNote isn't + // profiled so we use a fail point to detect it. + // + + const hangInNoopFp = configureFailPoint(recipientPrimary, "hangInAppendOplogNote"); + const awaitReadOnRecipient = startParallelShell(funWithArgs(runAfterClusterTimeRead, + tenantDbName, + collName, + donorRes.operationTime, + donorRes.$clusterTime), + laggedRecipientSecondary.port); + + hangInNoopFp.wait(); + hangInNoopFp.off(); + + restartServerReplication(laggedRecipientSecondary); + awaitReadOnRecipient(); +} + +{ + jsTestLog("Testing noops on the donor"); + + const [tenantId, migrationId, migrationOpts, tenantDbName] = makeTestParams(); + const laggedDonorSecondary = tmt.getDonorRst().getSecondary(); + const fp = configureFailPoint(donorPrimary, "pauseTenantMigrationBeforeLeavingBlockingState"); + + // + // Commit a normal migration, but disable replication on a donor secondary before the commit so + // it will not learn the outcome. + // + + tmt.insertDonorDB(tenantDbName, collName); + assert.commandWorked(tmt.startMigration(migrationOpts)); + + fp.wait(); + + stopServerReplication(laggedDonorSecondary); + + fp.off(); + TenantMigrationTest.assertCommitted(tmt.waitForMigrationToComplete(migrationOpts)); + + // + // Advance cluster time on the recipient beyond the block timestamp. + // + + advanceClusterTime(recipientPrimary, kUnrelatedDbName, collName); + + const recipientRes = + assert.commandWorked(recipientPrimary.getDB(tenantDbName).runCommand({find: collName})); + assert(recipientRes.operationTime, tojson(recipientRes)); + assert.eq(timestampCmp(recipientRes.operationTime, getBlockTimestamp(donorPrimary, tenantId)), + 1, + tojson(recipientRes)); + + // + // Verify reading from a lagged donor secondary with an afterClusterTime > the block timestamp + // triggers a noop write on the donor primary. Even though reads later than the block timestamp + // are rejected and the donor is guaranteed to eventually replicate the migration decision, + // waiting for read concern is not interrupted upon learning the decision, so the noop is + // necessary to unblock tenant operations waiting for a cluster time > the block timestamp. + // + + const hangInNoopFp = configureFailPoint(donorPrimary, "hangInAppendOplogNote"); + const awaitReadOnDonor = startParallelShell(funWithArgs(runAfterClusterTimeRead, + tenantDbName, + collName, + recipientRes.operationTime, + recipientRes.$clusterTime, + ErrorCodes.TenantMigrationCommitted), + laggedDonorSecondary.port); + + hangInNoopFp.wait(); + hangInNoopFp.off(); + + restartServerReplication(laggedDonorSecondary); + awaitReadOnDonor(); +} + +donorRst.stopSet(); +recipientRst.stopSet(); +tmt.stop(); +})(); |