summaryrefslogtreecommitdiff
path: root/jstests/replsets
diff options
context:
space:
mode:
authorJack Mulrow <jack.mulrow@mongodb.com>2021-06-21 14:34:30 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-06-22 16:18:28 +0000
commit536090be65b70cc0f73ddbbffe5c501c12143a0d (patch)
treeb2fc9340606d704b1f4c9a0ba107d81a78b46215 /jstests/replsets
parent3befdc7d70fa56085bbdc9606da0db84b5b48ccd (diff)
downloadmongo-536090be65b70cc0f73ddbbffe5c501c12143a0d.tar.gz
SERVER-57769 Allow tenant migration donors to advance cluster time with noops
Diffstat (limited to 'jstests/replsets')
-rw-r--r--jstests/replsets/tenant_migrations_noop_writes.js218
1 files changed, 218 insertions, 0 deletions
diff --git a/jstests/replsets/tenant_migrations_noop_writes.js b/jstests/replsets/tenant_migrations_noop_writes.js
new file mode 100644
index 00000000000..7746ca12cd7
--- /dev/null
+++ b/jstests/replsets/tenant_migrations_noop_writes.js
@@ -0,0 +1,218 @@
+/**
+ * Verifies that nodes can trigger noop writes to satisfy cluster time reads after a tenant
+ * migration.
+ *
+ * @tags: [requires_fcv_47, requires_majority_read_concern, incompatible_with_windows_tls,
+ * incompatible_with_eft, incompatible_with_macos, requires_persistence]
+ */
+
+(function() {
+"use strict";
+
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/uuid_util.js");
+load("jstests/libs/write_concern_util.js");
+load("jstests/replsets/libs/tenant_migration_test.js");
+load('jstests/libs/parallel_shell_helpers.js');
+
+const kTenantIdPrefix = "testTenantId";
+const kUnrelatedDbName = "unrelatedDB";
+const collName = "foo";
+const migrationX509Options = TenantMigrationUtil.makeX509OptionsForTest();
+
+let counter = 0;
+let makeTenantId = function() {
+ return kTenantIdPrefix + "_" + counter++;
+};
+
+function makeTestParams() {
+ const tenantId = makeTenantId();
+ const migrationId = UUID();
+ const migrationOpts = {
+ migrationIdString: extractUUIDFromObject(migrationId),
+ tenantId: tenantId,
+ };
+ const dbName = tenantId + "_db";
+ return [tenantId, migrationId, migrationOpts, dbName];
+}
+
+function advanceClusterTime(conn, dbName, collName) {
+ let bulk = conn.getDB(dbName)[collName].initializeUnorderedBulkOp();
+ for (let i = 0; i < 200; i++) {
+ bulk.insert({x: i});
+ }
+ assert.commandWorked(bulk.execute());
+}
+
+function getBlockTimestamp(conn, tenantId) {
+ const mtabServerStatus =
+ TenantMigrationUtil.getTenantMigrationAccessBlocker(conn, tenantId).donor;
+ assert(mtabServerStatus.blockTimestamp, tojson(mtabServerStatus));
+ return mtabServerStatus.blockTimestamp;
+}
+
+function runAfterClusterTimeRead(dbName, collName, operationTime, clusterTime, expectedCode) {
+ db.getMongo().setSecondaryOk();
+ const res = db.getSiblingDB(dbName).runCommand({
+ find: collName,
+ readConcern: {afterClusterTime: operationTime},
+ $clusterTime: clusterTime
+ });
+ if (expectedCode) {
+ assert.commandFailedWithCode(res, expectedCode);
+ } else {
+ assert.commandWorked(res);
+ }
+}
+
+const donorRst = new ReplSetTest({
+ nodes: 3,
+ name: "donor",
+ settings: {chainingAllowed: false},
+ nodeOptions: Object.assign(migrationX509Options.donor, {
+ setParameter: {
+ // To allow after test hooks to run without errors.
+ "failpoint.tenantMigrationDonorAllowsNonTimestampedReads": tojson({mode: "alwaysOn"}),
+ }
+ })
+});
+donorRst.startSet();
+donorRst.initiate();
+
+const recipientRst = new ReplSetTest({
+ nodes: 3,
+ name: "recipient",
+ settings: {chainingAllowed: false},
+ nodeOptions: migrationX509Options.recipient
+});
+recipientRst.startSet();
+recipientRst.initiate();
+
+const tmt = new TenantMigrationTest({name: jsTestName(), donorRst, recipientRst});
+if (!tmt.isFeatureFlagEnabled()) {
+ jsTestLog("Skipping test because the tenant migrations feature flag is disabled");
+ return;
+}
+
+const donorPrimary = tmt.getDonorPrimary();
+const recipientPrimary = tmt.getRecipientPrimary();
+
+{
+ jsTestLog("Testing noops on the recipient");
+
+ const [tenantId, migrationId, migrationOpts, tenantDbName] = makeTestParams();
+ const laggedRecipientSecondary = tmt.getRecipientRst().getSecondary();
+ const fp = configureFailPoint(donorPrimary, "pauseTenantMigrationBeforeLeavingBlockingState");
+
+ //
+ // Run a migration, pausing after selecting a block timestamp to advance cluster time beyond it
+ // on the donor.
+ //
+
+ tmt.insertDonorDB(tenantDbName, collName);
+ assert.commandWorked(tmt.startMigration(migrationOpts));
+
+ fp.wait();
+
+ // Disable replication on a recipient secondary so it cannot advance its last applied opTime
+ // beyond the latest time on the donor from unrelated writes. The block timestamp will have
+ // already been replicated by this point.
+ stopServerReplication(laggedRecipientSecondary);
+
+ advanceClusterTime(donorPrimary, kUnrelatedDbName, collName);
+
+ const donorRes =
+ assert.commandWorked(donorPrimary.getDB(tenantDbName).runCommand({find: collName}));
+ assert(donorRes.operationTime, tojson(donorRes));
+ assert.eq(timestampCmp(donorRes.operationTime, getBlockTimestamp(donorPrimary, tenantId)),
+ 1,
+ tojson(donorRes));
+
+ fp.off();
+ TenantMigrationTest.assertCommitted(tmt.waitForMigrationToComplete(migrationOpts));
+
+ //
+ // Verify reading on the recipient with an afterClusterTime > the block timestamp
+ // triggers a noop write on the recipient primary. Unrelated writes on the primary may
+ // prevent the noop from taking effect, so we can't check the oplog. appendOplogNote isn't
+ // profiled so we use a fail point to detect it.
+ //
+
+ const hangInNoopFp = configureFailPoint(recipientPrimary, "hangInAppendOplogNote");
+ const awaitReadOnRecipient = startParallelShell(funWithArgs(runAfterClusterTimeRead,
+ tenantDbName,
+ collName,
+ donorRes.operationTime,
+ donorRes.$clusterTime),
+ laggedRecipientSecondary.port);
+
+ hangInNoopFp.wait();
+ hangInNoopFp.off();
+
+ restartServerReplication(laggedRecipientSecondary);
+ awaitReadOnRecipient();
+}
+
+{
+ jsTestLog("Testing noops on the donor");
+
+ const [tenantId, migrationId, migrationOpts, tenantDbName] = makeTestParams();
+ const laggedDonorSecondary = tmt.getDonorRst().getSecondary();
+ const fp = configureFailPoint(donorPrimary, "pauseTenantMigrationBeforeLeavingBlockingState");
+
+ //
+ // Commit a normal migration, but disable replication on a donor secondary before the commit so
+ // it will not learn the outcome.
+ //
+
+ tmt.insertDonorDB(tenantDbName, collName);
+ assert.commandWorked(tmt.startMigration(migrationOpts));
+
+ fp.wait();
+
+ stopServerReplication(laggedDonorSecondary);
+
+ fp.off();
+ TenantMigrationTest.assertCommitted(tmt.waitForMigrationToComplete(migrationOpts));
+
+ //
+ // Advance cluster time on the recipient beyond the block timestamp.
+ //
+
+ advanceClusterTime(recipientPrimary, kUnrelatedDbName, collName);
+
+ const recipientRes =
+ assert.commandWorked(recipientPrimary.getDB(tenantDbName).runCommand({find: collName}));
+ assert(recipientRes.operationTime, tojson(recipientRes));
+ assert.eq(timestampCmp(recipientRes.operationTime, getBlockTimestamp(donorPrimary, tenantId)),
+ 1,
+ tojson(recipientRes));
+
+ //
+ // Verify reading from a lagged donor secondary with an afterClusterTime > the block timestamp
+ // triggers a noop write on the donor primary. Even though reads later than the block timestamp
+ // are rejected and the donor is guaranteed to eventually replicate the migration decision,
+ // waiting for read concern is not interrupted upon learning the decision, so the noop is
+ // necessary to unblock tenant operations waiting for a cluster time > the block timestamp.
+ //
+
+ const hangInNoopFp = configureFailPoint(donorPrimary, "hangInAppendOplogNote");
+ const awaitReadOnDonor = startParallelShell(funWithArgs(runAfterClusterTimeRead,
+ tenantDbName,
+ collName,
+ recipientRes.operationTime,
+ recipientRes.$clusterTime,
+ ErrorCodes.TenantMigrationCommitted),
+ laggedDonorSecondary.port);
+
+ hangInNoopFp.wait();
+ hangInNoopFp.off();
+
+ restartServerReplication(laggedDonorSecondary);
+ awaitReadOnDonor();
+}
+
+donorRst.stopSet();
+recipientRst.stopSet();
+tmt.stop();
+})();