summaryrefslogtreecommitdiff
path: root/jstests
diff options
context:
space:
mode:
authorWenbin Zhu <wenbin.zhu@mongodb.com>2021-06-08 21:00:45 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-06-08 21:48:33 +0000
commit18f91c4304086b0334d90c1d94a7d3c7225439bf (patch)
treeb5d3f20e1b60791bb4be91008d3bc8e464053477 /jstests
parent2c1191cc8ff9ead1c600a85f7a1adbac8e09b1e1 (diff)
downloadmongo-18f91c4304086b0334d90c1d94a7d3c7225439bf.tar.gz
SERVER-56631 Make sure retryable write pre-fetch phase can see the config.transactions record when committed snapshot is not at batch boundary.
Diffstat (limited to 'jstests')
-rw-r--r--jstests/replsets/tenant_migration_recipient_fetches_retryable_writes_entry_after_committed_snapshot.js244
-rw-r--r--jstests/replsets/tenant_migration_retryable_write_retry.js22
2 files changed, 255 insertions, 11 deletions
diff --git a/jstests/replsets/tenant_migration_recipient_fetches_retryable_writes_entry_after_committed_snapshot.js b/jstests/replsets/tenant_migration_recipient_fetches_retryable_writes_entry_after_committed_snapshot.js
new file mode 100644
index 00000000000..fdeba25b524
--- /dev/null
+++ b/jstests/replsets/tenant_migration_recipient_fetches_retryable_writes_entry_after_committed_snapshot.js
@@ -0,0 +1,244 @@
+/**
+ * Tests that the tenant migration recipient correctly reads 'config.transactions' entries from a
+ * donor secondary. During secondary oplog application, updates to the same 'config.transactions'
+ * entry are coalesced in a single update of the most recent retryable write statement. If the
+ * majority committed snapshot of a secondary exists in the middle of a completed batch, then a
+ * recipient's majority read on 'config.transactions' can miss committed retryable writes at that
+ * majority commit point.
+ *
+ * @tags: [
+ * requires_fcv_50,
+ * requires_majority_read_concern,
+ * incompatible_with_eft,
+ * incompatible_with_windows_tls,
+ * incompatible_with_macos, requires_persistence
+ * ]
+ */
+
+(function() {
+load("jstests/replsets/libs/tenant_migration_test.js");
+load("jstests/replsets/libs/tenant_migration_util.js");
+load("jstests/libs/fail_point_util.js"); // For configureFailPoint().
+load("jstests/libs/uuid_util.js"); // For extractUUIDFromObject().
+load("jstests/libs/write_concern_util.js");
+
+const getRecipientCurrOp = function(conn, migrationId) {
+ const res = conn.adminCommand({currentOp: true, desc: "tenant recipient migration"});
+ assert.eq(res.inprog.length, 1);
+ const currOp = res.inprog[0];
+ assert.eq(bsonWoCompare(currOp.instanceID, migrationId), 0);
+
+ return currOp;
+};
+
+const getDonorSyncSource = function(conn, migrationId) {
+ const currOp = getRecipientCurrOp(conn, migrationId);
+ return currOp.donorSyncSource;
+};
+
+const getStartFetchingDonorOpTime = function(conn, migrationId) {
+ const currOp = getRecipientCurrOp(conn, migrationId);
+ return currOp.startFetchingDonorOpTime;
+};
+
+const oplogApplierBatchSize = 50;
+
+const donorRst = new ReplSetTest({
+ nodes: {
+ n0: {},
+ // Set the 'syncdelay' to 1s to speed up checkpointing. Also explicitly set the batch
+ // size for oplog application to ensure the number of retryable write statements being
+ // made majority committed isn't a multiple of it.
+ n1: {syncdelay: 1, setParameter: {replBatchLimitOperations: oplogApplierBatchSize}},
+ // Set the bgSyncOplogFetcherBatchSize to 1 oplog entry to guarantee replication
+ // progress with the stopReplProducerOnDocument failpoint.
+ n2: {rsConfig: {priority: 0, hidden: true}, setParameter: {bgSyncOplogFetcherBatchSize: 1}},
+ n3: {rsConfig: {priority: 0, hidden: true}, setParameter: {bgSyncOplogFetcherBatchSize: 1}},
+ n4: {rsConfig: {priority: 0, hidden: true}, setParameter: {bgSyncOplogFetcherBatchSize: 1}},
+ },
+ // Force secondaries to sync from the primary to guarantee replication progress with the
+ // stopReplProducerOnDocument failpoint. Also disable primary catchup because some replicated
+ // retryable write statements are intentionally not being made majority committed.
+ settings: {chainingAllowed: false, catchUpTimeoutMillis: 0},
+ nodeOptions: Object.assign(TenantMigrationUtil.makeX509OptionsForTest().donor, {
+ setParameter: {
+ tenantMigrationExcludeDonorHostTimeoutMS: 30 * 1000,
+ // Allow non-timestamped reads on donor after migration completes for testing.
+ 'failpoint.tenantMigrationDonorAllowsNonTimestampedReads': tojson({mode: 'alwaysOn'}),
+ }
+ }),
+});
+donorRst.startSet();
+donorRst.initiateWithHighElectionTimeout();
+const donorPrimary = donorRst.getPrimary();
+
+if (!TenantMigrationUtil.isFeatureFlagEnabled(donorPrimary)) {
+ jsTestLog("Skipping test because the tenant migrations feature flag is disabled");
+ donorRst.stopSet();
+ return;
+}
+
+const tenantMigrationTest = new TenantMigrationTest({name: jsTestName(), donorRst: donorRst});
+
+const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
+const kTenantId = "testTenantId";
+const migrationId = UUID();
+const kDbName = tenantMigrationTest.tenantDB(kTenantId, "testDB");
+const kCollName = "retryable_write_secondary_oplog_application";
+const kNs = `${kDbName}.${kCollName}`;
+
+const migrationOpts = {
+ migrationIdString: extractUUIDFromObject(migrationId),
+ tenantId: kTenantId,
+ // The recipient needs to choose a donor secondary as sync source.
+ readPreference: {mode: 'secondary'},
+};
+
+const fpAfterConnectingTenantMigrationRecipientInstance = configureFailPoint(
+ recipientPrimary, "fpAfterConnectingTenantMigrationRecipientInstance", {action: "hang"});
+
+const fpBeforeWaitingForRetryableWritePreFetchMajorityCommitted = configureFailPoint(
+ recipientPrimary, "fpBeforeWaitingForRetryableWritePreFetchMajorityCommitted");
+
+// Start tenant migration and hang after recipient connects to donor sync source.
+jsTestLog(`Starting tenant migration: ${tojson(migrationOpts)}`);
+assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts));
+fpAfterConnectingTenantMigrationRecipientInstance.wait();
+
+// Recipient should connect to secondary1 as other secondaries are hidden.
+const [secondary1, secondary2, secondary3, secondary4] = donorRst.getSecondaries();
+const syncSourceSecondaryHost = getDonorSyncSource(recipientPrimary, migrationId);
+assert.eq(syncSourceSecondaryHost, secondary1.host);
+
+assert.commandWorked(
+ donorPrimary.getCollection(kNs).insert({_id: 0, counter: 0}, {writeConcern: {w: 5}}));
+
+// The default WC is majority and the donor replica set can't satisfy majority writes after we
+// stop replication on the secondaries.
+assert.commandWorked(donorPrimary.adminCommand(
+ {setDefaultRWConcern: 1, defaultWriteConcern: {w: 1}, writeConcern: {w: "majority"}}));
+donorRst.awaitReplication();
+
+// Disable replication on all of the secondaries to manually control the replication progress.
+const stopReplProducerFailpoints = [secondary1, secondary2, secondary3, secondary4].map(
+ conn => configureFailPoint(conn, 'stopReplProducer'));
+
+// While replication is still entirely disabled, additionally disable replication partway
+// into the retryable write on other secondaries. The idea is that while secondary1 will
+// apply all of the oplog entries in a single batch, other secondaries will only apply up
+// to counterMajorityCommitted oplog entries.
+const counterTotal = oplogApplierBatchSize;
+const counterMajorityCommitted = counterTotal - 2;
+jsTestLog(`counterTotal: ${counterTotal}, counterMajorityCommitted: ${counterMajorityCommitted}`);
+const stopReplProducerOnDocumentFailpoints = [secondary2, secondary3, secondary4].map(
+ conn => configureFailPoint(conn,
+ 'stopReplProducerOnDocument',
+ {document: {"diff.u.counter": counterMajorityCommitted + 1}}));
+
+// Perform all the retryable write statements on donor primary.
+const lsid = ({id: UUID()});
+assert.commandWorked(donorPrimary.getCollection(kNs).runCommand("update", {
+ updates: Array.from({length: counterTotal}, () => ({q: {_id: 0}, u: {$inc: {counter: 1}}})),
+ lsid,
+ txnNumber: NumberLong(1),
+}));
+
+// Get the majority committed and last oplog entry of the respective retryable write statements.
+const stmtTotal =
+ donorPrimary.getCollection("local.oplog.rs").findOne({"o.diff.u.counter": counterTotal});
+const stmtMajorityCommitted = donorPrimary.getCollection("local.oplog.rs").findOne({
+ "o.diff.u.counter": counterMajorityCommitted
+});
+
+assert.neq(null, stmtTotal);
+assert.neq(null, stmtMajorityCommitted);
+jsTestLog(`stmtTotal timestamp: ${tojson(stmtTotal.ts)}`);
+jsTestLog(`stmtMajorityCommitted timestamp: ${tojson(stmtMajorityCommitted.ts)}`);
+
+for (const fp of stopReplProducerFailpoints) {
+ fp.off();
+ // Wait for secondary1 to have applied through the counterTotal retryable write statement and
+ // other secondaries applied through the counterMajorityCommitted retryable write statement,
+ // to guarantee that secondary1 will advance its stable_timestamp when learning of the other
+ // secondaries also having applied through counterMajorityCommitted.
+ assert.soon(() => {
+ const {optimes: {appliedOpTime, durableOpTime}} =
+ assert.commandWorked(fp.conn.adminCommand({replSetGetStatus: 1}));
+
+ print(`${fp.conn.host}: ${tojsononeline({
+ appliedOpTime,
+ durableOpTime,
+ stmtMajorityCommittedTimestamp: stmtMajorityCommitted.ts
+ })}`);
+
+ const stmtTarget = (fp.conn.host === secondary1.host) ? stmtTotal : stmtMajorityCommitted;
+
+ return bsonWoCompare(appliedOpTime.ts, stmtTarget.ts) >= 0 &&
+ bsonWoCompare(durableOpTime.ts, stmtTarget.ts) >= 0;
+ });
+}
+
+// Wait for secondary1 to have advanced its stable timestamp, and therefore updated the
+// committed snapshot.
+assert.soon(() => {
+ const {lastStableRecoveryTimestamp} =
+ assert.commandWorked(secondary1.adminCommand({replSetGetStatus: 1}));
+
+ print(`${secondary1.host}: ${tojsononeline({
+ lastStableRecoveryTimestamp,
+ stmtMajorityCommittedTimestamp: stmtMajorityCommitted.ts
+ })}`);
+
+ return bsonWoCompare(lastStableRecoveryTimestamp, stmtMajorityCommitted.ts) >= 0;
+});
+
+// Wait before tenant migration starts to wait for the retryable write pre-fetch result to be
+// majority committed.
+fpAfterConnectingTenantMigrationRecipientInstance.off();
+fpBeforeWaitingForRetryableWritePreFetchMajorityCommitted.wait();
+
+const startFetchingDonorOpTime = getStartFetchingDonorOpTime(recipientPrimary, migrationId);
+assert.eq(startFetchingDonorOpTime.ts, stmtMajorityCommitted.ts);
+
+// At this point, the recipient should have fetched retryable writes and put them into the
+// oplog buffer.
+const kOplogBufferNS = "config.repl.migration.oplog_" + migrationOpts.migrationIdString;
+const recipientOplogBuffer = recipientPrimary.getCollection(kOplogBufferNS);
+jsTestLog(`oplog buffer ns: ${kOplogBufferNS}`);
+
+// Number of entries fetched into oplog buffer is the majority committed count - 1 since we only
+// fetch entries that occur before startFetchingDonorOpTime, which is equal to the commit point.
+const cursor = recipientOplogBuffer.find();
+const expectedCount = counterMajorityCommitted - 1;
+assert.eq(
+ cursor.itcount(), expectedCount, `Incorrect number of oplog entries: ${cursor.toArray()}`);
+
+// Resume replication on all the secondaries and wait for migration to complete.
+for (const fp of stopReplProducerOnDocumentFailpoints) {
+ fp.off();
+}
+
+fpBeforeWaitingForRetryableWritePreFetchMajorityCommitted.off();
+
+TenantMigrationTest.assertCommitted(tenantMigrationTest.waitForMigrationToComplete(migrationOpts));
+assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString));
+
+// After migration, verify that if we perform the same retryable write statements on the recipient,
+// these statements will not be executed.
+let docAfterMigration = recipientPrimary.getCollection(kNs).findOne({_id: 0});
+assert.eq(docAfterMigration.counter, counterTotal);
+
+assert.commandWorked(recipientPrimary.getCollection(kNs).runCommand("update", {
+ updates: Array.from({length: counterTotal}, () => ({q: {_id: 0}, u: {$inc: {counter: 1}}})),
+ lsid,
+ txnNumber: NumberLong(1),
+}));
+
+// The second query should return the same result as first one, since the recipient should have
+// fetched the executed retryable writes from donor.
+docAfterMigration = recipientPrimary.getCollection(kNs).findOne({_id: 0});
+assert.eq(docAfterMigration.counter, counterTotal);
+
+donorRst.stopSet();
+tenantMigrationTest.stop();
+})();
diff --git a/jstests/replsets/tenant_migration_retryable_write_retry.js b/jstests/replsets/tenant_migration_retryable_write_retry.js
index 3ad8e8e78b7..52a392dc7ae 100644
--- a/jstests/replsets/tenant_migration_retryable_write_retry.js
+++ b/jstests/replsets/tenant_migration_retryable_write_retry.js
@@ -322,17 +322,6 @@ const aggRes = donorPrimary.getDB("config").runCommand({
as: "history",
depthField: "depthForTenantMigration"
}},
- // Now that we have the whole chain, filter out entries that occurred after
- // `startFetchingTimestamp`, since these entries will be fetched during the oplog fetching
- // phase.
- {$set: {
- history: {
- $filter: {
- input: "$history",
- cond: {$lt: ["$$this.ts", startFetchingTimestamp]}
- }
- }
- }},
// Sort the oplog entries in each oplog chain.
{$set: {
history: {$reverseArray: {$reduce: {
@@ -351,6 +340,17 @@ const aggRes = donorPrimary.getDB("config").runCommand({
]},
}}},
}},
+ // Now that we have the whole sorted chain, filter out entries that occurred after
+ // `startFetchingTimestamp`, since these entries will be fetched during the oplog fetching
+ // phase.
+ {$set: {
+ history: {
+ $filter: {
+ input: "$history",
+ cond: {$lt: ["$$this.ts", startFetchingTimestamp]}
+ }
+ }
+ }},
// Combine the oplog entries.
{$set: {history: {$concatArrays: ["$preImageOps", "$history", "$postImageOps"]}}},
// Fetch the complete oplog entries and unwind oplog entries in each chain to the top-level