summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLingzhi Deng <lingzhi.deng@mongodb.com>2021-04-28 01:45:33 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-04-28 16:40:23 +0000
commit7bc3b87610f5abc2ee3e9900c955d1d19b3b46fd (patch)
treebc9b3ea66f29348e3db0db7f46500f0f739d212b
parent522e0db207fe2a0d3b2bc75437b6af5b70b8827c (diff)
downloadmongo-7bc3b87610f5abc2ee3e9900c955d1d19b3b46fd.tar.gz
SERVER-55353: Ignore pre/post image when tenant oplog batcher can't find one
(cherry picked from commit 0464f0b7bcb2eee8dafbf108447013b4d586a16a)
-rw-r--r--buildscripts/resmokelib/testing/hooks/tenant_migration.py10
-rw-r--r--jstests/replsets/tenant_migration_find_and_modify_retry.js103
-rw-r--r--src/mongo/db/repl/tenant_migration_recipient_service.cpp7
-rw-r--r--src/mongo/db/repl/tenant_oplog_applier.cpp24
-rw-r--r--src/mongo/db/repl/tenant_oplog_batcher.cpp18
5 files changed, 133 insertions, 29 deletions
diff --git a/buildscripts/resmokelib/testing/hooks/tenant_migration.py b/buildscripts/resmokelib/testing/hooks/tenant_migration.py
index f175c468934..b9f6189f9b1 100644
--- a/buildscripts/resmokelib/testing/hooks/tenant_migration.py
+++ b/buildscripts/resmokelib/testing/hooks/tenant_migration.py
@@ -340,20 +340,12 @@ class _TenantMigrationThread(threading.Thread): # pylint: disable=too-many-inst
return abort_reason["code"] == self.FAIL_TO_PARSE_ERR_CODE and re.search(
err_msg_regex, abort_reason["errmsg"])
- def _is_no_such_key_in_oplog_buffer_err(self, abort_reason):
- # TODO (SERVER-55353): Tenant migration recipient can fail to find pre-image or post-image
- # for oplog entry in its tenant migration oplog buffer.
- err_msg_regex = r"No document found with _id.*in namespace config.repl.migration.oplog"
- return abort_reason["code"] == self.NO_SUCH_KEY_ERR_CODE and re.search(
- err_msg_regex, abort_reason["errmsg"])
-
def _is_blacklisted_abort_reason(self, abort_reason):
is_recipient_err = abort_reason["errmsg"].startswith(
"Tenant migration recipient command failed")
if not is_recipient_err:
return False
- return self._is_missing_uuid_in_collection_info_err(
- abort_reason) or self._is_no_such_key_in_oplog_buffer_err(abort_reason)
+ return self._is_missing_uuid_in_collection_info_err(abort_reason)
def _create_migration_opts(self, donor_rs_index, recipient_rs_index):
donor_rs = self._tenant_migration_fixture.get_replset(donor_rs_index)
diff --git a/jstests/replsets/tenant_migration_find_and_modify_retry.js b/jstests/replsets/tenant_migration_find_and_modify_retry.js
new file mode 100644
index 00000000000..1a77e92fc2c
--- /dev/null
+++ b/jstests/replsets/tenant_migration_find_and_modify_retry.js
@@ -0,0 +1,103 @@
+/**
+ * Tests a case where the retryable write oplog pre-fetch stage does not fetch the pre/post image
+ * for a findAndModify operation because there is a newer txnNumber started in the same session when
+ * the pre-fetch stage runs. As a newer transaction has started in the same session, the tenant
+ * migration does not need to support retrying the findAndModify. We test that the tenant migration
+ * can still succeed in this case.
+ *
+ * @tags: [requires_fcv_49, requires_majority_read_concern, incompatible_with_eft,
+ * incompatible_with_windows_tls, incompatible_with_macos, requires_persistence]
+ */
+
+(function() {
+"use strict";
+
+load("jstests/replsets/libs/tenant_migration_test.js");
+load("jstests/replsets/libs/tenant_migration_util.js");
+load("jstests/libs/uuid_util.js");
+load("jstests/libs/fail_point_util.js"); // For configureFailPoint().
+
+const tenantMigrationTest = new TenantMigrationTest({name: jsTestName()});
+
+if (!tenantMigrationTest.isFeatureFlagEnabled()) {
+ jsTestLog("Skipping test because the tenant migrations feature flag is disabled");
+ return;
+}
+const kTenantId = "testTenantId";
+const kDbName = tenantMigrationTest.tenantDB(kTenantId, "testDB");
+const kCollName = "testColl";
+
+tenantMigrationTest.insertDonorDB(kDbName, kCollName, [{x: 1}]);
+
+const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
+const hangBeforeRetrievingStartOpTime =
+ configureFailPoint(recipientPrimary, "fpAfterComparingRecipientAndDonorFCV", {action: "hang"});
+const hangAfterRetrievingStartOpTime = configureFailPoint(
+ recipientPrimary, "fpAfterRetrievingStartOpTimesMigrationRecipientInstance", {action: "hang"});
+
+const migrationId = UUID();
+const migrationOpts = {
+ migrationIdString: extractUUIDFromObject(migrationId),
+ tenantId: kTenantId,
+};
+assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts));
+
+hangBeforeRetrievingStartOpTime.wait();
+
+const donorDB = tenantMigrationTest.getDonorPrimary().getDB(kDbName);
+
+// Run a retryable findAndModify before the recipient gets the start fetching optime so that the
+// recipient would start fetching oplog from this findAndModify's oplog entry and the post-image
+// should be less than the startFetchingOpTime.
+const lsid = {
+ id: UUID()
+};
+let res = assert.commandWorked(donorDB.runCommand({
+ findAndModify: kCollName,
+ query: {x: 1},
+ update: {$set: {y: 1}},
+ new: true,
+ txnNumber: NumberLong(0),
+ lsid: lsid,
+ writeConcern: {w: "majority"}
+}));
+jsTestLog("First findAndModify: " + tojson(res));
+
+hangBeforeRetrievingStartOpTime.off();
+hangAfterRetrievingStartOpTime.wait();
+
+// Run another retryable findAndModify with a newer txnNumber using the same session before the
+// recipient's retryable write pre-fetch stage. After this, the pre-fetch stage would not fetch the
+// post-image of the last findAndModify because a newer txnNumber has started.
+res = assert.commandWorked(donorDB.runCommand({
+ findAndModify: kCollName,
+ query: {x: 1},
+ update: {$set: {y: 2}},
+ new: true,
+ txnNumber: NumberLong(1),
+ lsid: lsid,
+ writeConcern: {w: "majority"}
+}));
+jsTestLog("Second findAndModify: " + tojson(res));
+
+// Resume the tenant migration.
+hangAfterRetrievingStartOpTime.off();
+
+const stateRes =
+ assert.commandWorked(tenantMigrationTest.waitForMigrationToComplete(migrationOpts));
+assert.eq(stateRes.state, TenantMigrationTest.State.kCommitted);
+
+res = assert.commandWorked(recipientPrimary.getDB(kDbName).runCommand({
+ findAndModify: kCollName,
+ query: {x: 1},
+ update: {$set: {y: 2}},
+ new: true,
+ txnNumber: NumberLong(1),
+ lsid: lsid,
+ writeConcern: {w: "majority"}
+}));
+jsTestLog("Retry findAndModify on recipient: " + tojson(res));
+assert.eq(2, res.value.y);
+
+tenantMigrationTest.stop();
+})();
diff --git a/src/mongo/db/repl/tenant_migration_recipient_service.cpp b/src/mongo/db/repl/tenant_migration_recipient_service.cpp
index 26587e78907..7e910a2a30d 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_service.cpp
+++ b/src/mongo/db/repl/tenant_migration_recipient_service.cpp
@@ -1120,6 +1120,13 @@ TenantMigrationRecipientService::Instance::_fetchRetryableWritesOplogBeforeStart
startFetchingTimestamp = _stateDoc.getStartFetchingDonorOpTime().get().getTimestamp();
}
+ LOGV2_DEBUG(5535300,
+ 1,
+ "Pre-fetching retryable oplog entries before startFetchingTimstamp",
+ "startFetchingTimestamp"_attr = startFetchingTimestamp,
+ "tenantId"_attr = getTenantId(),
+ "migrationId"_attr = getMigrationUUID());
+
// Fetch the oplog chains of all retryable writes that occurred before startFetchingTimestamp
// on this tenant.
auto serializedPipeline =
diff --git a/src/mongo/db/repl/tenant_oplog_applier.cpp b/src/mongo/db/repl/tenant_oplog_applier.cpp
index 3849180874e..16e5fd1db0a 100644
--- a/src/mongo/db/repl/tenant_oplog_applier.cpp
+++ b/src/mongo/db/repl/tenant_oplog_applier.cpp
@@ -660,15 +660,13 @@ void TenantOplogApplier::_writeSessionNoOpsForRange(
}
stmtIds.insert(stmtIds.end(), entryStmtIds.begin(), entryStmtIds.end());
- if (entry.getPreImageOpTime()) {
- uassert(
- 5351005,
- str::stream()
- << "Tenant oplog application cannot apply retryable write with txnNumber "
- << txnNumber << " statementNumber " << stmtIds.front() << " on session "
- << sessionId << " because the preImage is missing",
- prePostImageEntry);
-
+ if (!prePostImageEntry && (entry.getPreImageOpTime() || entry.getPostImageOpTime())) {
+ LOGV2(5535302,
+ "Tenant Oplog Applier omitting pre- or post- image for findAndModify",
+ "entry"_attr = redact(entry.toBSONForLogging()),
+ "tenant"_attr = _tenantId,
+ "migrationUuid"_attr = _migrationUuid);
+ } else if (entry.getPreImageOpTime()) {
uassert(
5351002,
str::stream()
@@ -682,14 +680,6 @@ void TenantOplogApplier::_writeSessionNoOpsForRange(
noopEntry.setPreImageOpTime(prePostImageEntry->getOpTime());
} else if (entry.getPostImageOpTime()) {
uassert(
- 5351006,
- str::stream()
- << "Tenant oplog application cannot apply retryable write with txnNumber "
- << txnNumber << " statementNumber " << stmtIds.front() << " on session "
- << sessionId << " because the postImage is missing",
- prePostImageEntry);
-
- uassert(
5351007,
str::stream()
<< "Tenant oplog application cannot apply retryable write with txnNumber "
diff --git a/src/mongo/db/repl/tenant_oplog_batcher.cpp b/src/mongo/db/repl/tenant_oplog_batcher.cpp
index f8d116a9db7..5635f0ce339 100644
--- a/src/mongo/db/repl/tenant_oplog_batcher.cpp
+++ b/src/mongo/db/repl/tenant_oplog_batcher.cpp
@@ -111,9 +111,21 @@ void TenantOplogBatcher::_pushEntry(OperationContext* opCtx,
"Tenant Oplog Batcher reordering pre- or post- image for oplog entry",
"opTime"_attr = op.getOpTime(),
"imageOpTime"_attr = imageOpTime);
- auto imageOp = OplogEntry(
- uassertStatusOK(_oplogBuffer->findByTimestamp(opCtx, imageOpTime.getTimestamp())));
- batch->ops.emplace_back(TenantOplogEntry(std::move(imageOp)));
+ auto swImageOp = _oplogBuffer->findByTimestamp(opCtx, imageOpTime.getTimestamp());
+ if (swImageOp.getStatus() == ErrorCodes::NoSuchKey) {
+ // If we don't find the pre/post image in the buffer, it means that the pre/post
+ // image has an optime less than the startFetchingDonorOpTime and was never
+ // fetched. This implies that there was a newer transaction number started in
+ // the same session on the donor when the retryable write pre-fetch stage tried
+ // to fetch oplog entries with optime less than the startFetchingDonorOpTime. In
+ // that case, we don't need to support retrying the current findAndModify.
+ // Therefore, use a dummy pre/post image.
+ LOGV2(5535301,
+ "Tenant Oplog Batcher cannot find pre- or post- image",
+ "imageOpTime"_attr = imageOpTime);
+ } else {
+ batch->ops.emplace_back(TenantOplogEntry(uassertStatusOK(swImageOp)));
+ }
}
batch->ops.emplace_back(TenantOplogEntry(std::move(op)));
} else {