diff options
author | Lingzhi Deng <lingzhi.deng@mongodb.com> | 2021-04-28 01:45:33 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-04-28 16:40:23 +0000 |
commit | 7bc3b87610f5abc2ee3e9900c955d1d19b3b46fd (patch) | |
tree | bc9b3ea66f29348e3db0db7f46500f0f739d212b | |
parent | 522e0db207fe2a0d3b2bc75437b6af5b70b8827c (diff) | |
download | mongo-7bc3b87610f5abc2ee3e9900c955d1d19b3b46fd.tar.gz |
SERVER-55353: Ignore pre/post image when tenant oplog batcher can't find one
(cherry picked from commit 0464f0b7bcb2eee8dafbf108447013b4d586a16a)
-rw-r--r-- | buildscripts/resmokelib/testing/hooks/tenant_migration.py | 10 | ||||
-rw-r--r-- | jstests/replsets/tenant_migration_find_and_modify_retry.js | 103 | ||||
-rw-r--r-- | src/mongo/db/repl/tenant_migration_recipient_service.cpp | 7 | ||||
-rw-r--r-- | src/mongo/db/repl/tenant_oplog_applier.cpp | 24 | ||||
-rw-r--r-- | src/mongo/db/repl/tenant_oplog_batcher.cpp | 18 |
5 files changed, 133 insertions, 29 deletions
diff --git a/buildscripts/resmokelib/testing/hooks/tenant_migration.py b/buildscripts/resmokelib/testing/hooks/tenant_migration.py index f175c468934..b9f6189f9b1 100644 --- a/buildscripts/resmokelib/testing/hooks/tenant_migration.py +++ b/buildscripts/resmokelib/testing/hooks/tenant_migration.py @@ -340,20 +340,12 @@ class _TenantMigrationThread(threading.Thread): # pylint: disable=too-many-inst return abort_reason["code"] == self.FAIL_TO_PARSE_ERR_CODE and re.search( err_msg_regex, abort_reason["errmsg"]) - def _is_no_such_key_in_oplog_buffer_err(self, abort_reason): - # TODO (SERVER-55353): Tenant migration recipient can fail to find pre-image or post-image - # for oplog entry in its tenant migration oplog buffer. - err_msg_regex = r"No document found with _id.*in namespace config.repl.migration.oplog" - return abort_reason["code"] == self.NO_SUCH_KEY_ERR_CODE and re.search( - err_msg_regex, abort_reason["errmsg"]) - def _is_blacklisted_abort_reason(self, abort_reason): is_recipient_err = abort_reason["errmsg"].startswith( "Tenant migration recipient command failed") if not is_recipient_err: return False - return self._is_missing_uuid_in_collection_info_err( - abort_reason) or self._is_no_such_key_in_oplog_buffer_err(abort_reason) + return self._is_missing_uuid_in_collection_info_err(abort_reason) def _create_migration_opts(self, donor_rs_index, recipient_rs_index): donor_rs = self._tenant_migration_fixture.get_replset(donor_rs_index) diff --git a/jstests/replsets/tenant_migration_find_and_modify_retry.js b/jstests/replsets/tenant_migration_find_and_modify_retry.js new file mode 100644 index 00000000000..1a77e92fc2c --- /dev/null +++ b/jstests/replsets/tenant_migration_find_and_modify_retry.js @@ -0,0 +1,103 @@ +/** + * Tests a case where the retryable write oplog pre-fetch stage does not fetch the pre/post image + * for a findAndModify operation because there is a newer txnNumber started in the same session when + * the pre-fetch stage runs. As a newer transaction has started in the same session, the tenant + * migration does not need to support retrying the findAndModify. We test that the tenant migration + * can still succeed in this case. + * + * @tags: [requires_fcv_49, requires_majority_read_concern, incompatible_with_eft, + * incompatible_with_windows_tls, incompatible_with_macos, requires_persistence] + */ + +(function() { +"use strict"; + +load("jstests/replsets/libs/tenant_migration_test.js"); +load("jstests/replsets/libs/tenant_migration_util.js"); +load("jstests/libs/uuid_util.js"); +load("jstests/libs/fail_point_util.js"); // For configureFailPoint(). + +const tenantMigrationTest = new TenantMigrationTest({name: jsTestName()}); + +if (!tenantMigrationTest.isFeatureFlagEnabled()) { + jsTestLog("Skipping test because the tenant migrations feature flag is disabled"); + return; +} +const kTenantId = "testTenantId"; +const kDbName = tenantMigrationTest.tenantDB(kTenantId, "testDB"); +const kCollName = "testColl"; + +tenantMigrationTest.insertDonorDB(kDbName, kCollName, [{x: 1}]); + +const recipientPrimary = tenantMigrationTest.getRecipientPrimary(); +const hangBeforeRetrievingStartOpTime = + configureFailPoint(recipientPrimary, "fpAfterComparingRecipientAndDonorFCV", {action: "hang"}); +const hangAfterRetrievingStartOpTime = configureFailPoint( + recipientPrimary, "fpAfterRetrievingStartOpTimesMigrationRecipientInstance", {action: "hang"}); + +const migrationId = UUID(); +const migrationOpts = { + migrationIdString: extractUUIDFromObject(migrationId), + tenantId: kTenantId, +}; +assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts)); + +hangBeforeRetrievingStartOpTime.wait(); + +const donorDB = tenantMigrationTest.getDonorPrimary().getDB(kDbName); + +// Run a retryable findAndModify before the recipient gets the start fetching optime so that the +// recipient would start fetching oplog from this findAndModify's oplog entry and the post-image +// should be less than the startFetchingOpTime. +const lsid = { + id: UUID() +}; +let res = assert.commandWorked(donorDB.runCommand({ + findAndModify: kCollName, + query: {x: 1}, + update: {$set: {y: 1}}, + new: true, + txnNumber: NumberLong(0), + lsid: lsid, + writeConcern: {w: "majority"} +})); +jsTestLog("First findAndModify: " + tojson(res)); + +hangBeforeRetrievingStartOpTime.off(); +hangAfterRetrievingStartOpTime.wait(); + +// Run another retryable findAndModify with a newer txnNumber using the same session before the +// recipient's retryable write pre-fetch stage. After this, the pre-fetch stage would not fetch the +// post-image of the last findAndModify because a newer txnNumber has started. +res = assert.commandWorked(donorDB.runCommand({ + findAndModify: kCollName, + query: {x: 1}, + update: {$set: {y: 2}}, + new: true, + txnNumber: NumberLong(1), + lsid: lsid, + writeConcern: {w: "majority"} +})); +jsTestLog("Second findAndModify: " + tojson(res)); + +// Resume the tenant migration. +hangAfterRetrievingStartOpTime.off(); + +const stateRes = + assert.commandWorked(tenantMigrationTest.waitForMigrationToComplete(migrationOpts)); +assert.eq(stateRes.state, TenantMigrationTest.State.kCommitted); + +res = assert.commandWorked(recipientPrimary.getDB(kDbName).runCommand({ + findAndModify: kCollName, + query: {x: 1}, + update: {$set: {y: 2}}, + new: true, + txnNumber: NumberLong(1), + lsid: lsid, + writeConcern: {w: "majority"} +})); +jsTestLog("Retry findAndModify on recipient: " + tojson(res)); +assert.eq(2, res.value.y); + +tenantMigrationTest.stop(); +})(); diff --git a/src/mongo/db/repl/tenant_migration_recipient_service.cpp b/src/mongo/db/repl/tenant_migration_recipient_service.cpp index 26587e78907..7e910a2a30d 100644 --- a/src/mongo/db/repl/tenant_migration_recipient_service.cpp +++ b/src/mongo/db/repl/tenant_migration_recipient_service.cpp @@ -1120,6 +1120,13 @@ TenantMigrationRecipientService::Instance::_fetchRetryableWritesOplogBeforeStart startFetchingTimestamp = _stateDoc.getStartFetchingDonorOpTime().get().getTimestamp(); } + LOGV2_DEBUG(5535300, + 1, + "Pre-fetching retryable oplog entries before startFetchingTimstamp", + "startFetchingTimestamp"_attr = startFetchingTimestamp, + "tenantId"_attr = getTenantId(), + "migrationId"_attr = getMigrationUUID()); + // Fetch the oplog chains of all retryable writes that occurred before startFetchingTimestamp // on this tenant. auto serializedPipeline = diff --git a/src/mongo/db/repl/tenant_oplog_applier.cpp b/src/mongo/db/repl/tenant_oplog_applier.cpp index 3849180874e..16e5fd1db0a 100644 --- a/src/mongo/db/repl/tenant_oplog_applier.cpp +++ b/src/mongo/db/repl/tenant_oplog_applier.cpp @@ -660,15 +660,13 @@ void TenantOplogApplier::_writeSessionNoOpsForRange( } stmtIds.insert(stmtIds.end(), entryStmtIds.begin(), entryStmtIds.end()); - if (entry.getPreImageOpTime()) { - uassert( - 5351005, - str::stream() - << "Tenant oplog application cannot apply retryable write with txnNumber " - << txnNumber << " statementNumber " << stmtIds.front() << " on session " - << sessionId << " because the preImage is missing", - prePostImageEntry); - + if (!prePostImageEntry && (entry.getPreImageOpTime() || entry.getPostImageOpTime())) { + LOGV2(5535302, + "Tenant Oplog Applier omitting pre- or post- image for findAndModify", + "entry"_attr = redact(entry.toBSONForLogging()), + "tenant"_attr = _tenantId, + "migrationUuid"_attr = _migrationUuid); + } else if (entry.getPreImageOpTime()) { uassert( 5351002, str::stream() @@ -682,14 +680,6 @@ void TenantOplogApplier::_writeSessionNoOpsForRange( noopEntry.setPreImageOpTime(prePostImageEntry->getOpTime()); } else if (entry.getPostImageOpTime()) { uassert( - 5351006, - str::stream() - << "Tenant oplog application cannot apply retryable write with txnNumber " - << txnNumber << " statementNumber " << stmtIds.front() << " on session " - << sessionId << " because the postImage is missing", - prePostImageEntry); - - uassert( 5351007, str::stream() << "Tenant oplog application cannot apply retryable write with txnNumber " diff --git a/src/mongo/db/repl/tenant_oplog_batcher.cpp b/src/mongo/db/repl/tenant_oplog_batcher.cpp index f8d116a9db7..5635f0ce339 100644 --- a/src/mongo/db/repl/tenant_oplog_batcher.cpp +++ b/src/mongo/db/repl/tenant_oplog_batcher.cpp @@ -111,9 +111,21 @@ void TenantOplogBatcher::_pushEntry(OperationContext* opCtx, "Tenant Oplog Batcher reordering pre- or post- image for oplog entry", "opTime"_attr = op.getOpTime(), "imageOpTime"_attr = imageOpTime); - auto imageOp = OplogEntry( - uassertStatusOK(_oplogBuffer->findByTimestamp(opCtx, imageOpTime.getTimestamp()))); - batch->ops.emplace_back(TenantOplogEntry(std::move(imageOp))); + auto swImageOp = _oplogBuffer->findByTimestamp(opCtx, imageOpTime.getTimestamp()); + if (swImageOp.getStatus() == ErrorCodes::NoSuchKey) { + // If we don't find the pre/post image in the buffer, it means that the pre/post + // image has an optime less than the startFetchingDonorOpTime and was never + // fetched. This implies that there was a newer transaction number started in + // the same session on the donor when the retryable write pre-fetch stage tried + // to fetch oplog entries with optime less than the startFetchingDonorOpTime. In + // that case, we don't need to support retrying the current findAndModify. + // Therefore, use a dummy pre/post image. + LOGV2(5535301, + "Tenant Oplog Batcher cannot find pre- or post- image", + "imageOpTime"_attr = imageOpTime); + } else { + batch->ops.emplace_back(TenantOplogEntry(uassertStatusOK(swImageOp))); + } } batch->ops.emplace_back(TenantOplogEntry(std::move(op))); } else { |