summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLingzhi Deng <lingzhi.deng@mongodb.com>2021-03-17 23:03:23 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-03-18 00:09:38 +0000
commit3afa5f4fde94a47d8d3ca3743954216c8342f32d (patch)
treecb5c4f8c562e008b346a1f42e9efd3e54a07b98d
parent65faf38974f63900b1dcdc6fd1994ce3255911d4 (diff)
downloadmongo-3afa5f4fde94a47d8d3ca3743954216c8342f32d.tar.gz
SERVER-54920: Move the tenant migration recipient MTAB lifetime earlier
-rw-r--r--jstests/replsets/tenant_migration_collection_ttl.js95
-rw-r--r--jstests/replsets/tenant_migration_concurrent_writes_on_recipient.js4
-rw-r--r--jstests/replsets/tenant_migration_recipient_resumes_on_donor_failover.js13
-rw-r--r--jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover.js13
-rw-r--r--jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js13
-rw-r--r--src/mongo/db/repl/SConscript12
-rw-r--r--src/mongo/db/repl/tenant_collection_cloner.cpp7
-rw-r--r--src/mongo/db/repl/tenant_database_cloner.cpp3
-rw-r--r--src/mongo/db/repl/tenant_migration_recipient_access_blocker.cpp11
-rw-r--r--src/mongo/db/repl/tenant_migration_recipient_access_blocker.h3
-rw-r--r--src/mongo/db/repl/tenant_migration_recipient_op_observer.cpp11
11 files changed, 143 insertions, 42 deletions
diff --git a/jstests/replsets/tenant_migration_collection_ttl.js b/jstests/replsets/tenant_migration_collection_ttl.js
index e1fd6d1ec0e..3f5a56961df 100644
--- a/jstests/replsets/tenant_migration_collection_ttl.js
+++ b/jstests/replsets/tenant_migration_collection_ttl.js
@@ -20,7 +20,9 @@ const garbageCollectionOpts = {
// up the test.
tenantMigrationGarbageCollectionDelayMS: 5 * 1000,
// Set the TTL interval large enough to decrease the probability of races.
- ttlMonitorSleepSecs: 5
+ ttlMonitorSleepSecs: 5,
+ // Allow reads on recipient before migration completes for testing.
+ 'failpoint.tenantMigrationRecipientNotRejectReads': tojson({mode: 'alwaysOn'}),
};
const tenantMigrationTest = new TenantMigrationTest(
@@ -30,8 +32,6 @@ if (!tenantMigrationTest.isFeatureFlagEnabled()) {
return;
}
-const tenantId = "testTenantId";
-const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB");
const collName = "testColl";
const donorRst = tenantMigrationTest.getDonorRst();
@@ -50,7 +50,7 @@ function prepareData() {
});
}
-function prepareDb(ttlTimeoutSeconds = 0) {
+function prepareDb(dbName, ttlTimeoutSeconds = 0) {
let db = donorPrimary.getDB(dbName);
tenantMigrationTest.insertDonorDB(dbName, collName, prepareData());
// Create TTL index.
@@ -72,29 +72,86 @@ function waitForOneTTLPassAtNode(node) {
}, "TTLMonitor never did any passes.");
}
-function getDocumentCount(node) {
+function getDocumentCount(dbName, node) {
return node.getDB(dbName)[collName].count();
}
-function assertTTLNotDeleteExpiredDocs(node) {
- assert.eq(numDocs, getDocumentCount(node));
+function assertTTLNotDeleteExpiredDocs(dbName, node) {
+ assert.eq(numDocs, getDocumentCount(dbName, node));
}
-function assertTTLDeleteExpiredDocs(node) {
+function assertTTLDeleteExpiredDocs(dbName, node) {
waitForOneTTLPassAtNode(node);
assert.soon(() => {
- let found = getDocumentCount(node);
+ let found = getDocumentCount(dbName, node);
jsTest.log(`${found} documents in the ${node} collection`);
return found == 0;
}, `TTL doesn't clean the database at ${node}`);
}
// Tests that:
-// 1. At the recipient, the TTL deletions are suspended until migration is forgotten.
+// 1. At the recipient, the TTL deletions are suspended during the cloning phase.
+// 2. At the donor, TTL deletions are not suspended before blocking state.
+(() => {
+ jsTest.log("Test that the TTL does not delete documents on recipient during cloning");
+
+ const tenantId = "testTenantId_duringCloning";
+ const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB");
+
+ const migrationId = UUID();
+ const migrationOpts = {
+ migrationIdString: extractUUIDFromObject(migrationId),
+ tenantId: tenantId,
+ recipientConnString: tenantMigrationTest.getRecipientConnString(),
+ };
+
+ // We start the test right after the donor TTL cycle.
+ waitForOneTTLPassAtNode(donorPrimary);
+ // The TTL timeout is intentionally shorter than TTL interval to let the documents to be subject
+ // of TTL in the first round.
+ prepareDb(dbName, 3);
+
+ const recipientDb = recipientPrimary.getDB(dbName);
+ let recipientColl = recipientDb.getCollection(collName);
+ const hangDuringCollectionClone = configureFailPoint(
+ recipientDb,
+ "hangAfterClonerStage",
+ {cloner: "TenantCollectionCloner", stage: "query", nss: recipientColl.getFullName()});
+
+ assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts));
+
+ hangDuringCollectionClone.wait();
+
+ waitForOneTTLPassAtNode(donorPrimary);
+ waitForOneTTLPassAtNode(recipientPrimary);
+
+ // All documents should expire on the donor but not on the recipient.
+ assertTTLDeleteExpiredDocs(dbName, donorPrimary);
+ assertTTLNotDeleteExpiredDocs(dbName, recipientPrimary);
+
+ hangDuringCollectionClone.off();
+
+ const stateRes =
+ assert.commandWorked(tenantMigrationTest.waitForMigrationToComplete(migrationOpts));
+ assert.eq(stateRes.state, TenantMigrationTest.State.kCommitted);
+
+ // Data should be consistent after the migration commits.
+ assertTTLDeleteExpiredDocs(dbName, recipientPrimary);
+ assertTTLDeleteExpiredDocs(dbName, donorPrimary);
+
+ assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString));
+})();
+
+// Tests that:
+// 1. At the recipient, the TTL deletions are suspended after the cloning phase until migration is
+// forgotten.
// 2. At the donor, TTL deletions are suspended during blocking state. This verifies that
// the TTL mechanism respects the same MTAB mechanism as normal updates.
(() => {
- jsTest.log("Test that the TTL does not delete documents during tenant migration");
+ jsTest.log("Test that the TTL does not delete documents on recipient after cloning");
+
+ const tenantId = "testTenantId_afterCloning";
+ const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB");
const migrationId = UUID();
const migrationOpts = {
@@ -108,7 +165,7 @@ function assertTTLDeleteExpiredDocs(node) {
// The TTL timeout is intentionally shorter than TTL interval to let the documents to be subject
// of TTL in the first round. It also should be long enough to let the startMigration() finish
// before the timeout expires.
- prepareDb(3);
+ prepareDb(dbName, 3);
let blockFp =
configureFailPoint(donorPrimary, "pauseTenantMigrationBeforeLeavingBlockingState");
@@ -118,7 +175,7 @@ function assertTTLDeleteExpiredDocs(node) {
// At a very slow machine, there is a chance that a TTL cycle happened at the donor
// before it entered the blocking phase. This flag is set when there was a race.
- const donorHadNoTTLCyclesBeforeBlocking = numDocs == getDocumentCount(donorPrimary);
+ const donorHadNoTTLCyclesBeforeBlocking = numDocs == getDocumentCount(dbName, donorPrimary);
if (!donorHadNoTTLCyclesBeforeBlocking) {
jsTestLog('A rare race when TTL cycle happened before donor entered its blocking phase');
return;
@@ -128,8 +185,8 @@ function assertTTLDeleteExpiredDocs(node) {
// 1. TTL is suspended at the recipient
// 2. As there was no race with TTL cycle at the donor, TTL is suspended as well.
waitForOneTTLPassAtNode(recipientPrimary);
- assertTTLNotDeleteExpiredDocs(recipientPrimary);
- assertTTLNotDeleteExpiredDocs(donorPrimary);
+ assertTTLNotDeleteExpiredDocs(dbName, recipientPrimary);
+ assertTTLNotDeleteExpiredDocs(dbName, donorPrimary);
blockFp.off();
@@ -140,14 +197,14 @@ function assertTTLDeleteExpiredDocs(node) {
// Tests that the TTL cleanup was suspended during the tenant migration.
waitForOneTTLPassAtNode(donorPrimary);
waitForOneTTLPassAtNode(recipientPrimary);
- assertTTLNotDeleteExpiredDocs(recipientPrimary);
- assertTTLNotDeleteExpiredDocs(donorPrimary);
+ assertTTLNotDeleteExpiredDocs(dbName, recipientPrimary);
+ assertTTLNotDeleteExpiredDocs(dbName, donorPrimary);
assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString));
// After the tenant migration is aborted, the TTL cleanup is restored.
- assertTTLDeleteExpiredDocs(recipientPrimary);
- assertTTLDeleteExpiredDocs(donorPrimary);
+ assertTTLDeleteExpiredDocs(dbName, recipientPrimary);
+ assertTTLDeleteExpiredDocs(dbName, donorPrimary);
})();
tenantMigrationTest.stop();
diff --git a/jstests/replsets/tenant_migration_concurrent_writes_on_recipient.js b/jstests/replsets/tenant_migration_concurrent_writes_on_recipient.js
index a398bb52d96..f2a1bc6824c 100644
--- a/jstests/replsets/tenant_migration_concurrent_writes_on_recipient.js
+++ b/jstests/replsets/tenant_migration_concurrent_writes_on_recipient.js
@@ -58,7 +58,7 @@ const kTenantId = "testTenantId";
startOplogFetcherFp.wait();
// Write before cloning is done.
- assert.commandWorked(tenantCollOnRecipient.remove({_id: 1}));
+ assert.commandFailedWithCode(tenantCollOnRecipient.remove({_id: 1}), ErrorCodes.SnapshotTooOld);
startOplogFetcherFp.off();
clonerDoneFp.wait();
@@ -152,4 +152,4 @@ const kTenantId = "testTenantId";
})();
tenantMigrationTest.stop();
-})(); \ No newline at end of file
+})();
diff --git a/jstests/replsets/tenant_migration_recipient_resumes_on_donor_failover.js b/jstests/replsets/tenant_migration_recipient_resumes_on_donor_failover.js
index b1fc37d8ecd..3a885ba2376 100644
--- a/jstests/replsets/tenant_migration_recipient_resumes_on_donor_failover.js
+++ b/jstests/replsets/tenant_migration_recipient_resumes_on_donor_failover.js
@@ -23,10 +23,15 @@ function runTest(failPoint) {
const recipientRst = new ReplSetTest({
nodes: 2,
name: jsTestName() + "_recipient",
- // Use a batch size of 2 so that collection cloner requires more than a single batch to
- // complete.
- nodeOptions: Object.assign(TenantMigrationUtil.makeX509OptionsForTest().recipient,
- {setParameter: {collectionClonerBatchSize: 2}})
+ nodeOptions: Object.assign(TenantMigrationUtil.makeX509OptionsForTest().recipient, {
+ setParameter: {
+ // Use a batch size of 2 so that collection cloner requires more than a single batch
+ // to complete.
+ collectionClonerBatchSize: 2,
+ // Allow reads on recipient before migration completes for testing.
+ 'failpoint.tenantMigrationRecipientNotRejectReads': tojson({mode: 'alwaysOn'}),
+ }
+ })
});
recipientRst.startSet();
recipientRst.initiateWithHighElectionTimeout();
diff --git a/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover.js b/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover.js
index 3a2b0439ea9..71510fb02cd 100644
--- a/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover.js
+++ b/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover.js
@@ -16,10 +16,15 @@ load("jstests/replsets/libs/tenant_migration_util.js");
const recipientRst = new ReplSetTest({
nodes: 2,
name: jsTestName() + "_recipient",
- // Use a batch size of 2 so that collection cloner requires more than a single batch to
- // complete.
- nodeOptions: Object.assign(TenantMigrationUtil.makeX509OptionsForTest().recipient,
- {setParameter: {collectionClonerBatchSize: 2}})
+ nodeOptions: Object.assign(TenantMigrationUtil.makeX509OptionsForTest().recipient, {
+ setParameter: {
+ // Use a batch size of 2 so that collection cloner requires more than a single batch to
+ // complete.
+ collectionClonerBatchSize: 2,
+ // Allow reads on recipient before migration completes for testing.
+ 'failpoint.tenantMigrationRecipientNotRejectReads': tojson({mode: 'alwaysOn'}),
+ }
+ })
});
recipientRst.startSet();
diff --git a/jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js b/jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js
index 9cde0e2cbf0..a949e25e08f 100644
--- a/jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js
+++ b/jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js
@@ -16,10 +16,15 @@ load("jstests/replsets/libs/tenant_migration_util.js");
const recipientRst = new ReplSetTest({
nodes: 2,
name: jsTestName() + "_recipient",
- // Use a batch size of 2 so that collection cloner requires more than a single batch to
- // complete.
- nodeOptions: Object.assign(TenantMigrationUtil.makeX509OptionsForTest().recipient,
- {setParameter: {collectionClonerBatchSize: 2}})
+ nodeOptions: Object.assign(TenantMigrationUtil.makeX509OptionsForTest().recipient, {
+ setParameter: {
+ // Use a batch size of 2 so that collection cloner requires more than a single batch to
+ // complete.
+ collectionClonerBatchSize: 2,
+ // Allow reads on recipient before migration completes for testing.
+ 'failpoint.tenantMigrationRecipientNotRejectReads': tojson({mode: 'alwaysOn'}),
+ }
+ })
});
recipientRst.startSet();
diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript
index cbcc6c938de..c2bf8e36c53 100644
--- a/src/mongo/db/repl/SConscript
+++ b/src/mongo/db/repl/SConscript
@@ -33,15 +33,24 @@ env.Library(
)
env.Library(
+ target='tenant_migration_decoration',
+ source=[
+ 'tenant_migration_decoration.cpp',
+ ],
+)
+
+env.Library(
target='oplog',
source=[
'apply_ops.cpp',
'oplog.cpp',
'oplog_entry_or_grouped_inserts.cpp',
- 'tenant_migration_decoration.cpp',
'transaction_oplog_application.cpp',
'apply_ops.idl',
],
+ LIBDEPS=[
+ 'tenant_migration_decoration',
+ ],
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/base',
'$BUILD_DIR/mongo/db/catalog/catalog_helpers',
@@ -1371,6 +1380,7 @@ env.Library(
'local_oplog_info',
'optime',
'repl_coordinator_interface',
+ 'tenant_migration_decoration',
'tenant_migration_errors',
'tenant_migration_state_machine_idl'
],
diff --git a/src/mongo/db/repl/tenant_collection_cloner.cpp b/src/mongo/db/repl/tenant_collection_cloner.cpp
index 5a16d7a4576..a1beb962892 100644
--- a/src/mongo/db/repl/tenant_collection_cloner.cpp
+++ b/src/mongo/db/repl/tenant_collection_cloner.cpp
@@ -323,6 +323,13 @@ BaseCloner::AfterStageBehavior TenantCollectionCloner::createCollectionStage() {
// We are resuming and the collection already exists.
DBDirectClient client(opCtx.get());
+ // Set the recipient info on the opCtx to bypass the access blocker for local reads.
+ tenantMigrationRecipientInfo(opCtx.get()) =
+ boost::make_optional<TenantMigrationRecipientInfo>(getSharedData()->getMigrationId());
+ // Reset the recipient info after local reads so oplog entries for future writes
+ // (createCollection/createIndex) don't get stamped with the fromTenantMigration field.
+ ON_BLOCK_EXIT([&opCtx] { tenantMigrationRecipientInfo(opCtx.get()) = boost::none; });
+
auto fieldsToReturn = BSON("_id" << 1);
_lastDocId =
client.findOne(_existingNss->ns(), Query().sort(BSON("_id" << -1)), &fieldsToReturn);
diff --git a/src/mongo/db/repl/tenant_database_cloner.cpp b/src/mongo/db/repl/tenant_database_cloner.cpp
index c4c21178f81..faeafb32516 100644
--- a/src/mongo/db/repl/tenant_database_cloner.cpp
+++ b/src/mongo/db/repl/tenant_database_cloner.cpp
@@ -38,6 +38,7 @@
#include "mongo/db/repl/database_cloner_gen.h"
#include "mongo/db/repl/tenant_collection_cloner.h"
#include "mongo/db/repl/tenant_database_cloner.h"
+#include "mongo/db/repl/tenant_migration_decoration.h"
#include "mongo/logv2/log.h"
#include "mongo/rpc/get_status_from_command_result.h"
#include "mongo/util/assert_util.h"
@@ -174,6 +175,8 @@ BaseCloner::AfterStageBehavior TenantDatabaseCloner::listCollectionsStage() {
BaseCloner::AfterStageBehavior TenantDatabaseCloner::listExistingCollectionsStage() {
auto opCtx = cc().makeOperationContext();
DBDirectClient client(opCtx.get());
+ tenantMigrationRecipientInfo(opCtx.get()) =
+ boost::make_optional<TenantMigrationRecipientInfo>(getSharedData()->getMigrationId());
std::vector<UUID> clonedCollectionUUIDs;
auto collectionInfos =
diff --git a/src/mongo/db/repl/tenant_migration_recipient_access_blocker.cpp b/src/mongo/db/repl/tenant_migration_recipient_access_blocker.cpp
index 3a22934b09d..4ddf2bb1260 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_access_blocker.cpp
+++ b/src/mongo/db/repl/tenant_migration_recipient_access_blocker.cpp
@@ -36,6 +36,7 @@
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/storage_interface.h"
#include "mongo/db/repl/tenant_migration_access_blocker_executor.h"
+#include "mongo/db/repl/tenant_migration_decoration.h"
#include "mongo/db/repl/tenant_migration_recipient_access_blocker.h"
#include "mongo/logv2/log.h"
#include "mongo/util/cancelation.h"
@@ -82,6 +83,16 @@ SharedSemiFuture<void> TenantMigrationRecipientAccessBlocker::getCanReadFuture(
return SharedSemiFuture<void>();
}
+ // Exclude internal reads decorated with 'tenantMigrationRecipientInfo' from any logic.
+ if (repl::tenantMigrationRecipientInfo(opCtx).has_value()) {
+ LOGV2_DEBUG(5492000,
+ 1,
+ "Internal tenant read got excluded from the MTAB filtering",
+ "tenantId"_attr = _tenantId,
+ "opId"_attr = opCtx->getOpID());
+ return SharedSemiFuture<void>();
+ }
+
auto readConcernArgs = repl::ReadConcernArgs::get(opCtx);
auto atClusterTime = [opCtx, &readConcernArgs]() -> boost::optional<Timestamp> {
if (auto atClusterTime = readConcernArgs.getArgsAtClusterTime()) {
diff --git a/src/mongo/db/repl/tenant_migration_recipient_access_blocker.h b/src/mongo/db/repl/tenant_migration_recipient_access_blocker.h
index fa387afdca4..3f622d62039 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_access_blocker.h
+++ b/src/mongo/db/repl/tenant_migration_recipient_access_blocker.h
@@ -64,6 +64,9 @@ namespace mongo {
* To ensure atClusterTime and afterClusterTime reads are consistent, when the recipient receives a
* recipientSyncData command with a returnAfterReachingTimestamp after the consistent point, the
* `rejectBeforeTimestamp` will be advanced to the given returnAfterReachingTimestamp.
+ *
+ * Blocker excludes all operations with 'tenantMigrationRecipientInfo' decoration set, as they are
+ * internal.
*/
class TenantMigrationRecipientAccessBlocker
: public std::enable_shared_from_this<TenantMigrationRecipientAccessBlocker>,
diff --git a/src/mongo/db/repl/tenant_migration_recipient_op_observer.cpp b/src/mongo/db/repl/tenant_migration_recipient_op_observer.cpp
index e57ba2aa7e2..6c01ec62935 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_op_observer.cpp
+++ b/src/mongo/db/repl/tenant_migration_recipient_op_observer.cpp
@@ -49,11 +49,8 @@ const auto tenantIdToDeleteDecoration =
* Initializes the TenantMigrationRecipientAccessBlocker for the tenant migration denoted by the
* given state doc.
*/
-void onFinishCloning(OperationContext* opCtx,
- const TenantMigrationRecipientDocument& recipientStateDoc) {
- invariant(recipientStateDoc.getState() == TenantMigrationRecipientStateEnum::kStarted);
- invariant(recipientStateDoc.getDataConsistentStopDonorOpTime());
-
+void createAccessBlockerIfNeeded(OperationContext* opCtx,
+ const TenantMigrationRecipientDocument& recipientStateDoc) {
if (tenant_migration_access_blocker::getTenantMigrationRecipientAccessBlocker(
opCtx->getServiceContext(), recipientStateDoc.getTenantId())) {
// The migration failed part-way on the recipient with a retryable error, and got retried
@@ -117,9 +114,7 @@ void TenantMigrationRecipientOpObserver::onUpdate(OperationContext* opCtx,
case TenantMigrationRecipientStateEnum::kDone:
break;
case TenantMigrationRecipientStateEnum::kStarted:
- if (recipientStateDoc.getDataConsistentStopDonorOpTime()) {
- onFinishCloning(opCtx, recipientStateDoc);
- }
+ createAccessBlockerIfNeeded(opCtx, recipientStateDoc);
break;
case TenantMigrationRecipientStateEnum::kConsistent:
if (recipientStateDoc.getRejectReadsBeforeTimestamp()) {