diff options
author | Matthew Russotto <matthew.russotto@mongodb.com> | 2021-02-16 09:48:35 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-02-16 17:58:59 +0000 |
commit | 7648dc6152e8f24ca626b6cd120d9d701c37bf2d (patch) | |
tree | 07bd33f124b08a41fd7b77a27934cfaff8f6cce9 | |
parent | 9849b9f4a485c91b812ec1d068e610824cb1105e (diff) | |
download | mongo-7648dc6152e8f24ca626b6cd120d9d701c37bf2d.tar.gz |
SERVER-53552 Tenant migration donor should pass startMigrationDonorTimestamp info to recipient via recipientSyncData cmd
10 files changed, 135 insertions, 20 deletions
diff --git a/jstests/replsets/tenant_migration_conflicting_recipient_sync_data_cmds.js b/jstests/replsets/tenant_migration_conflicting_recipient_sync_data_cmds.js index 1449cd6bdb4..d22ce1bf551 100644 --- a/jstests/replsets/tenant_migration_conflicting_recipient_sync_data_cmds.js +++ b/jstests/replsets/tenant_migration_conflicting_recipient_sync_data_cmds.js @@ -60,6 +60,7 @@ function startRecipientSyncDataCmd(migrationUuid, tenantId, connectionString, re donorConnectionString: connectionString, tenantId: tenantId, readPreference: readPreference, + startMigrationDonorTimestamp: Timestamp(1, 1), recipientCertificateForDonor: TenantMigrationUtil.makeMigrationCertificatesForTest().recipientCertificateForDonor }), diff --git a/jstests/replsets/tenant_migration_donor_current_op.js b/jstests/replsets/tenant_migration_donor_current_op.js index d24a9e3176c..f8d6bbf8742 100644 --- a/jstests/replsets/tenant_migration_donor_current_op.js +++ b/jstests/replsets/tenant_migration_donor_current_op.js @@ -17,10 +17,11 @@ load("jstests/replsets/libs/tenant_migration_test.js"); // An object that mirrors the donor migration states. const migrationStates = { kUninitialized: 0, - kDataSync: 1, - kBlocking: 2, - kCommitted: 3, - kAborted: 4 + kAbortingIndexBuilds: 1, + kDataSync: 2, + kBlocking: 3, + kCommitted: 4, + kAborted: 5 }; const kTenantId = 'testTenantId'; @@ -44,6 +45,43 @@ const kReadPreference = { tenantId: kTenantId, readPreference: kReadPreference }; + let fp = configureFailPoint(donorPrimary, + "pauseTenantMigrationBeforeLeavingAbortingIndexBuildsState"); + assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts)); + fp.wait(); + + const res = assert.commandWorked( + donorPrimary.adminCommand({currentOp: true, desc: "tenant donor migration"})); + assert.eq(res.inprog.length, 1); + assert.eq(bsonWoCompare(res.inprog[0].instanceID.uuid, migrationId), 0); + assert.eq(bsonWoCompare(res.inprog[0].tenantId, kTenantId), 0); + assert.eq(res.inprog[0].recipientConnectionString, + tenantMigrationTest.getRecipientRst().getURL()); + assert.eq(bsonWoCompare(res.inprog[0].readPreference, kReadPreference), 0); + assert.eq(res.inprog[0].lastDurableState, migrationStates.kAbortingIndexBuilds); + assert.eq(res.inprog[0].migrationCompleted, false); + + fp.off(); + assert.commandWorked(tenantMigrationTest.waitForMigrationToComplete(migrationOpts)); + tenantMigrationTest.stop(); +})(); + +(() => { + jsTestLog("Testing currentOp output for migration in data sync state"); + const tenantMigrationTest = new TenantMigrationTest({name: jsTestName()}); + if (!tenantMigrationTest.isFeatureFlagEnabled()) { + jsTestLog("Skipping test because the tenant migrations feature flag is disabled"); + return; + } + + const donorPrimary = tenantMigrationTest.getDonorPrimary(); + + const migrationId = UUID(); + const migrationOpts = { + migrationIdString: extractUUIDFromObject(migrationId), + tenantId: kTenantId, + readPreference: kReadPreference + }; let fp = configureFailPoint(donorPrimary, "pauseTenantMigrationBeforeLeavingDataSyncState"); assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts)); fp.wait(); @@ -57,6 +95,7 @@ const kReadPreference = { tenantMigrationTest.getRecipientRst().getURL()); assert.eq(bsonWoCompare(res.inprog[0].readPreference, kReadPreference), 0); assert.eq(res.inprog[0].lastDurableState, migrationStates.kDataSync); + assert(res.inprog[0].startMigrationDonorTimestamp); assert.eq(res.inprog[0].migrationCompleted, false); fp.off(); @@ -92,6 +131,7 @@ const kReadPreference = { tenantMigrationTest.getRecipientRst().getURL()); assert.eq(bsonWoCompare(res.inprog[0].readPreference, kReadPreference), 0); assert.eq(res.inprog[0].lastDurableState, migrationStates.kBlocking); + assert(res.inprog[0].startMigrationDonorTimestamp); assert(res.inprog[0].blockTimestamp); assert.eq(res.inprog[0].migrationCompleted, false); @@ -128,6 +168,7 @@ const kReadPreference = { tenantMigrationTest.getRecipientRst().getURL()); assert.eq(bsonWoCompare(res.inprog[0].readPreference, kReadPreference), 0); assert.eq(res.inprog[0].lastDurableState, migrationStates.kAborted); + assert(res.inprog[0].startMigrationDonorTimestamp); assert(res.inprog[0].blockTimestamp); assert(res.inprog[0].commitOrAbortOpTime); assert(res.inprog[0].abortReason); @@ -162,6 +203,7 @@ const kReadPreference = { tenantMigrationTest.getRecipientRst().getURL()); assert.eq(bsonWoCompare(res.inprog[0].readPreference, kReadPreference), 0); assert.eq(res.inprog[0].lastDurableState, migrationStates.kCommitted); + assert(res.inprog[0].startMigrationDonorTimestamp); assert(res.inprog[0].blockTimestamp); assert(res.inprog[0].commitOrAbortOpTime); assert.eq(res.inprog[0].migrationCompleted, false); @@ -178,6 +220,7 @@ const kReadPreference = { tenantMigrationTest.getRecipientRst().getURL()); assert.eq(bsonWoCompare(res.inprog[0].readPreference, kReadPreference), 0); assert.eq(res.inprog[0].lastDurableState, migrationStates.kCommitted); + assert(res.inprog[0].startMigrationDonorTimestamp); assert(res.inprog[0].blockTimestamp); assert(res.inprog[0].commitOrAbortOpTime); assert(res.inprog[0].expireAt); diff --git a/jstests/replsets/tenant_migration_donor_state_machine.js b/jstests/replsets/tenant_migration_donor_state_machine.js index 96172f6cb22..75edae7e784 100644 --- a/jstests/replsets/tenant_migration_donor_state_machine.js +++ b/jstests/replsets/tenant_migration_donor_state_machine.js @@ -110,8 +110,13 @@ let configDonorsColl = donorPrimary.getCollection(TenantMigrationTest.kConfigDon assert(mtabs[kTenantId].blockTimestamp); let donorDoc = configDonorsColl.findOne({tenantId: kTenantId}); - let blockOplogEntry = donorPrimary.getDB("local").oplog.rs.findOne( - {ns: TenantMigrationTest.kConfigDonorsNS, op: "u", "o.tenantId": kTenantId}); + let blockOplogEntry = + donorPrimary.getDB("local") + .oplog.rs + .find({ns: TenantMigrationTest.kConfigDonorsNS, op: "u", "o.tenantId": kTenantId}) + .sort({"$natural": -1}) + .limit(1) + .next(); assert.eq(donorDoc.state, "blocking"); assert.eq(donorDoc.blockTimestamp, blockOplogEntry.ts); diff --git a/jstests/replsets/tenant_migration_invalid_inputs.js b/jstests/replsets/tenant_migration_invalid_inputs.js index ce7b158a70a..f85ec22d1a7 100644 --- a/jstests/replsets/tenant_migration_invalid_inputs.js +++ b/jstests/replsets/tenant_migration_invalid_inputs.js @@ -93,6 +93,7 @@ unsupportedtenantIds.forEach((invalidTenantId) => { migrationId: UUID(), donorConnectionString: tenantMigrationTest.getDonorRst().getURL(), tenantId: invalidTenantId, + startMigrationDonorTimestamp: Timestamp(1, 1), readPreference: readPreference, recipientCertificateForDonor: migrationCertificates.recipientCertificateForDonor, }), @@ -105,6 +106,7 @@ assert.commandFailedWithCode(recipientPrimary.adminCommand({ migrationId: UUID(), donorConnectionString: tenantMigrationTest.getRecipientRst().getURL(), tenantId: tenantId, + startMigrationDonorTimestamp: Timestamp(1, 1), readPreference: readPreference, recipientCertificateForDonor: migrationCertificates.recipientCertificateForDonor, }), @@ -116,6 +118,7 @@ assert.commandFailedWithCode(recipientPrimary.adminCommand({ migrationId: UUID(), donorConnectionString: tenantMigrationTest.getDonorRst().getURL() + "," + recipientPrimary.host, tenantId: tenantId, + startMigrationDonorTimestamp: Timestamp(1, 1), readPreference: readPreference, recipientCertificateForDonor: migrationCertificates.recipientCertificateForDonor, }), @@ -127,6 +130,7 @@ assert.commandFailedWithCode(recipientPrimary.adminCommand({ migrationId: UUID(), donorConnectionString: recipientPrimary.host, tenantId: tenantId, + startMigrationDonorTimestamp: Timestamp(1, 1), readPreference: readPreference, recipientCertificateForDonor: migrationCertificates.recipientCertificateForDonor, }), @@ -140,6 +144,7 @@ nullTimestamps.forEach((nullTs) => { migrationId: UUID(), donorConnectionString: tenantMigrationTest.getDonorRst().getURL(), tenantId: tenantId, + startMigrationDonorTimestamp: Timestamp(1, 1), readPreference: readPreference, returnAfterReachingDonorTimestamp: nullTs, recipientCertificateForDonor: migrationCertificates.recipientCertificateForDonor, diff --git a/jstests/replsets/tenant_migration_ssl_configuration.js b/jstests/replsets/tenant_migration_ssl_configuration.js index ebab8c01041..e59fbfd8daa 100644 --- a/jstests/replsets/tenant_migration_ssl_configuration.js +++ b/jstests/replsets/tenant_migration_ssl_configuration.js @@ -66,6 +66,7 @@ const kExpiredMigrationCertificates = { migrationId: UUID(), donorConnectionString: tenantMigrationTest.getDonorRst().getURL(), tenantId: kTenantId, + startMigrationDonorTimestamp: Timestamp(1, 1), readPreference: kReadPreference }), ErrorCodes.InvalidOptions); @@ -137,6 +138,7 @@ const kExpiredMigrationCertificates = { donorConnectionString: tenantMigrationTest.getDonorRst().getURL(), tenantId: kTenantId, readPreference: kReadPreference, + startMigrationDonorTimestamp: Timestamp(1, 1), recipientCertificateForDonor: kValidMigrationCertificates.recipientCertificateForDonor, }), ErrorCodes.IllegalOperation); @@ -213,6 +215,7 @@ const kExpiredMigrationCertificates = { migrationId: UUID(), donorConnectionString: tenantMigrationTest.getDonorRst().getURL(), tenantId: kTenantId, + startMigrationDonorTimestamp: Timestamp(1, 1), readPreference: kReadPreference })); diff --git a/src/mongo/db/commands/tenant_migration_recipient_cmds.idl b/src/mongo/db/commands/tenant_migration_recipient_cmds.idl index 71a5eb68f7b..6a4b44c9752 100644 --- a/src/mongo/db/commands/tenant_migration_recipient_cmds.idl +++ b/src/mongo/db/commands/tenant_migration_recipient_cmds.idl @@ -101,6 +101,13 @@ commands: optional: true validator: callback: "tenant_migration_util::validateTimestampNotNull" + startMigrationDonorTimestamp: + type: timestamp + description: >- + Recipient must not start cloning/fetching oplog entries from the donor until this + timestamp is majority committed. + validator: + callback: "tenant_migration_util::validateTimestampNotNull" recipientForgetMigration: description: "Parser for the 'recipientForgetMigration' command." diff --git a/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp b/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp index 36573d870d4..24b79028653 100644 --- a/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp +++ b/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp @@ -92,6 +92,13 @@ TenantMigrationDonorDocument parseDonorStateDocument(const BSONObj& doc) { switch (donorStateDoc.getState()) { case TenantMigrationDonorStateEnum::kUninitialized: break; + case TenantMigrationDonorStateEnum::kAbortingIndexBuilds: + uassert(ErrorCodes::BadValue, + errmsg, + !donorStateDoc.getBlockTimestamp() && !donorStateDoc.getCommitOrAbortOpTime() && + !donorStateDoc.getAbortReason() && + !donorStateDoc.getStartMigrationDonorTimestamp()); + break; case TenantMigrationDonorStateEnum::kDataSync: uassert(ErrorCodes::BadValue, errmsg, @@ -232,6 +239,7 @@ void recoverTenantMigrationAccessBlockers(OperationContext* opCtx) { .add(doc.getTenantId(), mtab); switch (doc.getState()) { + case TenantMigrationDonorStateEnum::kAbortingIndexBuilds: case TenantMigrationDonorStateEnum::kDataSync: break; case TenantMigrationDonorStateEnum::kBlocking: @@ -252,7 +260,7 @@ void recoverTenantMigrationAccessBlockers(OperationContext* opCtx) { } mtab->setAbortOpTime(opCtx, doc.getCommitOrAbortOpTime().get()); break; - default: + case TenantMigrationDonorStateEnum::kUninitialized: MONGO_UNREACHABLE; } return true; diff --git a/src/mongo/db/repl/tenant_migration_donor_op_observer.cpp b/src/mongo/db/repl/tenant_migration_donor_op_observer.cpp index e3faa621b15..c789741a087 100644 --- a/src/mongo/db/repl/tenant_migration_donor_op_observer.cpp +++ b/src/mongo/db/repl/tenant_migration_donor_op_observer.cpp @@ -49,9 +49,9 @@ const auto tenantIdToDeleteDecoration = * Initializes the TenantMigrationDonorAccessBlocker for the tenant migration denoted by the given * state doc. */ -void onTransitionToDataSync(OperationContext* opCtx, - const TenantMigrationDonorDocument& donorStateDoc) { - invariant(donorStateDoc.getState() == TenantMigrationDonorStateEnum::kDataSync); +void onTransitionToAbortingIndexBuilds(OperationContext* opCtx, + const TenantMigrationDonorDocument& donorStateDoc) { + invariant(donorStateDoc.getState() == TenantMigrationDonorStateEnum::kAbortingIndexBuilds); auto mtab = std::make_shared<TenantMigrationDonorAccessBlocker>( opCtx->getServiceContext(), @@ -213,15 +213,16 @@ void TenantMigrationDonorOpObserver::onInserts(OperationContext* opCtx, for (auto it = first; it != last; it++) { auto donorStateDoc = tenant_migration_access_blocker::parseDonorStateDocument(it->doc); switch (donorStateDoc.getState()) { - case TenantMigrationDonorStateEnum::kDataSync: - onTransitionToDataSync(opCtx, donorStateDoc); + case TenantMigrationDonorStateEnum::kAbortingIndexBuilds: + onTransitionToAbortingIndexBuilds(opCtx, donorStateDoc); break; + case TenantMigrationDonorStateEnum::kDataSync: case TenantMigrationDonorStateEnum::kBlocking: case TenantMigrationDonorStateEnum::kCommitted: case TenantMigrationDonorStateEnum::kAborted: - uasserted( - ErrorCodes::IllegalOperation, - "cannot insert a donor's state doc with 'state' other than 'data sync'"); + uasserted(ErrorCodes::IllegalOperation, + "cannot insert a donor's state doc with 'state' other than 'aborting " + "index builds'"); break; default: MONGO_UNREACHABLE; @@ -240,6 +241,8 @@ void TenantMigrationDonorOpObserver::onUpdate(OperationContext* opCtx, auto donorStateDoc = tenant_migration_access_blocker::parseDonorStateDocument(args.updateArgs.updatedDoc); switch (donorStateDoc.getState()) { + case TenantMigrationDonorStateEnum::kDataSync: + break; case TenantMigrationDonorStateEnum::kBlocking: onTransitionToBlocking(opCtx, donorStateDoc); break; diff --git a/src/mongo/db/repl/tenant_migration_donor_service.cpp b/src/mongo/db/repl/tenant_migration_donor_service.cpp index 9f5492abe4b..8f1e7166ac4 100644 --- a/src/mongo/db/repl/tenant_migration_donor_service.cpp +++ b/src/mongo/db/repl/tenant_migration_donor_service.cpp @@ -60,6 +60,7 @@ namespace { MONGO_FAIL_POINT_DEFINE(abortTenantMigrationBeforeLeavingBlockingState); MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationAfterPersistingInitialDonorStateDoc); +MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationBeforeLeavingAbortingIndexBuildsState); MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationBeforeLeavingBlockingState); MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationBeforeLeavingDataSyncState); @@ -243,6 +244,10 @@ boost::optional<BSONObj> TenantMigrationDonorService::Instance::reportForCurrent if (_stateDoc.getExpireAt()) { bob.append("expireAt", _stateDoc.getExpireAt()->toString()); } + if (_stateDoc.getStartMigrationDonorTimestamp()) { + bob.append("startMigrationDonorTimestamp", + _stateDoc.getStartMigrationDonorTimestamp()->toBSON()); + } if (_stateDoc.getBlockTimestamp()) { bob.append("blockTimestamp", _stateDoc.getBlockTimestamp()->toBSON()); } @@ -407,7 +412,7 @@ TenantMigrationDonorService::Instance::_fetchAndStoreRecipientClusterTimeKeyDocs ExecutorFuture<repl::OpTime> TenantMigrationDonorService::Instance::_insertStateDoc( std::shared_ptr<executor::ScopedTaskExecutor> executor, const CancelationToken& token) { invariant(_stateDoc.getState() == TenantMigrationDonorStateEnum::kUninitialized); - _stateDoc.setState(TenantMigrationDonorStateEnum::kDataSync); + _stateDoc.setState(TenantMigrationDonorStateEnum::kAbortingIndexBuilds); return AsyncTry([this, self = shared_from_this()] { auto opCtxHolder = cc().makeOperationContext(); @@ -474,6 +479,10 @@ ExecutorFuture<repl::OpTime> TenantMigrationDonorService::Instance::_updateState // Update the state. _stateDoc.setState(nextState); switch (nextState) { + case TenantMigrationDonorStateEnum::kDataSync: { + _stateDoc.setStartMigrationDonorTimestamp(oplogSlot.getTimestamp()); + break; + } case TenantMigrationDonorStateEnum::kBlocking: { _stateDoc.setBlockTimestamp(oplogSlot.getTimestamp()); @@ -575,11 +584,12 @@ ExecutorFuture<void> TenantMigrationDonorService::Instance::_waitForMajorityWrit stdx::lock_guard<Latch> lg(_mutex); _durableState.state = _stateDoc.getState(); switch (_durableState.state) { - case TenantMigrationDonorStateEnum::kDataSync: + case TenantMigrationDonorStateEnum::kAbortingIndexBuilds: if (!_initialDonorStateDurablePromise.getFuture().isReady()) { _initialDonorStateDurablePromise.emplaceValue(); } break; + case TenantMigrationDonorStateEnum::kDataSync: case TenantMigrationDonorStateEnum::kBlocking: case TenantMigrationDonorStateEnum::kCommitted: break; @@ -652,7 +662,8 @@ ExecutorFuture<void> TenantMigrationDonorService::Instance::_sendRecipientSyncDa _stateDoc.getReadPreference()); commonData.setRecipientCertificateForDonor(_stateDoc.getRecipientCertificateForDonor()); request.setMigrationRecipientCommonData(commonData); - + invariant(_stateDoc.getStartMigrationDonorTimestamp()); + request.setStartMigrationDonorTimestamp(*_stateDoc.getStartMigrationDonorTimestamp()); request.setReturnAfterReachingDonorTimestamp(_stateDoc.getBlockTimestamp()); return request.toBSON(BSONObj()); }(); @@ -697,7 +708,7 @@ SemiFuture<void> TenantMigrationDonorService::Instance::run( return ExecutorFuture<void>(**executor, Status::OK()); } - // Enter "dataSync" state. + // Enter "abortingIndexBuilds" state. return _insertStateDoc(executor, _abortMigrationSource.token()) .then([this, self = shared_from_this(), executor](repl::OpTime opTime) { // TODO (SERVER-53389): TenantMigration{Donor, Recipient}Service should @@ -718,7 +729,7 @@ SemiFuture<void> TenantMigrationDonorService::Instance::run( executor, recipientTargeterRS, serviceToken, _abortMigrationSource.token()); }) .then([this, self = shared_from_this(), executor, recipientTargeterRS, serviceToken] { - if (_stateDoc.getState() > TenantMigrationDonorStateEnum::kDataSync) { + if (_stateDoc.getState() > TenantMigrationDonorStateEnum::kAbortingIndexBuilds) { return ExecutorFuture<void>(**executor, Status::OK()); } @@ -732,8 +743,27 @@ SemiFuture<void> TenantMigrationDonorService::Instance::run( auto* indexBuildsCoordinator = IndexBuildsCoordinator::get(opCtx); indexBuildsCoordinator->abortTenantIndexBuilds( opCtx, _stateDoc.getTenantId(), "tenant migration"); + pauseTenantMigrationBeforeLeavingAbortingIndexBuildsState.pauseWhileSet(opCtx); } + // Enter "dataSync" state. + return _updateStateDoc(executor, + TenantMigrationDonorStateEnum::kDataSync, + _abortMigrationSource.token()) + + .then([this, self = shared_from_this(), executor](repl::OpTime opTime) { + // TODO (SERVER-53389): TenantMigration{Donor, Recipient}Service should + // use its base PrimaryOnlyService's cancelation source to pass tokens + // in calls to WaitForMajorityService::waitUntilMajority. + return _waitForMajorityWriteConcern(executor, std::move(opTime)); + }); + }) + .then([this, self = shared_from_this(), executor, recipientTargeterRS, serviceToken] { + if (_stateDoc.getState() > TenantMigrationDonorStateEnum::kDataSync) { + return ExecutorFuture<void>(**executor, Status::OK()); + } + + checkIfReceivedDonorAbortMigration(serviceToken, _abortMigrationSource.token()); return _sendRecipientSyncDataCommand( executor, recipientTargeterRS, _abortMigrationSource.token()) .then([this, self = shared_from_this()] { diff --git a/src/mongo/db/repl/tenant_migration_state_machine.idl b/src/mongo/db/repl/tenant_migration_state_machine.idl index 31e4e4f60a4..e57961e674b 100644 --- a/src/mongo/db/repl/tenant_migration_state_machine.idl +++ b/src/mongo/db/repl/tenant_migration_state_machine.idl @@ -44,6 +44,7 @@ enums: type: string values: kUninitialized: "uninitialized" + kAbortingIndexBuilds: "aborting index builds" kDataSync: "data sync" kBlocking: "blocking" kCommitted: "committed" @@ -122,6 +123,15 @@ structs: type: TenantMigrationPEMPayload # TODO (SERVER-54085): Remove server parameter tenantMigrationDisableX509Auth. optional: true + startMigrationDonorTimestamp: + type: timestamp + optional: true + description: >- + Timestamp after all index builds for tenant are complete or aborted. Recipient + must not start cloning/fetching oplog entries from the donor until this + timestamp is majority committed. + validator: + callback: "tenant_migration_util::validateTimestampNotNull" tenantMigrationRecipientDocument: description: "Represents an in-progress tenant migration on the migration recipient." |