From 8e6ab9a259d921298940190161fadfd118c6dc15 Mon Sep 17 00:00:00 2001 From: Jordi Serra Torrens Date: Thu, 30 Dec 2021 12:18:25 +0000 Subject: SERVER-62245 MigrationRecovery must not assume that only one migration needs to be recovered --- etc/backports_required_for_multiversion_tests.yml | 4 + .../recover_multiple_migrations_on_stepup.js | 85 ++++++++++++++++++++++ src/mongo/db/s/migration_coordinator.cpp | 41 +++++++++-- src/mongo/db/s/migration_coordinator.h | 2 + src/mongo/db/s/migration_util.cpp | 25 ++----- 5 files changed, 133 insertions(+), 24 deletions(-) create mode 100644 jstests/sharding/recover_multiple_migrations_on_stepup.js diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml index d8cd34bf4a1..582f4cb127e 100644 --- a/etc/backports_required_for_multiversion_tests.yml +++ b/etc/backports_required_for_multiversion_tests.yml @@ -132,6 +132,8 @@ last-continuous: test_file: jstests/auth/dbcheck.js - ticket: SERVER-61666 test_file: jstests/replsets/tenant_migration_transaction_boundary.js + - ticket: SERVER-62245 + test_file: jstests/sharding/recover_multiple_migrations_on_stepup.js - ticket: SERVER-62212 test_file: jstests/replsets/dbcheck_write_concern.js - ticket: SERVER-62296 @@ -409,6 +411,8 @@ last-lts: test_file: jstests/auth/dbcheck.js - ticket: SERVER-61666 test_file: jstests/replsets/tenant_migration_transaction_boundary.js + - ticket: SERVER-62245 + test_file: jstests/sharding/recover_multiple_migrations_on_stepup.js - ticket: SERVER-62296 test_file: jstests/sharding/migration_recovers_unfinished_migrations.js diff --git a/jstests/sharding/recover_multiple_migrations_on_stepup.js b/jstests/sharding/recover_multiple_migrations_on_stepup.js new file mode 100644 index 00000000000..b8cf4644df8 --- /dev/null +++ b/jstests/sharding/recover_multiple_migrations_on_stepup.js @@ -0,0 +1,85 @@ +/** + * Tests that if on stepup a shard finds more than one config.migrationCoordinators documents (which + * could happen as a consequence of the bug described in SERVER-62245), the shard will be able to + * properly recover all migrations. + * + * TODO: SERVER-62316 This test should be deleted when 6.0 becomes the lastLTS version, since a + * situation where there are more than one migrationCoordinators documents will no longer be + * possible. + */ +(function() { +"use strict"; + +load("jstests/libs/fail_point_util.js"); +load('./jstests/libs/chunk_manipulation_util.js'); + +// Disable checking for index consistency to ensure that the config server doesn't trigger a +// StaleShardVersion exception on the shards and cause them to refresh their sharding metadata. That +// would interfere with the precise migration recovery interleaving this test requires. +const nodeOptions = { + setParameter: {enableShardedIndexConsistencyCheck: false} +}; + +var st = new ShardingTest({shards: 2, other: {configOptions: nodeOptions, enableBalancer: false}}); +let staticMongod = MongoRunner.runMongod({}); + +const dbName = "test"; +const collNameA = "foo"; +const collNameB = "bar"; +const nsA = dbName + "." + collNameA; +const nsB = dbName + "." + collNameB; +const collA = st.s.getDB(dbName)[collNameA]; +const collB = st.s.getDB(dbName)[collNameB]; + +assert.commandWorked(st.s.adminCommand({enableSharding: dbName})); +st.ensurePrimaryShard(dbName, st.shard0.shardName); +assert.commandWorked(st.s.adminCommand({shardCollection: nsA, key: {_id: 1}})); +assert.commandWorked(st.s.adminCommand({shardCollection: nsB, key: {_id: 1}})); + +// Run a first migration just to save its associated config.migrationCoordinators document for the +// purposes of the test. +var moveChunkHangAtStep3Failpoint = configureFailPoint(st.rs0.getPrimary(), "moveChunkHangAtStep3"); +var joinMoveChunk1 = moveChunkParallel( + staticMongod, st.s0.host, {_id: 0}, null, nsA, st.shard1.shardName, true /* expectSuccess */); +moveChunkHangAtStep3Failpoint.wait(); + +let migrationCoordinatorDocuments = + st.rs0.getPrimary().getDB('config')['migrationCoordinators'].find().toArray(); +assert.eq(1, migrationCoordinatorDocuments.length); +let firstMigrationCoordinatorDoc = migrationCoordinatorDocuments[0]; + +// Let the first migration finish and delete its migrationCoordinator document. Otherwise because of +// the fix introduced in SERVER-62296 no new migration could start until the first one has deleted +// its recovery document. +moveChunkHangAtStep3Failpoint.off(); +joinMoveChunk1(); + +// Start a second migration on a different collection, wait until it persists it's recovery document +// and then step down the donor. +var moveChunkHangAtStep3Failpoint = configureFailPoint(st.rs0.getPrimary(), "moveChunkHangAtStep3"); + +var joinMoveChunk2 = moveChunkParallel( + staticMongod, st.s0.host, {_id: 0}, null, nsB, st.shard1.shardName, false /* expectSuccess */); + +moveChunkHangAtStep3Failpoint.wait(); + +// Insert the recovery document from the first migration as to simulate the bug described in +// SERVER-62245 +assert.commandWorked(st.rs0.getPrimary().getDB('config')['migrationCoordinators'].insert( + firstMigrationCoordinatorDoc)); + +// Now we have two migrations pending to be recovered +assert.eq(2, st.rs0.getPrimary().getDB('config')['migrationCoordinators'].countDocuments({})); + +// Stepdown the donor shard +assert.commandWorked(st.rs0.getPrimary().adminCommand({replSetStepDown: 5, force: true})); +moveChunkHangAtStep3Failpoint.off(); +joinMoveChunk2(); + +// Check that the donor shard has been able to recover the shard version for both collections. +assert.eq(0, collA.find().itcount()); +assert.eq(0, collB.find().itcount()); + +MongoRunner.stopMongod(staticMongod); +st.stop(); +})(); diff --git a/src/mongo/db/s/migration_coordinator.cpp b/src/mongo/db/s/migration_coordinator.cpp index 5b983056cf3..dfab697554e 100644 --- a/src/mongo/db/s/migration_coordinator.cpp +++ b/src/mongo/db/s/migration_coordinator.cpp @@ -90,7 +90,7 @@ MigrationCoordinator::MigrationCoordinator(MigrationSessionId sessionId, _waitForDelete(waitForDelete) {} MigrationCoordinator::MigrationCoordinator(const MigrationCoordinatorDocument& doc) - : _migrationInfo(doc) {} + : _migrationInfo(doc), _recoveringMigration(true) {} MigrationCoordinator::~MigrationCoordinator() = default; @@ -208,10 +208,26 @@ SemiFuture MigrationCoordinator::_commitMigrationOnDonorAndRecipient( "lsid"_attr = _migrationInfo.getLsid(), "currentTxnNumber"_attr = _migrationInfo.getTxnNumber(), "migrationId"_attr = _migrationInfo.getId()); - migrationutil::advanceTransactionOnRecipient(opCtx, - _migrationInfo.getRecipientShardId(), - _migrationInfo.getLsid(), - _migrationInfo.getTxnNumber()); + try { + migrationutil::advanceTransactionOnRecipient(opCtx, + _migrationInfo.getRecipientShardId(), + _migrationInfo.getLsid(), + _migrationInfo.getTxnNumber()); + } catch (const ExceptionFor& ex) { + // TODO: SERVER-62316: No longer catch after 6.0 branches out + if (_recoveringMigration) { + LOGV2_WARNING(6224500, + "Transaction number on recipient shard was already advanced by a later " + "migration that started before this one finished recovery", + "namespace"_attr = _migrationInfo.getNss(), + "migrationId"_attr = _migrationInfo.getId(), + "lsid"_attr = _migrationInfo.getLsid(), + "currentTxnNumber"_attr = _migrationInfo.getTxnNumber(), + "error"_attr = redact(ex)); + } else { + throw; + } + } hangBeforeSendingCommitDecision.pauseWhileSet(); @@ -293,8 +309,21 @@ void MigrationCoordinator::_abortMigrationOnDonorAndRecipient(OperationContext* "recipientShardId"_attr = _migrationInfo.getRecipientShardId(), "currentTxnNumber"_attr = _migrationInfo.getTxnNumber(), "error"_attr = exShardNotFound); + } catch (const ExceptionFor& ex) { + // TODO: SERVER-62316: No longer catch after 6.0 branches out + if (_recoveringMigration) { + LOGV2_WARNING(6224501, + "Transaction number on recipient shard was already advanced by a later " + "migration that started before this one finished recovery", + "namespace"_attr = _migrationInfo.getNss(), + "migrationId"_attr = _migrationInfo.getId(), + "lsid"_attr = _migrationInfo.getLsid(), + "currentTxnNumber"_attr = _migrationInfo.getTxnNumber(), + "error"_attr = redact(ex)); + } else { + throw; + } } - LOGV2_DEBUG(23902, 2, "Marking range deletion task on recipient as ready for processing", diff --git a/src/mongo/db/s/migration_coordinator.h b/src/mongo/db/s/migration_coordinator.h index 0c96d1eab15..88db1a68008 100644 --- a/src/mongo/db/s/migration_coordinator.h +++ b/src/mongo/db/s/migration_coordinator.h @@ -125,6 +125,8 @@ private: MigrationCoordinatorDocument _migrationInfo; bool _waitForDelete = false; boost::optional> _releaseRecipientCriticalSectionFuture; + const bool _recoveringMigration = + false; // TODO: SERVER-62316: Can be removed after 6.0 branches out }; } // namespace migrationutil diff --git a/src/mongo/db/s/migration_util.cpp b/src/mongo/db/s/migration_util.cpp index 61fe9f728eb..fa60f7990a8 100644 --- a/src/mongo/db/s/migration_util.cpp +++ b/src/mongo/db/s/migration_util.cpp @@ -816,7 +816,12 @@ void markAsReadyRangeDeletionTaskLocally(OperationContext* opCtx, const UUID& mi auto update = BSON("$unset" << BSON(RangeDeletionTask::kPendingFieldName << "")); hangInReadyRangeDeletionLocallyInterruptible.pauseWhileSet(opCtx); - store.update(opCtx, query, update); + try { + store.update(opCtx, query, update); + } catch (const ExceptionFor&) { + // If we are recovering the migration, the range-deletion may have already finished. So its + // associated document may already have been removed. + } if (hangInReadyRangeDeletionLocallyThenSimulateErrorUninterruptible.shouldFail()) { hangInReadyRangeDeletionLocallyThenSimulateErrorUninterruptible.pauseWhileSet(opCtx); @@ -884,16 +889,6 @@ void resumeMigrationCoordinationsOnStepUp(OperationContext* opCtx) { store.forEach(opCtx, BSONObj{}, [&opCtx, &unfinishedMigrationsCount](const MigrationCoordinatorDocument& doc) { - // MigrationCoordinators are only created under the MigrationBlockingGuard, - // which means that only one can possibly exist on an instance at a time. - // Furthermore, recovery of an incomplete MigrationCoordator also acquires the - // MigrationBlockingGuard. Because of this it is not possible to have more - // than one unfinished migration. - invariant(unfinishedMigrationsCount == 0, - str::stream() - << "Upon step-up a second migration coordinator was found" - << redact(doc.toBSON())); - unfinishedMigrationsCount++; LOGV2_DEBUG(4798511, 3, @@ -908,14 +903,8 @@ void resumeMigrationCoordinationsOnStepUp(OperationContext* opCtx) { CollectionShardingRuntime::get(opCtx, nss)->clearFilteringMetadata(opCtx); } - auto mbg = std::make_shared( - opCtx, - str::stream() << "Recovery of migration session " - << doc.getMigrationSessionId().toString() - << " on collection " << nss); - ExecutorFuture(getMigrationUtilExecutor(opCtx->getServiceContext())) - .then([serviceContext = opCtx->getServiceContext(), nss, mbg] { + .then([serviceContext = opCtx->getServiceContext(), nss] { ThreadClient tc("TriggerMigrationRecovery", serviceContext); { stdx::lock_guard lk(*tc.get()); -- cgit v1.2.1