diff options
author | A. Jesse Jiryu Davis <jesse@mongodb.com> | 2022-03-10 13:00:44 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-03-10 13:34:05 +0000 |
commit | 76a2683fd01c74100104b05563073e1e0dc59d96 (patch) | |
tree | 01ab80e7c4d7f99413a36415848f78e5096763b4 /src | |
parent | 9f8769859bee0824c0bfdbc78abfdc634c87f4b8 (diff) | |
download | mongo-76a2683fd01c74100104b05563073e1e0dc59d96.tar.gz |
SERVER-63397 Abort all index builds during shard merge
Diffstat (limited to 'src')
14 files changed, 271 insertions, 109 deletions
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index 192b8371d84..91153f8e44f 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -778,6 +778,7 @@ void IndexBuildsCoordinator::abortDatabaseIndexBuilds(OperationContext* opCtx, } void IndexBuildsCoordinator::abortTenantIndexBuilds(OperationContext* opCtx, + MigrationProtocolEnum protocol, StringData tenantId, const std::string& reason) { LOGV2(4886203, @@ -787,7 +788,9 @@ void IndexBuildsCoordinator::abortTenantIndexBuilds(OperationContext* opCtx, auto builds = [&]() -> std::vector<std::shared_ptr<ReplIndexBuildState>> { auto indexBuildFilter = [=](const auto& replState) { - return repl::ClonerUtils::isDatabaseForTenant(replState.dbName, tenantId); + // Abort *all* index builds at the start of shard merge. + return protocol == MigrationProtocolEnum::kShardMerge || + repl::ClonerUtils::isDatabaseForTenant(replState.dbName, tenantId); }; return activeIndexBuilds.filterIndexBuilds(indexBuildFilter); }(); @@ -812,8 +815,7 @@ void IndexBuildsCoordinator::abortTenantIndexBuilds(OperationContext* opCtx, } for (const auto& replState : buildsWaitingToFinish) { LOGV2(6221600, - "Waiting on the index build to unregister before continuing the tenant " - " migration.", + "Waiting on the index build to unregister before continuing the tenant migration.", "tenantId"_attr = tenantId, "buildUUID"_attr = replState->buildUUID, "db"_attr = replState->dbName, diff --git a/src/mongo/db/index_builds_coordinator.h b/src/mongo/db/index_builds_coordinator.h index 0ae137f047b..1f01a38b06b 100644 --- a/src/mongo/db/index_builds_coordinator.h +++ b/src/mongo/db/index_builds_coordinator.h @@ -49,6 +49,7 @@ #include "mongo/db/repl/oplog_entry.h" #include "mongo/db/repl_index_build_state.h" #include "mongo/db/resumable_index_builds_gen.h" +#include "mongo/db/serverless/serverless_types_gen.h" #include "mongo/executor/task_executor.h" #include "mongo/executor/thread_pool_task_executor.h" #include "mongo/platform/mutex.h" @@ -259,7 +260,10 @@ public: * * Does not stop new index builds from starting. Caller must make that guarantee. */ - void abortTenantIndexBuilds(OperationContext* opCtx, StringData db, const std::string& reason); + void abortTenantIndexBuilds(OperationContext* opCtx, + MigrationProtocolEnum protocol, + StringData db, + const std::string& reason); /** * Signals all of the index builds to abort and then waits until the index builds are no longer * running. The provided 'reason' will be used in the error message that the index builders diff --git a/src/mongo/db/index_builds_coordinator_mongod_test.cpp b/src/mongo/db/index_builds_coordinator_mongod_test.cpp index 0590d344b7b..28e09af1ef7 100644 --- a/src/mongo/db/index_builds_coordinator_mongod_test.cpp +++ b/src/mongo/db/index_builds_coordinator_mongod_test.cpp @@ -37,6 +37,7 @@ #include "mongo/db/catalog_raii.h" #include "mongo/db/namespace_string.h" #include "mongo/db/operation_context.h" +#include "mongo/db/serverless/serverless_types_gen.h" #include "mongo/util/uuid.h" namespace mongo { @@ -388,8 +389,10 @@ TEST_F(IndexBuildsCoordinatorMongodTest, AbortBuildIndexDueToTenantMigration) { // This call may see the index build active and wait for it to be unregistered, or the index // build may already have been unregistered. - _indexBuildsCoord->abortTenantIndexBuilds( - operationContext(), _tenantId.toString(), "tenant migration"); + _indexBuildsCoord->abortTenantIndexBuilds(operationContext(), + MigrationProtocolEnum::kMultitenantMigrations, + _tenantId.toString(), + "tenant migration"); ASSERT_EQ(0, _indexBuildsCoord->getActiveIndexBuildCount(operationContext())); diff --git a/src/mongo/db/op_observer_impl_test.cpp b/src/mongo/db/op_observer_impl_test.cpp index 3a3391dc26a..1d6f2a8e4d6 100644 --- a/src/mongo/db/op_observer_impl_test.cpp +++ b/src/mongo/db/op_observer_impl_test.cpp @@ -3594,6 +3594,7 @@ TEST_F(OpObserverTest, OnInsertChecksIfTenantMigrationIsBlockingWrites) { // Add a tenant migration access blocker on donor for blocking writes. auto donorMtab = std::make_shared<TenantMigrationDonorAccessBlocker>( getServiceContext(), + uuid, kTenantId, MigrationProtocolEnum::kMultitenantMigrations, "fakeConnString"); @@ -3618,17 +3619,18 @@ TEST_F(OpObserverTest, OnInsertChecksIfTenantMigrationIsBlockingWrites) { TEST_F(OpObserverTransactionTest, OnUnpreparedTransactionCommitChecksIfTenantMigrationIsBlockingWrites) { const std::string kTenantId = "tenantId"; + const auto uuid = UUID::gen(); // Add a tenant migration access blocker on donor for blocking writes. auto donorMtab = std::make_shared<TenantMigrationDonorAccessBlocker>( getServiceContext(), + uuid, kTenantId, MigrationProtocolEnum::kMultitenantMigrations, "fakeConnString"); TenantMigrationAccessBlockerRegistry::get(getServiceContext()).add(kTenantId, donorMtab); const NamespaceString nss("tenantId_db", "testColl"); - const auto uuid = UUID::gen(); auto txnParticipant = TransactionParticipant::get(opCtx()); txnParticipant.unstashTransactionResources(opCtx(), "insert"); diff --git a/src/mongo/db/repl/tenant_migration_access_blocker_registry.cpp b/src/mongo/db/repl/tenant_migration_access_blocker_registry.cpp index 378fa9c5b12..5d5f26a0669 100644 --- a/src/mongo/db/repl/tenant_migration_access_blocker_registry.cpp +++ b/src/mongo/db/repl/tenant_migration_access_blocker_registry.cpp @@ -69,13 +69,24 @@ void TenantMigrationAccessBlockerRegistry::add(StringData tenantId, std::shared_ptr<TenantMigrationAccessBlocker> mtab) { stdx::lock_guard<Latch> lg(_mutex); auto mtabType = mtab->getType(); - // Assume that all tenant ids (i.e. 'tenantId') have equal length. + tassert(8423351, + "addDonorAccessBlocker called with new-style shard merge blocker", + mtabType != MtabType::kDonor || + mtab->getProtocol() != MigrationProtocolEnum::kShardMerge); + + tassert( + 8423350, + "Adding multitenant migration donor blocker when this node has a shard merge donor blocker", + mtabType != MtabType::kDonor || !_donorAccessBlocker); + auto it = _tenantMigrationAccessBlockers.find(tenantId); if (it != _tenantMigrationAccessBlockers.end()) { if (it->second.getAccessBlocker(mtabType)) { tasserted(ErrorCodes::ConflictingOperationInProgress, - str::stream() - << "Found active migration for tenantId \"" << tenantId << "\""); + str::stream() << "This node is already a " + << MigrationProtocol_serializer(mtab->getProtocol()) << " " + << (mtabType == MtabType::kDonor ? "donor" : "recipient") + << " for tenantId \"" << tenantId << "\""); } // The migration protocol guarantees that the original donor node must be garbage collected // before it can be chosen as a recipient under the same tenant. Therefore, we only expect @@ -90,6 +101,25 @@ void TenantMigrationAccessBlockerRegistry::add(StringData tenantId, _tenantMigrationAccessBlockers.emplace(tenantId, mtabPair); } +void TenantMigrationAccessBlockerRegistry::addDonorAccessBlocker( + std::shared_ptr<TenantMigrationDonorAccessBlocker> mtab) { + stdx::lock_guard<Latch> lg(_mutex); + tassert(8423342, + "addDonorAccessBlocker called with old-style multitenant migrations blocker", + mtab->getProtocol() == MigrationProtocolEnum::kShardMerge); + tassert(ErrorCodes::ConflictingOperationInProgress, + str::stream() << "This node is already a shard merge donor", + !_donorAccessBlocker); + tassert(8423349, + "Adding shard merge donor blocker when this node has other donor blockers", + std::find_if(_tenantMigrationAccessBlockers.begin(), + _tenantMigrationAccessBlockers.end(), + [](const auto& pair) { + return pair.second.getAccessBlocker(MtabType::kDonor).get(); + }) == _tenantMigrationAccessBlockers.end()); + _donorAccessBlocker = mtab; +} + void TenantMigrationAccessBlockerRegistry::_remove(WithLock, StringData tenantId, MtabType type) { auto it = _tenantMigrationAccessBlockers.find(tenantId); @@ -107,9 +137,22 @@ void TenantMigrationAccessBlockerRegistry::_remove(WithLock, StringData tenantId void TenantMigrationAccessBlockerRegistry::remove(StringData tenantId, MtabType type) { stdx::lock_guard<Latch> lg(_mutex); + if (type == MtabType::kDonor && _donorAccessBlocker) { + tasserted(8423348, "Using remove() for new-style donor access blocker"); + } + _remove(lg, tenantId, type); } +void TenantMigrationAccessBlockerRegistry::removeDonorAccessBlocker(const UUID& migrationId) { + stdx::lock_guard<Latch> lg(_mutex); + if (_donorAccessBlocker && _donorAccessBlocker->getMigrationId() == migrationId) { + // Shard merge has one donor blocker. If it exists it must be the one we're removing. + _donorAccessBlocker->interrupt(); + _donorAccessBlocker.reset(); + } +} + void TenantMigrationAccessBlockerRegistry::removeAll(MtabType type) { stdx::lock_guard<Latch> lg(_mutex); @@ -117,6 +160,11 @@ void TenantMigrationAccessBlockerRegistry::removeAll(MtabType type) { it != _tenantMigrationAccessBlockers.end();) { _remove(lg, (it++)->first, type); } + + if (_donorAccessBlocker) { + _donorAccessBlocker->interrupt(); + _donorAccessBlocker.reset(); + } } boost::optional<MtabPair> @@ -138,46 +186,43 @@ TenantMigrationAccessBlockerRegistry::getTenantMigrationAccessBlockerForDbName(S boost::optional<MtabPair> TenantMigrationAccessBlockerRegistry::_getTenantMigrationAccessBlockersForDbName(StringData dbName, - WithLock) { - auto it = _tenantMigrationAccessBlockers.find(""); - if (it != _tenantMigrationAccessBlockers.end()) { - auto donor = it->second.getAccessBlocker(TenantMigrationAccessBlocker::BlockerType::kDonor); - tassert(5979300, - "Expected blocker to be a donor blocker with protocol Shard Merge", - donor && donor->getProtocol() == MigrationProtocolEnum::kShardMerge); - - auto donorCount = std::count_if(_tenantMigrationAccessBlockers.begin(), - _tenantMigrationAccessBlockers.end(), - [](const std::pair<std::string, MtabPair>& blocker) { - return blocker.second.getAccessBlocker( - TenantMigrationAccessBlocker::BlockerType::kDonor); - }); - tassert(5979301, - "Expected there to be a single donor tenant migration access blocker present " - "during a shard merge", - donorCount == 1); - - return it->second; - } - - it = std::find_if(_tenantMigrationAccessBlockers.begin(), - _tenantMigrationAccessBlockers.end(), - [dbName](const std::pair<std::string, MtabPair>& blocker) { - StringData tenantId = blocker.first; - return dbName.startsWith(tenantId + "_"); - }); + WithLock lk) { + // TODO (SERVER-61141): Refactor. + auto tenantId = [&]() -> StringData { + if (auto pos = dbName.find("_"); pos != std::string::npos) { + return dbName.substr(0, pos); + } + return StringData(); + }(); - if (it == _tenantMigrationAccessBlockers.end()) { - return boost::none; + auto it = _tenantMigrationAccessBlockers.find(tenantId); + if (it != _tenantMigrationAccessBlockers.end()) { + auto pair = it->second; + if (_hasDonorAccessBlocker(lk, dbName)) { + // I still have a recipient blocker from a recent migration, now I'm a donor. + pair.setAccessBlocker(_donorAccessBlocker); + } + return pair; + } else if (_hasDonorAccessBlocker(lk, dbName)) { + return MtabPair(_donorAccessBlocker, nullptr); } else { - return it->second; + return boost::none; } } +bool TenantMigrationAccessBlockerRegistry::_hasDonorAccessBlocker(WithLock, StringData dbName) { + // No-op oplog entries, e.g. for linearizable reads, use namespace "". + bool isInternal = (dbName == "" || NamespaceString(dbName).isOnInternalDb()); + return _donorAccessBlocker && !isInternal; +} + std::shared_ptr<TenantMigrationAccessBlocker> TenantMigrationAccessBlockerRegistry::getTenantMigrationAccessBlockerForTenantId( StringData tenantId, MtabType type) { stdx::lock_guard<Latch> lg(_mutex); + if (type == MtabType::kDonor && _donorAccessBlocker) { + return _donorAccessBlocker; + } auto it = _tenantMigrationAccessBlockers.find(tenantId); if (it != _tenantMigrationAccessBlockers.end()) { @@ -197,20 +242,19 @@ void TenantMigrationAccessBlockerRegistry::appendInfoForServerStatus( BSONObjBuilder* builder) const { stdx::lock_guard<Latch> lg(_mutex); + if (_donorAccessBlocker) { + BSONObjBuilder donorMtabInfoBuilder; + _donorAccessBlocker->appendInfoForServerStatus(&donorMtabInfoBuilder); + builder->append("donor", donorMtabInfoBuilder.obj()); + } + for (auto& [tenantId, mtabPair] : _tenantMigrationAccessBlockers) { BSONObjBuilder mtabInfoBuilder; if (auto donorMtab = mtabPair.getAccessBlocker(MtabType::kDonor)) { BSONObjBuilder donorMtabInfoBuilder; donorMtab->appendInfoForServerStatus(&donorMtabInfoBuilder); - switch (donorMtab->getProtocol()) { - case MigrationProtocolEnum::kShardMerge: - builder->append("donor", donorMtabInfoBuilder.obj()); - break; - case MigrationProtocolEnum::kMultitenantMigrations: - mtabInfoBuilder.append("donor", donorMtabInfoBuilder.obj()); - break; - } + mtabInfoBuilder.append("donor", donorMtabInfoBuilder.obj()); } if (auto recipientMtab = mtabPair.getAccessBlocker(MtabType::kRecipient)) { @@ -236,6 +280,10 @@ void TenantMigrationAccessBlockerRegistry::onMajorityCommitPointUpdate(repl::OpT donorMtab->onMajorityCommitPointUpdate(opTime); } } + + if (_donorAccessBlocker) { + _donorAccessBlocker->onMajorityCommitPointUpdate(opTime); + } } std::shared_ptr<executor::TaskExecutor> diff --git a/src/mongo/db/repl/tenant_migration_access_blocker_registry.h b/src/mongo/db/repl/tenant_migration_access_blocker_registry.h index 819d9eb46f3..c9a556f674b 100644 --- a/src/mongo/db/repl/tenant_migration_access_blocker_registry.h +++ b/src/mongo/db/repl/tenant_migration_access_blocker_registry.h @@ -36,6 +36,9 @@ namespace mongo { +// TODO (SERVER-63517): There's only one donor access blocker for "shard merge", so remove +// DonorRecipientAccessBlockerPair. Keep the donor blocker in _donorAccessBlocker, and recipient +// blockers in a map tenantId |-> TenantMigrationRecipientAccessBlocker. class TenantMigrationAccessBlockerRegistry { TenantMigrationAccessBlockerRegistry(const TenantMigrationAccessBlockerRegistry&) = delete; TenantMigrationAccessBlockerRegistry& operator=(const TenantMigrationAccessBlockerRegistry&) = @@ -94,14 +97,26 @@ public: /** * Adds an entry for (tenantId, mtab). Throws ConflictingOperationInProgress if an entry for * tenantId already exists. + * + * TODO (SERVER-63517): Rename to addRecipientAccessBlocker, take a + * std::shared_ptr<TenantMigrationRecipientAccessBlocker>. */ void add(StringData tenantId, std::shared_ptr<TenantMigrationAccessBlocker> mtab); /** + * Adds donor access blocker, throws ConflictingOperationInProgress if one exists. + */ + void addDonorAccessBlocker(std::shared_ptr<TenantMigrationDonorAccessBlocker> mtab); + + /** * Invariants that an entry for tenantId exists, and then removes the entry for (tenantId, mtab) */ void remove(StringData tenantId, TenantMigrationAccessBlocker::BlockerType type); - void _remove(WithLock, StringData tenantId, TenantMigrationAccessBlocker::BlockerType type); + + /** + * Removes the donor access blocker, if any. + */ + void removeDonorAccessBlocker(const UUID& migrationId); /** * Removes all mtabs of the given type. @@ -156,11 +171,17 @@ public: std::shared_ptr<executor::TaskExecutor> getAsyncBlockingOperationsExecutor(); private: + std::shared_ptr<TenantMigrationDonorAccessBlocker> _donorAccessBlocker; + using TenantMigrationAccessBlockersMap = StringMap<DonorRecipientAccessBlockerPair>; boost::optional<DonorRecipientAccessBlockerPair> _getTenantMigrationAccessBlockersForDbName( StringData dbName, WithLock); + bool _hasDonorAccessBlocker(WithLock, StringData dbName); + + void _remove(WithLock, StringData tenantId, TenantMigrationAccessBlocker::BlockerType type); + std::shared_ptr<executor::TaskExecutor> _asyncBlockingOperationsExecutor; mutable Mutex _mutex = MONGO_MAKE_LATCH("TenantMigrationAccessBlockerRegistry::_mutex"); diff --git a/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp b/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp index c13a7e5fb2c..a6efa1797c7 100644 --- a/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp +++ b/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp @@ -310,6 +310,7 @@ void recoverTenantMigrationAccessBlockers(OperationContext* opCtx) { auto mtab = std::make_shared<TenantMigrationDonorAccessBlocker>( opCtx->getServiceContext(), + doc.getId(), doc.getTenantId().toString(), doc.getProtocol().value_or(MigrationProtocolEnum::kMultitenantMigrations), doc.getRecipientConnectionString().toString()); diff --git a/src/mongo/db/repl/tenant_migration_access_blocker_util_test.cpp b/src/mongo/db/repl/tenant_migration_access_blocker_util_test.cpp index 6a8e91a2885..4c7026fe44e 100644 --- a/src/mongo/db/repl/tenant_migration_access_blocker_util_test.cpp +++ b/src/mongo/db/repl/tenant_migration_access_blocker_util_test.cpp @@ -65,7 +65,11 @@ TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveTenantMigrationInitiallyFa TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveTenantMigrationTrueWithDonor) { auto donorMtab = std::make_shared<TenantMigrationDonorAccessBlocker>( - getServiceContext(), kTenantId, MigrationProtocolEnum::kMultitenantMigrations, kConnString); + getServiceContext(), + UUID::gen(), + kTenantId, + MigrationProtocolEnum::kMultitenantMigrations, + kConnString); TenantMigrationAccessBlockerRegistry::get(getServiceContext()).add(kTenantId, donorMtab); ASSERT(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), kTenantDB)); @@ -73,9 +77,8 @@ TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveTenantMigrationTrueWithDon TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveShardMergeTrueWithDonor) { auto donorMtab = std::make_shared<TenantMigrationDonorAccessBlocker>( - getServiceContext(), "", MigrationProtocolEnum::kShardMerge, kConnString); - TenantMigrationAccessBlockerRegistry::get(getServiceContext()).add("", donorMtab); - + getServiceContext(), UUID::gen(), "", MigrationProtocolEnum::kShardMerge, kConnString); + TenantMigrationAccessBlockerRegistry::get(getServiceContext()).addDonorAccessBlocker(donorMtab); ASSERT(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), "anyDb"_sd)); } @@ -109,27 +112,35 @@ TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveTenantMigrationTrueWithBot TenantMigrationAccessBlockerRegistry::get(getServiceContext()).add(kTenantId, recipientMtab); auto donorMtab = std::make_shared<TenantMigrationDonorAccessBlocker>( - getServiceContext(), kTenantId, MigrationProtocolEnum::kMultitenantMigrations, kConnString); + getServiceContext(), + UUID::gen(), + kTenantId, + MigrationProtocolEnum::kMultitenantMigrations, + kConnString); TenantMigrationAccessBlockerRegistry::get(getServiceContext()).add(kTenantId, donorMtab); ASSERT(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), kTenantDB)); } TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveShardMergeTrueWithBoth) { + auto uuid = UUID::gen(); auto recipientMtab = std::make_shared<TenantMigrationRecipientAccessBlocker>( - getServiceContext(), UUID::gen(), "", MigrationProtocolEnum::kShardMerge, kConnString); + getServiceContext(), uuid, "", MigrationProtocolEnum::kShardMerge, kConnString); TenantMigrationAccessBlockerRegistry::get(getServiceContext()).add("", recipientMtab); auto donorMtab = std::make_shared<TenantMigrationDonorAccessBlocker>( - getServiceContext(), "", MigrationProtocolEnum::kShardMerge, kConnString); - TenantMigrationAccessBlockerRegistry::get(getServiceContext()).add("", donorMtab); - + getServiceContext(), uuid, "", MigrationProtocolEnum::kShardMerge, kConnString); + TenantMigrationAccessBlockerRegistry::get(getServiceContext()).addDonorAccessBlocker(donorMtab); ASSERT(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), "anyDb"_sd)); } TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveTenantMigrationFalseForNoDbName) { auto donorMtab = std::make_shared<TenantMigrationDonorAccessBlocker>( - getServiceContext(), kTenantId, MigrationProtocolEnum::kMultitenantMigrations, kConnString); + getServiceContext(), + UUID::gen(), + kTenantId, + MigrationProtocolEnum::kMultitenantMigrations, + kConnString); TenantMigrationAccessBlockerRegistry::get(getServiceContext()).add(kTenantId, donorMtab); ASSERT_FALSE(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), StringData())); @@ -137,9 +148,8 @@ TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveTenantMigrationFalseForNoD TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveShardMergeFalseForNoDbName) { auto donorMtab = std::make_shared<TenantMigrationDonorAccessBlocker>( - getServiceContext(), "", MigrationProtocolEnum::kShardMerge, kConnString); - TenantMigrationAccessBlockerRegistry::get(getServiceContext()).add("", donorMtab); - + getServiceContext(), UUID::gen(), "", MigrationProtocolEnum::kShardMerge, kConnString); + TenantMigrationAccessBlockerRegistry::get(getServiceContext()).addDonorAccessBlocker(donorMtab); ASSERT_FALSE(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), StringData())); } @@ -153,7 +163,11 @@ TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveTenantMigrationFalseForUnr TenantMigrationAccessBlockerRegistry::get(getServiceContext()).add(kTenantId, recipientMtab); auto donorMtab = std::make_shared<TenantMigrationDonorAccessBlocker>( - getServiceContext(), kTenantId, MigrationProtocolEnum::kMultitenantMigrations, kConnString); + getServiceContext(), + UUID::gen(), + kTenantId, + MigrationProtocolEnum::kMultitenantMigrations, + kConnString); TenantMigrationAccessBlockerRegistry::get(getServiceContext()).add(kTenantId, donorMtab); ASSERT_FALSE(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), "otherDb"_sd)); @@ -169,7 +183,11 @@ TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveTenantMigrationFalseAfterR TenantMigrationAccessBlockerRegistry::get(getServiceContext()).add(kTenantId, recipientMtab); auto donorMtab = std::make_shared<TenantMigrationDonorAccessBlocker>( - getServiceContext(), kTenantId, MigrationProtocolEnum::kMultitenantMigrations, kConnString); + getServiceContext(), + UUID::gen(), + kTenantId, + MigrationProtocolEnum::kMultitenantMigrations, + kConnString); TenantMigrationAccessBlockerRegistry::get(getServiceContext()).add(kTenantId, donorMtab); ASSERT(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), kTenantDB)); @@ -195,7 +213,11 @@ TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveShardMergeFalseAfterRemove TenantMigrationAccessBlockerRegistry::get(getServiceContext()).add(kTenantId, recipientMtab); auto donorMtab = std::make_shared<TenantMigrationDonorAccessBlocker>( - getServiceContext(), kTenantId, MigrationProtocolEnum::kMultitenantMigrations, kConnString); + getServiceContext(), + UUID::gen(), + kTenantId, + MigrationProtocolEnum::kMultitenantMigrations, + kConnString); TenantMigrationAccessBlockerRegistry::get(getServiceContext()).add(kTenantId, donorMtab); ASSERT(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), kTenantDB)); diff --git a/src/mongo/db/repl/tenant_migration_donor_access_blocker.cpp b/src/mongo/db/repl/tenant_migration_donor_access_blocker.cpp index ea7d4e1f677..93e3a499b76 100644 --- a/src/mongo/db/repl/tenant_migration_donor_access_blocker.cpp +++ b/src/mongo/db/repl/tenant_migration_donor_access_blocker.cpp @@ -73,11 +73,13 @@ const StringMap<int> commandDenyListAfterMigration = { TenantMigrationDonorAccessBlocker::TenantMigrationDonorAccessBlocker( ServiceContext* serviceContext, + UUID migrationId, std::string tenantId, MigrationProtocolEnum protocol, std::string recipientConnString) : TenantMigrationAccessBlocker(BlockerType::kDonor, protocol), _serviceContext(serviceContext), + _migrationId(migrationId), _tenantId(std::move(tenantId)), _protocol(protocol), _recipientConnString(std::move(recipientConnString)) { diff --git a/src/mongo/db/repl/tenant_migration_donor_access_blocker.h b/src/mongo/db/repl/tenant_migration_donor_access_blocker.h index fe1311de68f..d04ca51cd14 100644 --- a/src/mongo/db/repl/tenant_migration_donor_access_blocker.h +++ b/src/mongo/db/repl/tenant_migration_donor_access_blocker.h @@ -181,10 +181,15 @@ class TenantMigrationDonorAccessBlocker public TenantMigrationAccessBlocker { public: TenantMigrationDonorAccessBlocker(ServiceContext* serviceContext, + UUID migrationId, std::string tenantId, MigrationProtocolEnum protocol, std::string recipientConnString); + const UUID& getMigrationId() const { + return _migrationId; + } + // // Called by all writes and reads against the database. // @@ -321,6 +326,7 @@ private: void _onMajorityCommitAbortOpTime(stdx::unique_lock<Latch>& lk); ServiceContext* _serviceContext; + const UUID _migrationId; const std::string _tenantId; const MigrationProtocolEnum _protocol; const std::string _recipientConnString; diff --git a/src/mongo/db/repl/tenant_migration_donor_op_observer.cpp b/src/mongo/db/repl/tenant_migration_donor_op_observer.cpp index c89f4a2dd64..7bb9f9822f1 100644 --- a/src/mongo/db/repl/tenant_migration_donor_op_observer.cpp +++ b/src/mongo/db/repl/tenant_migration_donor_op_observer.cpp @@ -45,6 +45,9 @@ MONGO_FAIL_POINT_DEFINE(donorOpObserverFailAfterOnUpdate); const auto tenantIdToDeleteDecoration = OperationContext::declareDecoration<boost::optional<std::string>>(); +const auto migrationIdToDeleteDecoration = + OperationContext::declareDecoration<boost::optional<UUID>>(); + /** * Initializes the TenantMigrationDonorAccessBlocker for the tenant migration denoted by the given * state doc. @@ -53,23 +56,47 @@ void onTransitionToAbortingIndexBuilds(OperationContext* opCtx, const TenantMigrationDonorDocument& donorStateDoc) { invariant(donorStateDoc.getState() == TenantMigrationDonorStateEnum::kAbortingIndexBuilds); - auto mtab = std::make_shared<TenantMigrationDonorAccessBlocker>( - opCtx->getServiceContext(), - donorStateDoc.getTenantId().toString(), - donorStateDoc.getProtocol().value_or(MigrationProtocolEnum::kMultitenantMigrations), - donorStateDoc.getRecipientConnectionString().toString()); - - TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext()) - .add(donorStateDoc.getTenantId(), mtab); - - if (opCtx->writesAreReplicated()) { - // onRollback is not registered on secondaries since secondaries should not fail to apply - // the write. - opCtx->recoveryUnit()->onRollback([opCtx, donorStateDoc] { - TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext()) - .remove(donorStateDoc.getTenantId(), - TenantMigrationAccessBlocker::BlockerType::kDonor); - }); + if (donorStateDoc.getProtocol().value_or(MigrationProtocolEnum::kMultitenantMigrations) == + MigrationProtocolEnum::kMultitenantMigrations) { + auto mtab = std::make_shared<TenantMigrationDonorAccessBlocker>( + opCtx->getServiceContext(), + donorStateDoc.getId(), + donorStateDoc.getTenantId().toString(), + MigrationProtocolEnum::kMultitenantMigrations, + donorStateDoc.getRecipientConnectionString().toString()); + + TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext()) + .add(donorStateDoc.getTenantId(), mtab); + + if (opCtx->writesAreReplicated()) { + // onRollback is not registered on secondaries since secondaries should not fail to + // apply the write. + opCtx->recoveryUnit()->onRollback([opCtx, donorStateDoc] { + TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext()) + .remove(donorStateDoc.getTenantId(), + TenantMigrationAccessBlocker::BlockerType::kDonor); + }); + } + } else { + // The protocol is kShardMerge. + auto mtab = std::make_shared<TenantMigrationDonorAccessBlocker>( + opCtx->getServiceContext(), + donorStateDoc.getId(), + donorStateDoc.getTenantId().toString(), + MigrationProtocolEnum::kShardMerge, + donorStateDoc.getRecipientConnectionString().toString()); + + TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext()) + .addDonorAccessBlocker(mtab); + + if (opCtx->writesAreReplicated()) { + // onRollback is not registered on secondaries since secondaries should not fail to + // apply the write. + opCtx->recoveryUnit()->onRollback([opCtx, donorStateDoc] { + TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext()) + .removeDonorAccessBlocker(donorStateDoc.getId()); + }); + } } } @@ -152,25 +179,30 @@ public: if (!_opCtx->writesAreReplicated()) { // Setting expireAt implies that the TenantMigrationDonorAccessBlocker for this // migration will be removed shortly after this. However, a lagged secondary - // might not manage to advance its majority commit point past the migration commit - // or abort opTime and consequently transition out of the blocking state before the - // TenantMigrationDonorAccessBlocker is removed. When this occurs, blocked reads or - // writes will be left waiting for the migration decision indefinitely. To avoid - // that, notify the TenantMigrationDonorAccessBlocker here that the commit or - // abort opTime has been majority committed (guaranteed to be true since by design - // the donor never marks its state doc as garbage collectable before the migration - // decision is majority committed). + // might not manage to advance its majority commit point past the migration + // commit or abort opTime and consequently transition out of the blocking state + // before the TenantMigrationDonorAccessBlocker is removed. When this occurs, + // blocked reads or writes will be left waiting for the migration decision + // indefinitely. To avoid that, notify the TenantMigrationDonorAccessBlocker + // here that the commit or abort opTime has been majority committed (guaranteed + // to be true since by design the donor never marks its state doc as garbage + // collectable before the migration decision is majority committed). mtab->onMajorityCommitPointUpdate(_donorStateDoc.getCommitOrAbortOpTime().get()); } if (_donorStateDoc.getState() == TenantMigrationDonorStateEnum::kAborted) { invariant(mtab->inStateAborted()); - // The migration durably aborted and is now marked as garbage collectable, remove - // its TenantMigrationDonorAccessBlocker right away to allow back-to-back migration - // retries. - TenantMigrationAccessBlockerRegistry::get(_opCtx->getServiceContext()) - .remove(_donorStateDoc.getTenantId(), - TenantMigrationAccessBlocker::BlockerType::kDonor); + // The migration durably aborted and is now marked as garbage collectable, + // remove its TenantMigrationDonorAccessBlocker right away to allow back-to-back + // migration retries. + if (_donorStateDoc.getProtocol() == MigrationProtocolEnum::kMultitenantMigrations) { + TenantMigrationAccessBlockerRegistry::get(_opCtx->getServiceContext()) + .remove(_donorStateDoc.getTenantId(), + TenantMigrationAccessBlocker::BlockerType::kDonor); + } else { + TenantMigrationAccessBlockerRegistry::get(_opCtx->getServiceContext()) + .removeDonorAccessBlocker(_donorStateDoc.getId()); + } } return; } @@ -273,10 +305,17 @@ void TenantMigrationDonorOpObserver::aboutToDelete(OperationContext* opCtx, // TenantMigrationDonorAccessBlocker as soon as its donor state doc is marked as garbage // collectable. So onDelete should skip removing the TenantMigrationDonorAccessBlocker for // aborted migrations. - tenantIdToDeleteDecoration(opCtx) = - donorStateDoc.getState() == TenantMigrationDonorStateEnum::kAborted - ? boost::none - : boost::make_optional(donorStateDoc.getTenantId().toString()); + if (donorStateDoc.getProtocol() == MigrationProtocolEnum::kMultitenantMigrations) { + tenantIdToDeleteDecoration(opCtx) = + donorStateDoc.getState() == TenantMigrationDonorStateEnum::kAborted + ? boost::none + : boost::make_optional(donorStateDoc.getTenantId().toString()); + } else { + migrationIdToDeleteDecoration(opCtx) = + donorStateDoc.getState() == TenantMigrationDonorStateEnum::kAborted + ? boost::none + : boost::make_optional(donorStateDoc.getId()); + } } } @@ -286,13 +325,21 @@ void TenantMigrationDonorOpObserver::onDelete(OperationContext* opCtx, StmtId stmtId, const OplogDeleteEntryArgs& args) { if (nss == NamespaceString::kTenantMigrationDonorsNamespace && - tenantIdToDeleteDecoration(opCtx) && !tenant_migration_access_blocker::inRecoveryMode(opCtx)) { - opCtx->recoveryUnit()->onCommit([opCtx](boost::optional<Timestamp>) { - TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext()) - .remove(tenantIdToDeleteDecoration(opCtx).get(), - TenantMigrationAccessBlocker::BlockerType::kDonor); - }); + if (tenantIdToDeleteDecoration(opCtx)) { + opCtx->recoveryUnit()->onCommit([opCtx](boost::optional<Timestamp>) { + TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext()) + .remove(tenantIdToDeleteDecoration(opCtx).get(), + TenantMigrationAccessBlocker::BlockerType::kDonor); + }); + } + + if (migrationIdToDeleteDecoration(opCtx)) { + opCtx->recoveryUnit()->onCommit([opCtx](boost::optional<Timestamp>) { + TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext()) + .removeDonorAccessBlocker(migrationIdToDeleteDecoration(opCtx).get()); + }); + } } } diff --git a/src/mongo/db/repl/tenant_migration_donor_service.cpp b/src/mongo/db/repl/tenant_migration_donor_service.cpp index a20182a7fc8..7c3e7c51d0b 100644 --- a/src/mongo/db/repl/tenant_migration_donor_service.cpp +++ b/src/mongo/db/repl/tenant_migration_donor_service.cpp @@ -935,7 +935,7 @@ SemiFuture<void> TenantMigrationDonorService::Instance::run( }) .then([this, self = shared_from_this(), executor, recipientTargeterRS, abortToken] { LOGV2(6104905, - "Waiting for receipient to reach the block timestamp.", + "Waiting for recipient to reach the block timestamp.", "migrationId"_attr = _migrationUuid, "tenantId"_attr = _tenantId); return _waitForRecipientToReachBlockTimestampAndEnterCommittedState( @@ -1028,7 +1028,8 @@ void TenantMigrationDonorService::Instance::_abortIndexBuilds(const Cancellation auto opCtxHolder = cc().makeOperationContext(); auto* opCtx = opCtxHolder.get(); auto* indexBuildsCoordinator = IndexBuildsCoordinator::get(opCtx); - indexBuildsCoordinator->abortTenantIndexBuilds(opCtx, _tenantId, "tenant migration"); + indexBuildsCoordinator->abortTenantIndexBuilds( + opCtx, _protocol, _tenantId, "tenant migration"); } } diff --git a/src/mongo/db/serverless/shard_split_donor_op_observer.cpp b/src/mongo/db/serverless/shard_split_donor_op_observer.cpp index e6c635451e5..a7e15963bf7 100644 --- a/src/mongo/db/serverless/shard_split_donor_op_observer.cpp +++ b/src/mongo/db/serverless/shard_split_donor_op_observer.cpp @@ -140,6 +140,7 @@ void onBlockerInitialization(OperationContext* opCtx, for (const auto& tenantId : optionalTenants.get()) { auto mtab = std::make_shared<TenantMigrationDonorAccessBlocker>( opCtx->getServiceContext(), + donorStateDoc.getId(), tenantId.toString(), MigrationProtocolEnum::kMultitenantMigrations, recipientConnectionString.toString()); diff --git a/src/mongo/db/serverless/shard_split_donor_op_observer_test.cpp b/src/mongo/db/serverless/shard_split_donor_op_observer_test.cpp index ffb246a8de2..d040690e0cf 100644 --- a/src/mongo/db/serverless/shard_split_donor_op_observer_test.cpp +++ b/src/mongo/db/serverless/shard_split_donor_op_observer_test.cpp @@ -144,10 +144,12 @@ protected: const std::vector<std::string>& tenants, OperationContext* opCtx, const std::string& connectionStr) { + auto uuid = UUID::gen(); std::vector<std::shared_ptr<TenantMigrationDonorAccessBlocker>> blockers; for (auto& tenant : tenants) { auto mtab = std::make_shared<TenantMigrationDonorAccessBlocker>( _opCtx->getServiceContext(), + uuid, tenant, MigrationProtocolEnum::kMultitenantMigrations, _connectionStr); |