diff options
author | Antonio Fuschetto <antonio.fuschetto@mongodb.com> | 2021-10-18 20:54:54 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-10-19 12:53:53 +0000 |
commit | 9747e790b50e147997994e45599eba4e19d96d01 (patch) | |
tree | 8b12fc26e7544394a4a31257ae7e474b0840dbfd /src | |
parent | b9bd6cbda4b93de67cc20fc2df170e815a59ba22 (diff) | |
download | mongo-9747e790b50e147997994e45599eba4e19d96d01.tar.gz |
SERVER-60489 Handle potential exceptions in balancer recovery phase
Diffstat (limited to 'src')
25 files changed, 163 insertions, 127 deletions
diff --git a/src/mongo/db/s/balancer/balancer.cpp b/src/mongo/db/s/balancer/balancer.cpp index 711fcea4397..25401a08f21 100644 --- a/src/mongo/db/s/balancer/balancer.cpp +++ b/src/mongo/db/s/balancer/balancer.cpp @@ -260,8 +260,10 @@ Balancer::ScopedPauseBalancerRequest Balancer::requestPause() { return ScopedPauseBalancerRequest(this); } -Status Balancer::rebalanceSingleChunk(OperationContext* opCtx, const ChunkType& chunk) { - auto migrateStatus = _chunkSelectionPolicy->selectSpecificChunkToMove(opCtx, chunk); +Status Balancer::rebalanceSingleChunk(OperationContext* opCtx, + const NamespaceString& nss, + const ChunkType& chunk) { + auto migrateStatus = _chunkSelectionPolicy->selectSpecificChunkToMove(opCtx, nss, chunk); if (!migrateStatus.isOK()) { return migrateStatus.getStatus(); } @@ -290,6 +292,7 @@ Status Balancer::rebalanceSingleChunk(OperationContext* opCtx, const ChunkType& } Status Balancer::moveSingleChunk(OperationContext* opCtx, + const NamespaceString& nss, const ChunkType& chunk, const ShardId& newShardId, uint64_t maxChunkSizeBytes, @@ -304,6 +307,7 @@ Status Balancer::moveSingleChunk(OperationContext* opCtx, return _migrationManager.executeManualMigration( opCtx, MigrateInfo(newShardId, + nss, chunk, forceJumbo ? MoveChunkRequest::ForceJumbo::kForceManual : MoveChunkRequest::ForceJumbo::kDoNotForce, diff --git a/src/mongo/db/s/balancer/balancer.h b/src/mongo/db/s/balancer/balancer.h index c62afc0a409..c51e87e4795 100644 --- a/src/mongo/db/s/balancer/balancer.h +++ b/src/mongo/db/s/balancer/balancer.h @@ -146,7 +146,9 @@ public: * will actually move because it may already be at the best shard. An error will be returned if * the attempt to find a better shard or the actual migration fail for any reason. */ - Status rebalanceSingleChunk(OperationContext* opCtx, const ChunkType& chunk); + Status rebalanceSingleChunk(OperationContext* opCtx, + const NamespaceString& nss, + const ChunkType& chunk); /** * Blocking call, which requests the balancer to move a single chunk to the specified location @@ -157,6 +159,7 @@ public: * move regardless. If should be used only for user-initiated moves. */ Status moveSingleChunk(OperationContext* opCtx, + const NamespaceString& nss, const ChunkType& chunk, const ShardId& newShardId, uint64_t maxChunkSizeBytes, diff --git a/src/mongo/db/s/balancer/balancer_chunk_selection_policy.h b/src/mongo/db/s/balancer/balancer_chunk_selection_policy.h index 4851a323118..6b25eb421ad 100644 --- a/src/mongo/db/s/balancer/balancer_chunk_selection_policy.h +++ b/src/mongo/db/s/balancer/balancer_chunk_selection_policy.h @@ -116,7 +116,7 @@ public: * Otherwise returns migration information for where the chunk should be moved. */ virtual StatusWith<boost::optional<MigrateInfo>> selectSpecificChunkToMove( - OperationContext* opCtx, const ChunkType& chunk) = 0; + OperationContext* opCtx, const NamespaceString& nss, const ChunkType& chunk) = 0; /** * Asks the chunk selection policy to validate that the specified chunk migration is allowed diff --git a/src/mongo/db/s/balancer/balancer_chunk_selection_policy_impl.cpp b/src/mongo/db/s/balancer/balancer_chunk_selection_policy_impl.cpp index 67ce067c200..404ca70e17b 100644 --- a/src/mongo/db/s/balancer/balancer_chunk_selection_policy_impl.cpp +++ b/src/mongo/db/s/balancer/balancer_chunk_selection_policy_impl.cpp @@ -424,6 +424,7 @@ StatusWith<MigrateInfoVector> BalancerChunkSelectionPolicyImpl::selectChunksToMo StatusWith<boost::optional<MigrateInfo>> BalancerChunkSelectionPolicyImpl::selectSpecificChunkToMove(OperationContext* opCtx, + const NamespaceString& nss, const ChunkType& chunk) { auto shardStatsStatus = _clusterStats->getStats(opCtx); if (!shardStatsStatus.isOK()) { @@ -432,11 +433,6 @@ BalancerChunkSelectionPolicyImpl::selectSpecificChunkToMove(OperationContext* op const auto& shardStats = shardStatsStatus.getValue(); - const CollectionType collection = Grid::get(opCtx)->catalogClient()->getCollection( - opCtx, chunk.getCollectionUUID(), repl::ReadConcernLevel::kLocalReadConcern); - - const auto& nss = collection.getNss(); - auto routingInfoStatus = Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx, nss); if (!routingInfoStatus.isOK()) { diff --git a/src/mongo/db/s/balancer/balancer_chunk_selection_policy_impl.h b/src/mongo/db/s/balancer/balancer_chunk_selection_policy_impl.h index 4262ff63dd0..07e24321639 100644 --- a/src/mongo/db/s/balancer/balancer_chunk_selection_policy_impl.h +++ b/src/mongo/db/s/balancer/balancer_chunk_selection_policy_impl.h @@ -52,7 +52,7 @@ public: const NamespaceString& ns) override; StatusWith<boost::optional<MigrateInfo>> selectSpecificChunkToMove( - OperationContext* opCtx, const ChunkType& chunk) override; + OperationContext* opCtx, const NamespaceString& nss, const ChunkType& chunk) override; Status checkMoveAllowed(OperationContext* opCtx, const ChunkType& chunk, diff --git a/src/mongo/db/s/balancer/balancer_chunk_selection_policy_test.cpp b/src/mongo/db/s/balancer/balancer_chunk_selection_policy_test.cpp index 4e1f232fc51..6996a495003 100644 --- a/src/mongo/db/s/balancer/balancer_chunk_selection_policy_test.cpp +++ b/src/mongo/db/s/balancer/balancer_chunk_selection_policy_test.cpp @@ -150,8 +150,8 @@ TEST_F(BalancerChunkSelectionTest, TagRangesOverlap) { shardTargeterMock(opCtx.get(), kShardId0)->setFindHostReturnValue(kShardHost0); shardTargeterMock(opCtx.get(), kShardId1)->setFindHostReturnValue(kShardHost1); - auto migrateInfoStatus = - _chunkSelectionPolicy.get()->selectSpecificChunkToMove(opCtx.get(), chunk); + auto migrateInfoStatus = _chunkSelectionPolicy.get()->selectSpecificChunkToMove( + opCtx.get(), kNamespace, chunk); ASSERT_EQUALS(ErrorCodes::RangeOverlapConflict, migrateInfoStatus.getStatus().code()); }); diff --git a/src/mongo/db/s/balancer/balancer_policy.cpp b/src/mongo/db/s/balancer/balancer_policy.cpp index 73f503233b2..6d2a83d7681 100644 --- a/src/mongo/db/s/balancer/balancer_policy.cpp +++ b/src/mongo/db/s/balancer/balancer_policy.cpp @@ -362,6 +362,7 @@ MigrateInfo chooseRandomMigration(const ShardStatisticsVector& shardStats, const auto& chunks = distribution.getChunks(sourceShardId); return {destShardId, + distribution.nss(), chunks[getRandomIndex(chunks.size())], MoveChunkRequest::ForceJumbo::kDoNotForce, MigrateInfo::chunksImbalance}; @@ -427,8 +428,11 @@ vector<MigrateInfo> BalancerPolicy::balance(const ShardStatisticsVector& shardSt } invariant(to != stat.shardId); - migrations.emplace_back( - to, chunk, MoveChunkRequest::ForceJumbo::kForceBalancer, MigrateInfo::drain); + migrations.emplace_back(to, + distribution.nss(), + chunk, + MoveChunkRequest::ForceJumbo::kForceBalancer, + MigrateInfo::drain); invariant(usedShards->insert(stat.shardId).second); invariant(usedShards->insert(to).second); break; @@ -488,6 +492,7 @@ vector<MigrateInfo> BalancerPolicy::balance(const ShardStatisticsVector& shardSt invariant(to != stat.shardId); migrations.emplace_back(to, + distribution.nss(), chunk, forceJumbo ? MoveChunkRequest::ForceJumbo::kForceBalancer : MoveChunkRequest::ForceJumbo::kDoNotForce, @@ -564,8 +569,11 @@ boost::optional<MigrateInfo> BalancerPolicy::balanceSingleChunk( return boost::optional<MigrateInfo>(); } - return MigrateInfo( - newShardId, chunk, MoveChunkRequest::ForceJumbo::kDoNotForce, MigrateInfo::chunksImbalance); + return MigrateInfo(newShardId, + distribution.nss(), + chunk, + MoveChunkRequest::ForceJumbo::kDoNotForce, + MigrateInfo::chunksImbalance); } bool BalancerPolicy::_singleZoneBalance(const ShardStatisticsVector& shardStats, @@ -637,7 +645,8 @@ bool BalancerPolicy::_singleZoneBalance(const ShardStatisticsVector& shardStats, continue; } - migrations->emplace_back(to, chunk, forceJumbo, MigrateInfo::chunksImbalance); + migrations->emplace_back( + to, distribution.nss(), chunk, forceJumbo, MigrateInfo::chunksImbalance); invariant(usedShards->insert(chunk.getShard()).second); invariant(usedShards->insert(to).second); return true; @@ -666,10 +675,11 @@ string ZoneRange::toString() const { } MigrateInfo::MigrateInfo(const ShardId& a_to, + const NamespaceString& a_nss, const ChunkType& a_chunk, const MoveChunkRequest::ForceJumbo a_forceJumbo, MigrationReason a_reason) - : uuid(a_chunk.getCollectionUUID()) { + : nss(a_nss), uuid(a_chunk.getCollectionUUID()) { invariant(a_chunk.validate()); invariant(a_to.isValid()); @@ -698,21 +708,7 @@ std::string MigrateInfo::getName() const { return buf.str(); } -StatusWith<NamespaceString> MigrateInfo::getNss(OperationContext* opCtx) const { - auto grid = Grid::get(opCtx); - invariant(grid != nullptr); - auto catalogClient = grid->catalogClient(); - invariant(catalogClient != nullptr); - try { - const CollectionType collection = - catalogClient->getCollection(opCtx, uuid, repl::ReadConcernLevel::kLocalReadConcern); - return collection.getNss(); - } catch (DBException const& e) { - return StatusWith<NamespaceString>(e.code(), e.reason()); - } -} - -BSONObj MigrateInfo::getMigrationTypeQuery(NamespaceString const& nss) const { +BSONObj MigrateInfo::getMigrationTypeQuery() const { // Generates a query object for a single MigrationType based on the namespace and the lower // bound of the chunk being moved. return BSON(MigrationType::ns(nss.ns()) << MigrationType::min(minKey)); diff --git a/src/mongo/db/s/balancer/balancer_policy.h b/src/mongo/db/s/balancer/balancer_policy.h index b4c14ef223f..e716122ddd2 100644 --- a/src/mongo/db/s/balancer/balancer_policy.h +++ b/src/mongo/db/s/balancer/balancer_policy.h @@ -57,17 +57,18 @@ struct MigrateInfo { enum MigrationReason { drain, zoneViolation, chunksImbalance }; MigrateInfo(const ShardId& a_to, + const NamespaceString& a_nss, const ChunkType& a_chunk, MoveChunkRequest::ForceJumbo a_forceJumbo, MigrationReason a_reason); std::string getName() const; - StatusWith<NamespaceString> getNss(OperationContext* opCtx) const; - BSONObj getMigrationTypeQuery(NamespaceString const& nss) const; + BSONObj getMigrationTypeQuery() const; std::string toString() const; + NamespaceString nss; UUID uuid; ShardId to; ShardId from; diff --git a/src/mongo/db/s/balancer/migration_manager.cpp b/src/mongo/db/s/balancer/migration_manager.cpp index 6935621ef44..fa94ade6b60 100644 --- a/src/mongo/db/s/balancer/migration_manager.cpp +++ b/src/mongo/db/s/balancer/migration_manager.cpp @@ -168,10 +168,6 @@ Status MigrationManager::executeManualMigration( _waitForRecovery(); ScopedMigrationRequestsMap scopedMigrationRequests; - const auto nssStatus = migrateInfo.getNss(opCtx); - if (!nssStatus.isOK()) { - return nssStatus.getStatus(); - } RemoteCommandResponse remoteCommandResponse = _schedule(opCtx, migrateInfo, @@ -182,7 +178,7 @@ Status MigrationManager::executeManualMigration( ->get(); auto swCM = Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh( - opCtx, nssStatus.getValue()); + opCtx, migrateInfo.nss); if (!swCM.isOK()) { return swCM.getStatus(); } @@ -343,12 +339,17 @@ void MigrationManager::finishRecovery(OperationContext* opCtx, } const auto& cm = swCM.getValue(); + const auto uuid = cm.getUUID(); + if (!uuid) { + // The collection has been dropped, so there is no need to recover the migration. + continue; + } int scheduledMigrations = 0; while (!migrateInfos.empty()) { auto migrationType = std::move(migrateInfos.front()); - const auto migrationInfo = migrationType.toMigrateInfo(opCtx); + const auto migrationInfo = migrationType.toMigrateInfo(*uuid); auto waitForDelete = migrationType.getWaitForDelete(); migrateInfos.pop_front(); @@ -444,10 +445,6 @@ std::shared_ptr<Notification<RemoteCommandResponse>> MigrationManager::_schedule bool waitForDelete, ScopedMigrationRequestsMap* scopedMigrationRequests) { - const CollectionType collection = Grid::get(opCtx)->catalogClient()->getCollection( - opCtx, migrateInfo.uuid, repl::ReadConcernLevel::kLocalReadConcern); - const NamespaceString& nss = collection.getNss(); - // Ensure we are not stopped in order to avoid doing the extra work { stdx::lock_guard<Latch> lock(_mutex); @@ -475,7 +472,7 @@ std::shared_ptr<Notification<RemoteCommandResponse>> MigrationManager::_schedule BSONObjBuilder builder; MoveChunkRequest::appendAsCommand(&builder, - nss, + migrateInfo.nss, migrateInfo.version, migrateInfo.from, migrateInfo.to, @@ -493,7 +490,7 @@ std::shared_ptr<Notification<RemoteCommandResponse>> MigrationManager::_schedule "Migration cannot be executed because the balancer is not running")); } - Migration migration(nss, builder.obj()); + Migration migration(migrateInfo.nss, builder.obj()); auto retVal = migration.completionNotification; diff --git a/src/mongo/db/s/balancer/migration_manager_test.cpp b/src/mongo/db/s/balancer/migration_manager_test.cpp index 94f232b6df0..d310a2c6c75 100644 --- a/src/mongo/db/s/balancer/migration_manager_test.cpp +++ b/src/mongo/db/s/balancer/migration_manager_test.cpp @@ -131,10 +131,12 @@ TEST_F(MigrationManagerTest, OneCollectionTwoMigrations) { // Going to request that these two chunks get migrated. const std::vector<MigrateInfo> migrationRequests{{kShardId1, + collName, chunk1, MoveChunkRequest::ForceJumbo::kDoNotForce, MigrateInfo::chunksImbalance}, {kShardId3, + collName, chunk2, MoveChunkRequest::ForceJumbo::kDoNotForce, MigrateInfo::chunksImbalance}}; @@ -199,18 +201,22 @@ TEST_F(MigrationManagerTest, TwoCollectionsTwoMigrationsEach) { // Going to request that these four chunks get migrated. const std::vector<MigrateInfo> migrationRequests{{kShardId1, + collName1, chunk1coll1, MoveChunkRequest::ForceJumbo::kDoNotForce, MigrateInfo::chunksImbalance}, {kShardId3, + collName1, chunk2coll1, MoveChunkRequest::ForceJumbo::kDoNotForce, MigrateInfo::chunksImbalance}, {kShardId1, + collName2, chunk1coll2, MoveChunkRequest::ForceJumbo::kDoNotForce, MigrateInfo::chunksImbalance}, {kShardId3, + collName2, chunk2coll2, MoveChunkRequest::ForceJumbo::kDoNotForce, MigrateInfo::chunksImbalance}}; @@ -269,10 +275,12 @@ TEST_F(MigrationManagerTest, SourceShardNotFound) { // Going to request that these two chunks get migrated. const std::vector<MigrateInfo> migrationRequests{{kShardId1, + collName, chunk1, MoveChunkRequest::ForceJumbo::kDoNotForce, MigrateInfo::chunksImbalance}, {kShardId3, + collName, chunk2, MoveChunkRequest::ForceJumbo::kDoNotForce, MigrateInfo::chunksImbalance}}; @@ -324,6 +332,7 @@ TEST_F(MigrationManagerTest, JumboChunkResponseBackwardsCompatibility) { // Going to request that this chunk gets migrated. const std::vector<MigrateInfo> migrationRequests{{kShardId1, + collName, chunk1, MoveChunkRequest::ForceJumbo::kDoNotForce, MigrateInfo::chunksImbalance}}; @@ -380,6 +389,7 @@ TEST_F(MigrationManagerTest, InterruptMigration) { ErrorCodes::BalancerInterrupted, _migrationManager->executeManualMigration(opCtx.get(), {kShardId1, + collName, chunk, MoveChunkRequest::ForceJumbo::kDoNotForce, MigrateInfo::chunksImbalance}, @@ -410,6 +420,7 @@ TEST_F(MigrationManagerTest, InterruptMigration) { ASSERT_EQ(ErrorCodes::BalancerInterrupted, _migrationManager->executeManualMigration(operationContext(), {kShardId1, + collName, chunk, MoveChunkRequest::ForceJumbo::kDoNotForce, MigrateInfo::chunksImbalance}, @@ -482,6 +493,7 @@ TEST_F(MigrationManagerTest, RestartMigrationManager) { ASSERT_OK( _migrationManager->executeManualMigration(opCtx.get(), {kShardId1, + collName, chunk1, MoveChunkRequest::ForceJumbo::kDoNotForce, MigrateInfo::chunksImbalance}, @@ -636,10 +648,12 @@ TEST_F(MigrationManagerTest, RemoteCallErrorConversionToOperationFailed) { // Going to request that these two chunks get migrated. const std::vector<MigrateInfo> migrationRequests{{kShardId1, + collName, chunk1, MoveChunkRequest::ForceJumbo::kDoNotForce, MigrateInfo::chunksImbalance}, {kShardId3, + collName, chunk2, MoveChunkRequest::ForceJumbo::kDoNotForce, MigrateInfo::chunksImbalance}}; diff --git a/src/mongo/db/s/balancer/scoped_migration_request.cpp b/src/mongo/db/s/balancer/scoped_migration_request.cpp index 5cc3fed4581..4e44b2d7e7e 100644 --- a/src/mongo/db/s/balancer/scoped_migration_request.cpp +++ b/src/mongo/db/s/balancer/scoped_migration_request.cpp @@ -98,14 +98,8 @@ StatusWith<ScopedMigrationRequest> ScopedMigrationRequest::writeMigration( OperationContext* opCtx, const MigrateInfo& migrateInfo, bool waitForDelete) { auto const grid = Grid::get(opCtx); - const auto nssStatus = migrateInfo.getNss(opCtx); - if (!nssStatus.isOK()) { - return nssStatus.getStatus(); - } - const auto nss = nssStatus.getValue(); - // Try to write a unique migration document to config.migrations. - const MigrationType migrationType(nss, migrateInfo, waitForDelete); + const MigrationType migrationType(migrateInfo, waitForDelete); for (int retry = 0; retry < kDuplicateKeyErrorMaxRetries; ++retry) { Status result = grid->catalogClient()->insertConfigDocument( @@ -121,7 +115,7 @@ StatusWith<ScopedMigrationRequest> ScopedMigrationRequest::writeMigration( ReadPreferenceSetting{ReadPreference::PrimaryOnly}, repl::ReadConcernLevel::kLocalReadConcern, MigrationType::ConfigNS, - migrateInfo.getMigrationTypeQuery(nss), + migrateInfo.getMigrationTypeQuery(), BSONObj(), boost::none); if (!statusWithMigrationQueryResult.isOK()) { @@ -148,7 +142,7 @@ StatusWith<ScopedMigrationRequest> ScopedMigrationRequest::writeMigration( } MigrateInfo activeMigrateInfo = - statusWithActiveMigration.getValue().toMigrateInfo(opCtx); + statusWithActiveMigration.getValue().toMigrateInfo(migrateInfo.uuid); if (activeMigrateInfo.to != migrateInfo.to || activeMigrateInfo.from != migrateInfo.from) { LOGV2( @@ -170,7 +164,7 @@ StatusWith<ScopedMigrationRequest> ScopedMigrationRequest::writeMigration( // As long as there isn't a DuplicateKey error, the document may have been written, and it's // safe (won't delete another migration's document) and necessary to try to clean up the // document via the destructor. - ScopedMigrationRequest scopedMigrationRequest(opCtx, nss, migrateInfo.minKey); + ScopedMigrationRequest scopedMigrationRequest(opCtx, migrateInfo.nss, migrateInfo.minKey); // If there was a write error, let the object go out of scope and clean up in the // destructor. @@ -185,7 +179,7 @@ StatusWith<ScopedMigrationRequest> ScopedMigrationRequest::writeMigration( str::stream() << "Failed to insert the config.migrations document after max " << "number of retries. Chunk '" << ChunkRange(migrateInfo.minKey, migrateInfo.maxKey).toString() - << "' in collection '" << nss + << "' in collection '" << migrateInfo.nss << "' was being moved (somewhere) by another operation."); } diff --git a/src/mongo/db/s/balancer/scoped_migration_request_test.cpp b/src/mongo/db/s/balancer/scoped_migration_request_test.cpp index 44ad31f3820..b6f7510bfd0 100644 --- a/src/mongo/db/s/balancer/scoped_migration_request_test.cpp +++ b/src/mongo/db/s/balancer/scoped_migration_request_test.cpp @@ -101,6 +101,7 @@ ScopedMigrationRequest ScopedMigrationRequestTest::makeScopedMigrationRequest( } MigrateInfo ScopedMigrationRequestTest::makeMigrateInfo() { + const NamespaceString collNss{kNs}; const auto collUuid = UUID::gen(); const ChunkVersion kChunkVersion{1, 2, OID::gen(), Timestamp()}; @@ -115,9 +116,10 @@ MigrateInfo ScopedMigrationRequestTest::makeMigrateInfo() { ASSERT_OK(chunkType.validate()); // Initialize the sharded TO collection - setupCollection(NamespaceString(kNs), KeyPattern(BSON("_id" << 1)), {chunkType}); + setupCollection(collNss, KeyPattern(BSON("_id" << 1)), {chunkType}); return MigrateInfo(kToShard, + collNss, chunkType, MoveChunkRequest::ForceJumbo::kDoNotForce, MigrateInfo::chunksImbalance); diff --git a/src/mongo/db/s/balancer/type_migration.cpp b/src/mongo/db/s/balancer/type_migration.cpp index 1367926056c..2bd55d93469 100644 --- a/src/mongo/db/s/balancer/type_migration.cpp +++ b/src/mongo/db/s/balancer/type_migration.cpp @@ -55,10 +55,8 @@ const BSONField<std::string> MigrationType::forceJumbo("forceJumbo"); MigrationType::MigrationType() = default; -MigrationType::MigrationType(const NamespaceString& nss, - const MigrateInfo& info, - bool waitForDelete) - : _nss(nss), +MigrationType::MigrationType(const MigrateInfo& info, bool waitForDelete) + : _nss(info.nss), _min(info.minKey), _max(info.maxKey), _fromShard(info.from), @@ -156,18 +154,16 @@ BSONObj MigrationType::toBSON() const { return builder.obj(); } -MigrateInfo MigrationType::toMigrateInfo(OperationContext* opCtx) const { - const CollectionType collection = Grid::get(opCtx)->catalogClient()->getCollection( - opCtx, _nss, repl::ReadConcernLevel::kLocalReadConcern); - +MigrateInfo MigrationType::toMigrateInfo(const UUID& uuid) const { ChunkType chunk; chunk.setShard(_fromShard); - chunk.setCollectionUUID(collection.getUuid()); + chunk.setCollectionUUID(uuid); chunk.setMin(_min); chunk.setMax(_max); chunk.setVersion(_chunkVersion); return MigrateInfo(_toShard, + _nss, chunk, MoveChunkRequest::parseForceJumbo(_forceJumbo), MigrateInfo::chunksImbalance); diff --git a/src/mongo/db/s/balancer/type_migration.h b/src/mongo/db/s/balancer/type_migration.h index 24ada030c3a..c082d4a9a32 100644 --- a/src/mongo/db/s/balancer/type_migration.h +++ b/src/mongo/db/s/balancer/type_migration.h @@ -60,7 +60,7 @@ public: * The Balancer encapsulates migration information in MigrateInfo objects, so this facilitates * conversion to a config.migrations entry format. */ - MigrationType(const NamespaceString& nss, const MigrateInfo& info, bool waitForDelete); + MigrationType(const MigrateInfo& info, bool waitForDelete); /** * Constructs a new MigrationType object from BSON. Expects all fields to be present, and errors @@ -76,7 +76,7 @@ public: /** * Helper function for the Balancer that uses MigrateInfo objects to schedule migrations. */ - MigrateInfo toMigrateInfo(OperationContext* opCtx) const; + MigrateInfo toMigrateInfo(const UUID& uuid) const; const NamespaceString& getNss() const { return _nss; diff --git a/src/mongo/db/s/balancer/type_migration_test.cpp b/src/mongo/db/s/balancer/type_migration_test.cpp index 0b013a750b5..fcb09569936 100644 --- a/src/mongo/db/s/balancer/type_migration_test.cpp +++ b/src/mongo/db/s/balancer/type_migration_test.cpp @@ -66,10 +66,11 @@ TEST(MigrationTypeTest, ConvertFromMigrationInfo) { ASSERT_OK(chunkType.validate()); MigrateInfo migrateInfo(kToShard, + NamespaceString(kNs), chunkType, MoveChunkRequest::ForceJumbo::kDoNotForce, MigrateInfo::chunksImbalance); - MigrationType migrationType(NamespaceString(kNs), migrateInfo, kWaitForDelete); + MigrationType migrationType(migrateInfo, kWaitForDelete); BSONObjBuilder builder; builder.append(MigrationType::ns(), kNs); diff --git a/src/mongo/db/s/chunk_splitter.cpp b/src/mongo/db/s/chunk_splitter.cpp index 3f653a714ec..34b419367ae 100644 --- a/src/mongo/db/s/chunk_splitter.cpp +++ b/src/mongo/db/s/chunk_splitter.cpp @@ -127,7 +127,7 @@ void moveChunk(OperationContext* opCtx, const NamespaceString& nss, const BSONOb chunkToMove.setMax(suggestedChunk.getMax()); chunkToMove.setVersion(suggestedChunk.getLastmod()); - uassertStatusOK(configsvr_client::rebalanceChunk(opCtx, chunkToMove)); + uassertStatusOK(configsvr_client::rebalanceChunk(opCtx, nss, chunkToMove)); } /** diff --git a/src/mongo/db/s/config/configsvr_move_chunk_command.cpp b/src/mongo/db/s/config/configsvr_move_chunk_command.cpp index b642d34e73e..ce63539e9f0 100644 --- a/src/mongo/db/s/config/configsvr_move_chunk_command.cpp +++ b/src/mongo/db/s/config/configsvr_move_chunk_command.cpp @@ -94,17 +94,23 @@ public: repl::ReadConcernArgs::get(opCtx) = repl::ReadConcernArgs(repl::ReadConcernLevel::kLocalReadConcern); - auto request = uassertStatusOK(BalanceChunkRequest::parseFromConfigCommand(cmdObj)); - - // pre v5.1 compatibility - if (request.getNss()) { - const auto collection = Grid::get(opCtx)->catalogClient()->getCollection( - opCtx, *request.getNss(), repl::ReadConcernLevel::kLocalReadConcern); - request.setCollectionUUID(collection.getUuid()); + auto request = uassertStatusOK( + BalanceChunkRequest::parseFromConfigCommand(cmdObj, false /* requireUUID */)); + + const auto& nss = request.getNss(); + + // In case of mixed binaries including v5.0, the collection UUID field may not be attached + // to the chunk. + if (!request.getChunk().hasCollectionUUID_UNSAFE()) { + // TODO (SERVER-60792): Remove the following logic after v6.0 branches out. + const auto& collection = Grid::get(opCtx)->catalogClient()->getCollection( + opCtx, nss, repl::ReadConcernLevel::kLocalReadConcern); + request.setCollectionUUID(collection.getUuid()); // Set collection UUID on chunk member } if (request.hasToShardId()) { uassertStatusOK(Balancer::get(opCtx)->moveSingleChunk(opCtx, + nss, request.getChunk(), request.getToShardId(), request.getMaxChunkSizeBytes(), @@ -112,7 +118,8 @@ public: request.getWaitForDelete(), request.getForceJumbo())); } else { - uassertStatusOK(Balancer::get(opCtx)->rebalanceSingleChunk(opCtx, request.getChunk())); + uassertStatusOK( + Balancer::get(opCtx)->rebalanceSingleChunk(opCtx, nss, request.getChunk())); } return true; diff --git a/src/mongo/s/catalog/type_chunk.cpp b/src/mongo/s/catalog/type_chunk.cpp index 85615320d18..68bf50e2dc2 100644 --- a/src/mongo/s/catalog/type_chunk.cpp +++ b/src/mongo/s/catalog/type_chunk.cpp @@ -216,7 +216,8 @@ ChunkType::ChunkType(CollectionUUID collectionUUID, _version(std::move(version)), _shard(std::move(shardId)) {} -StatusWith<ChunkType> ChunkType::parseFromConfigBSONCommand(const BSONObj& source) { +StatusWith<ChunkType> ChunkType::parseFromConfigBSONCommand(const BSONObj& source, + bool requireUUID) { ChunkType chunk; { @@ -248,9 +249,8 @@ StatusWith<ChunkType> ChunkType::parseFromConfigBSONCommand(const BSONObj& sourc } } - // There must be at least uuid - if (!chunk._collectionUUID) { - return {ErrorCodes::FailedToParse, str::stream() << "There must be a uuid present"}; + if (requireUUID && !chunk._collectionUUID) { + return {ErrorCodes::FailedToParse, str::stream() << "There must be a UUID present"}; } { diff --git a/src/mongo/s/catalog/type_chunk.h b/src/mongo/s/catalog/type_chunk.h index 925bb7075fe..a99983da6fb 100644 --- a/src/mongo/s/catalog/type_chunk.h +++ b/src/mongo/s/catalog/type_chunk.h @@ -224,7 +224,10 @@ public: * Also does validation of the contents. Note that 'parseFromConfigBSONCommand' does not return * ErrorCodes::NoSuchKey if the '_id' field is missing while 'fromConfigBSON' does. */ - static StatusWith<ChunkType> parseFromConfigBSONCommand(const BSONObj& source); + // TODO (SERVER-60792): Get rid of "requireUUID" once v6.0 branches out. Starting from v5.1, the + // collection UUID will always be present in the chunk. + static StatusWith<ChunkType> parseFromConfigBSONCommand(const BSONObj& source, + bool requireUUID = true); static StatusWith<ChunkType> fromConfigBSON(const BSONObj& source, const OID& epoch, const Timestamp& timestamp); @@ -258,6 +261,14 @@ public: * Getters and setters. */ + // TODO (SERVER-60792): Get rid of this function once v6.0 branches out. Due to a missing + // addition of the UUID field in v5.0 BalanceChunkRequest, it can happen that the field is not + // set. Mark as "UNSAFE" to make it clear that this method is just intended to be used for this + // specific purpose. + bool hasCollectionUUID_UNSAFE() const { + return (bool)_collectionUUID; + } + const CollectionUUID& getCollectionUUID() const { invariant(_collectionUUID); return *_collectionUUID; diff --git a/src/mongo/s/commands/cluster_move_chunk_cmd.cpp b/src/mongo/s/commands/cluster_move_chunk_cmd.cpp index 7c2c5d919e9..fa0fc7b9068 100644 --- a/src/mongo/s/commands/cluster_move_chunk_cmd.cpp +++ b/src/mongo/s/commands/cluster_move_chunk_cmd.cpp @@ -190,6 +190,7 @@ public: chunkType.setVersion(cm.getVersion()); uassertStatusOK(configsvr_client::moveChunk(opCtx, + nss, chunkType, to->getId(), maxChunkSizeBytes, diff --git a/src/mongo/s/config_server_client.cpp b/src/mongo/s/config_server_client.cpp index e08273ec916..d311fc62c6f 100644 --- a/src/mongo/s/config_server_client.cpp +++ b/src/mongo/s/config_server_client.cpp @@ -45,6 +45,7 @@ const ReadPreferenceSetting kPrimaryOnlyReadPreference{ReadPreference::PrimaryOn } // namespace Status moveChunk(OperationContext* opCtx, + const NamespaceString& nss, const ChunkType& chunk, const ShardId& newShardId, int64_t maxChunkSizeBytes, @@ -53,13 +54,18 @@ Status moveChunk(OperationContext* opCtx, bool forceJumbo) { auto shardRegistry = Grid::get(opCtx)->shardRegistry(); auto shard = shardRegistry->getConfigShard(); - auto cmdResponseStatus = shard->runCommand( - opCtx, - kPrimaryOnlyReadPreference, - "admin", - BalanceChunkRequest::serializeToMoveCommandForConfig( - chunk, newShardId, maxChunkSizeBytes, secondaryThrottle, waitForDelete, forceJumbo), - Shard::RetryPolicy::kIdempotent); + auto cmdResponseStatus = + shard->runCommand(opCtx, + kPrimaryOnlyReadPreference, + "admin", + BalanceChunkRequest::serializeToMoveCommandForConfig(nss, + chunk, + newShardId, + maxChunkSizeBytes, + secondaryThrottle, + waitForDelete, + forceJumbo), + Shard::RetryPolicy::kIdempotent); if (!cmdResponseStatus.isOK()) { return cmdResponseStatus.getStatus(); } @@ -67,14 +73,14 @@ Status moveChunk(OperationContext* opCtx, return cmdResponseStatus.getValue().commandStatus; } -Status rebalanceChunk(OperationContext* opCtx, const ChunkType& chunk) { +Status rebalanceChunk(OperationContext* opCtx, const NamespaceString& nss, const ChunkType& chunk) { auto shardRegistry = Grid::get(opCtx)->shardRegistry(); auto shard = shardRegistry->getConfigShard(); auto cmdResponseStatus = shard->runCommandWithFixedRetryAttempts( opCtx, kPrimaryOnlyReadPreference, "admin", - BalanceChunkRequest::serializeToRebalanceCommandForConfig(chunk), + BalanceChunkRequest::serializeToRebalanceCommandForConfig(nss, chunk), Shard::RetryPolicy::kNotIdempotent); if (!cmdResponseStatus.isOK()) { return cmdResponseStatus.getStatus(); diff --git a/src/mongo/s/config_server_client.h b/src/mongo/s/config_server_client.h index e7115fb7cd2..4a059e9815f 100644 --- a/src/mongo/s/config_server_client.h +++ b/src/mongo/s/config_server_client.h @@ -48,6 +48,7 @@ namespace configsvr_client { * Requests the balancer to move the specified chunk off of its current shard to the new shard. */ Status moveChunk(OperationContext* opCtx, + const NamespaceString& nss, const ChunkType& chunk, const ShardId& newShardId, int64_t maxChunkSizeBytes, @@ -59,7 +60,7 @@ Status moveChunk(OperationContext* opCtx, * Requests the balancer to move the specified chunk off of its current shard to a shard, considered * more appropriate under the balancing policy which is currently in effect. */ -Status rebalanceChunk(OperationContext* opCtx, const ChunkType& chunk); +Status rebalanceChunk(OperationContext* opCtx, const NamespaceString& nss, const ChunkType& chunk); } // namespace configsvr_client } // namespace mongo diff --git a/src/mongo/s/request_types/balance_chunk_request_test.cpp b/src/mongo/s/request_types/balance_chunk_request_test.cpp index 236b7fbc1d0..4de14acf428 100644 --- a/src/mongo/s/request_types/balance_chunk_request_test.cpp +++ b/src/mongo/s/request_types/balance_chunk_request_test.cpp @@ -51,9 +51,10 @@ TEST(BalanceChunkRequest, ParseFromConfigCommandNoSecondaryThrottle) { << "min" << BSON("a" << -100LL) << "max" << BSON("a" << 100LL) << "shard" << "TestShard0000" << "lastmod" << Date_t::fromMillisSinceEpoch(version.toLong()) << "lastmodEpoch" - << version.epoch() << "lastmodTimestamp" << version.getTimestamp()))); + << version.epoch() << "lastmodTimestamp" << version.getTimestamp()), + false /* requireUUID */)); const auto& chunk = request.getChunk(); - ASSERT_EQ("TestDB.TestColl", request.getNss()->ns()); + ASSERT_EQ("TestDB.TestColl", request.getNss().ns()); ASSERT_BSONOBJ_EQ(BSON("a" << -100LL), chunk.getMin()); ASSERT_BSONOBJ_EQ(BSON("a" << 100LL), chunk.getMax()); ASSERT_EQ(ShardId("TestShard0000"), chunk.getShard()); @@ -64,16 +65,21 @@ TEST(BalanceChunkRequest, ParseFromConfigCommandNoSecondaryThrottle) { secondaryThrottle.getSecondaryThrottle()); } +// TODO (SERVER-60792): Get rid of the collection namespace from BSON once v6.0 branches out, as it +// will become a no longer mandatory argument. Ideally both variants should be tested. TEST(BalanceChunkRequest, ParseFromConfigCommandWithUUID) { const auto uuid = UUID::gen(); const ChunkVersion version(1, 0, OID::gen(), Timestamp()); auto request = assertGet(BalanceChunkRequest::parseFromConfigCommand( - BSON("_configsvrMoveChunk" << 1 << "uuid" << uuid << "min" << BSON("a" << -100LL) << "max" + BSON("_configsvrMoveChunk" << 1 << "ns" + << "TestDB.TestColl" + << "uuid" << uuid << "min" << BSON("a" << -100LL) << "max" << BSON("a" << 100LL) << "shard" << "TestShard0000" << "lastmod" << Date_t::fromMillisSinceEpoch(version.toLong()) << "lastmodEpoch" << version.epoch() << "lastmodTimestamp" - << version.getTimestamp()))); + << version.getTimestamp()), + true /* requireUUID */)); const auto& chunk = request.getChunk(); ASSERT_EQ(uuid, chunk.getCollectionUUID()); ASSERT_BSONOBJ_EQ(BSON("a" << -100LL), chunk.getMin()); @@ -97,9 +103,10 @@ TEST(BalanceChunkRequest, ParseFromConfigCommandWithSecondaryThrottle) { << "lastmod" << Date_t::fromMillisSinceEpoch(version.toLong()) << "lastmodEpoch" << version.epoch() << "lastmodTimestamp" << version.getTimestamp() << "secondaryThrottle" - << BSON("_secondaryThrottle" << true << "writeConcern" << BSON("w" << 2))))); + << BSON("_secondaryThrottle" << true << "writeConcern" << BSON("w" << 2))), + false /* requireUUID */)); const auto& chunk = request.getChunk(); - ASSERT_EQ("TestDB.TestColl", request.getNss()->ns()); + ASSERT_EQ("TestDB.TestColl", request.getNss().ns()); ASSERT_BSONOBJ_EQ(BSON("a" << -100LL), chunk.getMin()); ASSERT_BSONOBJ_EQ(BSON("a" << 100LL), chunk.getMax()); ASSERT_EQ(ShardId("TestShard0000"), chunk.getShard()); diff --git a/src/mongo/s/request_types/balance_chunk_request_type.cpp b/src/mongo/s/request_types/balance_chunk_request_type.cpp index 36ef5b08dac..0223a94f782 100644 --- a/src/mongo/s/request_types/balance_chunk_request_type.cpp +++ b/src/mongo/s/request_types/balance_chunk_request_type.cpp @@ -58,31 +58,20 @@ BalanceChunkRequest::BalanceChunkRequest(ChunkType chunk, MigrationSecondaryThrottleOptions secondaryThrottle) : _chunk(std::move(chunk)), _secondaryThrottle(std::move(secondaryThrottle)) {} -StatusWith<BalanceChunkRequest> BalanceChunkRequest::parseFromConfigCommand(const BSONObj& obj) { +StatusWith<BalanceChunkRequest> BalanceChunkRequest::parseFromConfigCommand(const BSONObj& obj, + bool requireUUID) { - boost::optional<NamespaceString> nss; + NamespaceString nss; { - std::string chunkNS; - Status status = bsonExtractStringField(obj, kNS, &chunkNS); - if (status.isOK()) { - nss = NamespaceString(chunkNS); - } else if (status != ErrorCodes::NoSuchKey) { + std::string ns; + Status status = bsonExtractStringField(obj, kNS, &ns); + if (!status.isOK()) { return status; } + nss = NamespaceString(ns); } - const auto chunkStatus = [&]() { - if (nss) { - // Append an empty UUID because the ChunkType code expects it - BSONObjBuilder copy; - copy.appendElements(obj); - std::array<unsigned char, UUID::kNumBytes> empty; - UUID::fromCDR(empty).appendToBuilder(©, ChunkType::collectionUUID.name()); - return ChunkType::parseFromConfigBSONCommand(copy.obj()); - } - return ChunkType::parseFromConfigBSONCommand(obj); - }(); - + const auto chunkStatus = ChunkType::parseFromConfigBSONCommand(obj, requireUUID); if (!chunkStatus.isOK()) { return chunkStatus.getStatus(); } @@ -167,6 +156,7 @@ StatusWith<BalanceChunkRequest> BalanceChunkRequest::parseFromConfigCommand(cons } BSONObj BalanceChunkRequest::serializeToMoveCommandForConfig( + const NamespaceString& nss, const ChunkType& chunk, const ShardId& newShardId, int64_t maxChunkSizeBytes, @@ -177,6 +167,7 @@ BSONObj BalanceChunkRequest::serializeToMoveCommandForConfig( BSONObjBuilder cmdBuilder; cmdBuilder.append(kConfigSvrMoveChunk, 1); + cmdBuilder.append(kNS, nss.ns()); cmdBuilder.appendElements(chunk.toConfigBSON()); // ChunkType::toConfigBSON() no longer adds the epoch cmdBuilder.append(ChunkType::lastmod() + "Epoch", chunk.getVersion().epoch()); @@ -196,11 +187,13 @@ BSONObj BalanceChunkRequest::serializeToMoveCommandForConfig( return cmdBuilder.obj(); } -BSONObj BalanceChunkRequest::serializeToRebalanceCommandForConfig(const ChunkType& chunk) { +BSONObj BalanceChunkRequest::serializeToRebalanceCommandForConfig(const NamespaceString& nss, + const ChunkType& chunk) { invariant(chunk.validate()); BSONObjBuilder cmdBuilder; cmdBuilder.append(kConfigSvrMoveChunk, 1); + cmdBuilder.append(kNS, nss.ns()); cmdBuilder.appendElements(chunk.toConfigBSON()); // ChunkType::toConfigBSON() no longer returns the epoch cmdBuilder.append(ChunkType::lastmod() + "Epoch", chunk.getVersion().epoch()); diff --git a/src/mongo/s/request_types/balance_chunk_request_type.h b/src/mongo/s/request_types/balance_chunk_request_type.h index dda78045b1d..77d4e99afce 100644 --- a/src/mongo/s/request_types/balance_chunk_request_type.h +++ b/src/mongo/s/request_types/balance_chunk_request_type.h @@ -52,13 +52,17 @@ public: * Parses the provided BSON content and if it is correct construct a request object with the * request parameters. If the '_id' field is missing in obj, ignore it. */ - static StatusWith<BalanceChunkRequest> parseFromConfigCommand(const BSONObj& obj); + // TODO (SERVER-60792): Get rid of "requireUUID" once v6.0 branches out. Starting from v5.1, the + // collection UUID will always be present in the chunk. + static StatusWith<BalanceChunkRequest> parseFromConfigCommand(const BSONObj& obj, + bool requireUUID = true); /** * Produces a BSON object for the variant of the command, which requests the balancer to move a * chunk to a user-specified shard. */ static BSONObj serializeToMoveCommandForConfig( + const NamespaceString& nss, const ChunkType& chunk, const ShardId& newShardId, int64_t maxChunkSizeBytes, @@ -70,13 +74,16 @@ public: * Produces a BSON object for the variant of the command, which requests the balancer to pick a * better location for a chunk. */ - static BSONObj serializeToRebalanceCommandForConfig(const ChunkType& chunk); + static BSONObj serializeToRebalanceCommandForConfig(const NamespaceString& nss, + const ChunkType& chunk); - // an pre 5.1 mongos might send the namespace instead of the UUID - const boost::optional<NamespaceString>& getNss() const { + + const NamespaceString& getNss() const { return _nss; } + // TODO (SERVER-60792): Get rid of setCollectionUUID() once v6.0 branches out. Starting from + // v5.1, the collection UUID will always be present in the chunk. void setCollectionUUID(UUID const& uuid) { _chunk.setCollectionUUID(uuid); } @@ -112,8 +119,7 @@ public: private: BalanceChunkRequest(ChunkType chunk, MigrationSecondaryThrottleOptions secondaryThrottle); - // legacy code might send the namespace instead of UUID - boost::optional<NamespaceString> _nss; + NamespaceString _nss; // Complete description of the chunk to be manipulated ChunkType _chunk; |