diff options
author | Pierlauro Sciarelli <pierlauro.sciarelli@mongodb.com> | 2021-10-07 15:59:24 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-10-11 15:56:01 +0000 |
commit | 40cb50a010db2715fd9745ede02ce0cc6f8bbc34 (patch) | |
tree | 4cba39e3cee0bdf4b07bf2fbd552f3a6b141fbfa | |
parent | 76252bfa922432c698fad3b4330a0c7edd3e1fee (diff) | |
download | mongo-40cb50a010db2715fd9745ede02ce0cc6f8bbc34.tar.gz |
SERVER-54231 Resharding must not leave stale collection catalog entries
-rw-r--r-- | src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/s/resharding/resharding_coordinator_service.cpp | 44 |
2 files changed, 41 insertions, 7 deletions
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp index d1857fe231a..b89a6311fbf 100644 --- a/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp +++ b/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp @@ -61,10 +61,6 @@ ShardId selectShardForNewDatabase(OperationContext* opCtx, ShardRegistry* shardR shardRegistry->reload(opCtx); auto allShardIds = shardRegistry->getAllShardIds(opCtx); uassert(ErrorCodes::ShardNotFound, "No shards found", !allShardIds.empty()); - // TODO SERVER-54231 stop sorting this vector. - // Ideally it should be shuffled so that the we choose a random candidate based only - // on shard size and not on their lexical order. - std::sort(allShardIds.begin(), allShardIds.end()); ShardId candidateShardId = allShardIds[0]; diff --git a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp index 66be95e28b5..e87e5d2a705 100644 --- a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp +++ b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp @@ -52,6 +52,7 @@ #include "mongo/db/s/resharding/resharding_metrics.h" #include "mongo/db/s/resharding/resharding_server_parameters_gen.h" #include "mongo/db/s/resharding_util.h" +#include "mongo/db/s/sharding_ddl_util.h" #include "mongo/db/s/sharding_logging.h" #include "mongo/db/s/sharding_util.h" #include "mongo/db/storage/duplicate_key_error_info.h" @@ -64,6 +65,7 @@ #include "mongo/s/grid.h" #include "mongo/s/request_types/abort_reshard_collection_gen.h" #include "mongo/s/request_types/commit_reshard_collection_gen.h" +#include "mongo/s/request_types/drop_collection_if_uuid_not_matching_gen.h" #include "mongo/s/request_types/flush_resharding_state_change_gen.h" #include "mongo/s/request_types/flush_routing_table_cache_updates_gen.h" #include "mongo/s/shard_id.h" @@ -1703,14 +1705,50 @@ ReshardingCoordinatorService::ReshardingCoordinator::_awaitAllParticipantShardsD auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc()); auto& coordinatorDoc = coordinatorDocsChangedOnDisk[1]; - reshardingPauseCoordinatorBeforeRemovingStateDoc.pauseWhileSetAndNotCanceled( - opCtx.get(), _ctHolder->getStepdownToken()); - boost::optional<Status> abortReason; if (coordinatorDoc.getAbortReason()) { abortReason = getStatusFromAbortReason(coordinatorDoc); } + if (!abortReason) { + // (SERVER-54231) Ensure every catalog entry referring the source uuid is + // cleared out on every shard. + const auto allShardIds = + Grid::get(opCtx.get())->shardRegistry()->getAllShardIds(opCtx.get()); + const auto& nss = coordinatorDoc.getSourceNss(); + const auto& notMatchingThisUUID = coordinatorDoc.getReshardingUUID(); + const auto cmdObj = + ShardsvrDropCollectionIfUUIDNotMatchingRequest(nss, notMatchingThisUUID) + .toBSON({}); + + try { + sharding_ddl_util::sendAuthenticatedCommandToShards( + opCtx.get(), nss.db(), cmdObj, allShardIds, **executor); + } catch (const DBException& ex) { + if (ex.code() == ErrorCodes::CommandNotFound) { + // TODO SERVER-60531 get rid of the catch logic + // Cleanup failed because at least one shard could is using a binary + // not supporting the ShardsvrDropCollectionIfUUIDNotMatching command. + LOGV2_INFO(5423100, + "Resharding coordinator couldn't guarantee older incarnations " + "of the collection were dropped. A chunk migration to a shard " + "with an older incarnation of the collection will fail", + "namespace"_attr = nss.ns()); + } else if (opCtx->checkForInterruptNoAssert().isOK()) { + LOGV2_INFO( + 5423101, + "Resharding coordinator failed while trying to drop possible older " + "incarnations of the collection. A chunk migration to a shard with " + "an older incarnation of the collection will fail", + "namespace"_attr = nss.ns(), + "error"_attr = redact(ex.toStatus())); + } + } + } + + reshardingPauseCoordinatorBeforeRemovingStateDoc.pauseWhileSetAndNotCanceled( + opCtx.get(), _ctHolder->getStepdownToken()); + // Notify `ReshardingMetrics` as the operation is now complete for external observers. markCompleted(abortReason ? *abortReason : Status::OK()); |