summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPierlauro Sciarelli <pierlauro.sciarelli@mongodb.com>2021-10-07 15:59:24 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-10-11 15:56:01 +0000
commit40cb50a010db2715fd9745ede02ce0cc6f8bbc34 (patch)
tree4cba39e3cee0bdf4b07bf2fbd552f3a6b141fbfa
parent76252bfa922432c698fad3b4330a0c7edd3e1fee (diff)
downloadmongo-40cb50a010db2715fd9745ede02ce0cc6f8bbc34.tar.gz
SERVER-54231 Resharding must not leave stale collection catalog entries
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp4
-rw-r--r--src/mongo/db/s/resharding/resharding_coordinator_service.cpp44
2 files changed, 41 insertions, 7 deletions
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp
index d1857fe231a..b89a6311fbf 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp
@@ -61,10 +61,6 @@ ShardId selectShardForNewDatabase(OperationContext* opCtx, ShardRegistry* shardR
shardRegistry->reload(opCtx);
auto allShardIds = shardRegistry->getAllShardIds(opCtx);
uassert(ErrorCodes::ShardNotFound, "No shards found", !allShardIds.empty());
- // TODO SERVER-54231 stop sorting this vector.
- // Ideally it should be shuffled so that the we choose a random candidate based only
- // on shard size and not on their lexical order.
- std::sort(allShardIds.begin(), allShardIds.end());
ShardId candidateShardId = allShardIds[0];
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
index 66be95e28b5..e87e5d2a705 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
@@ -52,6 +52,7 @@
#include "mongo/db/s/resharding/resharding_metrics.h"
#include "mongo/db/s/resharding/resharding_server_parameters_gen.h"
#include "mongo/db/s/resharding_util.h"
+#include "mongo/db/s/sharding_ddl_util.h"
#include "mongo/db/s/sharding_logging.h"
#include "mongo/db/s/sharding_util.h"
#include "mongo/db/storage/duplicate_key_error_info.h"
@@ -64,6 +65,7 @@
#include "mongo/s/grid.h"
#include "mongo/s/request_types/abort_reshard_collection_gen.h"
#include "mongo/s/request_types/commit_reshard_collection_gen.h"
+#include "mongo/s/request_types/drop_collection_if_uuid_not_matching_gen.h"
#include "mongo/s/request_types/flush_resharding_state_change_gen.h"
#include "mongo/s/request_types/flush_routing_table_cache_updates_gen.h"
#include "mongo/s/shard_id.h"
@@ -1703,14 +1705,50 @@ ReshardingCoordinatorService::ReshardingCoordinator::_awaitAllParticipantShardsD
auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
auto& coordinatorDoc = coordinatorDocsChangedOnDisk[1];
- reshardingPauseCoordinatorBeforeRemovingStateDoc.pauseWhileSetAndNotCanceled(
- opCtx.get(), _ctHolder->getStepdownToken());
-
boost::optional<Status> abortReason;
if (coordinatorDoc.getAbortReason()) {
abortReason = getStatusFromAbortReason(coordinatorDoc);
}
+ if (!abortReason) {
+ // (SERVER-54231) Ensure every catalog entry referring the source uuid is
+ // cleared out on every shard.
+ const auto allShardIds =
+ Grid::get(opCtx.get())->shardRegistry()->getAllShardIds(opCtx.get());
+ const auto& nss = coordinatorDoc.getSourceNss();
+ const auto& notMatchingThisUUID = coordinatorDoc.getReshardingUUID();
+ const auto cmdObj =
+ ShardsvrDropCollectionIfUUIDNotMatchingRequest(nss, notMatchingThisUUID)
+ .toBSON({});
+
+ try {
+ sharding_ddl_util::sendAuthenticatedCommandToShards(
+ opCtx.get(), nss.db(), cmdObj, allShardIds, **executor);
+ } catch (const DBException& ex) {
+ if (ex.code() == ErrorCodes::CommandNotFound) {
+ // TODO SERVER-60531 get rid of the catch logic
+ // Cleanup failed because at least one shard could is using a binary
+ // not supporting the ShardsvrDropCollectionIfUUIDNotMatching command.
+ LOGV2_INFO(5423100,
+ "Resharding coordinator couldn't guarantee older incarnations "
+ "of the collection were dropped. A chunk migration to a shard "
+ "with an older incarnation of the collection will fail",
+ "namespace"_attr = nss.ns());
+ } else if (opCtx->checkForInterruptNoAssert().isOK()) {
+ LOGV2_INFO(
+ 5423101,
+ "Resharding coordinator failed while trying to drop possible older "
+ "incarnations of the collection. A chunk migration to a shard with "
+ "an older incarnation of the collection will fail",
+ "namespace"_attr = nss.ns(),
+ "error"_attr = redact(ex.toStatus()));
+ }
+ }
+ }
+
+ reshardingPauseCoordinatorBeforeRemovingStateDoc.pauseWhileSetAndNotCanceled(
+ opCtx.get(), _ctHolder->getStepdownToken());
+
// Notify `ReshardingMetrics` as the operation is now complete for external observers.
markCompleted(abortReason ? *abortReason : Status::OK());