From fd8260fc72cbe43f4addeab7bada2a87b05fb28c Mon Sep 17 00:00:00 2001 From: toto <58224719+toto-dev@users.noreply.github.com> Date: Thu, 5 Jan 2023 18:07:22 +0000 Subject: SERVER-71328 Ensure correct filtering metadata on donor shard after multiple failures (cherry picked from commit f782ca31ce08c7b0ec04a29eac3338884338a337) --- src/mongo/db/s/migration_coordinator.cpp | 6 ++++++ src/mongo/db/s/migration_util.cpp | 14 +++++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/mongo/db/s/migration_coordinator.cpp b/src/mongo/db/s/migration_coordinator.cpp index b6ff4b7abd1..d59de326210 100644 --- a/src/mongo/db/s/migration_coordinator.cpp +++ b/src/mongo/db/s/migration_coordinator.cpp @@ -37,6 +37,7 @@ #include "mongo/db/s/migration_util.h" #include "mongo/db/s/range_deletion_task_gen.h" #include "mongo/db/vector_clock.h" +#include "mongo/db/vector_clock_mutable.h" #include "mongo/logv2/log.h" #include "mongo/platform/atomic_word.h" #include "mongo/s/pm2423_feature_flags_gen.h" @@ -172,6 +173,11 @@ boost::optional> MigrationCoordinator::completeMigration( boost::optional> cleanupCompleteFuture = boost::none; + // Persist the config time before the migration decision to ensure that in case of stepdown + // next filtering metadata refresh on the new primary will always include the effect of this + // migration. + VectorClockMutable::get(opCtx)->waitForDurableConfigTime().get(opCtx); + switch (*decision) { case DecisionEnum::kAborted: _abortMigrationOnDonorAndRecipient(opCtx, acquireCSOnRecipient); diff --git a/src/mongo/db/s/migration_util.cpp b/src/mongo/db/s/migration_util.cpp index 656054d4514..7419956c871 100644 --- a/src/mongo/db/s/migration_util.cpp +++ b/src/mongo/db/s/migration_util.cpp @@ -999,11 +999,6 @@ void markAsReadyRangeDeletionTaskLocally(OperationContext* opCtx, const UUID& mi } void deleteMigrationCoordinatorDocumentLocally(OperationContext* opCtx, const UUID& migrationId) { - // Before deleting the migration coordinator document, ensure that in the case of a crash, the - // node will start-up from at least the configTime, which it obtained as part of recovery of the - // shardVersion, which will ensure that it will see at least the same shardVersion. - VectorClockMutable::get(opCtx)->waitForDurableConfigTime().get(opCtx); - PersistentTaskStore store( NamespaceString::kMigrationCoordinatorsNamespace); store.remove(opCtx, @@ -1179,6 +1174,15 @@ void recoverMigrationCoordinations(OperationContext* opCtx, "coordinatorDocumentUUID"_attr = doc.getCollectionUuid()); } + // TODO SERVER-71918 once the drop collection coordinator starts persisting the + // config time we can remove this. Since the collection has been dropped, + // persist config time inclusive of the drop collection event before deleting + // leftover migration metadata. + // This will ensure that in case of stepdown the new + // primary won't read stale data from config server and think that the sharded + // collection still exists. + VectorClockMutable::get(opCtx)->waitForDurableConfigTime().get(opCtx); + deleteRangeDeletionTaskOnRecipient(opCtx, doc.getRecipientShardId(), doc.getId()); deleteRangeDeletionTaskLocally(opCtx, doc.getId()); coordinator.forgetMigration(opCtx); -- cgit v1.2.1