From fd8260fc72cbe43f4addeab7bada2a87b05fb28c Mon Sep 17 00:00:00 2001
From: toto <58224719+toto-dev@users.noreply.github.com>
Date: Thu, 5 Jan 2023 18:07:22 +0000
Subject: SERVER-71328 Ensure correct filtering metadata on donor shard after
 multiple failures

(cherry picked from commit f782ca31ce08c7b0ec04a29eac3338884338a337)
---
 src/mongo/db/s/migration_coordinator.cpp |  6 ++++++
 src/mongo/db/s/migration_util.cpp        | 14 +++++++++-----
 2 files changed, 15 insertions(+), 5 deletions(-)

(limited to 'src')
diff --git a/src/mongo/db/s/migration_coordinator.cpp b/src/mongo/db/s/migration_coordinator.cpp
index b6ff4b7abd1..d59de326210 100644
--- a/src/mongo/db/s/migration_coordinator.cpp
+++ b/src/mongo/db/s/migration_coordinator.cpp
@@ -37,6 +37,7 @@
 #include "mongo/db/s/migration_util.h"
 #include "mongo/db/s/range_deletion_task_gen.h"
 #include "mongo/db/vector_clock.h"
+#include "mongo/db/vector_clock_mutable.h"
 #include "mongo/logv2/log.h"
 #include "mongo/platform/atomic_word.h"
 #include "mongo/s/pm2423_feature_flags_gen.h"
@@ -172,6 +173,11 @@ boost::optional<SemiFuture<void>> MigrationCoordinator::completeMigration(
 
     boost::optional<SemiFuture<void>> cleanupCompleteFuture = boost::none;
 
+    // Persist the config time before the migration decision to ensure that in case of stepdown
+    // next filtering metadata refresh on the new primary will always include the effect of this
+    // migration.
+    VectorClockMutable::get(opCtx)->waitForDurableConfigTime().get(opCtx);
+
     switch (*decision) {
         case DecisionEnum::kAborted:
             _abortMigrationOnDonorAndRecipient(opCtx, acquireCSOnRecipient);
diff --git a/src/mongo/db/s/migration_util.cpp b/src/mongo/db/s/migration_util.cpp
index 656054d4514..7419956c871 100644
--- a/src/mongo/db/s/migration_util.cpp
+++ b/src/mongo/db/s/migration_util.cpp
@@ -999,11 +999,6 @@ void markAsReadyRangeDeletionTaskLocally(OperationContext* opCtx, const UUID& mi
 }
 
 void deleteMigrationCoordinatorDocumentLocally(OperationContext* opCtx, const UUID& migrationId) {
-    // Before deleting the migration coordinator document, ensure that in the case of a crash, the
-    // node will start-up from at least the configTime, which it obtained as part of recovery of the
-    // shardVersion, which will ensure that it will see at least the same shardVersion.
-    VectorClockMutable::get(opCtx)->waitForDurableConfigTime().get(opCtx);
-
     PersistentTaskStore<MigrationCoordinatorDocument> store(
         NamespaceString::kMigrationCoordinatorsNamespace);
     store.remove(opCtx,
@@ -1179,6 +1174,15 @@ void recoverMigrationCoordinations(OperationContext* opCtx,
                           "coordinatorDocumentUUID"_attr = doc.getCollectionUuid());
                 }
 
+                // TODO SERVER-71918 once the drop collection coordinator starts persisting the
+                // config time we can remove this. Since the collection has been dropped,
+                // persist config time inclusive of the drop collection event before deleting
+                // leftover migration metadata.
+                // This will ensure that in case of stepdown the new
+                // primary won't read stale data from config server and think that the sharded
+                // collection still exists.
+                VectorClockMutable::get(opCtx)->waitForDurableConfigTime().get(opCtx);
+
                 deleteRangeDeletionTaskOnRecipient(opCtx, doc.getRecipientShardId(), doc.getId());
                 deleteRangeDeletionTaskLocally(opCtx, doc.getId());
                 coordinator.forgetMigration(opCtx);
-- 
cgit v1.2.1