SERVER-59719 Ensure resharding commit/abort completes before verifying

ShardsvrCommitReshardCollectionCommand's commit() call and ShardsvrAbortReshardCollectionCommand's abort() call each perform a write which will trigger the state document to be deleted on the donor and recipient. To verify this is done, those commands perform a read to check if the state documents still exist. Now that the RSTL is not acquired during reads, it is not guaranteed that the command's opCtx will be interrupted when performing a read despite calling setAlwaysInterruptAtStepDownOrUp(). As a consequence, it is possible for the command's write to have been interrupted due to a step down on the donor/recipient, causing the document to still exist during the verification read and triggering a uassert. To resolve this issue, the commands now do a no-op write before the verification read, ensuring that the first write has indeed completed without being interrupted. (cherry picked from commit cca75006b85690faa641a15dfc9940d2a2add52d)
author: Brett Nawrocki <brett.nawrocki@mongodb.com> 2021-11-09 21:49:22 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2021-11-11 22:10:28 +0000
commit: b89a97340366b308491344bddd84deca0cb2fa5f (patch)
tree: c1688cb9184fd76719427777ec7dcbe87b155c3e
parent: ad6c9823b2903f68898f08d4160ab2f72cf8a82a (diff)
download: mongo-b89a97340366b308491344bddd84deca0cb2fa5f.tar.gz
5 files changed, 28 insertions, 31 deletions
diff --git a/src/mongo/db/s/flush_resharding_state_change_command.cpp b/src/mongo/db/s/flush_resharding_state_change_command.cpp
index c4615016823..f1800874151 100644
--- a/src/mongo/db/s/flush_resharding_state_change_command.cpp
+++ b/src/mongo/db/s/flush_resharding_state_change_command.cpp
@@ -42,6 +42,7 @@
 #include "mongo/db/op_observer.h"
 #include "mongo/db/operation_context.h"
 #include "mongo/db/repl/repl_client_info.h"
+#include "mongo/db/s/resharding_util.h"
 #include "mongo/db/s/shard_filtering_metadata_refresh.h"
 #include "mongo/db/s/sharding_state.h"
 #include "mongo/logv2/log.h"
@@ -51,29 +52,6 @@
 
 namespace mongo {
 namespace {
-
-void doNoopWrite(OperationContext* opCtx, const NamespaceString& nss) {
-    writeConflictRetry(
-        opCtx, "_flushReshardingStateChange no-op", NamespaceString::kRsOplogNamespace.ns(), [&] {
-            AutoGetOplog oplogWrite(opCtx, OplogAccessMode::kWrite);
-
-            const std::string msg = str::stream()
-                << "no-op for _flushReshardingStateChange on " << nss;
-            WriteUnitOfWork wuow(opCtx);
-            opCtx->getClient()->getServiceContext()->getOpObserver()->onInternalOpMessage(
-                opCtx,
-                {},
-                boost::none,
-                BSON("msg" << msg),
-                boost::none,
-                boost::none,
-                boost::none,
-                boost::none,
-                boost::none);
-            wuow.commit();
-        });
-}
-
 class FlushReshardingStateChangeCmd final : public TypedCommand<FlushReshardingStateChangeCmd> {
 public:
     using Request = _flushReshardingStateChange;
@@ -152,7 +130,7 @@ public:
                 .getAsync([](auto) {});
 
             // Ensure the command isn't run on a stale primary.
-            doNoopWrite(opCtx, ns());
+            doNoopWrite(opCtx, "_flushReshardingStateChange no-op", ns());
         }
     };
 } _flushReshardingStateChange;
diff --git a/src/mongo/db/s/resharding_util.cpp b/src/mongo/db/s/resharding_util.cpp
index 247fd986682..d032b5fbc72 100644
--- a/src/mongo/db/s/resharding_util.cpp
+++ b/src/mongo/db/s/resharding_util.cpp
@@ -41,6 +41,7 @@
 #include "mongo/db/concurrency/write_conflict_exception.h"
 #include "mongo/db/exec/document_value/document.h"
 #include "mongo/db/namespace_string.h"
+#include "mongo/db/op_observer.h"
 #include "mongo/db/operation_context.h"
 #include "mongo/db/pipeline/document_source_add_fields.h"
 #include "mongo/db/pipeline/document_source_find_and_modify_image_lookup.h"
@@ -342,4 +343,25 @@ NamespaceString getLocalConflictStashNamespace(UUID existingUUID, ShardId donorS
                            "localReshardingConflictStash.{}.{}"_format(existingUUID.toString(),
                                                                        donorShardId.toString())};
 }
+
+void doNoopWrite(OperationContext* opCtx, StringData opStr, const NamespaceString& nss) {
+    writeConflictRetry(opCtx, opStr, NamespaceString::kRsOplogNamespace.ns(), [&] {
+        AutoGetOplog oplogWrite(opCtx, OplogAccessMode::kWrite);
+
+        const std::string msg = str::stream() << opStr << " on " << nss;
+        WriteUnitOfWork wuow(opCtx);
+        opCtx->getClient()->getServiceContext()->getOpObserver()->onInternalOpMessage(
+            opCtx,
+            {},
+            boost::none,
+            BSON("msg" << msg),
+            boost::none,
+            boost::none,
+            boost::none,
+            boost::none,
+            boost::none);
+        wuow.commit();
+    });
+}
+
 }  // namespace mongo
diff --git a/src/mongo/db/s/resharding_util.h b/src/mongo/db/s/resharding_util.h
index d38b2c8742c..ed50940a3be 100644
--- a/src/mongo/db/s/resharding_util.h
+++ b/src/mongo/db/s/resharding_util.h
@@ -299,4 +299,6 @@ NamespaceString getLocalOplogBufferNamespace(UUID existingUUID, ShardId donorSha
 
 NamespaceString getLocalConflictStashNamespace(UUID existingUUID, ShardId donorShardId);
 
+void doNoopWrite(OperationContext* opCtx, StringData opStr, const NamespaceString& nss);
+
 }  // namespace mongo
diff --git a/src/mongo/db/s/shardsvr_abort_reshard_collection_command.cpp b/src/mongo/db/s/shardsvr_abort_reshard_collection_command.cpp
index f28143f1d3a..1f80139b1c8 100644
--- a/src/mongo/db/s/shardsvr_abort_reshard_collection_command.cpp
+++ b/src/mongo/db/s/shardsvr_abort_reshard_collection_command.cpp
@@ -98,6 +98,7 @@ public:
             // If abort actually went through, the resharding documents should be cleaned up.
             // If they still exists, it could be because that it was interrupted or it is no
             // longer primary.
+            doNoopWrite(opCtx, "_shardsvrAbortReshardCollection no-op", ns());
             PersistentTaskStore<CommonReshardingMetadata> donorReshardingOpStore(
                 NamespaceString::kDonorReshardingOperationsNamespace);
             uassert(5563802,
@@ -114,11 +115,6 @@ public:
                 recipientReshardingOpStore.count(
                     opCtx, BSON(ReshardingRecipientDocument::kReshardingUUIDFieldName << uuid())) ==
                     0);
-
-            // Most of the work for this command is done on the donor/recipient executor thread, so
-            // set the last OpTime so that waitForWriteConcern can wait for the correct event
-            // to get majority committed.
-            repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx);
         }
 
     private:
diff --git a/src/mongo/db/s/shardsvr_commit_reshard_collection_command.cpp b/src/mongo/db/s/shardsvr_commit_reshard_collection_command.cpp
index 9506f46523a..67831f0c629 100644
--- a/src/mongo/db/s/shardsvr_commit_reshard_collection_command.cpp
+++ b/src/mongo/db/s/shardsvr_commit_reshard_collection_command.cpp
@@ -106,6 +106,7 @@ public:
             // If commit actually went through, the resharding documents will be cleaned up. If
             // documents still exist, it could be because that commit was interrupted or that the
             // underlying replica set node is no longer primary.
+            doNoopWrite(opCtx, "_shardsvrCommitReshardCollection no-op", ns());
             PersistentTaskStore<CommonReshardingMetadata> donorReshardingOpStore(
                 NamespaceString::kDonorReshardingOperationsNamespace);
             uassert(5795302,
@@ -122,8 +123,6 @@ public:
                 recipientReshardingOpStore.count(
                     opCtx, BSON(ReshardingRecipientDocument::kReshardingUUIDFieldName << uuid())) ==
                     0);
-
-            repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx);
         }
 
     private:
author	Brett Nawrocki <brett.nawrocki@mongodb.com>	2021-11-09 21:49:22 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2021-11-11 22:10:28 +0000
commit	b89a97340366b308491344bddd84deca0cb2fa5f (patch)
tree	c1688cb9184fd76719427777ec7dcbe87b155c3e
parent	ad6c9823b2903f68898f08d4160ab2f72cf8a82a (diff)
download	mongo-b89a97340366b308491344bddd84deca0cb2fa5f.tar.gz