diff options
author | Brett Nawrocki <brett.nawrocki@mongodb.com> | 2021-11-09 21:49:22 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-11-11 22:10:28 +0000 |
commit | b89a97340366b308491344bddd84deca0cb2fa5f (patch) | |
tree | c1688cb9184fd76719427777ec7dcbe87b155c3e | |
parent | ad6c9823b2903f68898f08d4160ab2f72cf8a82a (diff) | |
download | mongo-b89a97340366b308491344bddd84deca0cb2fa5f.tar.gz |
SERVER-59719 Ensure resharding commit/abort completes before verifying
ShardsvrCommitReshardCollectionCommand's commit() call and
ShardsvrAbortReshardCollectionCommand's abort() call each perform a
write which will trigger the state document to be deleted on the donor
and recipient. To verify this is done, those commands perform a read to
check if the state documents still exist. Now that the RSTL is not
acquired during reads, it is not guaranteed that the command's opCtx
will be interrupted when performing a read despite calling
setAlwaysInterruptAtStepDownOrUp(). As a consequence, it is possible for
the command's write to have been interrupted due to a step down on the
donor/recipient, causing the document to still exist during the
verification read and triggering a uassert. To resolve this issue, the
commands now do a no-op write before the verification read, ensuring
that the first write has indeed completed without being interrupted.
(cherry picked from commit cca75006b85690faa641a15dfc9940d2a2add52d)
-rw-r--r-- | src/mongo/db/s/flush_resharding_state_change_command.cpp | 26 | ||||
-rw-r--r-- | src/mongo/db/s/resharding_util.cpp | 22 | ||||
-rw-r--r-- | src/mongo/db/s/resharding_util.h | 2 | ||||
-rw-r--r-- | src/mongo/db/s/shardsvr_abort_reshard_collection_command.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/s/shardsvr_commit_reshard_collection_command.cpp | 3 |
5 files changed, 28 insertions, 31 deletions
diff --git a/src/mongo/db/s/flush_resharding_state_change_command.cpp b/src/mongo/db/s/flush_resharding_state_change_command.cpp index c4615016823..f1800874151 100644 --- a/src/mongo/db/s/flush_resharding_state_change_command.cpp +++ b/src/mongo/db/s/flush_resharding_state_change_command.cpp @@ -42,6 +42,7 @@ #include "mongo/db/op_observer.h" #include "mongo/db/operation_context.h" #include "mongo/db/repl/repl_client_info.h" +#include "mongo/db/s/resharding_util.h" #include "mongo/db/s/shard_filtering_metadata_refresh.h" #include "mongo/db/s/sharding_state.h" #include "mongo/logv2/log.h" @@ -51,29 +52,6 @@ namespace mongo { namespace { - -void doNoopWrite(OperationContext* opCtx, const NamespaceString& nss) { - writeConflictRetry( - opCtx, "_flushReshardingStateChange no-op", NamespaceString::kRsOplogNamespace.ns(), [&] { - AutoGetOplog oplogWrite(opCtx, OplogAccessMode::kWrite); - - const std::string msg = str::stream() - << "no-op for _flushReshardingStateChange on " << nss; - WriteUnitOfWork wuow(opCtx); - opCtx->getClient()->getServiceContext()->getOpObserver()->onInternalOpMessage( - opCtx, - {}, - boost::none, - BSON("msg" << msg), - boost::none, - boost::none, - boost::none, - boost::none, - boost::none); - wuow.commit(); - }); -} - class FlushReshardingStateChangeCmd final : public TypedCommand<FlushReshardingStateChangeCmd> { public: using Request = _flushReshardingStateChange; @@ -152,7 +130,7 @@ public: .getAsync([](auto) {}); // Ensure the command isn't run on a stale primary. - doNoopWrite(opCtx, ns()); + doNoopWrite(opCtx, "_flushReshardingStateChange no-op", ns()); } }; } _flushReshardingStateChange; diff --git a/src/mongo/db/s/resharding_util.cpp b/src/mongo/db/s/resharding_util.cpp index 247fd986682..d032b5fbc72 100644 --- a/src/mongo/db/s/resharding_util.cpp +++ b/src/mongo/db/s/resharding_util.cpp @@ -41,6 +41,7 @@ #include "mongo/db/concurrency/write_conflict_exception.h" #include "mongo/db/exec/document_value/document.h" #include "mongo/db/namespace_string.h" +#include "mongo/db/op_observer.h" #include "mongo/db/operation_context.h" #include "mongo/db/pipeline/document_source_add_fields.h" #include "mongo/db/pipeline/document_source_find_and_modify_image_lookup.h" @@ -342,4 +343,25 @@ NamespaceString getLocalConflictStashNamespace(UUID existingUUID, ShardId donorS "localReshardingConflictStash.{}.{}"_format(existingUUID.toString(), donorShardId.toString())}; } + +void doNoopWrite(OperationContext* opCtx, StringData opStr, const NamespaceString& nss) { + writeConflictRetry(opCtx, opStr, NamespaceString::kRsOplogNamespace.ns(), [&] { + AutoGetOplog oplogWrite(opCtx, OplogAccessMode::kWrite); + + const std::string msg = str::stream() << opStr << " on " << nss; + WriteUnitOfWork wuow(opCtx); + opCtx->getClient()->getServiceContext()->getOpObserver()->onInternalOpMessage( + opCtx, + {}, + boost::none, + BSON("msg" << msg), + boost::none, + boost::none, + boost::none, + boost::none, + boost::none); + wuow.commit(); + }); +} + } // namespace mongo diff --git a/src/mongo/db/s/resharding_util.h b/src/mongo/db/s/resharding_util.h index d38b2c8742c..ed50940a3be 100644 --- a/src/mongo/db/s/resharding_util.h +++ b/src/mongo/db/s/resharding_util.h @@ -299,4 +299,6 @@ NamespaceString getLocalOplogBufferNamespace(UUID existingUUID, ShardId donorSha NamespaceString getLocalConflictStashNamespace(UUID existingUUID, ShardId donorShardId); +void doNoopWrite(OperationContext* opCtx, StringData opStr, const NamespaceString& nss); + } // namespace mongo diff --git a/src/mongo/db/s/shardsvr_abort_reshard_collection_command.cpp b/src/mongo/db/s/shardsvr_abort_reshard_collection_command.cpp index f28143f1d3a..1f80139b1c8 100644 --- a/src/mongo/db/s/shardsvr_abort_reshard_collection_command.cpp +++ b/src/mongo/db/s/shardsvr_abort_reshard_collection_command.cpp @@ -98,6 +98,7 @@ public: // If abort actually went through, the resharding documents should be cleaned up. // If they still exists, it could be because that it was interrupted or it is no // longer primary. + doNoopWrite(opCtx, "_shardsvrAbortReshardCollection no-op", ns()); PersistentTaskStore<CommonReshardingMetadata> donorReshardingOpStore( NamespaceString::kDonorReshardingOperationsNamespace); uassert(5563802, @@ -114,11 +115,6 @@ public: recipientReshardingOpStore.count( opCtx, BSON(ReshardingRecipientDocument::kReshardingUUIDFieldName << uuid())) == 0); - - // Most of the work for this command is done on the donor/recipient executor thread, so - // set the last OpTime so that waitForWriteConcern can wait for the correct event - // to get majority committed. - repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx); } private: diff --git a/src/mongo/db/s/shardsvr_commit_reshard_collection_command.cpp b/src/mongo/db/s/shardsvr_commit_reshard_collection_command.cpp index 9506f46523a..67831f0c629 100644 --- a/src/mongo/db/s/shardsvr_commit_reshard_collection_command.cpp +++ b/src/mongo/db/s/shardsvr_commit_reshard_collection_command.cpp @@ -106,6 +106,7 @@ public: // If commit actually went through, the resharding documents will be cleaned up. If // documents still exist, it could be because that commit was interrupted or that the // underlying replica set node is no longer primary. + doNoopWrite(opCtx, "_shardsvrCommitReshardCollection no-op", ns()); PersistentTaskStore<CommonReshardingMetadata> donorReshardingOpStore( NamespaceString::kDonorReshardingOperationsNamespace); uassert(5795302, @@ -122,8 +123,6 @@ public: recipientReshardingOpStore.count( opCtx, BSON(ReshardingRecipientDocument::kReshardingUUIDFieldName << uuid())) == 0); - - repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx); } private: |