SERVER-61127 Retry multi-writes that hit StaleConfig due to critical section on the shard

author: Jordi Serra Torrens <jordi.serra-torrens@mongodb.com> 2022-06-09 07:21:14 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2022-06-09 08:20:01 +0000
commit: 824b9b7e608687ba0db7af2d5ccc5b6811a46720 (patch)
tree: 024f48d5525e7f49d6d256282bc0c6647e2fbf94 /src/mongo/db/query
parent: 0c2e70f14d87b17d43fd3af322b604af7e8ea5a6 (diff)
download: mongo-824b9b7e608687ba0db7af2d5ccc5b6811a46720.tar.gz
3 files changed, 32 insertions, 1 deletions
diff --git a/src/mongo/db/query/plan_executor.cpp b/src/mongo/db/query/plan_executor.cpp
index ee41d15d84c..99b2fd8fefa 100644
--- a/src/mongo/db/query/plan_executor.cpp
+++ b/src/mongo/db/query/plan_executor.cpp
@@ -38,6 +38,10 @@ namespace {
 MONGO_FAIL_POINT_DEFINE(planExecutorAlwaysFails);
 }  // namespace
 
+const OperationContext::Decoration<boost::optional<SharedSemiFuture<void>>>
+    planExecutorShardingCriticalSectionFuture =
+        OperationContext::declareDecoration<boost::optional<SharedSemiFuture<void>>>();
+
 std::string PlanExecutor::stateToStr(ExecState execState) {
     switch (execState) {
         case PlanExecutor::ADVANCED:
diff --git a/src/mongo/db/query/plan_executor.h b/src/mongo/db/query/plan_executor.h
index bf7799dd3b3..a94e87648dd 100644
--- a/src/mongo/db/query/plan_executor.h
+++ b/src/mongo/db/query/plan_executor.h
@@ -56,6 +56,15 @@ class RecordId;
 extern const OperationContext::Decoration<repl::OpTime> clientsLastKnownCommittedOpTime;
 
 /**
+ * If a plan yielded because it encountered a sharding critical section,
+ * 'planExecutorShardingCriticalSectionFuture' will be set to a future that becomes ready when the
+ * critical section ends. This future can be waited on to hold off resuming the plan execution while
+ * the critical section is still active.
+ */
+extern const OperationContext::Decoration<boost::optional<SharedSemiFuture<void>>>
+    planExecutorShardingCriticalSectionFuture;
+
+/**
  * A PlanExecutor is the abstraction that knows how to crank a tree of stages into execution.
  * The executor is usually part of a larger abstraction that is interacting with the cache
  * and/or the query optimizer.
diff --git a/src/mongo/db/query/plan_executor_impl.cpp b/src/mongo/db/query/plan_executor_impl.cpp
index 76559f3d003..808b0800d23 100644
--- a/src/mongo/db/query/plan_executor_impl.cpp
+++ b/src/mongo/db/query/plan_executor_impl.cpp
@@ -60,6 +60,7 @@
 #include "mongo/db/query/plan_yield_policy_impl.h"
 #include "mongo/db/query/yield_policy_callbacks_impl.h"
 #include "mongo/db/repl/replication_coordinator.h"
+#include "mongo/db/s/operation_sharding_state.h"
 #include "mongo/db/service_context.h"
 #include "mongo/logv2/log.h"
 #include "mongo/util/fail_point.h"
@@ -361,8 +362,25 @@ PlanExecutor::ExecState PlanExecutorImpl::_getNextImpl(Snapshotted<Document>* ob
         //   2) some stage requested a yield, or
         //   3) we need to yield and retry due to a WriteConflictException.
         // In all cases, the actual yielding happens here.
+
+        const auto whileYieldingFn = [&]() {
+            // If we yielded because we encountered a sharding critical section, wait for the
+            // critical section to end before continuing. By waiting for the critical section to be
+            // exited we avoid busy spinning immediately and encountering the same critical section
+            // again. It is important that this wait happens after having released the lock
+            // hierarchy -- otherwise deadlocks could happen, or the very least, locks would be
+            // unnecessarily held while waiting.
+            const auto& shardingCriticalSection = planExecutorShardingCriticalSectionFuture(_opCtx);
+            if (shardingCriticalSection) {
+                OperationShardingState::waitForCriticalSectionToComplete(_opCtx,
+                                                                         *shardingCriticalSection)
+                    .ignore();
+                planExecutorShardingCriticalSectionFuture(_opCtx).reset();
+            }
+        };
+
         if (_yieldPolicy->shouldYieldOrInterrupt(_opCtx)) {
-            uassertStatusOK(_yieldPolicy->yieldOrInterrupt(_opCtx));
+            uassertStatusOK(_yieldPolicy->yieldOrInterrupt(_opCtx, whileYieldingFn));
         }
 
         WorkingSetID id = WorkingSet::INVALID_ID;
author	Jordi Serra Torrens <jordi.serra-torrens@mongodb.com>	2022-06-09 07:21:14 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2022-06-09 08:20:01 +0000
commit	824b9b7e608687ba0db7af2d5ccc5b6811a46720 (patch)
tree	024f48d5525e7f49d6d256282bc0c6647e2fbf94 /src/mongo/db/query
parent	0c2e70f14d87b17d43fd3af322b604af7e8ea5a6 (diff)
download	mongo-824b9b7e608687ba0db7af2d5ccc5b6811a46720.tar.gz