diff options
author | Jordi Serra Torrens <jordi.serra-torrens@mongodb.com> | 2022-06-09 07:21:14 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-06-09 08:20:01 +0000 |
commit | 824b9b7e608687ba0db7af2d5ccc5b6811a46720 (patch) | |
tree | 024f48d5525e7f49d6d256282bc0c6647e2fbf94 /src/mongo/db/query | |
parent | 0c2e70f14d87b17d43fd3af322b604af7e8ea5a6 (diff) | |
download | mongo-824b9b7e608687ba0db7af2d5ccc5b6811a46720.tar.gz |
SERVER-61127 Retry multi-writes that hit StaleConfig due to critical section on the shard
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r-- | src/mongo/db/query/plan_executor.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/query/plan_executor.h | 9 | ||||
-rw-r--r-- | src/mongo/db/query/plan_executor_impl.cpp | 20 |
3 files changed, 32 insertions, 1 deletions
diff --git a/src/mongo/db/query/plan_executor.cpp b/src/mongo/db/query/plan_executor.cpp index ee41d15d84c..99b2fd8fefa 100644 --- a/src/mongo/db/query/plan_executor.cpp +++ b/src/mongo/db/query/plan_executor.cpp @@ -38,6 +38,10 @@ namespace { MONGO_FAIL_POINT_DEFINE(planExecutorAlwaysFails); } // namespace +const OperationContext::Decoration<boost::optional<SharedSemiFuture<void>>> + planExecutorShardingCriticalSectionFuture = + OperationContext::declareDecoration<boost::optional<SharedSemiFuture<void>>>(); + std::string PlanExecutor::stateToStr(ExecState execState) { switch (execState) { case PlanExecutor::ADVANCED: diff --git a/src/mongo/db/query/plan_executor.h b/src/mongo/db/query/plan_executor.h index bf7799dd3b3..a94e87648dd 100644 --- a/src/mongo/db/query/plan_executor.h +++ b/src/mongo/db/query/plan_executor.h @@ -56,6 +56,15 @@ class RecordId; extern const OperationContext::Decoration<repl::OpTime> clientsLastKnownCommittedOpTime; /** + * If a plan yielded because it encountered a sharding critical section, + * 'planExecutorShardingCriticalSectionFuture' will be set to a future that becomes ready when the + * critical section ends. This future can be waited on to hold off resuming the plan execution while + * the critical section is still active. + */ +extern const OperationContext::Decoration<boost::optional<SharedSemiFuture<void>>> + planExecutorShardingCriticalSectionFuture; + +/** * A PlanExecutor is the abstraction that knows how to crank a tree of stages into execution. * The executor is usually part of a larger abstraction that is interacting with the cache * and/or the query optimizer. diff --git a/src/mongo/db/query/plan_executor_impl.cpp b/src/mongo/db/query/plan_executor_impl.cpp index 76559f3d003..808b0800d23 100644 --- a/src/mongo/db/query/plan_executor_impl.cpp +++ b/src/mongo/db/query/plan_executor_impl.cpp @@ -60,6 +60,7 @@ #include "mongo/db/query/plan_yield_policy_impl.h" #include "mongo/db/query/yield_policy_callbacks_impl.h" #include "mongo/db/repl/replication_coordinator.h" +#include "mongo/db/s/operation_sharding_state.h" #include "mongo/db/service_context.h" #include "mongo/logv2/log.h" #include "mongo/util/fail_point.h" @@ -361,8 +362,25 @@ PlanExecutor::ExecState PlanExecutorImpl::_getNextImpl(Snapshotted<Document>* ob // 2) some stage requested a yield, or // 3) we need to yield and retry due to a WriteConflictException. // In all cases, the actual yielding happens here. + + const auto whileYieldingFn = [&]() { + // If we yielded because we encountered a sharding critical section, wait for the + // critical section to end before continuing. By waiting for the critical section to be + // exited we avoid busy spinning immediately and encountering the same critical section + // again. It is important that this wait happens after having released the lock + // hierarchy -- otherwise deadlocks could happen, or the very least, locks would be + // unnecessarily held while waiting. + const auto& shardingCriticalSection = planExecutorShardingCriticalSectionFuture(_opCtx); + if (shardingCriticalSection) { + OperationShardingState::waitForCriticalSectionToComplete(_opCtx, + *shardingCriticalSection) + .ignore(); + planExecutorShardingCriticalSectionFuture(_opCtx).reset(); + } + }; + if (_yieldPolicy->shouldYieldOrInterrupt(_opCtx)) { - uassertStatusOK(_yieldPolicy->yieldOrInterrupt(_opCtx)); + uassertStatusOK(_yieldPolicy->yieldOrInterrupt(_opCtx, whileYieldingFn)); } WorkingSetID id = WorkingSet::INVALID_ID; |