diff options
author | Jordi Serra Torrens <jordi.serra-torrens@mongodb.com> | 2021-10-25 07:13:47 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-10-25 07:37:13 +0000 |
commit | 02add56a2100bef135281938a0cadaf374279f03 (patch) | |
tree | fa437dc0d8a1ef5da579917387c76592fcb16e57 /src/mongo | |
parent | 90a82a5938e5655e283518feb29c92bdb490bb9d (diff) | |
download | mongo-02add56a2100bef135281938a0cadaf374279f03.tar.gz |
SERVER-59965 Limit max time wait behind critical section during filtering metadata refresh in txn
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/s/shard_filtering_metadata_refresh.cpp | 26 | ||||
-rw-r--r-- | src/mongo/db/s/sharding_runtime_d_params.idl | 11 |
2 files changed, 33 insertions, 4 deletions
diff --git a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp index 413cf72430c..f16739f660d 100644 --- a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp +++ b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp @@ -89,7 +89,8 @@ bool joinShardVersionOperation(OperationContext* opCtx, CollectionShardingRuntime* csr, boost::optional<Lock::DBLock>* dbLock, boost::optional<Lock::CollectionLock>* collLock, - boost::optional<CollectionShardingRuntime::CSRLock>* csrLock) { + boost::optional<CollectionShardingRuntime::CSRLock>* csrLock, + Milliseconds criticalSectionMaxWait) { invariant(collLock->has_value()); invariant(csrLock->has_value()); @@ -106,7 +107,11 @@ bool joinShardVersionOperation(OperationContext* opCtx, dbLock->reset(); if (critSecSignal) { - critSecSignal->get(opCtx); + const auto deadline = criticalSectionMaxWait == Milliseconds::max() + ? Date_t::max() + : opCtx->getServiceContext()->getFastClockSource()->now() + criticalSectionMaxWait; + opCtx->runWithDeadline( + deadline, ErrorCodes::ExceededTimeLimit, [&] { critSecSignal->get(opCtx); }); } else { inRecoverOrRefresh->get(opCtx); } @@ -207,6 +212,17 @@ void onShardVersionMismatch(OperationContext* opCtx, "namespace"_attr = nss, "shardVersionReceived"_attr = shardVersionReceived); + // If we are in a transaction, limit the time we can wait behind the critical section. This is + // needed in order to prevent distributed deadlocks in situations where a DDL operation needs to + // acquire the critical section on several shards. In that case, a shard running a transaction + // could be waiting for the critical section to be exited, while on another shard the + // transaction has already executed some statement and stashed locks which prevent the critical + // section from being acquired in that node. Limiting the wait behind the critical section will + // ensure that the transaction will eventually get aborted. + const auto criticalSectionMaxWait = opCtx->inMultiDocumentTransaction() + ? Milliseconds(metadataRefreshInTransactionMaxWaitBehindCritSecMS.load()) + : Milliseconds::max(); + boost::optional<SharedSemiFuture<void>> inRecoverOrRefresh; while (true) { boost::optional<Lock::DBLock> dbLock; @@ -218,7 +234,8 @@ void onShardVersionMismatch(OperationContext* opCtx, boost::optional<CollectionShardingRuntime::CSRLock> csrLock = CollectionShardingRuntime::CSRLock::lockShared(opCtx, csr); - if (joinShardVersionOperation(opCtx, csr, &dbLock, &collLock, &csrLock)) { + if (joinShardVersionOperation( + opCtx, csr, &dbLock, &collLock, &csrLock, criticalSectionMaxWait)) { continue; } @@ -240,7 +257,8 @@ void onShardVersionMismatch(OperationContext* opCtx, // If there is no ongoing shard version operation, initialize the RecoverRefreshThread // thread and associate it to the CSR. - if (!joinShardVersionOperation(opCtx, csr, &dbLock, &collLock, &csrLock)) { + if (!joinShardVersionOperation( + opCtx, csr, &dbLock, &collLock, &csrLock, criticalSectionMaxWait)) { // If the shard doesn't yet know its filtering metadata, recovery needs to be run const bool runRecover = metadata ? false : true; csr->setShardVersionRecoverRefreshFuture( diff --git a/src/mongo/db/s/sharding_runtime_d_params.idl b/src/mongo/db/s/sharding_runtime_d_params.idl index af7d732265c..cd5885bbe3f 100644 --- a/src/mongo/db/s/sharding_runtime_d_params.idl +++ b/src/mongo/db/s/sharding_runtime_d_params.idl @@ -141,3 +141,14 @@ server_parameters: cpp_vartype: int cpp_varname: shardedIndexConsistencyCheckIntervalMS default: 600000 + + metadataRefreshInTransactionMaxWaitBehindCritSecMS: + description: >- + Maximum time in milliseconds to wait behind the critical section when refreshing the + filtering metadata within a transaction. + set_at: [startup, runtime] + cpp_vartype: AtomicWord<int> + cpp_varname: metadataRefreshInTransactionMaxWaitBehindCritSecMS + validator: + gte: 0 + default: 500 |