diff options
author | Yu Jin Kang Park <yujin.kang@mongodb.com> | 2022-11-21 17:09:50 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-12-02 14:46:29 +0000 |
commit | 9ca8158531bf786e374275598cde7e972910827e (patch) | |
tree | b3b634b4a6d785cfd7c366c3b18724878d2bb902 /src | |
parent | f90b35cf6c25a223c2f96649ee74a8ee943c13c7 (diff) | |
download | mongo-9ca8158531bf786e374275598cde7e972910827e.tar.gz |
SERVER-61909 Abort operation if it cannot ever fit in WT cache
(cherry picked from commit 9ddbd3512044ca6508cb72d9a321391d45c18140)
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/base/error_codes.yml | 2 | ||||
-rw-r--r-- | src/mongo/db/catalog/README.md | 24 | ||||
-rw-r--r-- | src/mongo/db/concurrency/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/concurrency/exception_util.cpp | 36 | ||||
-rw-r--r-- | src/mongo/db/concurrency/exception_util.h | 31 | ||||
-rw-r--r-- | src/mongo/db/concurrency/exception_util.idl | 15 | ||||
-rw-r--r-- | src/mongo/db/query/plan_executor_impl.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp | 79 | ||||
-rw-r--r-- | src/mongo/util/assert_util.h | 18 |
9 files changed, 189 insertions, 19 deletions
diff --git a/src/mongo/base/error_codes.yml b/src/mongo/base/error_codes.yml index 324b43f0d40..dbed4098a0d 100644 --- a/src/mongo/base/error_codes.yml +++ b/src/mongo/base/error_codes.yml @@ -506,6 +506,8 @@ error_codes: - {code: 387, name: EncounteredFLEPayloadWhileRedacting} + - {code: 388, name: TransactionTooLargeForCache} + # Error codes 4000-8999 are reserved. # Non-sequential error codes for compatibility only) diff --git a/src/mongo/db/catalog/README.md b/src/mongo/db/catalog/README.md index 09b1e2f4ca8..005345a996e 100644 --- a/src/mongo/db/catalog/README.md +++ b/src/mongo/db/catalog/README.md @@ -444,6 +444,30 @@ See [wtRcToStatus](https://github.com/mongodb/mongo/blob/c799851554dc01493d35b43701416e9c78b3665c/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp#L178-L183) where we throw the exception in WiredTiger. See [TemporarilyUnavailableException](https://github.com/mongodb/mongo/blob/c799851554dc01493d35b43701416e9c78b3665c/src/mongo/db/concurrency/temporarily_unavailable_exception.h#L39-L45). + +## TransactionTooLargeForCacheException + +A TransactionTooLargeForCacheException may be thrown inside the server to indicate that an operation +was rolled-back and is unlikely to ever complete because the storage engine cache is insufficient, +even in the absence of concurrent operations. This is determined by a simple heuristic wherein, +after a rollback, a threshold on the proportion of total dirty cache bytes the running transaction +can represent and still be considered fullfillable is checked. The threshold can be tuned with the +`transactionTooLargeForCacheThreshold` parameter. Setting this threshold to its maximum value (1.0) +causes the check to be skipped and TransactionTooLargeForCacheException to be disabled. + +On replica sets, if an operation succeeds on a primary, it should also succeed on a secondary. It +would be possible to convert to both TemporarilyUnavailableException and WriteConflictException, +as if TransactionTooLargeForCacheException was disabled. But on secondaries the only +difference between the two is the rate at which the operation is retried. Hence, +TransactionTooLargeForCacheException is always converted to a WriteConflictException, which retries +faster, to avoid stalling replication longer than necessary. + +Prior to 6.3, or when TransactionTooLargeForCacheException is disabled, multi-document +transactions always return a WriteConflictException, which may result in drivers retrying an +operation indefinitely. For non-multi-document operations, there is a limited number of retries on +TemporarilyUnavailableException, but it might still be beneficial to not retry operations which are +unlikely to complete and are disruptive for concurrent operations. + ## Collection and Index Writes Collection write operations (inserts, updates, and deletes) perform storage engine writes to both diff --git a/src/mongo/db/concurrency/SConscript b/src/mongo/db/concurrency/SConscript index 37880a0c061..90b0e21ba07 100644 --- a/src/mongo/db/concurrency/SConscript +++ b/src/mongo/db/concurrency/SConscript @@ -27,6 +27,7 @@ env.Library( ], LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/db/commands/server_status_core', + '$BUILD_DIR/mongo/db/curop', '$BUILD_DIR/mongo/db/server_base', '$BUILD_DIR/mongo/db/server_options_servers', '$BUILD_DIR/mongo/db/storage/recovery_unit_base', diff --git a/src/mongo/db/concurrency/exception_util.cpp b/src/mongo/db/concurrency/exception_util.cpp index ebece0e57e7..278c51cde80 100644 --- a/src/mongo/db/concurrency/exception_util.cpp +++ b/src/mongo/db/concurrency/exception_util.cpp @@ -33,6 +33,7 @@ #include "mongo/db/concurrency/exception_util_gen.h" #include "mongo/db/namespace_string.h" #include "mongo/logv2/log.h" +#include "mongo/util/assert_util.h" #include "mongo/util/duration.h" #include "mongo/util/log_and_backoff.h" @@ -52,6 +53,16 @@ void logWriteConflictAndBackoff(int attempt, StringData operation, StringData ns logAttrs(NamespaceString(ns))); } +void handleWriteConflictException(OperationContext* opCtx, + int* writeConflictAttempts, + StringData opStr, + StringData ns) { + CurOp::get(opCtx)->debug().additiveMetrics.incrementWriteConflicts(1); + logWriteConflictAndBackoff(*writeConflictAttempts, opStr, ns); + ++writeConflictAttempts; + opCtx->recoveryUnit()->abandonSnapshot(); +} + namespace { CounterMetric temporarilyUnavailableErrors{"operation.temporarilyUnavailableErrors"}; @@ -59,6 +70,11 @@ CounterMetric temporarilyUnavailableErrorsEscaped{"operation.temporarilyUnavaila CounterMetric temporarilyUnavailableErrorsConvertedToWriteConflict{ "operation.temporarilyUnavailableErrorsConvertedToWriteConflict"}; +CounterMetric transactionTooLargeForCacheErrors{"operation.transactionTooLargeForCacheErrors"}; +CounterMetric transactionTooLargeForCacheErrorsConvertedToWriteConflict{ + "operation.transactionTooLargeForCacheErrorsConvertedToWriteConflict"}; + + } // namespace void handleTemporarilyUnavailableException(OperationContext* opCtx, @@ -66,6 +82,8 @@ void handleTemporarilyUnavailableException(OperationContext* opCtx, StringData opStr, StringData ns, const TemporarilyUnavailableException& e) { + CurOp::get(opCtx)->debug().additiveMetrics.incrementTemporarilyUnavailableErrors(1); + opCtx->recoveryUnit()->abandonSnapshot(); temporarilyUnavailableErrors.increment(1); if (opCtx->getClient()->isFromUserConnection() && @@ -107,4 +125,22 @@ void handleTemporarilyUnavailableExceptionInTransaction(OperationContext* opCtx, throwWriteConflictException(e.reason()); } +void handleTransactionTooLargeForCacheException(OperationContext* opCtx, + int* writeConflictAttempts, + StringData opStr, + StringData ns, + const TransactionTooLargeForCacheException& e) { + transactionTooLargeForCacheErrors.increment(1); + if (opCtx->writesAreReplicated()) { + // Surface error on primaries. + throw e; + } + // If an operation succeeds on primary, it should always be retried on secondaries. Secondaries + // always retry TemporarilyUnavailableExceptions and WriteConflictExceptions indefinitely, the + // only difference being the rate of retry. We prefer retrying faster, by converting to + // WriteConflictException, to avoid stalling replication longer than necessary. + transactionTooLargeForCacheErrorsConvertedToWriteConflict.increment(1); + handleWriteConflictException(opCtx, writeConflictAttempts, opStr, ns); +} + } // namespace mongo diff --git a/src/mongo/db/concurrency/exception_util.h b/src/mongo/db/concurrency/exception_util.h index 21346bdd533..6e634cc3ac6 100644 --- a/src/mongo/db/concurrency/exception_util.h +++ b/src/mongo/db/concurrency/exception_util.h @@ -47,6 +47,11 @@ extern FailPoint skipWriteConflictRetries; */ void logWriteConflictAndBackoff(int attempt, StringData operation, StringData ns); +void handleWriteConflictException(OperationContext* opCtx, + int* writeConflictAttempts, + StringData opStr, + StringData ns); + void handleTemporarilyUnavailableException(OperationContext* opCtx, int attempts, StringData opStr, @@ -61,6 +66,12 @@ void handleTemporarilyUnavailableExceptionInTransaction(OperationContext* opCtx, StringData ns, const TemporarilyUnavailableException& e); +void handleTransactionTooLargeForCacheException(OperationContext* opCtx, + int* writeConflictAttempts, + StringData opStr, + StringData ns, + const TransactionTooLargeForCacheException& e); + /** * A `WriteConflictException` is thrown if during a write, two or more operations conflict with each * other. For example if two operations get the same version of a document, and then both try to @@ -85,6 +96,16 @@ void handleTemporarilyUnavailableExceptionInTransaction(OperationContext* opCtx, } /** + * A `TransactionTooLargeForCache` is thrown if it has been determined that it is unlikely to + * ever complete the operation because the configured cache is insufficient to hold all the + * transaction state. This helps to avoid retrying, maybe indefinitely, a transaction which would + * never be able to complete. + */ +[[noreturn]] inline void throwTransactionTooLargeForCache(StringData context) { + iasserted({ErrorCodes::TransactionTooLargeForCache, context}); +} + +/** * Runs the argument function f as many times as needed for f to complete or throw an exception * other than WriteConflictException or TemporarilyUnavailableException. For each time f throws * one of these exceptions, logs the error, waits a spell, cleans up, and then tries f again. @@ -119,19 +140,17 @@ auto writeConflictRetry(OperationContext* opCtx, StringData opStr, StringData ns } } - int attempts = 0; + int writeConflictAttempts = 0; int attemptsTempUnavailable = 0; while (true) { try { return f(); } catch (WriteConflictException const&) { - CurOp::get(opCtx)->debug().additiveMetrics.incrementWriteConflicts(1); - logWriteConflictAndBackoff(attempts, opStr, ns); - ++attempts; - opCtx->recoveryUnit()->abandonSnapshot(); + handleWriteConflictException(opCtx, &writeConflictAttempts, opStr, ns); } catch (TemporarilyUnavailableException const& e) { - CurOp::get(opCtx)->debug().additiveMetrics.incrementTemporarilyUnavailableErrors(1); handleTemporarilyUnavailableException(opCtx, ++attemptsTempUnavailable, opStr, ns, e); + } catch (TransactionTooLargeForCacheException const& e) { + handleTransactionTooLargeForCacheException(opCtx, &writeConflictAttempts, opStr, ns, e); } } } diff --git a/src/mongo/db/concurrency/exception_util.idl b/src/mongo/db/concurrency/exception_util.idl index afa03ca5185..3fdcef5235b 100644 --- a/src/mongo/db/concurrency/exception_util.idl +++ b/src/mongo/db/concurrency/exception_util.idl @@ -57,3 +57,18 @@ server_parameters: default: 1000 validator: gte: 0 + + transactionTooLargeForCacheThreshold: + description: "Threshold on the proportion of total dirty cache bytes that the running + transaction's dirty cache bytes can represent and still be considered + fullfillable on retry. If this threshold is exceeded, a + TransactionTooLargeForCache exception is thrown. Setting this parameter to 1.0 + causes this check to be disabled, and TransactionTooLargeForCache exceptions + will not be thrown." + set_at: [ startup, runtime ] + cpp_varname: 'gTransactionTooLargeForCacheThreshold' + cpp_vartype: AtomicWord<double> + default: 0.75 + validator: + gte: 0.0 + lte: 1.0 diff --git a/src/mongo/db/query/plan_executor_impl.cpp b/src/mongo/db/query/plan_executor_impl.cpp index 693be64c962..5045f54c302 100644 --- a/src/mongo/db/query/plan_executor_impl.cpp +++ b/src/mongo/db/query/plan_executor_impl.cpp @@ -453,8 +453,6 @@ PlanExecutor::ExecState PlanExecutorImpl::_getNextImpl(Snapshotted<Document>* ob "got TemporarilyUnavailable exception on a plan that cannot auto-yield"); } - CurOp::get(_opCtx)->debug().additiveMetrics.incrementTemporarilyUnavailableErrors( - 1); tempUnavailErrorsInARow++; handleTemporarilyUnavailableException( _opCtx, diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp index fe9d80ad02b..5079cbceab8 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp @@ -52,6 +52,9 @@ // From src/third_party/wiredtiger/src/include/txn.h #define WT_TXN_ROLLBACK_REASON_OLDEST_FOR_EVICTION \ "oldest pinned transaction ID rolled back for eviction" + +#define WT_TXN_ROLLBACK_REASON_TOO_LARGE_FOR_CACHE \ + "transaction is too large and will not fit in the storage engine cache" namespace mongo { namespace { @@ -126,18 +129,75 @@ bool wasRollbackReasonCachePressure(WT_SESSION* session) { return false; } +/** + * Configured WT cache is deemed insufficient for a transaction when its dirty bytes in cache + * exceed a certain threshold on the proportion of total cache which is used by transaction. + * + * For instance, if the transaction uses 80% of WT cache and the threshold is set to 75%, the + * transaction is considered too large. + */ +bool isCacheInsufficientForTransaction(WT_SESSION* session, double threshold) { + StatusWith<int64_t> txnDirtyBytes = WiredTigerUtil::getStatisticsValue( + session, "statistics:session", "", WT_STAT_SESSION_TXN_BYTES_DIRTY); + if (!txnDirtyBytes.isOK()) { + tasserted(6190900, + str::stream() << "unable to gather the WT session's txn dirty bytes: " + << txnDirtyBytes.getStatus()); + } + + StatusWith<int64_t> cacheDirtyBytes = WiredTigerUtil::getStatisticsValue( + session, "statistics:", "", WT_STAT_CONN_CACHE_BYTES_DIRTY); + if (!cacheDirtyBytes.isOK()) { + tasserted(6190901, + str::stream() << "unable to gather the WT connection's cache dirty bytes: " + << txnDirtyBytes.getStatus()); + } + + + double txnBytesDirtyOverCacheBytesDirty = + static_cast<double>(txnDirtyBytes.getValue()) / cacheDirtyBytes.getValue(); + + LOGV2_DEBUG(6190902, + 2, + "Checking if transaction can eventually succeed", + "txnDirtyBytes"_attr = txnDirtyBytes.getValue(), + "cacheDirtyBytes"_attr = cacheDirtyBytes.getValue(), + "txnBytesDirtyOverCacheBytesDirty"_attr = txnBytesDirtyOverCacheBytesDirty, + "threshold"_attr = threshold); + + return txnBytesDirtyOverCacheBytesDirty > threshold; +} + Status wtRCToStatus_slow(int retCode, WT_SESSION* session, StringData prefix) { if (retCode == 0) return Status::OK(); + const auto generateContextStrStream = [&](StringData reason) { + str::stream contextStrStream; + if (!prefix.empty()) + contextStrStream << prefix << " "; + contextStrStream << retCode << ": " << reason; + + return contextStrStream; + }; + if (retCode == WT_ROLLBACK) { - if (gEnableTemporarilyUnavailableExceptions.load() && - wasRollbackReasonCachePressure(session)) { - str::stream s; - if (!prefix.empty()) - s << prefix << " "; - s << retCode << ": " << WT_TXN_ROLLBACK_REASON_OLDEST_FOR_EVICTION; - throwTemporarilyUnavailableException(s); + double cacheThreshold = gTransactionTooLargeForCacheThreshold.load(); + bool txnTooLargeEnabled = cacheThreshold < 1.0; + bool temporarilyUnavailableEnabled = gEnableTemporarilyUnavailableExceptions.load(); + bool reasonWasCachePressure = (txnTooLargeEnabled || temporarilyUnavailableEnabled) && + wasRollbackReasonCachePressure(session); + + if (reasonWasCachePressure) { + if (txnTooLargeEnabled && isCacheInsufficientForTransaction(session, cacheThreshold)) { + auto s = generateContextStrStream(WT_TXN_ROLLBACK_REASON_TOO_LARGE_FOR_CACHE); + throwTransactionTooLargeForCache(s); + } + + if (temporarilyUnavailableEnabled) { + auto s = generateContextStrStream(WT_TXN_ROLLBACK_REASON_OLDEST_FOR_EVICTION); + throwTemporarilyUnavailableException(s); + } } throwWriteConflictException(prefix); @@ -146,10 +206,7 @@ Status wtRCToStatus_slow(int retCode, WT_SESSION* session, StringData prefix) { // Don't abort on WT_PANIC when repairing, as the error will be handled at a higher layer. fassert(28559, retCode != WT_PANIC || storageGlobalParams.repair); - str::stream s; - if (!prefix.empty()) - s << prefix << " "; - s << retCode << ": " << wiredtiger_strerror(retCode); + auto s = generateContextStrStream(wiredtiger_strerror(retCode)); if (retCode == EINVAL) { return Status(ErrorCodes::BadValue, s); diff --git a/src/mongo/util/assert_util.h b/src/mongo/util/assert_util.h index 4b664f72d0b..f864738b700 100644 --- a/src/mongo/util/assert_util.h +++ b/src/mongo/util/assert_util.h @@ -180,6 +180,19 @@ private: }; /** + * Use `throwTransactionTooLargeForCache()` instead of throwing + * `TransactionTooLargeForCache` directly. + */ +class TransactionTooLargeForCacheException final : public DBException { +public: + TransactionTooLargeForCacheException(const Status& status) : DBException(status) {} + +private: + void defineOnlyInFinalSubclassToPreventSlicing() final {} +}; + + +/** * The base class of all DBExceptions for codes of the given ErrorCategory to allow catching by * category. */ @@ -242,6 +255,11 @@ struct ExceptionForDispatcher<ErrorCodes::TemporarilyUnavailable> { using type = TemporarilyUnavailableException; }; +template <> +struct ExceptionForDispatcher<ErrorCodes::TransactionTooLargeForCache> { + using type = TransactionTooLargeForCacheException; +}; + } // namespace error_details |