summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorYu Jin Kang Park <yujin.kang@mongodb.com>2022-11-21 17:09:50 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-12-02 14:46:29 +0000
commit9ca8158531bf786e374275598cde7e972910827e (patch)
treeb3b634b4a6d785cfd7c366c3b18724878d2bb902 /src
parentf90b35cf6c25a223c2f96649ee74a8ee943c13c7 (diff)
downloadmongo-9ca8158531bf786e374275598cde7e972910827e.tar.gz
SERVER-61909 Abort operation if it cannot ever fit in WT cache
(cherry picked from commit 9ddbd3512044ca6508cb72d9a321391d45c18140)
Diffstat (limited to 'src')
-rw-r--r--src/mongo/base/error_codes.yml2
-rw-r--r--src/mongo/db/catalog/README.md24
-rw-r--r--src/mongo/db/concurrency/SConscript1
-rw-r--r--src/mongo/db/concurrency/exception_util.cpp36
-rw-r--r--src/mongo/db/concurrency/exception_util.h31
-rw-r--r--src/mongo/db/concurrency/exception_util.idl15
-rw-r--r--src/mongo/db/query/plan_executor_impl.cpp2
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp79
-rw-r--r--src/mongo/util/assert_util.h18
9 files changed, 189 insertions, 19 deletions
diff --git a/src/mongo/base/error_codes.yml b/src/mongo/base/error_codes.yml
index 324b43f0d40..dbed4098a0d 100644
--- a/src/mongo/base/error_codes.yml
+++ b/src/mongo/base/error_codes.yml
@@ -506,6 +506,8 @@ error_codes:
- {code: 387, name: EncounteredFLEPayloadWhileRedacting}
+ - {code: 388, name: TransactionTooLargeForCache}
+
# Error codes 4000-8999 are reserved.
# Non-sequential error codes for compatibility only)
diff --git a/src/mongo/db/catalog/README.md b/src/mongo/db/catalog/README.md
index 09b1e2f4ca8..005345a996e 100644
--- a/src/mongo/db/catalog/README.md
+++ b/src/mongo/db/catalog/README.md
@@ -444,6 +444,30 @@ See
[wtRcToStatus](https://github.com/mongodb/mongo/blob/c799851554dc01493d35b43701416e9c78b3665c/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp#L178-L183)
where we throw the exception in WiredTiger.
See [TemporarilyUnavailableException](https://github.com/mongodb/mongo/blob/c799851554dc01493d35b43701416e9c78b3665c/src/mongo/db/concurrency/temporarily_unavailable_exception.h#L39-L45).
+
+## TransactionTooLargeForCacheException
+
+A TransactionTooLargeForCacheException may be thrown inside the server to indicate that an operation
+was rolled-back and is unlikely to ever complete because the storage engine cache is insufficient,
+even in the absence of concurrent operations. This is determined by a simple heuristic wherein,
+after a rollback, a threshold on the proportion of total dirty cache bytes the running transaction
+can represent and still be considered fullfillable is checked. The threshold can be tuned with the
+`transactionTooLargeForCacheThreshold` parameter. Setting this threshold to its maximum value (1.0)
+causes the check to be skipped and TransactionTooLargeForCacheException to be disabled.
+
+On replica sets, if an operation succeeds on a primary, it should also succeed on a secondary. It
+would be possible to convert to both TemporarilyUnavailableException and WriteConflictException,
+as if TransactionTooLargeForCacheException was disabled. But on secondaries the only
+difference between the two is the rate at which the operation is retried. Hence,
+TransactionTooLargeForCacheException is always converted to a WriteConflictException, which retries
+faster, to avoid stalling replication longer than necessary.
+
+Prior to 6.3, or when TransactionTooLargeForCacheException is disabled, multi-document
+transactions always return a WriteConflictException, which may result in drivers retrying an
+operation indefinitely. For non-multi-document operations, there is a limited number of retries on
+TemporarilyUnavailableException, but it might still be beneficial to not retry operations which are
+unlikely to complete and are disruptive for concurrent operations.
+
## Collection and Index Writes
Collection write operations (inserts, updates, and deletes) perform storage engine writes to both
diff --git a/src/mongo/db/concurrency/SConscript b/src/mongo/db/concurrency/SConscript
index 37880a0c061..90b0e21ba07 100644
--- a/src/mongo/db/concurrency/SConscript
+++ b/src/mongo/db/concurrency/SConscript
@@ -27,6 +27,7 @@ env.Library(
],
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/db/commands/server_status_core',
+ '$BUILD_DIR/mongo/db/curop',
'$BUILD_DIR/mongo/db/server_base',
'$BUILD_DIR/mongo/db/server_options_servers',
'$BUILD_DIR/mongo/db/storage/recovery_unit_base',
diff --git a/src/mongo/db/concurrency/exception_util.cpp b/src/mongo/db/concurrency/exception_util.cpp
index ebece0e57e7..278c51cde80 100644
--- a/src/mongo/db/concurrency/exception_util.cpp
+++ b/src/mongo/db/concurrency/exception_util.cpp
@@ -33,6 +33,7 @@
#include "mongo/db/concurrency/exception_util_gen.h"
#include "mongo/db/namespace_string.h"
#include "mongo/logv2/log.h"
+#include "mongo/util/assert_util.h"
#include "mongo/util/duration.h"
#include "mongo/util/log_and_backoff.h"
@@ -52,6 +53,16 @@ void logWriteConflictAndBackoff(int attempt, StringData operation, StringData ns
logAttrs(NamespaceString(ns)));
}
+void handleWriteConflictException(OperationContext* opCtx,
+ int* writeConflictAttempts,
+ StringData opStr,
+ StringData ns) {
+ CurOp::get(opCtx)->debug().additiveMetrics.incrementWriteConflicts(1);
+ logWriteConflictAndBackoff(*writeConflictAttempts, opStr, ns);
+ ++writeConflictAttempts;
+ opCtx->recoveryUnit()->abandonSnapshot();
+}
+
namespace {
CounterMetric temporarilyUnavailableErrors{"operation.temporarilyUnavailableErrors"};
@@ -59,6 +70,11 @@ CounterMetric temporarilyUnavailableErrorsEscaped{"operation.temporarilyUnavaila
CounterMetric temporarilyUnavailableErrorsConvertedToWriteConflict{
"operation.temporarilyUnavailableErrorsConvertedToWriteConflict"};
+CounterMetric transactionTooLargeForCacheErrors{"operation.transactionTooLargeForCacheErrors"};
+CounterMetric transactionTooLargeForCacheErrorsConvertedToWriteConflict{
+ "operation.transactionTooLargeForCacheErrorsConvertedToWriteConflict"};
+
+
} // namespace
void handleTemporarilyUnavailableException(OperationContext* opCtx,
@@ -66,6 +82,8 @@ void handleTemporarilyUnavailableException(OperationContext* opCtx,
StringData opStr,
StringData ns,
const TemporarilyUnavailableException& e) {
+ CurOp::get(opCtx)->debug().additiveMetrics.incrementTemporarilyUnavailableErrors(1);
+
opCtx->recoveryUnit()->abandonSnapshot();
temporarilyUnavailableErrors.increment(1);
if (opCtx->getClient()->isFromUserConnection() &&
@@ -107,4 +125,22 @@ void handleTemporarilyUnavailableExceptionInTransaction(OperationContext* opCtx,
throwWriteConflictException(e.reason());
}
+void handleTransactionTooLargeForCacheException(OperationContext* opCtx,
+ int* writeConflictAttempts,
+ StringData opStr,
+ StringData ns,
+ const TransactionTooLargeForCacheException& e) {
+ transactionTooLargeForCacheErrors.increment(1);
+ if (opCtx->writesAreReplicated()) {
+ // Surface error on primaries.
+ throw e;
+ }
+ // If an operation succeeds on primary, it should always be retried on secondaries. Secondaries
+ // always retry TemporarilyUnavailableExceptions and WriteConflictExceptions indefinitely, the
+ // only difference being the rate of retry. We prefer retrying faster, by converting to
+ // WriteConflictException, to avoid stalling replication longer than necessary.
+ transactionTooLargeForCacheErrorsConvertedToWriteConflict.increment(1);
+ handleWriteConflictException(opCtx, writeConflictAttempts, opStr, ns);
+}
+
} // namespace mongo
diff --git a/src/mongo/db/concurrency/exception_util.h b/src/mongo/db/concurrency/exception_util.h
index 21346bdd533..6e634cc3ac6 100644
--- a/src/mongo/db/concurrency/exception_util.h
+++ b/src/mongo/db/concurrency/exception_util.h
@@ -47,6 +47,11 @@ extern FailPoint skipWriteConflictRetries;
*/
void logWriteConflictAndBackoff(int attempt, StringData operation, StringData ns);
+void handleWriteConflictException(OperationContext* opCtx,
+ int* writeConflictAttempts,
+ StringData opStr,
+ StringData ns);
+
void handleTemporarilyUnavailableException(OperationContext* opCtx,
int attempts,
StringData opStr,
@@ -61,6 +66,12 @@ void handleTemporarilyUnavailableExceptionInTransaction(OperationContext* opCtx,
StringData ns,
const TemporarilyUnavailableException& e);
+void handleTransactionTooLargeForCacheException(OperationContext* opCtx,
+ int* writeConflictAttempts,
+ StringData opStr,
+ StringData ns,
+ const TransactionTooLargeForCacheException& e);
+
/**
* A `WriteConflictException` is thrown if during a write, two or more operations conflict with each
* other. For example if two operations get the same version of a document, and then both try to
@@ -85,6 +96,16 @@ void handleTemporarilyUnavailableExceptionInTransaction(OperationContext* opCtx,
}
/**
+ * A `TransactionTooLargeForCache` is thrown if it has been determined that it is unlikely to
+ * ever complete the operation because the configured cache is insufficient to hold all the
+ * transaction state. This helps to avoid retrying, maybe indefinitely, a transaction which would
+ * never be able to complete.
+ */
+[[noreturn]] inline void throwTransactionTooLargeForCache(StringData context) {
+ iasserted({ErrorCodes::TransactionTooLargeForCache, context});
+}
+
+/**
* Runs the argument function f as many times as needed for f to complete or throw an exception
* other than WriteConflictException or TemporarilyUnavailableException. For each time f throws
* one of these exceptions, logs the error, waits a spell, cleans up, and then tries f again.
@@ -119,19 +140,17 @@ auto writeConflictRetry(OperationContext* opCtx, StringData opStr, StringData ns
}
}
- int attempts = 0;
+ int writeConflictAttempts = 0;
int attemptsTempUnavailable = 0;
while (true) {
try {
return f();
} catch (WriteConflictException const&) {
- CurOp::get(opCtx)->debug().additiveMetrics.incrementWriteConflicts(1);
- logWriteConflictAndBackoff(attempts, opStr, ns);
- ++attempts;
- opCtx->recoveryUnit()->abandonSnapshot();
+ handleWriteConflictException(opCtx, &writeConflictAttempts, opStr, ns);
} catch (TemporarilyUnavailableException const& e) {
- CurOp::get(opCtx)->debug().additiveMetrics.incrementTemporarilyUnavailableErrors(1);
handleTemporarilyUnavailableException(opCtx, ++attemptsTempUnavailable, opStr, ns, e);
+ } catch (TransactionTooLargeForCacheException const& e) {
+ handleTransactionTooLargeForCacheException(opCtx, &writeConflictAttempts, opStr, ns, e);
}
}
}
diff --git a/src/mongo/db/concurrency/exception_util.idl b/src/mongo/db/concurrency/exception_util.idl
index afa03ca5185..3fdcef5235b 100644
--- a/src/mongo/db/concurrency/exception_util.idl
+++ b/src/mongo/db/concurrency/exception_util.idl
@@ -57,3 +57,18 @@ server_parameters:
default: 1000
validator:
gte: 0
+
+ transactionTooLargeForCacheThreshold:
+ description: "Threshold on the proportion of total dirty cache bytes that the running
+ transaction's dirty cache bytes can represent and still be considered
+ fullfillable on retry. If this threshold is exceeded, a
+ TransactionTooLargeForCache exception is thrown. Setting this parameter to 1.0
+ causes this check to be disabled, and TransactionTooLargeForCache exceptions
+ will not be thrown."
+ set_at: [ startup, runtime ]
+ cpp_varname: 'gTransactionTooLargeForCacheThreshold'
+ cpp_vartype: AtomicWord<double>
+ default: 0.75
+ validator:
+ gte: 0.0
+ lte: 1.0
diff --git a/src/mongo/db/query/plan_executor_impl.cpp b/src/mongo/db/query/plan_executor_impl.cpp
index 693be64c962..5045f54c302 100644
--- a/src/mongo/db/query/plan_executor_impl.cpp
+++ b/src/mongo/db/query/plan_executor_impl.cpp
@@ -453,8 +453,6 @@ PlanExecutor::ExecState PlanExecutorImpl::_getNextImpl(Snapshotted<Document>* ob
"got TemporarilyUnavailable exception on a plan that cannot auto-yield");
}
- CurOp::get(_opCtx)->debug().additiveMetrics.incrementTemporarilyUnavailableErrors(
- 1);
tempUnavailErrorsInARow++;
handleTemporarilyUnavailableException(
_opCtx,
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp
index fe9d80ad02b..5079cbceab8 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp
@@ -52,6 +52,9 @@
// From src/third_party/wiredtiger/src/include/txn.h
#define WT_TXN_ROLLBACK_REASON_OLDEST_FOR_EVICTION \
"oldest pinned transaction ID rolled back for eviction"
+
+#define WT_TXN_ROLLBACK_REASON_TOO_LARGE_FOR_CACHE \
+ "transaction is too large and will not fit in the storage engine cache"
namespace mongo {
namespace {
@@ -126,18 +129,75 @@ bool wasRollbackReasonCachePressure(WT_SESSION* session) {
return false;
}
+/**
+ * Configured WT cache is deemed insufficient for a transaction when its dirty bytes in cache
+ * exceed a certain threshold on the proportion of total cache which is used by transaction.
+ *
+ * For instance, if the transaction uses 80% of WT cache and the threshold is set to 75%, the
+ * transaction is considered too large.
+ */
+bool isCacheInsufficientForTransaction(WT_SESSION* session, double threshold) {
+ StatusWith<int64_t> txnDirtyBytes = WiredTigerUtil::getStatisticsValue(
+ session, "statistics:session", "", WT_STAT_SESSION_TXN_BYTES_DIRTY);
+ if (!txnDirtyBytes.isOK()) {
+ tasserted(6190900,
+ str::stream() << "unable to gather the WT session's txn dirty bytes: "
+ << txnDirtyBytes.getStatus());
+ }
+
+ StatusWith<int64_t> cacheDirtyBytes = WiredTigerUtil::getStatisticsValue(
+ session, "statistics:", "", WT_STAT_CONN_CACHE_BYTES_DIRTY);
+ if (!cacheDirtyBytes.isOK()) {
+ tasserted(6190901,
+ str::stream() << "unable to gather the WT connection's cache dirty bytes: "
+ << txnDirtyBytes.getStatus());
+ }
+
+
+ double txnBytesDirtyOverCacheBytesDirty =
+ static_cast<double>(txnDirtyBytes.getValue()) / cacheDirtyBytes.getValue();
+
+ LOGV2_DEBUG(6190902,
+ 2,
+ "Checking if transaction can eventually succeed",
+ "txnDirtyBytes"_attr = txnDirtyBytes.getValue(),
+ "cacheDirtyBytes"_attr = cacheDirtyBytes.getValue(),
+ "txnBytesDirtyOverCacheBytesDirty"_attr = txnBytesDirtyOverCacheBytesDirty,
+ "threshold"_attr = threshold);
+
+ return txnBytesDirtyOverCacheBytesDirty > threshold;
+}
+
Status wtRCToStatus_slow(int retCode, WT_SESSION* session, StringData prefix) {
if (retCode == 0)
return Status::OK();
+ const auto generateContextStrStream = [&](StringData reason) {
+ str::stream contextStrStream;
+ if (!prefix.empty())
+ contextStrStream << prefix << " ";
+ contextStrStream << retCode << ": " << reason;
+
+ return contextStrStream;
+ };
+
if (retCode == WT_ROLLBACK) {
- if (gEnableTemporarilyUnavailableExceptions.load() &&
- wasRollbackReasonCachePressure(session)) {
- str::stream s;
- if (!prefix.empty())
- s << prefix << " ";
- s << retCode << ": " << WT_TXN_ROLLBACK_REASON_OLDEST_FOR_EVICTION;
- throwTemporarilyUnavailableException(s);
+ double cacheThreshold = gTransactionTooLargeForCacheThreshold.load();
+ bool txnTooLargeEnabled = cacheThreshold < 1.0;
+ bool temporarilyUnavailableEnabled = gEnableTemporarilyUnavailableExceptions.load();
+ bool reasonWasCachePressure = (txnTooLargeEnabled || temporarilyUnavailableEnabled) &&
+ wasRollbackReasonCachePressure(session);
+
+ if (reasonWasCachePressure) {
+ if (txnTooLargeEnabled && isCacheInsufficientForTransaction(session, cacheThreshold)) {
+ auto s = generateContextStrStream(WT_TXN_ROLLBACK_REASON_TOO_LARGE_FOR_CACHE);
+ throwTransactionTooLargeForCache(s);
+ }
+
+ if (temporarilyUnavailableEnabled) {
+ auto s = generateContextStrStream(WT_TXN_ROLLBACK_REASON_OLDEST_FOR_EVICTION);
+ throwTemporarilyUnavailableException(s);
+ }
}
throwWriteConflictException(prefix);
@@ -146,10 +206,7 @@ Status wtRCToStatus_slow(int retCode, WT_SESSION* session, StringData prefix) {
// Don't abort on WT_PANIC when repairing, as the error will be handled at a higher layer.
fassert(28559, retCode != WT_PANIC || storageGlobalParams.repair);
- str::stream s;
- if (!prefix.empty())
- s << prefix << " ";
- s << retCode << ": " << wiredtiger_strerror(retCode);
+ auto s = generateContextStrStream(wiredtiger_strerror(retCode));
if (retCode == EINVAL) {
return Status(ErrorCodes::BadValue, s);
diff --git a/src/mongo/util/assert_util.h b/src/mongo/util/assert_util.h
index 4b664f72d0b..f864738b700 100644
--- a/src/mongo/util/assert_util.h
+++ b/src/mongo/util/assert_util.h
@@ -180,6 +180,19 @@ private:
};
/**
+ * Use `throwTransactionTooLargeForCache()` instead of throwing
+ * `TransactionTooLargeForCache` directly.
+ */
+class TransactionTooLargeForCacheException final : public DBException {
+public:
+ TransactionTooLargeForCacheException(const Status& status) : DBException(status) {}
+
+private:
+ void defineOnlyInFinalSubclassToPreventSlicing() final {}
+};
+
+
+/**
* The base class of all DBExceptions for codes of the given ErrorCategory to allow catching by
* category.
*/
@@ -242,6 +255,11 @@ struct ExceptionForDispatcher<ErrorCodes::TemporarilyUnavailable> {
using type = TemporarilyUnavailableException;
};
+template <>
+struct ExceptionForDispatcher<ErrorCodes::TransactionTooLargeForCache> {
+ using type = TransactionTooLargeForCacheException;
+};
+
} // namespace error_details