From b4cb11ee36993ea978eb2e07482ce4800ae6a335 Mon Sep 17 00:00:00 2001 From: Louis Williams Date: Mon, 14 Mar 2022 09:09:24 +0000 Subject: SERVER-63760 Report the number of TemporarilyUnvailable errors that escape the server This exposes serverStatus metrics for the number of TU errors that are caught, the number returned to the user, and the number converted to WriteConflicts for multi-doc transactions. --- .../noPassthrough/temporarily_unavailable_error.js | 16 ++++++++++++ .../temporarily_unavailable_exception.cpp | 29 ++++++++++++++++++++++ .../temporarily_unavailable_exception.h | 11 ++++++++ .../db/concurrency/write_conflict_exception.h | 6 +---- src/mongo/db/curop_metrics.cpp | 5 ---- 5 files changed, 57 insertions(+), 10 deletions(-) diff --git a/jstests/noPassthrough/temporarily_unavailable_error.js b/jstests/noPassthrough/temporarily_unavailable_error.js index d70841733c8..fca31e56971 100644 --- a/jstests/noPassthrough/temporarily_unavailable_error.js +++ b/jstests/noPassthrough/temporarily_unavailable_error.js @@ -38,6 +38,7 @@ for (let j = 0; j < 50000; j++) doc.x.push("" + Math.random() + Math.random()); (function temporarilyUnavailableNonTransaction() { + const serverStatusBefore = db.serverStatus(); let caughtTUerror = false; let attempts; for (attempts = 1; attempts <= 20; attempts++) { @@ -58,11 +59,17 @@ for (let j = 0; j < 50000; j++) assert(caughtTUerror, "did not return the expected TemporarilyUnavailable error after " + (attempts - 1) + " attempts"); + const serverStatusAfter = db.serverStatus(); + assert.gt(serverStatusAfter.metrics.operation.temporarilyUnavailableErrors, + serverStatusBefore.metrics.operation.temporarilyUnavailableErrors); + assert.gt(serverStatusAfter.metrics.operation.temporarilyUnavailableErrorsEscaped, + serverStatusBefore.metrics.operation.temporarilyUnavailableErrorsEscaped); })(); (function temporarilyUnavailableInTransactionIsConvertedToWriteConflict() { // Inside a transaction, TemporarilyUnavailable errors should be converted to // WriteConflicts and tagged as TransientTransactionErrors. + const serverStatusBefore = db.serverStatus(); let caughtWriteConflict = false; let attempts; let ret; @@ -93,6 +100,15 @@ for (let j = 0; j < 50000; j++) assert(caughtWriteConflict, "did not return the expected WriteConflict error after " + (attempts - 1) + " attempts. Result: " + tojson(ret)); + + const serverStatusAfter = db.serverStatus(); + assert.gt( + serverStatusAfter.metrics.operation.temporarilyUnavailableErrorsConvertedToWriteConflict, + serverStatusBefore.metrics.operation.temporarilyUnavailableErrorsConvertedToWriteConflict); + assert.eq(serverStatusAfter.metrics.operation.temporarilyUnavailableErrors, + serverStatusBefore.metrics.operation.temporarilyUnavailableErrors); + assert.eq(serverStatusAfter.metrics.operation.temporarilyUnavailableErrorsEscaped, + serverStatusBefore.metrics.operation.temporarilyUnavailableErrorsEscaped); })(); replSet.stopSet(); diff --git a/src/mongo/db/concurrency/temporarily_unavailable_exception.cpp b/src/mongo/db/concurrency/temporarily_unavailable_exception.cpp index 98131fc3140..fee20c4d268 100644 --- a/src/mongo/db/concurrency/temporarily_unavailable_exception.cpp +++ b/src/mongo/db/concurrency/temporarily_unavailable_exception.cpp @@ -31,6 +31,8 @@ #include "mongo/db/concurrency/temporarily_unavailable_exception.h" #include "mongo/base/string_data.h" +#include "mongo/db/commands/server_status_metric.h" +#include "mongo/db/concurrency/write_conflict_exception.h" #include "mongo/db/server_options_general_gen.h" #include "mongo/logv2/log.h" #include "mongo/util/duration.h" @@ -41,6 +43,18 @@ namespace mongo { AtomicWord TemporarilyUnavailableException::maxRetryAttempts; AtomicWord TemporarilyUnavailableException::retryBackoffBaseMs; +Counter64 temporarilyUnavailableErrors; +Counter64 temporarilyUnavailableErrorsEscaped; +Counter64 temporarilyUnavailableErrorsConvertedToWriteConflict; + +ServerStatusMetricField displayTemporarilyUnavailableErrors( + "operation.temporarilyUnavailableErrors", &temporarilyUnavailableErrors); +ServerStatusMetricField displayTemporarilyUnavailableErrorsEscaped( + "operation.temporarilyUnavailableErrorsEscaped", &temporarilyUnavailableErrorsEscaped); +ServerStatusMetricField displayTemporarilyUnavailableErrorsConverted( + "operation.temporarilyUnavailableErrorsConvertedToWriteConflict", + &temporarilyUnavailableErrorsConvertedToWriteConflict); + TemporarilyUnavailableException::TemporarilyUnavailableException(StringData context) : DBException(Status(ErrorCodes::TemporarilyUnavailable, context)) {} @@ -50,6 +64,7 @@ void TemporarilyUnavailableException::handle(OperationContext* opCtx, StringData ns, const TemporarilyUnavailableException& e) { opCtx->recoveryUnit()->abandonSnapshot(); + temporarilyUnavailableErrors.increment(1); if (opCtx->getClient()->isFromUserConnection() && attempts > TemporarilyUnavailableException::maxRetryAttempts.load()) { LOGV2_DEBUG(6083901, @@ -59,6 +74,7 @@ void TemporarilyUnavailableException::handle(OperationContext* opCtx, "attempts"_attr = attempts, "operation"_attr = opStr, logAttrs(NamespaceString(ns))); + temporarilyUnavailableErrorsEscaped.increment(1); throw e; } @@ -76,4 +92,17 @@ void TemporarilyUnavailableException::handle(OperationContext* opCtx, opCtx->sleepFor(sleepFor); } +void TemporarilyUnavailableException::handleInTransaction( + OperationContext* opCtx, + StringData opStr, + StringData ns, + const TemporarilyUnavailableException& e) { + // Since WriteConflicts are tagged as TransientTransactionErrors and TemporarilyUnavailable + // errors are not, we convert the error to a WriteConflict to allow users of multi-document + // transactions to retry without changing any behavior. Otherwise, we let the error escape as + // usual. + temporarilyUnavailableErrorsConvertedToWriteConflict.increment(1); + throw WriteConflictException(e.reason()); +} + } // namespace mongo diff --git a/src/mongo/db/concurrency/temporarily_unavailable_exception.h b/src/mongo/db/concurrency/temporarily_unavailable_exception.h index ecd12c46943..cffd4f518e4 100644 --- a/src/mongo/db/concurrency/temporarily_unavailable_exception.h +++ b/src/mongo/db/concurrency/temporarily_unavailable_exception.h @@ -49,12 +49,23 @@ public: TemporarilyUnavailableException(StringData context); + /** + * Handle a TemporarilyUnavailableException. + */ static void handle(OperationContext* opCtx, int attempts, StringData opStr, StringData ns, const TemporarilyUnavailableException& e); + /** + * Handle a TemporarilyUnavailableException inside a multi-document transaction. + */ + static void handleInTransaction(OperationContext* opCtx, + StringData opStr, + StringData ns, + const TemporarilyUnavailableException& e); + private: void defineOnlyInFinalSubclassToPreventSlicing() final {} }; diff --git a/src/mongo/db/concurrency/write_conflict_exception.h b/src/mongo/db/concurrency/write_conflict_exception.h index 745f63e8e64..745c35cd5d3 100644 --- a/src/mongo/db/concurrency/write_conflict_exception.h +++ b/src/mongo/db/concurrency/write_conflict_exception.h @@ -98,11 +98,7 @@ auto writeConflictRetry(OperationContext* opCtx, StringData opStr, StringData ns return f(); } catch (TemporarilyUnavailableException const& e) { if (opCtx->inMultiDocumentTransaction()) { - // Since WriteConflicts are tagged as TransientTransactionErrors and - // TemporarilyUnavailable errors are not, we convert the error to a WriteConflict to - // allow users of multi-document transactions to retry without changing any - // behavior. Otherwise, we let the error escape as usual. - throw WriteConflictException(e.reason()); + TemporarilyUnavailableException::handleInTransaction(opCtx, opStr, ns, e); } throw; } diff --git a/src/mongo/db/curop_metrics.cpp b/src/mongo/db/curop_metrics.cpp index ae3e1020c29..63537de8980 100644 --- a/src/mongo/db/curop_metrics.cpp +++ b/src/mongo/db/curop_metrics.cpp @@ -54,14 +54,11 @@ ServerStatusMetricField displayScannedObjects("queryExecutor.scannedO Counter64 scanAndOrderCounter; Counter64 writeConflictsCounter; -Counter64 temporarilyUnavailableErrorsCounter; ServerStatusMetricField displayScanAndOrder("operation.scanAndOrder", &scanAndOrderCounter); ServerStatusMetricField displayWriteConflicts("operation.writeConflicts", &writeConflictsCounter); -ServerStatusMetricField displayTemporarilyUnavailableErrors( - "operation.temporarilyUnavailableErrors", &temporarilyUnavailableErrorsCounter); } // namespace @@ -84,8 +81,6 @@ void recordCurOpMetrics(OperationContext* opCtx) { scanAndOrderCounter.increment(); if (auto n = debug.additiveMetrics.writeConflicts.load(); n > 0) writeConflictsCounter.increment(n); - if (auto n = debug.additiveMetrics.temporarilyUnavailableErrors.load(); n > 0) - temporarilyUnavailableErrorsCounter.increment(n); queryEngineCounters.incrementQueryEngineCounters(CurOp::get(opCtx)); } -- cgit v1.2.1