summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRachita Dhawan <rachita.dhawan@gmail.com>2022-05-04 19:10:44 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-05-17 15:48:13 +0000
commitce47119ff1a1ab79f18235f1f42eeef39da6f20c (patch)
treedd34fbdfb6a65fe2378fa7166654c0893ad8db49
parent387db2d0f7b26b4ae6efdf46854e60f12f93d4d2 (diff)
downloadmongo-r6.0.0-rc6.tar.gz
SERVER-62175 Mongos fails to attach RetryableWrite Error Label For Command Interrupted In _parseCommandr6.0.0-rc6
(cherry picked from commit 38e4a5516d614a2bd1f3c3afde97c19068fd1441)
-rw-r--r--jstests/sharding/mongos_insert_fails_with_shutdown.js59
-rw-r--r--src/mongo/s/commands/strategy.cpp110
-rw-r--r--src/mongo/s/mongos_main.cpp6
3 files changed, 130 insertions, 45 deletions
diff --git a/jstests/sharding/mongos_insert_fails_with_shutdown.js b/jstests/sharding/mongos_insert_fails_with_shutdown.js
new file mode 100644
index 00000000000..df02f6f54e6
--- /dev/null
+++ b/jstests/sharding/mongos_insert_fails_with_shutdown.js
@@ -0,0 +1,59 @@
+/**
+@tags: [multiversion_incompatible]
+*/
+
+// Don't check for UUID index consistency,orphans and routine table across the cluster at the end,
+// since the test shuts down a mongos
+TestData.skipCheckingUUIDsConsistentAcrossCluster = true;
+TestData.skipCheckingIndexesConsistentAcrossCluster = true;
+TestData.skipCheckOrphans = true;
+TestData.skipCheckRoutingTableConsistency = true;
+(function() {
+"use strict";
+
+load("jstests/libs/fail_point_util.js");
+load('jstests/libs/parallelTester.js');
+load("jstests/libs/retryable_writes_util.js");
+const st = new ShardingTest({
+ mongos: 1,
+ config: 1,
+ shards: 2,
+});
+const dbName = "test";
+const collName = "mycoll";
+const fpData = {
+ "cmdName": "insert",
+ "ns": dbName + '.' + collName
+};
+const hangBeforeCheckInterruptFailPoint =
+ configureFailPoint(st.s, "hangBeforeCheckingMongosShutdownInterrupt", fpData);
+
+const insertThread = new Thread(function insertDoc(host, dbName, collName) {
+ var lsid = UUID();
+ const conn = new Mongo(host);
+ const retrySession = conn.startSession({retryWrites: true});
+ const retrySessionDB = retrySession.getDatabase(dbName);
+
+ try {
+ var res = assert.commandWorked(retrySessionDB.runCommand({
+ insert: 'mycoll',
+ documents: [{x: 0}, {x: 1}],
+ ordered: true,
+ lsid: {id: lsid},
+ txnNumber: NumberLong(1)
+ }));
+
+ } catch (e) {
+ assert.eq(e.errorLabels, ["RetryableWriteError"], e);
+ }
+ retrySession.endSession();
+}, st.s.host, dbName, collName);
+
+insertThread.start();
+hangBeforeCheckInterruptFailPoint.wait();
+
+st.stopMongos(0);
+insertThread.join();
+
+st.stop();
+})();
diff --git a/src/mongo/s/commands/strategy.cpp b/src/mongo/s/commands/strategy.cpp
index c3ade949332..777d24bf257 100644
--- a/src/mongo/s/commands/strategy.cpp
+++ b/src/mongo/s/commands/strategy.cpp
@@ -97,10 +97,9 @@
#include "mongo/util/scopeguard.h"
#include "mongo/util/str.h"
#include "mongo/util/timer.h"
-
namespace mongo {
namespace {
-
+MONGO_FAIL_POINT_DEFINE(hangBeforeCheckingMongosShutdownInterrupt);
const auto kOperationTime = "operationTime"_sd;
/**
@@ -402,6 +401,9 @@ private:
// invocation and extracting read/write concerns).
void _parseCommand();
+ // updates statistics and applies labels if an error occurs.
+ void _updateStatsAndApplyErrorLabels(const Status& status);
+
const std::shared_ptr<RequestExecutionContext> _rec;
const std::shared_ptr<BSONObjBuilder> _errorBuilder;
const NetworkOp _opType;
@@ -437,9 +439,6 @@ public:
private:
Status _setup();
- // Logs and updates statistics if an error occurs.
- void _tapOnError(const Status& status);
-
ParseAndRunCommand* const _parc;
boost::optional<RouterOperationContextSession> _routerSession;
@@ -482,7 +481,33 @@ private:
int _tries = 0;
};
+void ParseAndRunCommand::_updateStatsAndApplyErrorLabels(const Status& status) {
+ auto opCtx = _rec->getOpCtx();
+ const auto command = _rec->getCommand();
+
+ if (command)
+ command->incrementCommandsFailed();
+ NotPrimaryErrorTracker::get(opCtx->getClient()).recordError(status.code());
+ // WriteConcern error (wcCode) is set to boost::none because:
+ // 1. TransientTransaction error label handling for commitTransaction command in mongos is
+ // delegated to the shards. Mongos simply propagates the shard's response up to the client.
+ // 2. For other commands in a transaction, they shouldn't get a writeConcern error so this
+ // setting doesn't apply.
+ if (_osi.has_value()) {
+
+ auto errorLabels = getErrorLabels(opCtx,
+ *_osi,
+ command->getName(),
+ status.code(),
+ boost::none,
+ false /* isInternalClient */,
+ true /* isMongos */);
+
+
+ _errorBuilder->appendElements(errorLabels);
+ }
+}
void ParseAndRunCommand::_parseCommand() {
auto opCtx = _rec->getOpCtx();
const auto& m = _rec->getMessage();
@@ -526,12 +551,6 @@ void ParseAndRunCommand::_parseCommand() {
uassert(ErrorCodes::InvalidOptions,
"no such command option $maxTimeMs; use maxTimeMS instead",
request.body[query_request_helper::queryOptionMaxTimeMS].eoo());
- const int maxTimeMS =
- uassertStatusOK(parseMaxTimeMS(request.body[query_request_helper::cmdOptionMaxTimeMS]));
- if (maxTimeMS > 0 && command->getLogicalOp() != LogicalOp::opGetMore) {
- opCtx->setDeadlineAfterNowBy(Milliseconds{maxTimeMS}, ErrorCodes::MaxTimeMSExpired);
- }
- opCtx->checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point.
// If the command includes a 'comment' field, set it on the current OpCtx.
if (auto commentField = request.body["comment"]) {
@@ -597,6 +616,27 @@ Status ParseAndRunCommand::RunInvocation::_setup() {
const auto& request = _parc->_rec->getRequest();
auto replyBuilder = _parc->_rec->getReplyBuilder();
+ const int maxTimeMS =
+ uassertStatusOK(parseMaxTimeMS(request.body[query_request_helper::cmdOptionMaxTimeMS]));
+ if (maxTimeMS > 0 && command->getLogicalOp() != LogicalOp::opGetMore) {
+ opCtx->setDeadlineAfterNowBy(Milliseconds{maxTimeMS}, ErrorCodes::MaxTimeMSExpired);
+ }
+
+ if (MONGO_unlikely(
+ hangBeforeCheckingMongosShutdownInterrupt.shouldFail([&](const BSONObj& data) {
+ if (data.hasField("cmdName") && data.hasField("ns")) {
+ std::string cmdNS = _parc->_ns.get();
+ return ((data.getStringField("cmdName") == _parc->_commandName) &&
+ (data.getStringField("ns") == cmdNS));
+ }
+ return false;
+ }))) {
+
+ LOGV2(6217501, "Hanging before hangBeforeCheckingMongosShutdownInterrupt is cancelled");
+ hangBeforeCheckingMongosShutdownInterrupt.pauseWhileSet();
+ }
+ opCtx->checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point.
+
auto appendStatusToReplyAndSkipCommandExecution = [replyBuilder](Status status) -> Status {
auto responseBuilder = replyBuilder->getBodyBuilder();
CommandHelpers::appendCommandStatusNoThrow(responseBuilder, status);
@@ -871,8 +911,8 @@ void ParseAndRunCommand::RunAndRetry::_setup() {
auto& readConcernArgs = repl::ReadConcernArgs::get(opCtx);
if (_tries > 1) {
- // Re-parse before retrying in case the process of run()-ning the invocation could affect
- // the parsed result.
+ // Re-parse before retrying in case the process of run()-ning the invocation could
+ // affect the parsed result.
_parc->_invocation = command->parse(opCtx, request);
invariant(_parc->_invocation->ns().toString() == _parc->_ns,
"unexpected change of namespace when retrying");
@@ -911,15 +951,15 @@ Future<void> ParseAndRunCommand::RunAndRetry::_run() {
}
void ParseAndRunCommand::RunAndRetry::_checkRetryForTransaction(Status& status) {
- // Retry logic specific to transactions. Throws and aborts the transaction if the error cannot
- // be retried on.
+ // Retry logic specific to transactions. Throws and aborts the transaction if the error
+ // cannot be retried on.
auto opCtx = _parc->_rec->getOpCtx();
auto txnRouter = TransactionRouter::get(opCtx);
if (!txnRouter) {
if (opCtx->inMultiDocumentTransaction()) {
- // This command must have failed while its session was yielded. We cannot retry in this
- // case, whatever the session was yielded to is responsible for that, so rethrow the
- // error.
+ // This command must have failed while its session was yielded. We cannot retry in
+ // this case, whatever the session was yielded to is responsible for that, so
+ // rethrow the error.
iassert(status);
}
@@ -1059,30 +1099,10 @@ void ParseAndRunCommand::RunAndRetry::_onTenantMigrationAborted(Status& status)
iassert(status);
}
-void ParseAndRunCommand::RunInvocation::_tapOnError(const Status& status) {
- auto opCtx = _parc->_rec->getOpCtx();
- const auto command = _parc->_rec->getCommand();
-
- command->incrementCommandsFailed();
- NotPrimaryErrorTracker::get(opCtx->getClient()).recordError(status.code());
- // WriteConcern error (wcCode) is set to boost::none because:
- // 1. TransientTransaction error label handling for commitTransaction command in mongos is
- // delegated to the shards. Mongos simply propagates the shard's response up to the client.
- // 2. For other commands in a transaction, they shouldn't get a writeConcern error so this
- // setting doesn't apply.
- auto errorLabels = getErrorLabels(opCtx,
- *_parc->_osi,
- command->getName(),
- status.code(),
- boost::none,
- false /* isInternalClient */,
- true /* isMongos */);
- _parc->_errorBuilder->appendElements(errorLabels);
-}
-
Future<void> ParseAndRunCommand::RunAndRetry::run() {
return makeReadyFutureWith([&] {
- // Try kMaxNumStaleVersionRetries times. On the last try, exceptions are rethrown.
+ // Try kMaxNumStaleVersionRetries times. On the last try, exceptions are
+ // rethrown.
_tries++;
_setup();
@@ -1116,11 +1136,10 @@ Future<void> ParseAndRunCommand::RunAndRetry::run() {
Future<void> ParseAndRunCommand::RunInvocation::run() {
return makeReadyFutureWith([&] {
- iassert(_setup());
- return future_util::makeState<RunAndRetry>(_parc).thenWithState(
- [](auto* runner) { return runner->run(); });
- })
- .tapError([this](Status status) { _tapOnError(status); });
+ iassert(_setup());
+ return future_util::makeState<RunAndRetry>(_parc).thenWithState(
+ [](auto* runner) { return runner->run(); });
+ });
}
Future<void> ParseAndRunCommand::run() {
@@ -1129,6 +1148,7 @@ Future<void> ParseAndRunCommand::run() {
return future_util::makeState<RunInvocation>(this).thenWithState(
[](auto* runner) { return runner->run(); });
})
+ .tapError([this](Status status) { _updateStatsAndApplyErrorLabels(status); })
.onError<ErrorCodes::SkipCommandExecution>([this](Status status) {
// We've already skipped execution, so no other action is required.
return Status::OK();
diff --git a/src/mongo/s/mongos_main.cpp b/src/mongo/s/mongos_main.cpp
index bea3ea0ae4b..0504b714aec 100644
--- a/src/mongo/s/mongos_main.cpp
+++ b/src/mongo/s/mongos_main.cpp
@@ -321,6 +321,12 @@ void cleanupTask(const ShutdownTaskArgs& shutdownArgs) {
LOGV2(4701800, "pauseWhileKillingOperationsAtShutdown failpoint enabled");
sleepsecs(1);
}
+ FailPoint* hangBeforeInterruptfailPoint =
+ globalFailPointRegistry().find("hangBeforeCheckingMongosShutdownInterrupt");
+ if (hangBeforeInterruptfailPoint) {
+ hangBeforeInterruptfailPoint->setMode(FailPoint::Mode::off);
+ sleepsecs(3);
+ }
}
// Perform all shutdown operations after setKillAllOperations is called in order to ensure