diff options
-rw-r--r-- | src/mongo/s/client/shard.cpp | 27 | ||||
-rw-r--r-- | src/mongo/s/client/shard.h | 5 | ||||
-rw-r--r-- | src/mongo/s/config_server_client.cpp | 2 |
3 files changed, 30 insertions, 4 deletions
diff --git a/src/mongo/s/client/shard.cpp b/src/mongo/s/client/shard.cpp index 622c6ea7c50..bc1e791e3db 100644 --- a/src/mongo/s/client/shard.cpp +++ b/src/mongo/s/client/shard.cpp @@ -30,6 +30,7 @@ #include "mongo/platform/basic.h" +#include "mongo/db/operation_context.h" #include "mongo/s/client/shard.h" #include "mongo/s/client/shard_registry.h" #include "mongo/s/write_ops/batched_command_request.h" @@ -106,7 +107,7 @@ StatusWith<Shard::CommandResponse> Shard::runCommand(OperationContext* txn, const std::string& dbName, const BSONObj& cmdObj, RetryPolicy retryPolicy) { - MONGO_UNREACHABLE; + return runCommand(txn, readPref, dbName, cmdObj, Milliseconds::max(), retryPolicy); } StatusWith<Shard::CommandResponse> Shard::runCommand(OperationContext* txn, @@ -115,6 +116,25 @@ StatusWith<Shard::CommandResponse> Shard::runCommand(OperationContext* txn, const BSONObj& cmdObj, Milliseconds maxTimeMSOverride, RetryPolicy retryPolicy) { + while (true) { + auto interruptStatus = txn->checkForInterruptNoAssert(); + if (!interruptStatus.isOK()) { + return interruptStatus; + } + + auto hostWithResponse = _runCommand(txn, readPref, dbName, maxTimeMSOverride, cmdObj); + auto swCmdResponse = std::move(hostWithResponse.commandResponse); + auto commandStatus = _getEffectiveCommandStatus(swCmdResponse); + + if (isRetriableError(commandStatus.code(), retryPolicy)) { + LOG(2) << "Command " << redact(cmdObj) + << " failed with retriable error and will be retried" + << causedBy(redact(commandStatus)); + continue; + } + + return swCmdResponse; + } MONGO_UNREACHABLE; } @@ -136,6 +156,11 @@ StatusWith<Shard::CommandResponse> Shard::runCommandWithFixedRetryAttempts( Milliseconds maxTimeMSOverride, RetryPolicy retryPolicy) { for (int retry = 1; retry <= kOnErrorNumRetries; ++retry) { + auto interruptStatus = txn->checkForInterruptNoAssert(); + if (!interruptStatus.isOK()) { + return interruptStatus; + } + auto hostWithResponse = _runCommand(txn, readPref, dbName, maxTimeMSOverride, cmdObj); auto swCmdResponse = std::move(hostWithResponse.commandResponse); auto commandStatus = _getEffectiveCommandStatus(swCmdResponse); diff --git a/src/mongo/s/client/shard.h b/src/mongo/s/client/shard.h index 1f3866e84e3..723a66d8ca9 100644 --- a/src/mongo/s/client/shard.h +++ b/src/mongo/s/client/shard.h @@ -138,8 +138,9 @@ public: /** * Runs the specified command returns the BSON command response plus parsed out Status of this - * response and write concern error (if present). Waits for up to the deadline for the - * OperationContext. Retries failed operations according to the given "retryPolicy". + * response and write concern error (if present). Retries failed operations according to the + * given "retryPolicy". Retries indefinitely until/unless a non-retriable error is encountered, + * the maxTimeMs on the OperationContext expires, or the operation is interrupted. */ StatusWith<CommandResponse> runCommand(OperationContext* txn, const ReadPreferenceSetting& readPref, diff --git a/src/mongo/s/config_server_client.cpp b/src/mongo/s/config_server_client.cpp index ba3e09f3292..d3dd7f88dea 100644 --- a/src/mongo/s/config_server_client.cpp +++ b/src/mongo/s/config_server_client.cpp @@ -51,7 +51,7 @@ Status moveChunk(OperationContext* txn, bool waitForDelete) { auto shardRegistry = Grid::get(txn)->shardRegistry(); auto shard = shardRegistry->getConfigShard(); - auto cmdResponseStatus = shard->runCommandWithFixedRetryAttempts( + auto cmdResponseStatus = shard->runCommand( txn, kPrimaryOnlyReadPreference, "admin", |