summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--jstests/sharding/all_config_servers_blackholed_from_mongos.js6
-rw-r--r--jstests/sharding/lagged_config_secondary.js18
-rw-r--r--jstests/sharding/primary_config_server_blackholed_from_mongos.js12
-rw-r--r--src/mongo/db/operation_context.cpp9
-rw-r--r--src/mongo/db/operation_context.h14
-rw-r--r--src/mongo/s/catalog/replset/dist_lock_catalog_impl.cpp14
-rw-r--r--src/mongo/s/catalog/replset/sharding_catalog_append_db_stats_test.cpp2
-rw-r--r--src/mongo/s/catalog/replset/sharding_catalog_client_impl.cpp34
-rw-r--r--src/mongo/s/client/shard.cpp25
-rw-r--r--src/mongo/s/client/shard.h30
-rw-r--r--src/mongo/s/client/shard_local.cpp1
-rw-r--r--src/mongo/s/client/shard_local.h1
-rw-r--r--src/mongo/s/client/shard_local_test.cpp2
-rw-r--r--src/mongo/s/client/shard_remote.cpp107
-rw-r--r--src/mongo/s/client/shard_remote.h1
-rw-r--r--src/mongo/s/commands/strategy.cpp14
-rw-r--r--src/mongo/s/config_server_test_fixture.cpp8
-rw-r--r--src/mongo/shell/utils_sh.js2
18 files changed, 183 insertions, 117 deletions
diff --git a/jstests/sharding/all_config_servers_blackholed_from_mongos.js b/jstests/sharding/all_config_servers_blackholed_from_mongos.js
index c3ed68e97de..f575ba26276 100644
--- a/jstests/sharding/all_config_servers_blackholed_from_mongos.js
+++ b/jstests/sharding/all_config_servers_blackholed_from_mongos.js
@@ -4,7 +4,6 @@
'use strict';
var st = new ShardingTest({
- name: 'all_config_servers_blackholed_from_mongos',
shards: 2,
mongos: 1,
useBridge: true,
@@ -28,13 +27,14 @@
// This shouldn't stall
jsTest.log('Doing read operation on the sharded collection');
assert.throws(function() {
- testDB.ShardedColl.find({}).itcount();
+ testDB.ShardedColl.find({}).maxTimeMS(15000).itcount();
});
// This should fail, because the primary is not available
jsTest.log('Doing write operation on a new database and collection');
assert.writeError(st.s.getDB('NonExistentDB')
- .TestColl.insert({_id: 0, value: 'This value will never be inserted'}));
+ .TestColl.insert({_id: 0, value: 'This value will never be inserted'},
+ {maxTimeMS: 15000}));
st.stop();
diff --git a/jstests/sharding/lagged_config_secondary.js b/jstests/sharding/lagged_config_secondary.js
index 92f0453b941..d9a93c56fad 100644
--- a/jstests/sharding/lagged_config_secondary.js
+++ b/jstests/sharding/lagged_config_secondary.js
@@ -4,6 +4,13 @@
*/
(function() {
var st = new ShardingTest({shards: 1});
+ var testDB = st.s.getDB('test');
+
+ assert.commandWorked(testDB.adminCommand({enableSharding: 'test'}));
+ assert.commandWorked(testDB.adminCommand({shardCollection: 'test.user', key: {_id: 1}}));
+
+ // Ensures that all metadata writes thus far have been replicated to all nodes
+ st.configRS.awaitReplication();
var configSecondaryList = st.configRS.getSecondaries();
var configSecondaryToKill = configSecondaryList[0];
@@ -12,11 +19,10 @@
delayedConfigSecondary.getDB('admin').adminCommand(
{configureFailPoint: 'rsSyncApplyStop', mode: 'alwaysOn'});
- var testDB = st.s.getDB('test');
- testDB.adminCommand({enableSharding: 'test'});
- testDB.adminCommand({shardCollection: 'test.user', key: {_id: 1}});
+ assert.writeOK(testDB.user.insert({_id: 1}));
- testDB.user.insert({_id: 1});
+ // Do one metadata write in order to bump the optime on mongos
+ assert.writeOK(st.getDB('config').TestConfigColl.insert({TestKey: 'Test value'}));
st.configRS.stopMaster();
MongoRunner.stopMongod(configSecondaryToKill.port);
@@ -24,8 +30,9 @@
// Clears all cached info so mongos will be forced to query from the config.
st.s.adminCommand({flushRouterConfig: 1});
+ print('Attempting read on a sharded collection...');
var exception = assert.throws(function() {
- testDB.user.findOne();
+ testDB.user.find({}).maxTimeMS(15000).itcount();
});
assert.eq(ErrorCodes.ExceededTimeLimit, exception.code);
@@ -43,5 +50,4 @@
}, 'Did not see any log entries containing the following message: ' + msg, 60000, 300);
st.stop();
-
}());
diff --git a/jstests/sharding/primary_config_server_blackholed_from_mongos.js b/jstests/sharding/primary_config_server_blackholed_from_mongos.js
index b4d0d26d5cb..4fb3886b40b 100644
--- a/jstests/sharding/primary_config_server_blackholed_from_mongos.js
+++ b/jstests/sharding/primary_config_server_blackholed_from_mongos.js
@@ -3,12 +3,7 @@
(function() {
'use strict';
- var st = new ShardingTest({
- name: 'primary_config_server_blackholed_from_mongos',
- shards: 2,
- mongos: 1,
- useBridge: true
- });
+ var st = new ShardingTest({shards: 2, mongos: 1, useBridge: true});
var testDB = st.s.getDB('BlackHoleDB');
var configDB = st.s.getDB('config');
@@ -33,7 +28,8 @@
// This should fail, because the primary is not available
jsTest.log('Doing write operation on a new database and collection');
assert.writeError(st.s.getDB('NonExistentDB')
- .TestColl.insert({_id: 0, value: 'This value will never be inserted'}));
+ .TestColl.insert({_id: 0, value: 'This value will never be inserted'},
+ {maxTimeMS: 15000}));
jsTest.log('Doing CRUD operations on the sharded collection');
assert.eq(1000, testDB.ShardedColl.find().itcount());
@@ -41,6 +37,7 @@
assert.eq(1001, testDB.ShardedColl.find().count());
jsTest.log('Doing read operations on a config server collection');
+
// Should fail due to primary read preference
assert.throws(function() {
configDB.chunks.find().itcount();
@@ -59,5 +56,4 @@
assert.lt(0, configDB.chunks.aggregate().itcount());
st.stop();
-
}());
diff --git a/src/mongo/db/operation_context.cpp b/src/mongo/db/operation_context.cpp
index 58529dffedf..2a5c8814bf3 100644
--- a/src/mongo/db/operation_context.cpp
+++ b/src/mongo/db/operation_context.cpp
@@ -139,6 +139,15 @@ bool OperationContext::hasDeadlineExpired() const {
return now >= getDeadline();
}
+Milliseconds OperationContext::getRemainingMaxTimeMillis() const {
+ if (!hasDeadline()) {
+ return Milliseconds::max();
+ }
+
+ return std::max(Milliseconds{0},
+ getDeadline() - getServiceContext()->getFastClockSource()->now());
+}
+
Microseconds OperationContext::getRemainingMaxTimeMicros() const {
if (!hasDeadline()) {
return Microseconds::max();
diff --git a/src/mongo/db/operation_context.h b/src/mongo/db/operation_context.h
index aadb6c9f981..eee6b98f48d 100644
--- a/src/mongo/db/operation_context.h
+++ b/src/mongo/db/operation_context.h
@@ -344,11 +344,17 @@ public:
}
/**
- * Returns the number of microseconds remaining for this operation's time limit, or the
- * special value Microseconds::max() if the operation has no time limit.
+ * Returns the number of milliseconds remaining for this operation's time limit or
+ * Milliseconds::max() if the operation has no time limit.
+ */
+ Milliseconds getRemainingMaxTimeMillis() const;
+
+ /**
+ * NOTE: This is a legacy "max time" method for controlling operation deadlines and it should
+ * not be used in new code. Use getRemainingMaxTimeMillis instead.
*
- * This is a legacy "max time" method for controlling operation deadlines. Prefer not to use it
- * in new code.
+ * Returns the number of microseconds remaining for this operation's time limit, or the special
+ * value Microseconds::max() if the operation has no time limit.
*/
Microseconds getRemainingMaxTimeMicros() const;
diff --git a/src/mongo/s/catalog/replset/dist_lock_catalog_impl.cpp b/src/mongo/s/catalog/replset/dist_lock_catalog_impl.cpp
index 7105ae33de5..d573bcefdc8 100644
--- a/src/mongo/s/catalog/replset/dist_lock_catalog_impl.cpp
+++ b/src/mongo/s/catalog/replset/dist_lock_catalog_impl.cpp
@@ -183,6 +183,7 @@ Status DistLockCatalogImpl::ping(OperationContext* txn, StringData processID, Da
ReadPreferenceSetting{ReadPreference::PrimaryOnly},
_locksNS.db().toString(),
request.toBSON(),
+ Shard::kDefaultConfigCommandTimeout,
Shard::RetryPolicy::kNotIdempotent);
auto findAndModifyStatus = extractFindAndModifyNewObj(std::move(resultStatus));
@@ -218,6 +219,7 @@ StatusWith<LocksType> DistLockCatalogImpl::grabLock(OperationContext* txn,
ReadPreferenceSetting{ReadPreference::PrimaryOnly},
_locksNS.db().toString(),
request.toBSON(),
+ Shard::kDefaultConfigCommandTimeout,
Shard::RetryPolicy::kNoRetry); // Dist lock manager is handling own retries
auto findAndModifyStatus = extractFindAndModifyNewObj(std::move(resultStatus));
@@ -274,6 +276,7 @@ StatusWith<LocksType> DistLockCatalogImpl::overtakeLock(OperationContext* txn,
ReadPreferenceSetting{ReadPreference::PrimaryOnly},
_locksNS.db().toString(),
request.toBSON(),
+ Shard::kDefaultConfigCommandTimeout,
Shard::RetryPolicy::kNotIdempotent);
auto findAndModifyStatus = extractFindAndModifyNewObj(std::move(resultStatus));
@@ -304,6 +307,7 @@ Status DistLockCatalogImpl::unlock(OperationContext* txn, const OID& lockSession
ReadPreferenceSetting{ReadPreference::PrimaryOnly},
_locksNS.db().toString(),
request.toBSON(),
+ Shard::kDefaultConfigCommandTimeout,
Shard::RetryPolicy::kIdempotent);
auto findAndModifyStatus = extractFindAndModifyNewObj(std::move(resultStatus));
@@ -338,6 +342,7 @@ Status DistLockCatalogImpl::unlockAll(OperationContext* txn, const std::string&
ReadPreferenceSetting{ReadPreference::PrimaryOnly},
_locksNS.db().toString(),
cmdObj,
+ Shard::kDefaultConfigCommandTimeout,
Shard::RetryPolicy::kIdempotent);
if (!response.isOK()) {
@@ -363,8 +368,12 @@ Status DistLockCatalogImpl::unlockAll(OperationContext* txn, const std::string&
}
StatusWith<DistLockCatalog::ServerInfo> DistLockCatalogImpl::getServerInfo(OperationContext* txn) {
- auto resultStatus = _client->getConfigShard()->runCommand(
- txn, kReadPref, "admin", BSON("serverStatus" << 1), Shard::RetryPolicy::kIdempotent);
+ auto resultStatus = _client->getConfigShard()->runCommand(txn,
+ kReadPref,
+ "admin",
+ BSON("serverStatus" << 1),
+ Shard::kDefaultConfigCommandTimeout,
+ Shard::RetryPolicy::kIdempotent);
if (!resultStatus.isOK()) {
return resultStatus.getStatus();
@@ -455,6 +464,7 @@ Status DistLockCatalogImpl::stopPing(OperationContext* txn, StringData processId
ReadPreferenceSetting{ReadPreference::PrimaryOnly},
_locksNS.db().toString(),
request.toBSON(),
+ Shard::kDefaultConfigCommandTimeout,
Shard::RetryPolicy::kNotIdempotent);
auto findAndModifyStatus = extractFindAndModifyNewObj(std::move(resultStatus));
diff --git a/src/mongo/s/catalog/replset/sharding_catalog_append_db_stats_test.cpp b/src/mongo/s/catalog/replset/sharding_catalog_append_db_stats_test.cpp
index 55c2aba7157..ac50ab925cc 100644
--- a/src/mongo/s/catalog/replset/sharding_catalog_append_db_stats_test.cpp
+++ b/src/mongo/s/catalog/replset/sharding_catalog_append_db_stats_test.cpp
@@ -70,7 +70,7 @@ TEST_F(ShardingCatalogClientAppendDbStatsTest, BasicAppendDBStats) {
ASSERT_EQ(kReplSecondaryOkMetadata, request.metadata);
ASSERT_EQ("admin", request.dbname);
- ASSERT_EQ(BSON("listDatabases" << 1 << "maxTimeMS" << 30000), request.cmdObj);
+ ASSERT_EQ(BSON("listDatabases" << 1), request.cmdObj);
return fromjson(R"({
databases: [
diff --git a/src/mongo/s/catalog/replset/sharding_catalog_client_impl.cpp b/src/mongo/s/catalog/replset/sharding_catalog_client_impl.cpp
index c59178d6d22..e9832c68cac 100644
--- a/src/mongo/s/catalog/replset/sharding_catalog_client_impl.cpp
+++ b/src/mongo/s/catalog/replset/sharding_catalog_client_impl.cpp
@@ -1173,6 +1173,7 @@ bool ShardingCatalogClientImpl::runUserManagementWriteCommand(OperationContext*
ReadPreferenceSetting{ReadPreference::PrimaryOnly},
dbname,
cmdToRun,
+ Shard::kDefaultConfigCommandTimeout,
Shard::RetryPolicy::kNotIdempotent);
if (!response.isOK()) {
@@ -1212,7 +1213,12 @@ bool ShardingCatalogClientImpl::runUserManagementReadCommand(OperationContext* t
const BSONObj& cmdObj,
BSONObjBuilder* result) {
auto resultStatus = Grid::get(txn)->shardRegistry()->getConfigShard()->runCommand(
- txn, kConfigPrimaryPreferredSelector, dbname, cmdObj, Shard::RetryPolicy::kIdempotent);
+ txn,
+ kConfigPrimaryPreferredSelector,
+ dbname,
+ cmdObj,
+ Shard::kDefaultConfigCommandTimeout,
+ Shard::RetryPolicy::kIdempotent);
if (resultStatus.isOK()) {
result->appendElements(resultStatus.getValue().response);
return resultStatus.getValue().commandStatus.isOK();
@@ -1517,6 +1523,7 @@ Status ShardingCatalogClientImpl::_createCappedConfigCollection(OperationContext
ReadPreferenceSetting{ReadPreference::PrimaryOnly},
"config",
createCmd,
+ Shard::kDefaultConfigCommandTimeout,
Shard::RetryPolicy::kIdempotent);
if (!result.isOK()) {
@@ -1546,12 +1553,13 @@ StatusWith<long long> ShardingCatalogClientImpl::_runCountCommandOnConfig(Operat
countBuilder.append("query", query);
_appendReadConcern(&countBuilder);
- auto resultStatus = Grid::get(txn)->shardRegistry()->getConfigShard()->runCommand(
- txn,
- kConfigReadSelector,
- ns.db().toString(),
- countBuilder.done(),
- Shard::RetryPolicy::kIdempotent);
+ auto configShard = Grid::get(txn)->shardRegistry()->getConfigShard();
+ auto resultStatus = configShard->runCommand(txn,
+ kConfigReadSelector,
+ ns.db().toString(),
+ countBuilder.done(),
+ Shard::kDefaultConfigCommandTimeout,
+ Shard::RetryPolicy::kIdempotent);
if (!resultStatus.isOK()) {
return resultStatus.getStatus();
}
@@ -1595,12 +1603,12 @@ void ShardingCatalogClientImpl::_appendReadConcern(BSONObjBuilder* builder) {
Status ShardingCatalogClientImpl::appendInfoForConfigServerDatabases(OperationContext* txn,
BSONArrayBuilder* builder) {
- auto resultStatus = Grid::get(txn)->shardRegistry()->getConfigShard()->runCommand(
- txn,
- kConfigPrimaryPreferredSelector,
- "admin",
- BSON("listDatabases" << 1),
- Shard::RetryPolicy::kIdempotent);
+ auto configShard = Grid::get(txn)->shardRegistry()->getConfigShard();
+ auto resultStatus = configShard->runCommand(txn,
+ kConfigPrimaryPreferredSelector,
+ "admin",
+ BSON("listDatabases" << 1),
+ Shard::RetryPolicy::kIdempotent);
if (!resultStatus.isOK()) {
return resultStatus.getStatus();
diff --git a/src/mongo/s/client/shard.cpp b/src/mongo/s/client/shard.cpp
index 68721257b5b..445b63b3dbe 100644
--- a/src/mongo/s/client/shard.cpp
+++ b/src/mongo/s/client/shard.cpp
@@ -89,6 +89,8 @@ Status Shard::CommandResponse::processBatchWriteResponse(
return status;
}
+const Milliseconds Shard::kDefaultConfigCommandTimeout = Seconds{30};
+
Shard::Shard(const ShardId& id) : _id(id) {}
const ShardId Shard::getId() const {
@@ -104,8 +106,18 @@ StatusWith<Shard::CommandResponse> Shard::runCommand(OperationContext* txn,
const std::string& dbName,
const BSONObj& cmdObj,
RetryPolicy retryPolicy) {
+ return runCommand(txn, readPref, dbName, cmdObj, Milliseconds::max(), retryPolicy);
+}
+
+StatusWith<Shard::CommandResponse> Shard::runCommand(OperationContext* txn,
+ const ReadPreferenceSetting& readPref,
+ const std::string& dbName,
+ const BSONObj& cmdObj,
+ Milliseconds maxTimeMSOverride,
+ RetryPolicy retryPolicy) {
for (int retry = 1; retry <= kOnErrorNumRetries; ++retry) {
- auto swCmdResponse = _runCommand(txn, readPref, dbName, cmdObj).commandResponse;
+ auto hostWithResponse = _runCommand(txn, readPref, dbName, maxTimeMSOverride, cmdObj);
+ auto swCmdResponse = std::move(hostWithResponse.commandResponse);
auto commandStatus = _getEffectiveCommandStatus(swCmdResponse);
if (retry < kOnErrorNumRetries && isRetriableError(commandStatus.code(), retryPolicy)) {
@@ -130,8 +142,11 @@ BatchedCommandResponse Shard::runBatchWriteCommandOnConfig(
const BSONObj cmdObj = batchRequest.toBSON();
for (int retry = 1; retry <= kOnErrorNumRetries; ++retry) {
- auto response =
- _runCommand(txn, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, dbname, cmdObj);
+ auto response = _runCommand(txn,
+ ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+ dbname,
+ kDefaultConfigCommandTimeout,
+ cmdObj);
BatchedCommandResponse batchResponse;
Status writeStatus =
@@ -160,6 +175,9 @@ StatusWith<Shard::QueryResponse> Shard::exhaustiveFindOnConfig(
const BSONObj& query,
const BSONObj& sort,
const boost::optional<long long> limit) {
+ // Do not allow exhaustive finds to be run against regular shards.
+ invariant(isConfig());
+
for (int retry = 1; retry <= kOnErrorNumRetries; retry++) {
auto result =
_exhaustiveFindOnConfig(txn, readPref, readConcernLevel, nss, query, sort, limit);
@@ -171,7 +189,6 @@ StatusWith<Shard::QueryResponse> Shard::exhaustiveFindOnConfig(
return result;
}
-
MONGO_UNREACHABLE;
}
diff --git a/src/mongo/s/client/shard.h b/src/mongo/s/client/shard.h
index 191935611e2..96dfecd204c 100644
--- a/src/mongo/s/client/shard.h
+++ b/src/mongo/s/client/shard.h
@@ -137,9 +137,9 @@ public:
virtual bool isRetriableError(ErrorCodes::Error code, RetryPolicy options) = 0;
/**
- * Runs a command against this shard and returns the BSON command response, as well as the
- * already-parsed out Status of the command response and write concern error (if present).
- * Retries failed operations according to the given "retryPolicy".
+ * Runs the specified command returns the BSON command response plus parsed out Status of this
+ * response and write concern error (if present). Waits for up to the deadline for the
+ * OperationContext. Retries failed operations according to the given "retryPolicy".
*/
StatusWith<CommandResponse> runCommand(OperationContext* txn,
const ReadPreferenceSetting& readPref,
@@ -148,6 +148,18 @@ public:
RetryPolicy retryPolicy);
/**
+ * Same as the other variant of runCommand, but allows the operation timeout to be overriden.
+ * Runs for the lesser of the remaining time on the operation context or the specified maxTimeMS
+ * override.
+ */
+ StatusWith<CommandResponse> runCommand(OperationContext* txn,
+ const ReadPreferenceSetting& readPref,
+ const std::string& dbName,
+ const BSONObj& cmdObj,
+ Milliseconds maxTimeMSOverride,
+ RetryPolicy retryPolicy);
+
+ /**
* Expects a single-entry batch wrtie command and runs it on the config server's primary using
* the specified retry policy.
*/
@@ -181,6 +193,9 @@ public:
const BSONObj& keys,
bool unique) = 0;
+ // This timeout will be used by default in operations against the config server, unless
+ // explicitly overridden
+ static const Milliseconds kDefaultConfigCommandTimeout;
protected:
struct HostWithResponse {
@@ -196,12 +211,17 @@ protected:
private:
/**
- * Paired HostWithResponse output exposes RemoteShard's host for updateReplSetMonitor.
- * LocalShard will not return a host.
+ * Runs the specified command against the shard backed by this object with a timeout set to the
+ * minimum of maxTimeMSOverride or the timeout of the OperationContext.
+ *
+ * The return value exposes RemoteShard's host for calls to updateReplSetMonitor.
+ *
+ * NOTE: LocalShard implementation will not return a valid host and so should be ignored.
*/
virtual HostWithResponse _runCommand(OperationContext* txn,
const ReadPreferenceSetting& readPref,
const std::string& dbname,
+ Milliseconds maxTimeMSOverride,
const BSONObj& cmdObj) = 0;
virtual StatusWith<QueryResponse> _exhaustiveFindOnConfig(
diff --git a/src/mongo/s/client/shard_local.cpp b/src/mongo/s/client/shard_local.cpp
index 085dbce0e40..f20412e3b29 100644
--- a/src/mongo/s/client/shard_local.cpp
+++ b/src/mongo/s/client/shard_local.cpp
@@ -124,6 +124,7 @@ repl::OpTime ShardLocal::_getLastOpTime() {
Shard::HostWithResponse ShardLocal::_runCommand(OperationContext* txn,
const ReadPreferenceSetting& unused,
const std::string& dbName,
+ Milliseconds maxTimeMSOverrideUnused,
const BSONObj& cmdObj) {
repl::OpTime currentOpTimeFromClient =
repl::ReplClientInfo::forClient(txn->getClient()).getLastOp();
diff --git a/src/mongo/s/client/shard_local.h b/src/mongo/s/client/shard_local.h
index 56358b449dc..75e97ed0d29 100644
--- a/src/mongo/s/client/shard_local.h
+++ b/src/mongo/s/client/shard_local.h
@@ -67,6 +67,7 @@ private:
Shard::HostWithResponse _runCommand(OperationContext* txn,
const ReadPreferenceSetting& unused,
const std::string& dbName,
+ Milliseconds maxTimeMSOverrideUnused,
const BSONObj& cmdObj) final;
StatusWith<Shard::QueryResponse> _exhaustiveFindOnConfig(
diff --git a/src/mongo/s/client/shard_local_test.cpp b/src/mongo/s/client/shard_local_test.cpp
index 35a9359b372..409d8dba86d 100644
--- a/src/mongo/s/client/shard_local_test.cpp
+++ b/src/mongo/s/client/shard_local_test.cpp
@@ -83,7 +83,7 @@ void ShardLocalTest::setUp() {
Client::initThreadIfNotAlready();
_txn = getGlobalServiceContext()->makeOperationContext(&cc());
serverGlobalParams.clusterRole = ClusterRole::ConfigServer;
- _shardLocal = stdx::make_unique<ShardLocal>(ShardId("shardOrConfig"));
+ _shardLocal = stdx::make_unique<ShardLocal>(ShardId("config"));
const repl::ReplSettings replSettings = {};
repl::setGlobalReplicationCoordinator(new repl::ReplicationCoordinatorMock(replSettings));
}
diff --git a/src/mongo/s/client/shard_remote.cpp b/src/mongo/s/client/shard_remote.cpp
index 79b7d493863..098bbae7692 100644
--- a/src/mongo/s/client/shard_remote.cpp
+++ b/src/mongo/s/client/shard_remote.cpp
@@ -67,8 +67,6 @@ namespace {
const Status kInternalErrorStatus{ErrorCodes::InternalError,
"Invalid to check for write concern error if command failed"};
-const Milliseconds kConfigCommandTimeout = Seconds{30};
-
const BSONObj kNoMetadata(rpc::makeEmptyMetadata());
// Include kReplSetMetadataFieldName in a request to get the shard's ReplSetMetadata in the
@@ -88,50 +86,24 @@ const BSONObj kReplSecondaryOkMetadata{[] {
}()};
/**
- * Returns a new BSONObj describing the same command and arguments as 'cmdObj', but with a maxTimeMS
- * set on it that is the minimum of the maxTimeMS in 'cmdObj' (if present), 'maxTimeMicros', and
- * 30 seconds.
+ * Returns a new BSONObj describing the same command and arguments as 'cmdObj', but with maxTimeMS
+ * replaced by maxTimeMSOverride (or removed if maxTimeMSOverride is Milliseconds::max()).
*/
-BSONObj appendMaxTimeToCmdObj(OperationContext* txn, const BSONObj& cmdObj) {
- Milliseconds maxTime = kConfigCommandTimeout;
-
- bool hasTxnMaxTime = txn->hasDeadline();
- bool hasUserMaxTime = !cmdObj[QueryRequest::cmdOptionMaxTimeMS].eoo();
-
- if (hasTxnMaxTime) {
- maxTime = std::min(maxTime, duration_cast<Milliseconds>(txn->getRemainingMaxTimeMicros()));
- if (maxTime <= Milliseconds::zero()) {
- // If there is less than 1ms remaining before the maxTime timeout expires, set the max
- // time to 1ms, since setting maxTimeMs to 1ms in a command means "no max time".
+BSONObj appendMaxTimeToCmdObj(Milliseconds maxTimeMSOverride, const BSONObj& cmdObj) {
+ BSONObjBuilder updatedCmdBuilder;
- maxTime = Milliseconds{1};
+ // Remove the user provided maxTimeMS so we can attach the one from the override
+ for (const auto& elem : cmdObj) {
+ if (!str::equals(elem.fieldName(), QueryRequest::cmdOptionMaxTimeMS)) {
+ updatedCmdBuilder.append(elem);
}
}
- if (hasUserMaxTime) {
- Milliseconds userMaxTime(cmdObj[QueryRequest::cmdOptionMaxTimeMS].numberLong());
- if (userMaxTime <= maxTime) {
- return cmdObj;
- }
+ if (maxTimeMSOverride < Milliseconds::max()) {
+ updatedCmdBuilder.append(QueryRequest::cmdOptionMaxTimeMS,
+ durationCount<Milliseconds>(maxTimeMSOverride));
}
- BSONObjBuilder updatedCmdBuilder;
- if (hasUserMaxTime) { // Need to remove user provided maxTimeMS.
- BSONObjIterator cmdObjIter(cmdObj);
- const char* maxTimeFieldName = QueryRequest::cmdOptionMaxTimeMS;
- while (cmdObjIter.more()) {
- BSONElement e = cmdObjIter.next();
- if (str::equals(e.fieldName(), maxTimeFieldName)) {
- continue;
- }
- updatedCmdBuilder.append(e);
- }
- } else {
- updatedCmdBuilder.appendElements(cmdObj);
- }
-
- updatedCmdBuilder.append(QueryRequest::cmdOptionMaxTimeMS,
- durationCount<Milliseconds>(maxTime));
return updatedCmdBuilder.obj();
}
@@ -199,8 +171,8 @@ const BSONObj& ShardRemote::_getMetadataForCommand(const ReadPreferenceSetting&
Shard::HostWithResponse ShardRemote::_runCommand(OperationContext* txn,
const ReadPreferenceSetting& readPref,
const string& dbName,
+ Milliseconds maxTimeMSOverride,
const BSONObj& cmdObj) {
- const BSONObj cmdWithMaxTimeMS = (isConfig() ? appendMaxTimeToCmdObj(txn, cmdObj) : cmdObj);
const auto host =
_targeter->findHost(readPref, RemoteCommandTargeter::selectFindHostMaxWaitTime(txn));
@@ -208,13 +180,17 @@ Shard::HostWithResponse ShardRemote::_runCommand(OperationContext* txn,
return Shard::HostWithResponse(boost::none, host.getStatus());
}
- RemoteCommandRequest request(host.getValue(),
- dbName,
- cmdWithMaxTimeMS,
- _getMetadataForCommand(readPref),
- txn,
- isConfig() ? kConfigCommandTimeout
- : executor::RemoteCommandRequest::kNoTimeout);
+ const Milliseconds requestTimeout =
+ std::min(txn->getRemainingMaxTimeMillis(), maxTimeMSOverride);
+
+ const RemoteCommandRequest request(
+ host.getValue(),
+ dbName,
+ appendMaxTimeToCmdObj(maxTimeMSOverride, cmdObj),
+ _getMetadataForCommand(readPref),
+ txn,
+ requestTimeout < Milliseconds::max() ? requestTimeout : RemoteCommandRequest::kNoTimeout);
+
StatusWith<RemoteCommandResponse> swResponse =
Status(ErrorCodes::InternalError, "Internal error running command");
@@ -262,9 +238,6 @@ StatusWith<Shard::QueryResponse> ShardRemote::_exhaustiveFindOnConfig(
const BSONObj& query,
const BSONObj& sort,
boost::optional<long long> limit) {
- // Do not allow exhaustive finds to be run against regular shards.
- invariant(getId() == "config");
-
const auto host =
_targeter->findHost(readPref, RemoteCommandTargeter::selectFindHostMaxWaitTime(txn));
if (!host.isOK()) {
@@ -329,24 +302,24 @@ StatusWith<Shard::QueryResponse> ShardRemote::_exhaustiveFindOnConfig(
bob.done().getObjectField(repl::ReadConcernArgs::kReadConcernFieldName).getOwned();
}
- auto qr = stdx::make_unique<QueryRequest>(nss);
- qr->setFilter(query);
- qr->setSort(sort);
- qr->setReadConcern(readConcernObj);
- qr->setLimit(limit);
+ const Milliseconds maxTimeMS =
+ std::min(txn->getRemainingMaxTimeMillis(), kDefaultConfigCommandTimeout);
BSONObjBuilder findCmdBuilder;
- qr->asFindCommand(&findCmdBuilder);
-
- Microseconds maxTime = std::min(duration_cast<Microseconds>(kConfigCommandTimeout),
- txn->getRemainingMaxTimeMicros());
- if (maxTime < Milliseconds{1}) {
- // If there is less than 1ms remaining before the maxTime timeout expires, set the max time
- // to 1ms, since setting maxTimeMs to 1ms in a find command means "no max time".
- maxTime = Milliseconds{1};
- }
- findCmdBuilder.append(QueryRequest::cmdOptionMaxTimeMS, durationCount<Milliseconds>(maxTime));
+ {
+ QueryRequest qr(nss);
+ qr.setFilter(query);
+ qr.setSort(sort);
+ qr.setReadConcern(readConcernObj);
+ qr.setLimit(limit);
+
+ if (maxTimeMS < Milliseconds::max()) {
+ qr.setMaxTimeMS(durationCount<Milliseconds>(maxTimeMS));
+ }
+
+ qr.asFindCommand(&findCmdBuilder);
+ }
Fetcher fetcher(Grid::get(txn)->getExecutorPool()->getFixedExecutor(),
host.getValue(),
@@ -354,7 +327,7 @@ StatusWith<Shard::QueryResponse> ShardRemote::_exhaustiveFindOnConfig(
findCmdBuilder.done(),
fetcherCallback,
_getMetadataForCommand(readPref),
- duration_cast<Milliseconds>(maxTime));
+ maxTimeMS);
Status scheduleStatus = fetcher.schedule();
if (!scheduleStatus.isOK()) {
return scheduleStatus;
@@ -366,7 +339,7 @@ StatusWith<Shard::QueryResponse> ShardRemote::_exhaustiveFindOnConfig(
if (!status.isOK()) {
if (status.compareCode(ErrorCodes::ExceededTimeLimit)) {
- LOG(0) << "Operation timed out with status " << redact(status);
+ LOG(0) << "Operation timed out " << causedBy(status);
}
return status;
}
diff --git a/src/mongo/s/client/shard_remote.h b/src/mongo/s/client/shard_remote.h
index 25b610e534b..9615cd0e4fa 100644
--- a/src/mongo/s/client/shard_remote.h
+++ b/src/mongo/s/client/shard_remote.h
@@ -88,6 +88,7 @@ private:
Shard::HostWithResponse _runCommand(OperationContext* txn,
const ReadPreferenceSetting& readPref,
const std::string& dbname,
+ Milliseconds maxTimeMSOverride,
const BSONObj& cmdObj) final;
StatusWith<QueryResponse> _exhaustiveFindOnConfig(
diff --git a/src/mongo/s/commands/strategy.cpp b/src/mongo/s/commands/strategy.cpp
index 06040d20f60..820f40164e0 100644
--- a/src/mongo/s/commands/strategy.cpp
+++ b/src/mongo/s/commands/strategy.cpp
@@ -303,6 +303,17 @@ void Strategy::clientCommandOp(OperationContext* txn, Request& request) {
}
}
+ // Handle command option maxTimeMS.
+ uassert(ErrorCodes::InvalidOptions,
+ "no such command option $maxTimeMs; use maxTimeMS instead",
+ cmdObj[QueryRequest::queryOptionMaxTimeMS].eoo());
+
+ const int maxTimeMS =
+ uassertStatusOK(QueryRequest::parseMaxTimeMS(cmdObj[QueryRequest::cmdOptionMaxTimeMS]));
+ if (maxTimeMS > 0) {
+ txn->setDeadlineAfterNowBy(Milliseconds{maxTimeMS});
+ }
+
int loops = 5;
while (true) {
@@ -319,7 +330,8 @@ void Strategy::clientCommandOp(OperationContext* txn, Request& request) {
throw e;
loops--;
- log() << "retrying command: " << redact(q.query);
+
+ log() << "Retrying command " << redact(q.query) << causedBy(e);
// For legacy reasons, ns may not actually be set in the exception :-(
string staleNS = e.getns();
diff --git a/src/mongo/s/config_server_test_fixture.cpp b/src/mongo/s/config_server_test_fixture.cpp
index 4ea5df4ebb4..49573d43603 100644
--- a/src/mongo/s/config_server_test_fixture.cpp
+++ b/src/mongo/s/config_server_test_fixture.cpp
@@ -340,8 +340,12 @@ Status ConfigServerTestFixture::insertToConfigCollection(OperationContext* txn,
auto config = getConfigShard();
invariant(config);
- auto insertResponse = config->runCommand(
- txn, kReadPref, ns.db().toString(), request.toBSON(), Shard::RetryPolicy::kNoRetry);
+ auto insertResponse = config->runCommand(txn,
+ kReadPref,
+ ns.db().toString(),
+ request.toBSON(),
+ Shard::kDefaultConfigCommandTimeout,
+ Shard::RetryPolicy::kNoRetry);
BatchedCommandResponse batchResponse;
auto status = Shard::CommandResponse::processBatchWriteResponse(insertResponse, &batchResponse);
diff --git a/src/mongo/shell/utils_sh.js b/src/mongo/shell/utils_sh.js
index 0e0e92c1759..535de11de41 100644
--- a/src/mongo/shell/utils_sh.js
+++ b/src/mongo/shell/utils_sh.js
@@ -180,6 +180,8 @@ sh.stopBalancer = function(timeoutMs, interval) {
};
sh.startBalancer = function(timeoutMs, interval) {
+ timeoutMs = timeoutMs || 60000;
+
var result = db.adminCommand({balancerStart: 1, maxTimeMS: timeoutMs});
if (result.code === ErrorCodes.CommandNotFound) {
// For backwards compatibility, use the legacy balancer start method