diff options
-rw-r--r-- | jstests/sharding/all_config_servers_blackholed_from_mongos.js | 6 | ||||
-rw-r--r-- | jstests/sharding/lagged_config_secondary.js | 18 | ||||
-rw-r--r-- | jstests/sharding/primary_config_server_blackholed_from_mongos.js | 12 | ||||
-rw-r--r-- | src/mongo/db/operation_context.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/operation_context.h | 14 | ||||
-rw-r--r-- | src/mongo/s/catalog/replset/dist_lock_catalog_impl.cpp | 14 | ||||
-rw-r--r-- | src/mongo/s/catalog/replset/sharding_catalog_append_db_stats_test.cpp | 2 | ||||
-rw-r--r-- | src/mongo/s/catalog/replset/sharding_catalog_client_impl.cpp | 34 | ||||
-rw-r--r-- | src/mongo/s/client/shard.cpp | 25 | ||||
-rw-r--r-- | src/mongo/s/client/shard.h | 30 | ||||
-rw-r--r-- | src/mongo/s/client/shard_local.cpp | 1 | ||||
-rw-r--r-- | src/mongo/s/client/shard_local.h | 1 | ||||
-rw-r--r-- | src/mongo/s/client/shard_local_test.cpp | 2 | ||||
-rw-r--r-- | src/mongo/s/client/shard_remote.cpp | 107 | ||||
-rw-r--r-- | src/mongo/s/client/shard_remote.h | 1 | ||||
-rw-r--r-- | src/mongo/s/commands/strategy.cpp | 14 | ||||
-rw-r--r-- | src/mongo/s/config_server_test_fixture.cpp | 8 | ||||
-rw-r--r-- | src/mongo/shell/utils_sh.js | 2 |
18 files changed, 183 insertions, 117 deletions
diff --git a/jstests/sharding/all_config_servers_blackholed_from_mongos.js b/jstests/sharding/all_config_servers_blackholed_from_mongos.js index c3ed68e97de..f575ba26276 100644 --- a/jstests/sharding/all_config_servers_blackholed_from_mongos.js +++ b/jstests/sharding/all_config_servers_blackholed_from_mongos.js @@ -4,7 +4,6 @@ 'use strict'; var st = new ShardingTest({ - name: 'all_config_servers_blackholed_from_mongos', shards: 2, mongos: 1, useBridge: true, @@ -28,13 +27,14 @@ // This shouldn't stall jsTest.log('Doing read operation on the sharded collection'); assert.throws(function() { - testDB.ShardedColl.find({}).itcount(); + testDB.ShardedColl.find({}).maxTimeMS(15000).itcount(); }); // This should fail, because the primary is not available jsTest.log('Doing write operation on a new database and collection'); assert.writeError(st.s.getDB('NonExistentDB') - .TestColl.insert({_id: 0, value: 'This value will never be inserted'})); + .TestColl.insert({_id: 0, value: 'This value will never be inserted'}, + {maxTimeMS: 15000})); st.stop(); diff --git a/jstests/sharding/lagged_config_secondary.js b/jstests/sharding/lagged_config_secondary.js index 92f0453b941..d9a93c56fad 100644 --- a/jstests/sharding/lagged_config_secondary.js +++ b/jstests/sharding/lagged_config_secondary.js @@ -4,6 +4,13 @@ */ (function() { var st = new ShardingTest({shards: 1}); + var testDB = st.s.getDB('test'); + + assert.commandWorked(testDB.adminCommand({enableSharding: 'test'})); + assert.commandWorked(testDB.adminCommand({shardCollection: 'test.user', key: {_id: 1}})); + + // Ensures that all metadata writes thus far have been replicated to all nodes + st.configRS.awaitReplication(); var configSecondaryList = st.configRS.getSecondaries(); var configSecondaryToKill = configSecondaryList[0]; @@ -12,11 +19,10 @@ delayedConfigSecondary.getDB('admin').adminCommand( {configureFailPoint: 'rsSyncApplyStop', mode: 'alwaysOn'}); - var testDB = st.s.getDB('test'); - testDB.adminCommand({enableSharding: 'test'}); - testDB.adminCommand({shardCollection: 'test.user', key: {_id: 1}}); + assert.writeOK(testDB.user.insert({_id: 1})); - testDB.user.insert({_id: 1}); + // Do one metadata write in order to bump the optime on mongos + assert.writeOK(st.getDB('config').TestConfigColl.insert({TestKey: 'Test value'})); st.configRS.stopMaster(); MongoRunner.stopMongod(configSecondaryToKill.port); @@ -24,8 +30,9 @@ // Clears all cached info so mongos will be forced to query from the config. st.s.adminCommand({flushRouterConfig: 1}); + print('Attempting read on a sharded collection...'); var exception = assert.throws(function() { - testDB.user.findOne(); + testDB.user.find({}).maxTimeMS(15000).itcount(); }); assert.eq(ErrorCodes.ExceededTimeLimit, exception.code); @@ -43,5 +50,4 @@ }, 'Did not see any log entries containing the following message: ' + msg, 60000, 300); st.stop(); - }()); diff --git a/jstests/sharding/primary_config_server_blackholed_from_mongos.js b/jstests/sharding/primary_config_server_blackholed_from_mongos.js index b4d0d26d5cb..4fb3886b40b 100644 --- a/jstests/sharding/primary_config_server_blackholed_from_mongos.js +++ b/jstests/sharding/primary_config_server_blackholed_from_mongos.js @@ -3,12 +3,7 @@ (function() { 'use strict'; - var st = new ShardingTest({ - name: 'primary_config_server_blackholed_from_mongos', - shards: 2, - mongos: 1, - useBridge: true - }); + var st = new ShardingTest({shards: 2, mongos: 1, useBridge: true}); var testDB = st.s.getDB('BlackHoleDB'); var configDB = st.s.getDB('config'); @@ -33,7 +28,8 @@ // This should fail, because the primary is not available jsTest.log('Doing write operation on a new database and collection'); assert.writeError(st.s.getDB('NonExistentDB') - .TestColl.insert({_id: 0, value: 'This value will never be inserted'})); + .TestColl.insert({_id: 0, value: 'This value will never be inserted'}, + {maxTimeMS: 15000})); jsTest.log('Doing CRUD operations on the sharded collection'); assert.eq(1000, testDB.ShardedColl.find().itcount()); @@ -41,6 +37,7 @@ assert.eq(1001, testDB.ShardedColl.find().count()); jsTest.log('Doing read operations on a config server collection'); + // Should fail due to primary read preference assert.throws(function() { configDB.chunks.find().itcount(); @@ -59,5 +56,4 @@ assert.lt(0, configDB.chunks.aggregate().itcount()); st.stop(); - }()); diff --git a/src/mongo/db/operation_context.cpp b/src/mongo/db/operation_context.cpp index 58529dffedf..2a5c8814bf3 100644 --- a/src/mongo/db/operation_context.cpp +++ b/src/mongo/db/operation_context.cpp @@ -139,6 +139,15 @@ bool OperationContext::hasDeadlineExpired() const { return now >= getDeadline(); } +Milliseconds OperationContext::getRemainingMaxTimeMillis() const { + if (!hasDeadline()) { + return Milliseconds::max(); + } + + return std::max(Milliseconds{0}, + getDeadline() - getServiceContext()->getFastClockSource()->now()); +} + Microseconds OperationContext::getRemainingMaxTimeMicros() const { if (!hasDeadline()) { return Microseconds::max(); diff --git a/src/mongo/db/operation_context.h b/src/mongo/db/operation_context.h index aadb6c9f981..eee6b98f48d 100644 --- a/src/mongo/db/operation_context.h +++ b/src/mongo/db/operation_context.h @@ -344,11 +344,17 @@ public: } /** - * Returns the number of microseconds remaining for this operation's time limit, or the - * special value Microseconds::max() if the operation has no time limit. + * Returns the number of milliseconds remaining for this operation's time limit or + * Milliseconds::max() if the operation has no time limit. + */ + Milliseconds getRemainingMaxTimeMillis() const; + + /** + * NOTE: This is a legacy "max time" method for controlling operation deadlines and it should + * not be used in new code. Use getRemainingMaxTimeMillis instead. * - * This is a legacy "max time" method for controlling operation deadlines. Prefer not to use it - * in new code. + * Returns the number of microseconds remaining for this operation's time limit, or the special + * value Microseconds::max() if the operation has no time limit. */ Microseconds getRemainingMaxTimeMicros() const; diff --git a/src/mongo/s/catalog/replset/dist_lock_catalog_impl.cpp b/src/mongo/s/catalog/replset/dist_lock_catalog_impl.cpp index 7105ae33de5..d573bcefdc8 100644 --- a/src/mongo/s/catalog/replset/dist_lock_catalog_impl.cpp +++ b/src/mongo/s/catalog/replset/dist_lock_catalog_impl.cpp @@ -183,6 +183,7 @@ Status DistLockCatalogImpl::ping(OperationContext* txn, StringData processID, Da ReadPreferenceSetting{ReadPreference::PrimaryOnly}, _locksNS.db().toString(), request.toBSON(), + Shard::kDefaultConfigCommandTimeout, Shard::RetryPolicy::kNotIdempotent); auto findAndModifyStatus = extractFindAndModifyNewObj(std::move(resultStatus)); @@ -218,6 +219,7 @@ StatusWith<LocksType> DistLockCatalogImpl::grabLock(OperationContext* txn, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, _locksNS.db().toString(), request.toBSON(), + Shard::kDefaultConfigCommandTimeout, Shard::RetryPolicy::kNoRetry); // Dist lock manager is handling own retries auto findAndModifyStatus = extractFindAndModifyNewObj(std::move(resultStatus)); @@ -274,6 +276,7 @@ StatusWith<LocksType> DistLockCatalogImpl::overtakeLock(OperationContext* txn, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, _locksNS.db().toString(), request.toBSON(), + Shard::kDefaultConfigCommandTimeout, Shard::RetryPolicy::kNotIdempotent); auto findAndModifyStatus = extractFindAndModifyNewObj(std::move(resultStatus)); @@ -304,6 +307,7 @@ Status DistLockCatalogImpl::unlock(OperationContext* txn, const OID& lockSession ReadPreferenceSetting{ReadPreference::PrimaryOnly}, _locksNS.db().toString(), request.toBSON(), + Shard::kDefaultConfigCommandTimeout, Shard::RetryPolicy::kIdempotent); auto findAndModifyStatus = extractFindAndModifyNewObj(std::move(resultStatus)); @@ -338,6 +342,7 @@ Status DistLockCatalogImpl::unlockAll(OperationContext* txn, const std::string& ReadPreferenceSetting{ReadPreference::PrimaryOnly}, _locksNS.db().toString(), cmdObj, + Shard::kDefaultConfigCommandTimeout, Shard::RetryPolicy::kIdempotent); if (!response.isOK()) { @@ -363,8 +368,12 @@ Status DistLockCatalogImpl::unlockAll(OperationContext* txn, const std::string& } StatusWith<DistLockCatalog::ServerInfo> DistLockCatalogImpl::getServerInfo(OperationContext* txn) { - auto resultStatus = _client->getConfigShard()->runCommand( - txn, kReadPref, "admin", BSON("serverStatus" << 1), Shard::RetryPolicy::kIdempotent); + auto resultStatus = _client->getConfigShard()->runCommand(txn, + kReadPref, + "admin", + BSON("serverStatus" << 1), + Shard::kDefaultConfigCommandTimeout, + Shard::RetryPolicy::kIdempotent); if (!resultStatus.isOK()) { return resultStatus.getStatus(); @@ -455,6 +464,7 @@ Status DistLockCatalogImpl::stopPing(OperationContext* txn, StringData processId ReadPreferenceSetting{ReadPreference::PrimaryOnly}, _locksNS.db().toString(), request.toBSON(), + Shard::kDefaultConfigCommandTimeout, Shard::RetryPolicy::kNotIdempotent); auto findAndModifyStatus = extractFindAndModifyNewObj(std::move(resultStatus)); diff --git a/src/mongo/s/catalog/replset/sharding_catalog_append_db_stats_test.cpp b/src/mongo/s/catalog/replset/sharding_catalog_append_db_stats_test.cpp index 55c2aba7157..ac50ab925cc 100644 --- a/src/mongo/s/catalog/replset/sharding_catalog_append_db_stats_test.cpp +++ b/src/mongo/s/catalog/replset/sharding_catalog_append_db_stats_test.cpp @@ -70,7 +70,7 @@ TEST_F(ShardingCatalogClientAppendDbStatsTest, BasicAppendDBStats) { ASSERT_EQ(kReplSecondaryOkMetadata, request.metadata); ASSERT_EQ("admin", request.dbname); - ASSERT_EQ(BSON("listDatabases" << 1 << "maxTimeMS" << 30000), request.cmdObj); + ASSERT_EQ(BSON("listDatabases" << 1), request.cmdObj); return fromjson(R"({ databases: [ diff --git a/src/mongo/s/catalog/replset/sharding_catalog_client_impl.cpp b/src/mongo/s/catalog/replset/sharding_catalog_client_impl.cpp index c59178d6d22..e9832c68cac 100644 --- a/src/mongo/s/catalog/replset/sharding_catalog_client_impl.cpp +++ b/src/mongo/s/catalog/replset/sharding_catalog_client_impl.cpp @@ -1173,6 +1173,7 @@ bool ShardingCatalogClientImpl::runUserManagementWriteCommand(OperationContext* ReadPreferenceSetting{ReadPreference::PrimaryOnly}, dbname, cmdToRun, + Shard::kDefaultConfigCommandTimeout, Shard::RetryPolicy::kNotIdempotent); if (!response.isOK()) { @@ -1212,7 +1213,12 @@ bool ShardingCatalogClientImpl::runUserManagementReadCommand(OperationContext* t const BSONObj& cmdObj, BSONObjBuilder* result) { auto resultStatus = Grid::get(txn)->shardRegistry()->getConfigShard()->runCommand( - txn, kConfigPrimaryPreferredSelector, dbname, cmdObj, Shard::RetryPolicy::kIdempotent); + txn, + kConfigPrimaryPreferredSelector, + dbname, + cmdObj, + Shard::kDefaultConfigCommandTimeout, + Shard::RetryPolicy::kIdempotent); if (resultStatus.isOK()) { result->appendElements(resultStatus.getValue().response); return resultStatus.getValue().commandStatus.isOK(); @@ -1517,6 +1523,7 @@ Status ShardingCatalogClientImpl::_createCappedConfigCollection(OperationContext ReadPreferenceSetting{ReadPreference::PrimaryOnly}, "config", createCmd, + Shard::kDefaultConfigCommandTimeout, Shard::RetryPolicy::kIdempotent); if (!result.isOK()) { @@ -1546,12 +1553,13 @@ StatusWith<long long> ShardingCatalogClientImpl::_runCountCommandOnConfig(Operat countBuilder.append("query", query); _appendReadConcern(&countBuilder); - auto resultStatus = Grid::get(txn)->shardRegistry()->getConfigShard()->runCommand( - txn, - kConfigReadSelector, - ns.db().toString(), - countBuilder.done(), - Shard::RetryPolicy::kIdempotent); + auto configShard = Grid::get(txn)->shardRegistry()->getConfigShard(); + auto resultStatus = configShard->runCommand(txn, + kConfigReadSelector, + ns.db().toString(), + countBuilder.done(), + Shard::kDefaultConfigCommandTimeout, + Shard::RetryPolicy::kIdempotent); if (!resultStatus.isOK()) { return resultStatus.getStatus(); } @@ -1595,12 +1603,12 @@ void ShardingCatalogClientImpl::_appendReadConcern(BSONObjBuilder* builder) { Status ShardingCatalogClientImpl::appendInfoForConfigServerDatabases(OperationContext* txn, BSONArrayBuilder* builder) { - auto resultStatus = Grid::get(txn)->shardRegistry()->getConfigShard()->runCommand( - txn, - kConfigPrimaryPreferredSelector, - "admin", - BSON("listDatabases" << 1), - Shard::RetryPolicy::kIdempotent); + auto configShard = Grid::get(txn)->shardRegistry()->getConfigShard(); + auto resultStatus = configShard->runCommand(txn, + kConfigPrimaryPreferredSelector, + "admin", + BSON("listDatabases" << 1), + Shard::RetryPolicy::kIdempotent); if (!resultStatus.isOK()) { return resultStatus.getStatus(); diff --git a/src/mongo/s/client/shard.cpp b/src/mongo/s/client/shard.cpp index 68721257b5b..445b63b3dbe 100644 --- a/src/mongo/s/client/shard.cpp +++ b/src/mongo/s/client/shard.cpp @@ -89,6 +89,8 @@ Status Shard::CommandResponse::processBatchWriteResponse( return status; } +const Milliseconds Shard::kDefaultConfigCommandTimeout = Seconds{30}; + Shard::Shard(const ShardId& id) : _id(id) {} const ShardId Shard::getId() const { @@ -104,8 +106,18 @@ StatusWith<Shard::CommandResponse> Shard::runCommand(OperationContext* txn, const std::string& dbName, const BSONObj& cmdObj, RetryPolicy retryPolicy) { + return runCommand(txn, readPref, dbName, cmdObj, Milliseconds::max(), retryPolicy); +} + +StatusWith<Shard::CommandResponse> Shard::runCommand(OperationContext* txn, + const ReadPreferenceSetting& readPref, + const std::string& dbName, + const BSONObj& cmdObj, + Milliseconds maxTimeMSOverride, + RetryPolicy retryPolicy) { for (int retry = 1; retry <= kOnErrorNumRetries; ++retry) { - auto swCmdResponse = _runCommand(txn, readPref, dbName, cmdObj).commandResponse; + auto hostWithResponse = _runCommand(txn, readPref, dbName, maxTimeMSOverride, cmdObj); + auto swCmdResponse = std::move(hostWithResponse.commandResponse); auto commandStatus = _getEffectiveCommandStatus(swCmdResponse); if (retry < kOnErrorNumRetries && isRetriableError(commandStatus.code(), retryPolicy)) { @@ -130,8 +142,11 @@ BatchedCommandResponse Shard::runBatchWriteCommandOnConfig( const BSONObj cmdObj = batchRequest.toBSON(); for (int retry = 1; retry <= kOnErrorNumRetries; ++retry) { - auto response = - _runCommand(txn, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, dbname, cmdObj); + auto response = _runCommand(txn, + ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + dbname, + kDefaultConfigCommandTimeout, + cmdObj); BatchedCommandResponse batchResponse; Status writeStatus = @@ -160,6 +175,9 @@ StatusWith<Shard::QueryResponse> Shard::exhaustiveFindOnConfig( const BSONObj& query, const BSONObj& sort, const boost::optional<long long> limit) { + // Do not allow exhaustive finds to be run against regular shards. + invariant(isConfig()); + for (int retry = 1; retry <= kOnErrorNumRetries; retry++) { auto result = _exhaustiveFindOnConfig(txn, readPref, readConcernLevel, nss, query, sort, limit); @@ -171,7 +189,6 @@ StatusWith<Shard::QueryResponse> Shard::exhaustiveFindOnConfig( return result; } - MONGO_UNREACHABLE; } diff --git a/src/mongo/s/client/shard.h b/src/mongo/s/client/shard.h index 191935611e2..96dfecd204c 100644 --- a/src/mongo/s/client/shard.h +++ b/src/mongo/s/client/shard.h @@ -137,9 +137,9 @@ public: virtual bool isRetriableError(ErrorCodes::Error code, RetryPolicy options) = 0; /** - * Runs a command against this shard and returns the BSON command response, as well as the - * already-parsed out Status of the command response and write concern error (if present). - * Retries failed operations according to the given "retryPolicy". + * Runs the specified command returns the BSON command response plus parsed out Status of this + * response and write concern error (if present). Waits for up to the deadline for the + * OperationContext. Retries failed operations according to the given "retryPolicy". */ StatusWith<CommandResponse> runCommand(OperationContext* txn, const ReadPreferenceSetting& readPref, @@ -148,6 +148,18 @@ public: RetryPolicy retryPolicy); /** + * Same as the other variant of runCommand, but allows the operation timeout to be overriden. + * Runs for the lesser of the remaining time on the operation context or the specified maxTimeMS + * override. + */ + StatusWith<CommandResponse> runCommand(OperationContext* txn, + const ReadPreferenceSetting& readPref, + const std::string& dbName, + const BSONObj& cmdObj, + Milliseconds maxTimeMSOverride, + RetryPolicy retryPolicy); + + /** * Expects a single-entry batch wrtie command and runs it on the config server's primary using * the specified retry policy. */ @@ -181,6 +193,9 @@ public: const BSONObj& keys, bool unique) = 0; + // This timeout will be used by default in operations against the config server, unless + // explicitly overridden + static const Milliseconds kDefaultConfigCommandTimeout; protected: struct HostWithResponse { @@ -196,12 +211,17 @@ protected: private: /** - * Paired HostWithResponse output exposes RemoteShard's host for updateReplSetMonitor. - * LocalShard will not return a host. + * Runs the specified command against the shard backed by this object with a timeout set to the + * minimum of maxTimeMSOverride or the timeout of the OperationContext. + * + * The return value exposes RemoteShard's host for calls to updateReplSetMonitor. + * + * NOTE: LocalShard implementation will not return a valid host and so should be ignored. */ virtual HostWithResponse _runCommand(OperationContext* txn, const ReadPreferenceSetting& readPref, const std::string& dbname, + Milliseconds maxTimeMSOverride, const BSONObj& cmdObj) = 0; virtual StatusWith<QueryResponse> _exhaustiveFindOnConfig( diff --git a/src/mongo/s/client/shard_local.cpp b/src/mongo/s/client/shard_local.cpp index 085dbce0e40..f20412e3b29 100644 --- a/src/mongo/s/client/shard_local.cpp +++ b/src/mongo/s/client/shard_local.cpp @@ -124,6 +124,7 @@ repl::OpTime ShardLocal::_getLastOpTime() { Shard::HostWithResponse ShardLocal::_runCommand(OperationContext* txn, const ReadPreferenceSetting& unused, const std::string& dbName, + Milliseconds maxTimeMSOverrideUnused, const BSONObj& cmdObj) { repl::OpTime currentOpTimeFromClient = repl::ReplClientInfo::forClient(txn->getClient()).getLastOp(); diff --git a/src/mongo/s/client/shard_local.h b/src/mongo/s/client/shard_local.h index 56358b449dc..75e97ed0d29 100644 --- a/src/mongo/s/client/shard_local.h +++ b/src/mongo/s/client/shard_local.h @@ -67,6 +67,7 @@ private: Shard::HostWithResponse _runCommand(OperationContext* txn, const ReadPreferenceSetting& unused, const std::string& dbName, + Milliseconds maxTimeMSOverrideUnused, const BSONObj& cmdObj) final; StatusWith<Shard::QueryResponse> _exhaustiveFindOnConfig( diff --git a/src/mongo/s/client/shard_local_test.cpp b/src/mongo/s/client/shard_local_test.cpp index 35a9359b372..409d8dba86d 100644 --- a/src/mongo/s/client/shard_local_test.cpp +++ b/src/mongo/s/client/shard_local_test.cpp @@ -83,7 +83,7 @@ void ShardLocalTest::setUp() { Client::initThreadIfNotAlready(); _txn = getGlobalServiceContext()->makeOperationContext(&cc()); serverGlobalParams.clusterRole = ClusterRole::ConfigServer; - _shardLocal = stdx::make_unique<ShardLocal>(ShardId("shardOrConfig")); + _shardLocal = stdx::make_unique<ShardLocal>(ShardId("config")); const repl::ReplSettings replSettings = {}; repl::setGlobalReplicationCoordinator(new repl::ReplicationCoordinatorMock(replSettings)); } diff --git a/src/mongo/s/client/shard_remote.cpp b/src/mongo/s/client/shard_remote.cpp index 79b7d493863..098bbae7692 100644 --- a/src/mongo/s/client/shard_remote.cpp +++ b/src/mongo/s/client/shard_remote.cpp @@ -67,8 +67,6 @@ namespace { const Status kInternalErrorStatus{ErrorCodes::InternalError, "Invalid to check for write concern error if command failed"}; -const Milliseconds kConfigCommandTimeout = Seconds{30}; - const BSONObj kNoMetadata(rpc::makeEmptyMetadata()); // Include kReplSetMetadataFieldName in a request to get the shard's ReplSetMetadata in the @@ -88,50 +86,24 @@ const BSONObj kReplSecondaryOkMetadata{[] { }()}; /** - * Returns a new BSONObj describing the same command and arguments as 'cmdObj', but with a maxTimeMS - * set on it that is the minimum of the maxTimeMS in 'cmdObj' (if present), 'maxTimeMicros', and - * 30 seconds. + * Returns a new BSONObj describing the same command and arguments as 'cmdObj', but with maxTimeMS + * replaced by maxTimeMSOverride (or removed if maxTimeMSOverride is Milliseconds::max()). */ -BSONObj appendMaxTimeToCmdObj(OperationContext* txn, const BSONObj& cmdObj) { - Milliseconds maxTime = kConfigCommandTimeout; - - bool hasTxnMaxTime = txn->hasDeadline(); - bool hasUserMaxTime = !cmdObj[QueryRequest::cmdOptionMaxTimeMS].eoo(); - - if (hasTxnMaxTime) { - maxTime = std::min(maxTime, duration_cast<Milliseconds>(txn->getRemainingMaxTimeMicros())); - if (maxTime <= Milliseconds::zero()) { - // If there is less than 1ms remaining before the maxTime timeout expires, set the max - // time to 1ms, since setting maxTimeMs to 1ms in a command means "no max time". +BSONObj appendMaxTimeToCmdObj(Milliseconds maxTimeMSOverride, const BSONObj& cmdObj) { + BSONObjBuilder updatedCmdBuilder; - maxTime = Milliseconds{1}; + // Remove the user provided maxTimeMS so we can attach the one from the override + for (const auto& elem : cmdObj) { + if (!str::equals(elem.fieldName(), QueryRequest::cmdOptionMaxTimeMS)) { + updatedCmdBuilder.append(elem); } } - if (hasUserMaxTime) { - Milliseconds userMaxTime(cmdObj[QueryRequest::cmdOptionMaxTimeMS].numberLong()); - if (userMaxTime <= maxTime) { - return cmdObj; - } + if (maxTimeMSOverride < Milliseconds::max()) { + updatedCmdBuilder.append(QueryRequest::cmdOptionMaxTimeMS, + durationCount<Milliseconds>(maxTimeMSOverride)); } - BSONObjBuilder updatedCmdBuilder; - if (hasUserMaxTime) { // Need to remove user provided maxTimeMS. - BSONObjIterator cmdObjIter(cmdObj); - const char* maxTimeFieldName = QueryRequest::cmdOptionMaxTimeMS; - while (cmdObjIter.more()) { - BSONElement e = cmdObjIter.next(); - if (str::equals(e.fieldName(), maxTimeFieldName)) { - continue; - } - updatedCmdBuilder.append(e); - } - } else { - updatedCmdBuilder.appendElements(cmdObj); - } - - updatedCmdBuilder.append(QueryRequest::cmdOptionMaxTimeMS, - durationCount<Milliseconds>(maxTime)); return updatedCmdBuilder.obj(); } @@ -199,8 +171,8 @@ const BSONObj& ShardRemote::_getMetadataForCommand(const ReadPreferenceSetting& Shard::HostWithResponse ShardRemote::_runCommand(OperationContext* txn, const ReadPreferenceSetting& readPref, const string& dbName, + Milliseconds maxTimeMSOverride, const BSONObj& cmdObj) { - const BSONObj cmdWithMaxTimeMS = (isConfig() ? appendMaxTimeToCmdObj(txn, cmdObj) : cmdObj); const auto host = _targeter->findHost(readPref, RemoteCommandTargeter::selectFindHostMaxWaitTime(txn)); @@ -208,13 +180,17 @@ Shard::HostWithResponse ShardRemote::_runCommand(OperationContext* txn, return Shard::HostWithResponse(boost::none, host.getStatus()); } - RemoteCommandRequest request(host.getValue(), - dbName, - cmdWithMaxTimeMS, - _getMetadataForCommand(readPref), - txn, - isConfig() ? kConfigCommandTimeout - : executor::RemoteCommandRequest::kNoTimeout); + const Milliseconds requestTimeout = + std::min(txn->getRemainingMaxTimeMillis(), maxTimeMSOverride); + + const RemoteCommandRequest request( + host.getValue(), + dbName, + appendMaxTimeToCmdObj(maxTimeMSOverride, cmdObj), + _getMetadataForCommand(readPref), + txn, + requestTimeout < Milliseconds::max() ? requestTimeout : RemoteCommandRequest::kNoTimeout); + StatusWith<RemoteCommandResponse> swResponse = Status(ErrorCodes::InternalError, "Internal error running command"); @@ -262,9 +238,6 @@ StatusWith<Shard::QueryResponse> ShardRemote::_exhaustiveFindOnConfig( const BSONObj& query, const BSONObj& sort, boost::optional<long long> limit) { - // Do not allow exhaustive finds to be run against regular shards. - invariant(getId() == "config"); - const auto host = _targeter->findHost(readPref, RemoteCommandTargeter::selectFindHostMaxWaitTime(txn)); if (!host.isOK()) { @@ -329,24 +302,24 @@ StatusWith<Shard::QueryResponse> ShardRemote::_exhaustiveFindOnConfig( bob.done().getObjectField(repl::ReadConcernArgs::kReadConcernFieldName).getOwned(); } - auto qr = stdx::make_unique<QueryRequest>(nss); - qr->setFilter(query); - qr->setSort(sort); - qr->setReadConcern(readConcernObj); - qr->setLimit(limit); + const Milliseconds maxTimeMS = + std::min(txn->getRemainingMaxTimeMillis(), kDefaultConfigCommandTimeout); BSONObjBuilder findCmdBuilder; - qr->asFindCommand(&findCmdBuilder); - - Microseconds maxTime = std::min(duration_cast<Microseconds>(kConfigCommandTimeout), - txn->getRemainingMaxTimeMicros()); - if (maxTime < Milliseconds{1}) { - // If there is less than 1ms remaining before the maxTime timeout expires, set the max time - // to 1ms, since setting maxTimeMs to 1ms in a find command means "no max time". - maxTime = Milliseconds{1}; - } - findCmdBuilder.append(QueryRequest::cmdOptionMaxTimeMS, durationCount<Milliseconds>(maxTime)); + { + QueryRequest qr(nss); + qr.setFilter(query); + qr.setSort(sort); + qr.setReadConcern(readConcernObj); + qr.setLimit(limit); + + if (maxTimeMS < Milliseconds::max()) { + qr.setMaxTimeMS(durationCount<Milliseconds>(maxTimeMS)); + } + + qr.asFindCommand(&findCmdBuilder); + } Fetcher fetcher(Grid::get(txn)->getExecutorPool()->getFixedExecutor(), host.getValue(), @@ -354,7 +327,7 @@ StatusWith<Shard::QueryResponse> ShardRemote::_exhaustiveFindOnConfig( findCmdBuilder.done(), fetcherCallback, _getMetadataForCommand(readPref), - duration_cast<Milliseconds>(maxTime)); + maxTimeMS); Status scheduleStatus = fetcher.schedule(); if (!scheduleStatus.isOK()) { return scheduleStatus; @@ -366,7 +339,7 @@ StatusWith<Shard::QueryResponse> ShardRemote::_exhaustiveFindOnConfig( if (!status.isOK()) { if (status.compareCode(ErrorCodes::ExceededTimeLimit)) { - LOG(0) << "Operation timed out with status " << redact(status); + LOG(0) << "Operation timed out " << causedBy(status); } return status; } diff --git a/src/mongo/s/client/shard_remote.h b/src/mongo/s/client/shard_remote.h index 25b610e534b..9615cd0e4fa 100644 --- a/src/mongo/s/client/shard_remote.h +++ b/src/mongo/s/client/shard_remote.h @@ -88,6 +88,7 @@ private: Shard::HostWithResponse _runCommand(OperationContext* txn, const ReadPreferenceSetting& readPref, const std::string& dbname, + Milliseconds maxTimeMSOverride, const BSONObj& cmdObj) final; StatusWith<QueryResponse> _exhaustiveFindOnConfig( diff --git a/src/mongo/s/commands/strategy.cpp b/src/mongo/s/commands/strategy.cpp index 06040d20f60..820f40164e0 100644 --- a/src/mongo/s/commands/strategy.cpp +++ b/src/mongo/s/commands/strategy.cpp @@ -303,6 +303,17 @@ void Strategy::clientCommandOp(OperationContext* txn, Request& request) { } } + // Handle command option maxTimeMS. + uassert(ErrorCodes::InvalidOptions, + "no such command option $maxTimeMs; use maxTimeMS instead", + cmdObj[QueryRequest::queryOptionMaxTimeMS].eoo()); + + const int maxTimeMS = + uassertStatusOK(QueryRequest::parseMaxTimeMS(cmdObj[QueryRequest::cmdOptionMaxTimeMS])); + if (maxTimeMS > 0) { + txn->setDeadlineAfterNowBy(Milliseconds{maxTimeMS}); + } + int loops = 5; while (true) { @@ -319,7 +330,8 @@ void Strategy::clientCommandOp(OperationContext* txn, Request& request) { throw e; loops--; - log() << "retrying command: " << redact(q.query); + + log() << "Retrying command " << redact(q.query) << causedBy(e); // For legacy reasons, ns may not actually be set in the exception :-( string staleNS = e.getns(); diff --git a/src/mongo/s/config_server_test_fixture.cpp b/src/mongo/s/config_server_test_fixture.cpp index 4ea5df4ebb4..49573d43603 100644 --- a/src/mongo/s/config_server_test_fixture.cpp +++ b/src/mongo/s/config_server_test_fixture.cpp @@ -340,8 +340,12 @@ Status ConfigServerTestFixture::insertToConfigCollection(OperationContext* txn, auto config = getConfigShard(); invariant(config); - auto insertResponse = config->runCommand( - txn, kReadPref, ns.db().toString(), request.toBSON(), Shard::RetryPolicy::kNoRetry); + auto insertResponse = config->runCommand(txn, + kReadPref, + ns.db().toString(), + request.toBSON(), + Shard::kDefaultConfigCommandTimeout, + Shard::RetryPolicy::kNoRetry); BatchedCommandResponse batchResponse; auto status = Shard::CommandResponse::processBatchWriteResponse(insertResponse, &batchResponse); diff --git a/src/mongo/shell/utils_sh.js b/src/mongo/shell/utils_sh.js index 0e0e92c1759..535de11de41 100644 --- a/src/mongo/shell/utils_sh.js +++ b/src/mongo/shell/utils_sh.js @@ -180,6 +180,8 @@ sh.stopBalancer = function(timeoutMs, interval) { }; sh.startBalancer = function(timeoutMs, interval) { + timeoutMs = timeoutMs || 60000; + var result = db.adminCommand({balancerStart: 1, maxTimeMS: timeoutMs}); if (result.code === ErrorCodes.CommandNotFound) { // For backwards compatibility, use the legacy balancer start method |