diff options
author | Andy Schwerin <schwerin@mongodb.com> | 2016-09-06 16:57:35 -0400 |
---|---|---|
committer | Andy Schwerin <schwerin@mongodb.com> | 2016-09-07 09:26:50 -0400 |
commit | 645a77b3fa5b28d29d245e30cc195fd5a8eda049 (patch) | |
tree | d91a9bc7ed7012e753ec8d92859f7f342a994f86 /src | |
parent | 1f389ce467330cda1171d2a04bd0e0b2890aaf8d (diff) | |
download | mongo-645a77b3fa5b28d29d245e30cc195fd5a8eda049.tar.gz |
SERVER-24600 Increase interruptibility of RemoteCommandTargeter::findHost.
By making more calls of RemoteCommandTargeter::findHost interruptible, this
change speeds up the shutdown of mongos when no config servers are discoverable.
Diffstat (limited to 'src')
20 files changed, 110 insertions, 108 deletions
diff --git a/src/mongo/client/SConscript b/src/mongo/client/SConscript index aad23a013fb..ec024722954 100644 --- a/src/mongo/client/SConscript +++ b/src/mongo/client/SConscript @@ -179,13 +179,13 @@ env.Library( env.Library( target='remote_command_targeter', source=[ - 'remote_command_targeter.cpp', 'remote_command_targeter_factory_impl.cpp', 'remote_command_targeter_rs.cpp', 'remote_command_targeter_standalone.cpp', ], LIBDEPS=[ 'clientdriver', + '$BUILD_DIR/mongo/db/service_context', ] ) diff --git a/src/mongo/client/remote_command_targeter.cpp b/src/mongo/client/remote_command_targeter.cpp deleted file mode 100644 index c1227af2f52..00000000000 --- a/src/mongo/client/remote_command_targeter.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/** - * Copyright (C) 2015 MongoDB Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#include "mongo/platform/basic.h" - -#include "mongo/client/remote_command_targeter.h" -#include "mongo/db/operation_context.h" - -namespace mongo { -namespace { - -// This value is used if the operation doesn't have a user-specified max wait time. It should be -// closer to (preferably higher than) the replication electionTimeoutMillis in order to ensure that -// lack of primary due to replication election does not cause findHost failures. -const Seconds kDefaultFindHostMaxWaitTime(20); - -// When calculating the findHost max wait time and the operation has a user-specified max wait time, -// pessimistially assume that the findHost would take this much time so that when it returns, there -// is still time left to complete the actual operation. -const Seconds kFindHostTimeoutPad(1); - -} // namespace - -Milliseconds RemoteCommandTargeter::selectFindHostMaxWaitTime(OperationContext* txn) { - // TODO: Get remaining max time from 'txn'. - Milliseconds remainingMaxTime(0); - if (remainingMaxTime > Milliseconds::zero()) { - return std::min(remainingMaxTime - kFindHostTimeoutPad, - Milliseconds(kDefaultFindHostMaxWaitTime)); - } - - return kDefaultFindHostMaxWaitTime; -} - -} // namespace mongo diff --git a/src/mongo/client/remote_command_targeter.h b/src/mongo/client/remote_command_targeter.h index 020cc68945c..ca751be0dec 100644 --- a/src/mongo/client/remote_command_targeter.h +++ b/src/mongo/client/remote_command_targeter.h @@ -29,6 +29,7 @@ #pragma once #include "mongo/base/disallow_copying.h" +#include "mongo/util/net/hostandport.h" #include "mongo/util/time_support.h" namespace mongo { @@ -58,19 +59,38 @@ public: virtual ConnectionString connectionString() = 0; /** - * Obtains a host, which matches the read preferences specified by readPref, blocking for the + * Finds a host matching readPref blocking up to 20 seconds or until the given operation is + * interrupted or its deadline expires. + * + * TODO(schwerin): Once operation max-time behavior is more uniformly integrated into sharding, + * remove the 20-second ceiling on wait time. + */ + virtual StatusWith<HostAndPort> findHost(OperationContext* txn, + const ReadPreferenceSetting& readPref) = 0; + + + /** + * Finds a host that matches the read preference specified by readPref, blocking for up to * specified maxWait milliseconds, if a match cannot be found immediately. * - * Specifying a maxWait of zero means non-blocking. I.e., the call will just check the in-memory - * cached view of the replica set's host state and won't wait for it to be refreshed if it is - * found to be stale. + * DEPRECATED. Prefer findHost(OperationContext*, const ReadPreferenceSetting&), whenever + * an OperationContext is available. + */ + virtual StatusWith<HostAndPort> findHostWithMaxWait(const ReadPreferenceSetting& readPref, + Milliseconds maxWait) = 0; + + /** + * Finds a host matching the given read preference, giving up if a match is not found promptly. + * + * This method may still engage in blocking networking calls, but will attempt contact every + * member of the replica set at most one time. * - * Returns OK and a host and port to use for the specified read preference or an ErrorCode. - * Known error codes are: - * All error codes which can be returned by ReplicaSetMonitor::getHostOrRefresh. + * TODO(schwerin): Change this implementation to not perform any networking, once existing + * callers have been shown to be safe with this behavior or changed to call findHost. */ - virtual StatusWith<HostAndPort> findHost(const ReadPreferenceSetting& readPref, - Milliseconds maxWait = Milliseconds(0)) = 0; + StatusWith<HostAndPort> findHostNoWait(const ReadPreferenceSetting& readPref) { + return findHostWithMaxWait(readPref, Milliseconds::zero()); + } /** * Reports to the targeter that a NotMaster response was received when communicating with @@ -86,14 +106,6 @@ public: */ virtual void markHostUnreachable(const HostAndPort& host) = 0; - /** - * Based on the remaining time of the operation and the default max wait time for findHost, - * selects an appropriate value to pass to the maxWait argument of the findHost method, so it - * has high likelyhood in returning on time and also leaving time for the rest of the call to - * complete. - */ - static Milliseconds selectFindHostMaxWaitTime(OperationContext* txn); - protected: RemoteCommandTargeter() = default; }; diff --git a/src/mongo/client/remote_command_targeter_factory_mock.cpp b/src/mongo/client/remote_command_targeter_factory_mock.cpp index 2972528d704..3d30edf7a32 100644 --- a/src/mongo/client/remote_command_targeter_factory_mock.cpp +++ b/src/mongo/client/remote_command_targeter_factory_mock.cpp @@ -47,9 +47,14 @@ public: return _mock->connectionString(); } - StatusWith<HostAndPort> findHost(const ReadPreferenceSetting& readPref, - Milliseconds maxWait) override { - return _mock->findHost(readPref, maxWait); + StatusWith<HostAndPort> findHost(OperationContext* txn, + const ReadPreferenceSetting& readPref) override { + return _mock->findHost(txn, readPref); + } + + StatusWith<HostAndPort> findHostWithMaxWait(const ReadPreferenceSetting& readPref, + Milliseconds maxWait) override { + return _mock->findHostWithMaxWait(readPref, maxWait); } void markHostNotMaster(const HostAndPort& host) override { diff --git a/src/mongo/client/remote_command_targeter_mock.cpp b/src/mongo/client/remote_command_targeter_mock.cpp index 73ee8de2e0e..617efb699fc 100644 --- a/src/mongo/client/remote_command_targeter_mock.cpp +++ b/src/mongo/client/remote_command_targeter_mock.cpp @@ -52,8 +52,14 @@ ConnectionString RemoteCommandTargeterMock::connectionString() { return _connectionStringReturnValue; } -StatusWith<HostAndPort> RemoteCommandTargeterMock::findHost(const ReadPreferenceSetting& readPref, - Milliseconds maxWait) { +StatusWith<HostAndPort> RemoteCommandTargeterMock::findHost(OperationContext* txn, + const ReadPreferenceSetting& readPref) { + return _findHostReturnValue; +} + +StatusWith<HostAndPort> RemoteCommandTargeterMock::findHostWithMaxWait( + const ReadPreferenceSetting& readPref, Milliseconds maxTime) { + return _findHostReturnValue; } diff --git a/src/mongo/client/remote_command_targeter_mock.h b/src/mongo/client/remote_command_targeter_mock.h index d4c8835b619..6183a635f61 100644 --- a/src/mongo/client/remote_command_targeter_mock.h +++ b/src/mongo/client/remote_command_targeter_mock.h @@ -54,8 +54,11 @@ public: * Returns the return value last set by setFindHostReturnValue. * Returns ErrorCodes::InternalError if setFindHostReturnValue was never called. */ - StatusWith<HostAndPort> findHost(const ReadPreferenceSetting& readPref, - Milliseconds maxWait) override; + StatusWith<HostAndPort> findHostWithMaxWait(const ReadPreferenceSetting& readPref, + Milliseconds maxWait) override; + + StatusWith<HostAndPort> findHost(OperationContext* txn, + const ReadPreferenceSetting& readPref) override; /** * No-op for the mock. diff --git a/src/mongo/client/remote_command_targeter_rs.cpp b/src/mongo/client/remote_command_targeter_rs.cpp index 4d42f613613..b3d1e603031 100644 --- a/src/mongo/client/remote_command_targeter_rs.cpp +++ b/src/mongo/client/remote_command_targeter_rs.cpp @@ -36,6 +36,7 @@ #include "mongo/client/connection_string.h" #include "mongo/client/read_preference.h" #include "mongo/client/replica_set_monitor.h" +#include "mongo/db/operation_context.h" #include "mongo/util/assert_util.h" #include "mongo/util/log.h" #include "mongo/util/mongoutils/str.h" @@ -59,11 +60,34 @@ ConnectionString RemoteCommandTargeterRS::connectionString() { return fassertStatusOK(28712, ConnectionString::parse(_rsMonitor->getServerAddress())); } -StatusWith<HostAndPort> RemoteCommandTargeterRS::findHost(const ReadPreferenceSetting& readPref, - Milliseconds maxWait) { +StatusWith<HostAndPort> RemoteCommandTargeterRS::findHostWithMaxWait( + const ReadPreferenceSetting& readPref, Milliseconds maxWait) { return _rsMonitor->getHostOrRefresh(readPref, maxWait); } +StatusWith<HostAndPort> RemoteCommandTargeterRS::findHost(OperationContext* txn, + const ReadPreferenceSetting& readPref) { + auto clock = txn->getServiceContext()->getFastClockSource(); + auto startDate = clock->now(); + while (true) { + const auto interruptStatus = txn->checkForInterruptNoAssert(); + if (!interruptStatus.isOK()) { + return interruptStatus; + } + const auto host = _rsMonitor->getHostOrRefresh(readPref, Milliseconds::zero()); + if (host.getStatus() != ErrorCodes::FailedToSatisfyReadPreference) { + return host; + } + // Enforce a 20-second ceiling on the time spent looking for a host. This conforms with the + // behavior used throughout mongos prior to version 3.4, but is not fundamentally desirable. + // See comment in remote_command_targeter.h for details. + if (clock->now() - startDate > Seconds{20}) { + return host; + } + sleepFor(Milliseconds{500}); + } +} + void RemoteCommandTargeterRS::markHostNotMaster(const HostAndPort& host) { invariant(_rsMonitor); diff --git a/src/mongo/client/remote_command_targeter_rs.h b/src/mongo/client/remote_command_targeter_rs.h index 28decd97232..9917613834e 100644 --- a/src/mongo/client/remote_command_targeter_rs.h +++ b/src/mongo/client/remote_command_targeter_rs.h @@ -52,8 +52,11 @@ public: ConnectionString connectionString() override; - StatusWith<HostAndPort> findHost(const ReadPreferenceSetting& readPref, - Milliseconds maxWait) override; + StatusWith<HostAndPort> findHost(OperationContext* txn, + const ReadPreferenceSetting& readPref) override; + + StatusWith<HostAndPort> findHostWithMaxWait(const ReadPreferenceSetting& readPref, + Milliseconds maxWait) override; void markHostNotMaster(const HostAndPort& host) override; diff --git a/src/mongo/client/remote_command_targeter_standalone.cpp b/src/mongo/client/remote_command_targeter_standalone.cpp index 45123540752..a5afaef951a 100644 --- a/src/mongo/client/remote_command_targeter_standalone.cpp +++ b/src/mongo/client/remote_command_targeter_standalone.cpp @@ -42,11 +42,16 @@ ConnectionString RemoteCommandTargeterStandalone::connectionString() { return ConnectionString(_hostAndPort); } -StatusWith<HostAndPort> RemoteCommandTargeterStandalone::findHost( +StatusWith<HostAndPort> RemoteCommandTargeterStandalone::findHostWithMaxWait( const ReadPreferenceSetting& readPref, Milliseconds maxWait) { return _hostAndPort; } +StatusWith<HostAndPort> RemoteCommandTargeterStandalone::findHost( + OperationContext* txn, const ReadPreferenceSetting& readPref) { + return _hostAndPort; +} + void RemoteCommandTargeterStandalone::markHostNotMaster(const HostAndPort& host) { dassert(host == _hostAndPort); } diff --git a/src/mongo/client/remote_command_targeter_standalone.h b/src/mongo/client/remote_command_targeter_standalone.h index 603f8167bd0..ab7ae099243 100644 --- a/src/mongo/client/remote_command_targeter_standalone.h +++ b/src/mongo/client/remote_command_targeter_standalone.h @@ -43,8 +43,11 @@ public: ConnectionString connectionString() override; - StatusWith<HostAndPort> findHost(const ReadPreferenceSetting& readPref, - Milliseconds maxWait) override; + StatusWith<HostAndPort> findHost(OperationContext* txn, + const ReadPreferenceSetting& readPref) override; + + StatusWith<HostAndPort> findHostWithMaxWait(const ReadPreferenceSetting& readPref, + Milliseconds maxWait) override; void markHostNotMaster(const HostAndPort& host) override; diff --git a/src/mongo/client/replica_set_monitor.h b/src/mongo/client/replica_set_monitor.h index 3be879a06c8..1b2db2c906f 100644 --- a/src/mongo/client/replica_set_monitor.h +++ b/src/mongo/client/replica_set_monitor.h @@ -80,6 +80,8 @@ public: * wait for one to become available for up to the specified time and periodically refresh * the view of the set. The call may return with an error earlier than the specified value, * if none of the known hosts for the set are reachable within some number of attempts. + * Note that if a maxWait of 0ms is specified, this method may still attempt to contact + * every host in the replica set up to one time. * * Known errors are: * FailedToSatisfyReadPreference, if node cannot be found, which matches the read preference. diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp index 2be1bcbab01..26c5a8a209d 100644 --- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp +++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp @@ -198,7 +198,7 @@ Status MigrationChunkClonerSourceLegacy::startClone(OperationContext* txn) { } auto recipientShard = recipientShardStatus.getValue(); - auto shardHostStatus = recipientShard->getTargeter()->findHost( + auto shardHostStatus = recipientShard->getTargeter()->findHostNoWait( ReadPreferenceSetting{ReadPreference::PrimaryOnly}); if (!shardHostStatus.isOK()) { return shardHostStatus.getStatus(); diff --git a/src/mongo/s/balancer/migration_manager.cpp b/src/mongo/s/balancer/migration_manager.cpp index b73199ec848..ba4a488ffcb 100644 --- a/src/mongo/s/balancer/migration_manager.cpp +++ b/src/mongo/s/balancer/migration_manager.cpp @@ -327,8 +327,7 @@ shared_ptr<Notification<Status>> MigrationManager::_schedule( const auto fromShard = fromShardStatus.getValue(); auto fromHostStatus = - fromShard->getTargeter()->findHost(ReadPreferenceSetting{ReadPreference::PrimaryOnly}, - RemoteCommandTargeter::selectFindHostMaxWaitTime(txn)); + fromShard->getTargeter()->findHost(txn, ReadPreferenceSetting{ReadPreference::PrimaryOnly}); if (!fromHostStatus.isOK()) { return std::make_shared<Notification<Status>>(std::move(fromHostStatus.getStatus())); } diff --git a/src/mongo/s/catalog/replset/sharding_catalog_manager_impl.cpp b/src/mongo/s/catalog/replset/sharding_catalog_manager_impl.cpp index f58c323fd12..88c7d4c482f 100644 --- a/src/mongo/s/catalog/replset/sharding_catalog_manager_impl.cpp +++ b/src/mongo/s/catalog/replset/sharding_catalog_manager_impl.cpp @@ -325,8 +325,7 @@ StatusWith<Shard::CommandResponse> ShardingCatalogManagerImpl::_runCommandForAdd RemoteCommandTargeter* targeter, const std::string& dbName, const BSONObj& cmdObj) { - auto host = targeter->findHost(ReadPreferenceSetting{ReadPreference::PrimaryOnly}, - RemoteCommandTargeter::selectFindHostMaxWaitTime(txn)); + auto host = targeter->findHost(txn, ReadPreferenceSetting{ReadPreference::PrimaryOnly}); if (!host.isOK()) { return host.getStatus(); } @@ -1712,8 +1711,8 @@ void ShardingCatalogManagerImpl::_scheduleAddShardTask( // Schedule the shardIdentity upsert request to run immediately, and track the handle. - auto swHost = targeter->findHost(ReadPreferenceSetting{ReadPreference::PrimaryOnly}, - Milliseconds(kDefaultFindHostMaxWaitTime)); + auto swHost = targeter->findHostWithMaxWait(ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + Milliseconds(kDefaultFindHostMaxWaitTime)); if (!swHost.isOK()) { // A 3.2 mongos must have previously successfully communicated with hosts in this shard, // so a failure to find a host here is probably transient, and it is safe to retry. diff --git a/src/mongo/s/client/shard_remote.cpp b/src/mongo/s/client/shard_remote.cpp index 3dbae46be66..200f3fc30ad 100644 --- a/src/mongo/s/client/shard_remote.cpp +++ b/src/mongo/s/client/shard_remote.cpp @@ -176,8 +176,7 @@ Shard::HostWithResponse ShardRemote::_runCommand(OperationContext* txn, if (getId() == "config") { readPrefWithMinOpTime.minOpTime = grid.configOpTime(); } - const auto host = _targeter->findHost(readPrefWithMinOpTime, - RemoteCommandTargeter::selectFindHostMaxWaitTime(txn)); + const auto host = _targeter->findHost(txn, readPrefWithMinOpTime); if (!host.isOK()) { return Shard::HostWithResponse(boost::none, host.getStatus()); } @@ -244,8 +243,7 @@ StatusWith<Shard::QueryResponse> ShardRemote::_exhaustiveFindOnConfig( ReadPreferenceSetting readPrefWithMinOpTime(readPref); readPrefWithMinOpTime.minOpTime = grid.configOpTime(); - const auto host = _targeter->findHost(readPrefWithMinOpTime, - RemoteCommandTargeter::selectFindHostMaxWaitTime(txn)); + const auto host = _targeter->findHost(txn, readPrefWithMinOpTime); if (!host.isOK()) { return host.getStatus(); } diff --git a/src/mongo/s/commands/cluster_get_last_error_cmd.cpp b/src/mongo/s/commands/cluster_get_last_error_cmd.cpp index 56bfa66b369..dbeb35a673e 100644 --- a/src/mongo/s/commands/cluster_get_last_error_cmd.cpp +++ b/src/mongo/s/commands/cluster_get_last_error_cmd.cpp @@ -117,7 +117,7 @@ public: break; } auto shard = shardStatus.getValue(); - auto swHostAndPort = shard->getTargeter()->findHost(readPref); + auto swHostAndPort = shard->getTargeter()->findHostNoWait(readPref); if (!swHostAndPort.isOK()) { status = swHostAndPort.getStatus(); break; diff --git a/src/mongo/s/commands/cluster_write_cmd.cpp b/src/mongo/s/commands/cluster_write_cmd.cpp index ea63a7d020a..925edaddf62 100644 --- a/src/mongo/s/commands/cluster_write_cmd.cpp +++ b/src/mongo/s/commands/cluster_write_cmd.cpp @@ -271,7 +271,7 @@ private: if (!shardStatus.isOK()) { return shardStatus.getStatus(); } - auto swHostAndPort = shardStatus.getValue()->getTargeter()->findHost(readPref); + auto swHostAndPort = shardStatus.getValue()->getTargeter()->findHostNoWait(readPref); if (!swHostAndPort.isOK()) { return swHostAndPort.getStatus(); } diff --git a/src/mongo/s/query/async_results_merger.cpp b/src/mongo/s/query/async_results_merger.cpp index 85cc8454dd4..c95a0fe2482 100644 --- a/src/mongo/s/query/async_results_merger.cpp +++ b/src/mongo/s/query/async_results_merger.cpp @@ -685,8 +685,7 @@ Status AsyncResultsMerger::RemoteCursorData::resolveShardIdToHostAndPort( } // TODO: Pass down an OperationContext* to use here. - auto findHostStatus = shard->getTargeter()->findHost( - readPref, RemoteCommandTargeter::selectFindHostMaxWaitTime(nullptr)); + auto findHostStatus = shard->getTargeter()->findHostWithMaxWait(readPref, Seconds{20}); if (!findHostStatus.isOK()) { return findHostStatus.getStatus(); } diff --git a/src/mongo/s/server.cpp b/src/mongo/s/server.cpp index cd98e0cc6dc..79b149de6d7 100644 --- a/src/mongo/s/server.cpp +++ b/src/mongo/s/server.cpp @@ -131,6 +131,7 @@ static ExitCode initService(); // prior execution of mongo initializers or the existence of threads. static void cleanupTask() { { + auto serviceContext = getGlobalServiceContext(); Client::initThreadIfNotAlready(); Client& client = cc(); ServiceContext::UniqueOperationContext uniqueTxn; @@ -140,6 +141,9 @@ static void cleanupTask() { txn = uniqueTxn.get(); } + if (serviceContext) + serviceContext->setKillAllOperations(); + auto cursorManager = grid.getCursorManager(); cursorManager->shutdown(); grid.getExecutorPool()->shutdownAndJoin(); diff --git a/src/mongo/s/write_ops/batch_write_exec.cpp b/src/mongo/s/write_ops/batch_write_exec.cpp index 34f399267f4..6a6395eea13 100644 --- a/src/mongo/s/write_ops/batch_write_exec.cpp +++ b/src/mongo/s/write_ops/batch_write_exec.cpp @@ -190,7 +190,7 @@ void BatchWriteExec::executeBatch(OperationContext* txn, } else { auto shard = shardStatus.getValue(); - auto swHostAndPort = shard->getTargeter()->findHost(readPref); + auto swHostAndPort = shard->getTargeter()->findHostNoWait(readPref); if (!swHostAndPort.isOK()) { // Record a resolve failure |