diff options
author | Esha Maharishi <esha.maharishi@mongodb.come> | 2016-02-24 16:52:36 -0500 |
---|---|---|
committer | Esha Maharishi <esha.maharishi@mongodb.come> | 2016-02-24 17:08:53 -0500 |
commit | 241bcd889e3b5923e96844eebd380f044737a5ff (patch) | |
tree | f66639beca04ea00e85ec7640819617bd57a213a | |
parent | e7c8e17220ef32befa8673bd776ba381b1911496 (diff) | |
download | mongo-241bcd889e3b5923e96844eebd380f044737a5ff.tar.gz |
SERVER-22427 add logging for 'Operation timed out' errors
-rw-r--r-- | src/mongo/executor/network_interface_asio.cpp | 18 | ||||
-rw-r--r-- | src/mongo/executor/network_interface_asio.h | 9 | ||||
-rw-r--r-- | src/mongo/executor/thread_pool_task_executor.cpp | 12 | ||||
-rw-r--r-- | src/mongo/s/client/shard_registry.cpp | 6 |
4 files changed, 34 insertions, 11 deletions
diff --git a/src/mongo/executor/network_interface_asio.cpp b/src/mongo/executor/network_interface_asio.cpp index ce7ec0f13b9..96ba61a2dd4 100644 --- a/src/mongo/executor/network_interface_asio.cpp +++ b/src/mongo/executor/network_interface_asio.cpp @@ -295,10 +295,10 @@ void NetworkInterfaceASIO::startCommand(const TaskExecutor::CallbackHandle& cbHa // timeout duration - but make no stronger assumption. It is thus possible that // we have already exceeded the timeout. In this case we timeout the operation // manually. - return _completeOperation(op, - {ErrorCodes::ExceededTimeLimit, - "Remote command timed out while waiting to get a " - "connection from the pool."}); + std::stringstream msg; + msg << "Remote command timed out while waiting to get a connection from the " + << "pool, took " << getConnectionDuration; + return _completeOperation(op, {ErrorCodes::ExceededTimeLimit, msg.str()}); } // The above conditional guarantees that the adjusted timeout will never underflow. @@ -318,7 +318,7 @@ void NetworkInterfaceASIO::startCommand(const TaskExecutor::CallbackHandle& cbHa } op->_timeoutAlarm->asyncWait( - [this, op, access, generation, requestId](std::error_code ec) { + [this, op, access, generation, requestId, adjustedTimeout](std::error_code ec) { if (!ec) { // We must pass a check for safe access before using op inside the // callback or we may attempt access on an invalid pointer. @@ -328,12 +328,14 @@ void NetworkInterfaceASIO::startCommand(const TaskExecutor::CallbackHandle& cbHa return; } - LOG(2) << "Operation " << requestId << " timed out."; + LOG(2) << "Operation " << requestId << " timed out" + << ", adjusted timeout after getting connection from pool was " + << adjustedTimeout << ", op was " << op->toString(); op->timeOut_inlock(); } else { - LOG(2) << "Failed to time operation " << requestId - << " out: " << ec.message(); + LOG(2) << "Failed to time operation " << requestId << ", op was " + << op->toString() << ", out: " << ec.message(); } }); } diff --git a/src/mongo/executor/network_interface_asio.h b/src/mongo/executor/network_interface_asio.h index 3a175830dc8..b4d1d05021d 100644 --- a/src/mongo/executor/network_interface_asio.h +++ b/src/mongo/executor/network_interface_asio.h @@ -443,9 +443,12 @@ private: if (op->canceled()) return _completeOperation(op, Status(ErrorCodes::CallbackCanceled, "Callback canceled")); - if (op->timedOut()) - return _completeOperation(op, - Status(ErrorCodes::ExceededTimeLimit, "Operation timed out")); + if (op->timedOut()) { + str::stream msg; + msg << "Operation timed out" + << ", request was " << op->_request.toString(); + return _completeOperation(op, Status(ErrorCodes::ExceededTimeLimit, msg)); + } if (ec) return _networkErrorCallback(op, ec); diff --git a/src/mongo/executor/thread_pool_task_executor.cpp b/src/mongo/executor/thread_pool_task_executor.cpp index ea43b5e09eb..9b9660ca5a0 100644 --- a/src/mongo/executor/thread_pool_task_executor.cpp +++ b/src/mongo/executor/thread_pool_task_executor.cpp @@ -266,6 +266,11 @@ StatusWith<TaskExecutor::CallbackHandle> ThreadPoolTaskExecutor::scheduleWorkAt( } namespace { + +// If the request received a connection from the pool but failed in its execution, +// convert the raw Status in cbData to a StatusWith<RemoteCommandResponse> so that the callback, +// which expects a StatusWith<RemoteCommandResponse> as part of RemoteCommandCallbackArgs, +// can be run despite a RemoteCommandResponse never having been created. void remoteCommandFinished(const TaskExecutor::CallbackArgs& cbData, const TaskExecutor::RemoteCommandCallbackFn& cb, const RemoteCommandRequest& request, @@ -280,6 +285,10 @@ void remoteCommandFinished(const TaskExecutor::CallbackArgs& cbData, } } +// If the request failed to receive a connection from the pool, +// convert the raw Status in cbData to a StatusWith<RemoteCommandResponse> so that the callback, +// which expects a StatusWith<RemoteCommandResponse> as part of RemoteCommandCallbackArgs, +// can be run despite a RemoteCommandResponse never having been created. void remoteCommandFailedEarly(const TaskExecutor::CallbackArgs& cbData, const TaskExecutor::RemoteCommandCallbackFn& cb, const RemoteCommandRequest& request) { @@ -298,6 +307,9 @@ StatusWith<TaskExecutor::CallbackHandle> ThreadPoolTaskExecutor::scheduleRemoteC } else { scheduledRequest.expirationDate = _net->now() + scheduledRequest.timeout; } + + // In case the request fails to even get a connection from the pool, + // we wrap the callback in a method that prepares its input parameters. auto wq = makeSingletonWorkQueue([scheduledRequest, cb](const CallbackArgs& cbData) { remoteCommandFailedEarly(cbData, cb, scheduledRequest); }); diff --git a/src/mongo/s/client/shard_registry.cpp b/src/mongo/s/client/shard_registry.cpp index e985392f555..16c09248686 100644 --- a/src/mongo/s/client/shard_registry.cpp +++ b/src/mongo/s/client/shard_registry.cpp @@ -590,6 +590,9 @@ StatusWith<ShardRegistry::QueryResponse> ShardRegistry::_exhaustiveFindOnConfig( updateReplSetMonitor(targeter, host.getValue(), status); if (!status.isOK()) { + if (status.compareCode(ErrorCodes::ExceededTimeLimit)) { + LOG(0) << "Operation timed out with status " << status; + } return status; } @@ -810,6 +813,9 @@ StatusWith<ShardRegistry::CommandResponse> ShardRegistry::_runCommandWithMetadat if (!responseStatus.isOK()) { updateReplSetMonitor(targeter, host.getValue(), responseStatus.getStatus()); + if (responseStatus.getStatus().compareCode(ErrorCodes::ExceededTimeLimit)) { + LOG(0) << "Operation timed out with status " << responseStatus.getStatus(); + } return responseStatus.getStatus(); } |