summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEsha Maharishi <esha.maharishi@mongodb.come>2016-02-24 16:52:36 -0500
committerEsha Maharishi <esha.maharishi@mongodb.come>2016-02-24 17:08:53 -0500
commit241bcd889e3b5923e96844eebd380f044737a5ff (patch)
treef66639beca04ea00e85ec7640819617bd57a213a
parente7c8e17220ef32befa8673bd776ba381b1911496 (diff)
downloadmongo-241bcd889e3b5923e96844eebd380f044737a5ff.tar.gz
SERVER-22427 add logging for 'Operation timed out' errors
-rw-r--r--src/mongo/executor/network_interface_asio.cpp18
-rw-r--r--src/mongo/executor/network_interface_asio.h9
-rw-r--r--src/mongo/executor/thread_pool_task_executor.cpp12
-rw-r--r--src/mongo/s/client/shard_registry.cpp6
4 files changed, 34 insertions, 11 deletions
diff --git a/src/mongo/executor/network_interface_asio.cpp b/src/mongo/executor/network_interface_asio.cpp
index ce7ec0f13b9..96ba61a2dd4 100644
--- a/src/mongo/executor/network_interface_asio.cpp
+++ b/src/mongo/executor/network_interface_asio.cpp
@@ -295,10 +295,10 @@ void NetworkInterfaceASIO::startCommand(const TaskExecutor::CallbackHandle& cbHa
// timeout duration - but make no stronger assumption. It is thus possible that
// we have already exceeded the timeout. In this case we timeout the operation
// manually.
- return _completeOperation(op,
- {ErrorCodes::ExceededTimeLimit,
- "Remote command timed out while waiting to get a "
- "connection from the pool."});
+ std::stringstream msg;
+ msg << "Remote command timed out while waiting to get a connection from the "
+ << "pool, took " << getConnectionDuration;
+ return _completeOperation(op, {ErrorCodes::ExceededTimeLimit, msg.str()});
}
// The above conditional guarantees that the adjusted timeout will never underflow.
@@ -318,7 +318,7 @@ void NetworkInterfaceASIO::startCommand(const TaskExecutor::CallbackHandle& cbHa
}
op->_timeoutAlarm->asyncWait(
- [this, op, access, generation, requestId](std::error_code ec) {
+ [this, op, access, generation, requestId, adjustedTimeout](std::error_code ec) {
if (!ec) {
// We must pass a check for safe access before using op inside the
// callback or we may attempt access on an invalid pointer.
@@ -328,12 +328,14 @@ void NetworkInterfaceASIO::startCommand(const TaskExecutor::CallbackHandle& cbHa
return;
}
- LOG(2) << "Operation " << requestId << " timed out.";
+ LOG(2) << "Operation " << requestId << " timed out"
+ << ", adjusted timeout after getting connection from pool was "
+ << adjustedTimeout << ", op was " << op->toString();
op->timeOut_inlock();
} else {
- LOG(2) << "Failed to time operation " << requestId
- << " out: " << ec.message();
+ LOG(2) << "Failed to time operation " << requestId << ", op was "
+ << op->toString() << ", out: " << ec.message();
}
});
}
diff --git a/src/mongo/executor/network_interface_asio.h b/src/mongo/executor/network_interface_asio.h
index 3a175830dc8..b4d1d05021d 100644
--- a/src/mongo/executor/network_interface_asio.h
+++ b/src/mongo/executor/network_interface_asio.h
@@ -443,9 +443,12 @@ private:
if (op->canceled())
return _completeOperation(op,
Status(ErrorCodes::CallbackCanceled, "Callback canceled"));
- if (op->timedOut())
- return _completeOperation(op,
- Status(ErrorCodes::ExceededTimeLimit, "Operation timed out"));
+ if (op->timedOut()) {
+ str::stream msg;
+ msg << "Operation timed out"
+ << ", request was " << op->_request.toString();
+ return _completeOperation(op, Status(ErrorCodes::ExceededTimeLimit, msg));
+ }
if (ec)
return _networkErrorCallback(op, ec);
diff --git a/src/mongo/executor/thread_pool_task_executor.cpp b/src/mongo/executor/thread_pool_task_executor.cpp
index ea43b5e09eb..9b9660ca5a0 100644
--- a/src/mongo/executor/thread_pool_task_executor.cpp
+++ b/src/mongo/executor/thread_pool_task_executor.cpp
@@ -266,6 +266,11 @@ StatusWith<TaskExecutor::CallbackHandle> ThreadPoolTaskExecutor::scheduleWorkAt(
}
namespace {
+
+// If the request received a connection from the pool but failed in its execution,
+// convert the raw Status in cbData to a StatusWith<RemoteCommandResponse> so that the callback,
+// which expects a StatusWith<RemoteCommandResponse> as part of RemoteCommandCallbackArgs,
+// can be run despite a RemoteCommandResponse never having been created.
void remoteCommandFinished(const TaskExecutor::CallbackArgs& cbData,
const TaskExecutor::RemoteCommandCallbackFn& cb,
const RemoteCommandRequest& request,
@@ -280,6 +285,10 @@ void remoteCommandFinished(const TaskExecutor::CallbackArgs& cbData,
}
}
+// If the request failed to receive a connection from the pool,
+// convert the raw Status in cbData to a StatusWith<RemoteCommandResponse> so that the callback,
+// which expects a StatusWith<RemoteCommandResponse> as part of RemoteCommandCallbackArgs,
+// can be run despite a RemoteCommandResponse never having been created.
void remoteCommandFailedEarly(const TaskExecutor::CallbackArgs& cbData,
const TaskExecutor::RemoteCommandCallbackFn& cb,
const RemoteCommandRequest& request) {
@@ -298,6 +307,9 @@ StatusWith<TaskExecutor::CallbackHandle> ThreadPoolTaskExecutor::scheduleRemoteC
} else {
scheduledRequest.expirationDate = _net->now() + scheduledRequest.timeout;
}
+
+ // In case the request fails to even get a connection from the pool,
+ // we wrap the callback in a method that prepares its input parameters.
auto wq = makeSingletonWorkQueue([scheduledRequest, cb](const CallbackArgs& cbData) {
remoteCommandFailedEarly(cbData, cb, scheduledRequest);
});
diff --git a/src/mongo/s/client/shard_registry.cpp b/src/mongo/s/client/shard_registry.cpp
index e985392f555..16c09248686 100644
--- a/src/mongo/s/client/shard_registry.cpp
+++ b/src/mongo/s/client/shard_registry.cpp
@@ -590,6 +590,9 @@ StatusWith<ShardRegistry::QueryResponse> ShardRegistry::_exhaustiveFindOnConfig(
updateReplSetMonitor(targeter, host.getValue(), status);
if (!status.isOK()) {
+ if (status.compareCode(ErrorCodes::ExceededTimeLimit)) {
+ LOG(0) << "Operation timed out with status " << status;
+ }
return status;
}
@@ -810,6 +813,9 @@ StatusWith<ShardRegistry::CommandResponse> ShardRegistry::_runCommandWithMetadat
if (!responseStatus.isOK()) {
updateReplSetMonitor(targeter, host.getValue(), responseStatus.getStatus());
+ if (responseStatus.getStatus().compareCode(ErrorCodes::ExceededTimeLimit)) {
+ LOG(0) << "Operation timed out with status " << responseStatus.getStatus();
+ }
return responseStatus.getStatus();
}