summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Carey <jcarey@argv.me>2016-12-12 15:22:01 -0500
committerJason Carey <jcarey@argv.me>2016-12-13 16:09:47 -0500
commit5e103c4f5583e2566a45d740225dc250baacfbd7 (patch)
treebc23152b120a3ed38c453bd42cd04fff09e9e94a
parent63fa79b30347475962f1ca7ba8858c57baf33925 (diff)
downloadmongo-5e103c4f5583e2566a45d740225dc250baacfbd7.tar.gz
SERVER-27388 Add NetworkInterfaceExceededTimeLimitr3.4.1-rc0r3.4.1
max_time_ms_sharded.js uses a fail point to trigger immediate ExceededTimeLimit returns from commands. This triggers during connection establishment via the connection hook, which returns ExceededTimeLimit to the connection pool and causes us to spin on connection creation. The fix involves providing our own internal exceeded time limit that doesn't collide with the remote value. (cherry picked from commit 5d32ae99ab086e696bc9c5610c9f93220481e596)
-rw-r--r--src/mongo/base/error_codes.err1
-rw-r--r--src/mongo/executor/connection_pool.cpp50
-rw-r--r--src/mongo/executor/connection_pool_test.cpp4
-rw-r--r--src/mongo/executor/connection_pool_test_fixture.cpp4
-rw-r--r--src/mongo/executor/network_interface_asio.cpp8
-rw-r--r--src/mongo/executor/network_interface_asio.h5
-rw-r--r--src/mongo/executor/network_interface_asio_command.cpp9
-rw-r--r--src/mongo/executor/network_interface_asio_operation.cpp9
8 files changed, 52 insertions, 38 deletions
diff --git a/src/mongo/base/error_codes.err b/src/mongo/base/error_codes.err
index 54bc47d0182..280f83133a9 100644
--- a/src/mongo/base/error_codes.err
+++ b/src/mongo/base/error_codes.err
@@ -199,6 +199,7 @@ error_code("OBSOLETE_ReceivedOpReplyMessage", 198)
error_code("ReplicaSetMonitorRemoved", 199)
error_code("ChunkRangeCleanupPending", 200)
error_code("CannotBuildIndexKeys", 201)
+error_code("NetworkInterfaceExceededTimeLimit", 202)
# Non-sequential error codes (for compatibility only)
error_code("SocketException", 9001)
diff --git a/src/mongo/executor/connection_pool.cpp b/src/mongo/executor/connection_pool.cpp
index 92fa55aeaf3..188772d8aee 100644
--- a/src/mongo/executor/connection_pool.cpp
+++ b/src/mongo/executor/connection_pool.cpp
@@ -360,7 +360,7 @@ void ConnectionPool::SpecificPool::returnConnection(ConnectionInterface* connPtr
// If we've exceeded the time limit, start a new connect, rather than
// failing all operations. We do this because the various callers have
// their own time limit which is unrelated to our internal one.
- if (status.code() == ErrorCodes::ExceededTimeLimit) {
+ if (status.code() == ErrorCodes::NetworkInterfaceExceededTimeLimit) {
spawnConnections(lk);
return;
}
@@ -542,29 +542,29 @@ void ConnectionPool::SpecificPool::spawnConnections(stdx::unique_lock<stdx::mute
// Run the setup callback
lk.unlock();
- connPtr->setup(_parent->_options.refreshTimeout,
- [this](ConnectionInterface* connPtr, Status status) {
- connPtr->indicateUsed();
-
- stdx::unique_lock<stdx::mutex> lk(_parent->_mutex);
-
- auto conn = takeFromProcessingPool(connPtr);
-
- if (conn->getGeneration() != _generation) {
- // If the host and port was dropped, let the
- // connection lapse
- } else if (status.isOK()) {
- addToReady(lk, std::move(conn));
- } else if (status.code() == ErrorCodes::ExceededTimeLimit) {
- // If we've exceeded the time limit, restart the connect, rather than
- // failing all operations. We do this because the various callers
- // have their own time limit which is unrelated to our internal one.
- spawnConnections(lk);
- } else {
- // If the setup failed, cascade the failure edge
- processFailure(status, std::move(lk));
- }
- });
+ connPtr->setup(
+ _parent->_options.refreshTimeout, [this](ConnectionInterface* connPtr, Status status) {
+ connPtr->indicateUsed();
+
+ stdx::unique_lock<stdx::mutex> lk(_parent->_mutex);
+
+ auto conn = takeFromProcessingPool(connPtr);
+
+ if (conn->getGeneration() != _generation) {
+ // If the host and port was dropped, let the
+ // connection lapse
+ } else if (status.isOK()) {
+ addToReady(lk, std::move(conn));
+ } else if (status.code() == ErrorCodes::NetworkInterfaceExceededTimeLimit) {
+ // If we've exceeded the time limit, restart the connect, rather than
+ // failing all operations. We do this because the various callers
+ // have their own time limit which is unrelated to our internal one.
+ spawnConnections(lk);
+ } else {
+ // If the setup failed, cascade the failure edge
+ processFailure(status, std::move(lk));
+ }
+ });
// Note that this assumes that the refreshTimeout is sound for the
// setupTimeout
@@ -663,7 +663,7 @@ void ConnectionPool::SpecificPool::updateStateInLock() {
_requests.pop();
lk.unlock();
- cb(Status(ErrorCodes::ExceededTimeLimit,
+ cb(Status(ErrorCodes::NetworkInterfaceExceededTimeLimit,
"Couldn't get a connection within the time limit"));
lk.lock();
} else {
diff --git a/src/mongo/executor/connection_pool_test.cpp b/src/mongo/executor/connection_pool_test.cpp
index 8b7a492db5e..6a857602ca6 100644
--- a/src/mongo/executor/connection_pool_test.cpp
+++ b/src/mongo/executor/connection_pool_test.cpp
@@ -867,7 +867,7 @@ TEST_F(ConnectionPoolTest, SetupTimeoutsDontTimeoutUnrelatedRequests) {
ASSERT(conn1);
ASSERT(!conn1->isOK());
- ASSERT(conn1->getStatus().code() == ErrorCodes::ExceededTimeLimit);
+ ASSERT(conn1->getStatus().code() == ErrorCodes::NetworkInterfaceExceededTimeLimit);
ASSERT(!conn2);
}
@@ -920,7 +920,7 @@ TEST_F(ConnectionPoolTest, RefreshTimeoutsDontTimeoutRequests) {
ASSERT(conn1);
ASSERT(!conn1->isOK());
- ASSERT(conn1->getStatus().code() == ErrorCodes::ExceededTimeLimit);
+ ASSERT(conn1->getStatus().code() == ErrorCodes::NetworkInterfaceExceededTimeLimit);
ASSERT(!conn2);
}
diff --git a/src/mongo/executor/connection_pool_test_fixture.cpp b/src/mongo/executor/connection_pool_test_fixture.cpp
index 53badbbe1f3..8c193f39e06 100644
--- a/src/mongo/executor/connection_pool_test_fixture.cpp
+++ b/src/mongo/executor/connection_pool_test_fixture.cpp
@@ -160,7 +160,7 @@ void ConnectionImpl::setup(Milliseconds timeout, SetupCallback cb) {
_setupCallback = std::move(cb);
_timer.setTimeout(timeout, [this] {
- _setupCallback(this, Status(ErrorCodes::ExceededTimeLimit, "timeout"));
+ _setupCallback(this, Status(ErrorCodes::NetworkInterfaceExceededTimeLimit, "timeout"));
});
_setupQueue.push_back(this);
@@ -176,7 +176,7 @@ void ConnectionImpl::refresh(Milliseconds timeout, RefreshCallback cb) {
_refreshCallback = std::move(cb);
_timer.setTimeout(timeout, [this] {
- _refreshCallback(this, Status(ErrorCodes::ExceededTimeLimit, "timeout"));
+ _refreshCallback(this, Status(ErrorCodes::NetworkInterfaceExceededTimeLimit, "timeout"));
});
_refreshQueue.push_back(this);
diff --git a/src/mongo/executor/network_interface_asio.cpp b/src/mongo/executor/network_interface_asio.cpp
index 74e677acb6d..1fec733704d 100644
--- a/src/mongo/executor/network_interface_asio.cpp
+++ b/src/mongo/executor/network_interface_asio.cpp
@@ -287,6 +287,9 @@ Status NetworkInterfaceASIO::startCommand(const TaskExecutor::CallbackHandle& cb
Status status = wasPreviouslyCanceled
? Status(ErrorCodes::CallbackCanceled, "Callback canceled")
: swConn.getStatus();
+ if (status.code() == ErrorCodes::NetworkInterfaceExceededTimeLimit) {
+ status = Status(ErrorCodes::ExceededTimeLimit, status.reason());
+ }
if (status.code() == ErrorCodes::ExceededTimeLimit) {
_numTimedOutOps.fetchAndAdd(1);
}
@@ -363,8 +366,9 @@ Status NetworkInterfaceASIO::startCommand(const TaskExecutor::CallbackHandle& cb
msg << "Remote command timed out while waiting to get a connection from the "
<< "pool, took " << getConnectionDuration << ", timeout was set to "
<< timeout;
- auto rs = ResponseStatus(
- ErrorCodes::ExceededTimeLimit, msg.str(), getConnectionDuration);
+ auto rs = ResponseStatus(ErrorCodes::NetworkInterfaceExceededTimeLimit,
+ msg.str(),
+ getConnectionDuration);
return _completeOperation(op, rs);
}
diff --git a/src/mongo/executor/network_interface_asio.h b/src/mongo/executor/network_interface_asio.h
index 3fcd2e42771..59547b4db60 100644
--- a/src/mongo/executor/network_interface_asio.h
+++ b/src/mongo/executor/network_interface_asio.h
@@ -286,7 +286,7 @@ private:
AsyncCommand* command();
- void finish(const TaskExecutor::ResponseStatus& status);
+ void finish(TaskExecutor::ResponseStatus&& status);
const RemoteCommandRequest& request() const;
@@ -427,7 +427,8 @@ private:
str::stream msg;
msg << "Operation timed out"
<< ", request was " << op->_request.toString();
- auto rs = ResponseStatus(ErrorCodes::ExceededTimeLimit, msg, now() - op->start());
+ auto rs = ResponseStatus(
+ ErrorCodes::NetworkInterfaceExceededTimeLimit, msg, now() - op->start());
return _completeOperation(op, rs);
} else if (ec)
return _networkErrorCallback(op, ec);
diff --git a/src/mongo/executor/network_interface_asio_command.cpp b/src/mongo/executor/network_interface_asio_command.cpp
index 036b2954ee0..c72a7b74505 100644
--- a/src/mongo/executor/network_interface_asio_command.cpp
+++ b/src/mongo/executor/network_interface_asio_command.cpp
@@ -283,7 +283,8 @@ void NetworkInterfaceASIO::_completeOperation(AsyncOp* op, ResponseStatus resp)
op->_timeoutAlarm->cancel();
}
- if (resp.status.code() == ErrorCodes::ExceededTimeLimit) {
+ if (resp.status.code() == ErrorCodes::ExceededTimeLimit ||
+ resp.status.code() == ErrorCodes::NetworkInterfaceExceededTimeLimit) {
_numTimedOutOps.fetchAndAdd(1);
}
@@ -293,7 +294,7 @@ void NetworkInterfaceASIO::_completeOperation(AsyncOp* op, ResponseStatus resp)
// If we fail during connection, we won't be able to access any of op's members after
// calling finish(), so we return here.
log() << "Failed to connect to " << op->request().target << " - " << resp.status;
- op->finish(resp);
+ op->finish(std::move(resp));
return;
}
@@ -305,7 +306,7 @@ void NetworkInterfaceASIO::_completeOperation(AsyncOp* op, ResponseStatus resp)
log() << "Failed asio heartbeat to " << op->request().target << " - "
<< redact(resp.status);
_numFailedOps.fetchAndAdd(1);
- op->finish(resp);
+ op->finish(std::move(resp));
return;
}
@@ -337,7 +338,7 @@ void NetworkInterfaceASIO::_completeOperation(AsyncOp* op, ResponseStatus resp)
_inProgress.erase(iter);
}
- op->finish(resp);
+ op->finish(std::move(resp));
MONGO_ASIO_INVARIANT(static_cast<bool>(ownedOp), "Invalid AsyncOp", op);
diff --git a/src/mongo/executor/network_interface_asio_operation.cpp b/src/mongo/executor/network_interface_asio_operation.cpp
index f56c3be6e16..b300080ce76 100644
--- a/src/mongo/executor/network_interface_asio_operation.cpp
+++ b/src/mongo/executor/network_interface_asio_operation.cpp
@@ -191,13 +191,20 @@ NetworkInterfaceASIO::AsyncCommand* NetworkInterfaceASIO::AsyncOp::command() {
return _command.get_ptr();
}
-void NetworkInterfaceASIO::AsyncOp::finish(const ResponseStatus& rs) {
+void NetworkInterfaceASIO::AsyncOp::finish(ResponseStatus&& rs) {
// We never hold the access lock when we call finish from NetworkInterfaceASIO.
_transitionToState(AsyncOp::State::kFinished);
LOG(2) << "Request " << _request.id << " finished with response: "
<< redact(rs.isOK() ? rs.data.toString() : rs.status.toString());
+ // Our own internally generated time outs have a different error code to allow us to retry
+ // internal timeouts. But the outside world (and our tests) still expect the one true
+ // ExceededTimeLimit, so we convert back here so they get what they expect.
+ if (!rs.isOK() && rs.status.code() == ErrorCodes::NetworkInterfaceExceededTimeLimit) {
+ rs.status = Status(ErrorCodes::ExceededTimeLimit, rs.status.reason());
+ }
+
// Calling the completion handler may invalidate state in this op, so do it last.
_onFinish(rs);
}