summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_last_stable_mongos.yml3
-rw-r--r--jstests/sharding/cursor_timeout.js27
-rw-r--r--src/mongo/db/clientcursor.cpp4
-rw-r--r--src/mongo/s/cluster_cursor_stats.cpp25
-rw-r--r--src/mongo/s/query/cluster_cursor_cleanup_job.cpp7
-rw-r--r--src/mongo/s/query/cluster_cursor_manager.cpp27
-rw-r--r--src/mongo/s/query/cluster_cursor_manager.h23
7 files changed, 90 insertions, 26 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos.yml b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos.yml
index 562cefc1d1e..166bc7a35bc 100644
--- a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos.yml
@@ -36,6 +36,9 @@ selector:
# TODO Assumes shardCollection can handle the collation option; enable when 3.4 becomes
# 'last-stable'.
- jstests/sharding/shard_collection_basic.js
+ # TODO Requires mongos to have the 'clientCursorMonitorFrequencySecs' server parameter; enable
+ # when 3.4 becomes 'last-stable'.
+ - jstests/sharding/cursor_timeout.js
executor:
js_test:
diff --git a/jstests/sharding/cursor_timeout.js b/jstests/sharding/cursor_timeout.js
index cea17d93dcc..90097261eb9 100644
--- a/jstests/sharding/cursor_timeout.js
+++ b/jstests/sharding/cursor_timeout.js
@@ -15,8 +15,14 @@
shards: 2,
other: {
chunkSize: 1,
- shardOptions: {setParameter: "cursorTimeoutMillis=1000"},
- mongosOptions: {setParameter: "cursorTimeoutMillis=1000"}
+ shardOptions: {
+ verbose: 1,
+ setParameter: {cursorTimeoutMillis: 1000, clientCursorMonitorFrequencySecs: 1}
+ },
+ mongosOptions: {
+ verbose: 1,
+ setParameter: {cursorTimeoutMillis: 1000, clientCursorMonitorFrequencySecs: 1}
+ }
}
});
@@ -66,9 +72,20 @@
cursorWithNoTimeout.next();
// Wait until the idle cursor background job has killed the cursors that do not have the "no
- // timeout" flag set. We use the "cursorTimeoutMillis" setParameter above to reduce the amount
- // of time we need to wait here.
- sleep(5000);
+ // timeout" flag set. We use the "cursorTimeoutMillis" and "clientCursorMonitorFrequencySecs"
+ // setParameters above to reduce the amount of time we need to wait here.
+ assert.soon(function() {
+ return coll.getDB().serverStatus().metrics.cursor.timedOut > 0;
+ }, "sharded cursor failed to time out", 5000);
+
+ // Wait for the shard to have two open cursors on it (shardedCursorWithNoTimeout and
+ // cursorWithNoTimeout).
+ // We cannot reliably use metrics.cursor.timedOut here, because this will be 2 if
+ // shardedCursorWithTimeout is killed for timing out on the shard, and 1 if
+ // shardedCursorWithTimeout is killed by a killCursors command from the mongos.
+ assert.soon(function() {
+ return shardColl.getDB().serverStatus().metrics.cursor.open.total == 2;
+ }, "cursor failed to time out", 5000);
assert.throws(function() {
shardedCursorWithTimeout.itcount();
diff --git a/src/mongo/db/clientcursor.cpp b/src/mongo/db/clientcursor.cpp
index a4f80e44319..4cc2dd643a4 100644
--- a/src/mongo/db/clientcursor.cpp
+++ b/src/mongo/db/clientcursor.cpp
@@ -71,6 +71,7 @@ static ServerStatusMetricField<Counter64> dCursorStatusTimedout("cursor.timedOut
&cursorStatsTimedOut);
MONGO_EXPORT_SERVER_PARAMETER(cursorTimeoutMillis, int, 10 * 60 * 1000 /* 10 minutes */);
+MONGO_EXPORT_SERVER_PARAMETER(clientCursorMonitorFrequencySecs, int, 4);
long long ClientCursor::totalOpen() {
return cursorStatsOpen.get();
@@ -263,7 +264,6 @@ public:
void run() {
Client::initThread("clientcursormon");
Timer t;
- const int Secs = 4;
while (!inShutdown()) {
{
const ServiceContext::UniqueOperationContext txnPtr = cc().makeOperationContext();
@@ -271,7 +271,7 @@ public:
cursorStatsTimedOut.increment(
CursorManager::timeoutCursorsGlobal(&txn, t.millisReset()));
}
- sleepsecs(Secs);
+ sleepsecs(clientCursorMonitorFrequencySecs);
}
}
};
diff --git a/src/mongo/s/cluster_cursor_stats.cpp b/src/mongo/s/cluster_cursor_stats.cpp
index 03ba9fc26d0..db72b245795 100644
--- a/src/mongo/s/cluster_cursor_stats.cpp
+++ b/src/mongo/s/cluster_cursor_stats.cpp
@@ -41,18 +41,23 @@ namespace {
class ClusterCursorStats final : public ServerStatusMetric {
public:
- ClusterCursorStats() : ServerStatusMetric("cursor.open") {}
+ ClusterCursorStats() : ServerStatusMetric("cursor") {}
void appendAtLeaf(BSONObjBuilder& b) const final {
- BSONObjBuilder openBob(b.subobjStart(_leafName));
- auto stats = grid.getCursorManager()->stats();
-
- openBob.append("multiTarget", static_cast<long long>(stats.cursorsSharded));
- openBob.append("singleTarget", static_cast<long long>(stats.cursorsNotSharded));
- openBob.append("pinned", static_cast<long long>(stats.cursorsPinned));
- openBob.append("total",
- static_cast<long long>(stats.cursorsSharded + stats.cursorsNotSharded));
- openBob.done();
+ BSONObjBuilder cursorBob(b.subobjStart(_leafName));
+ cursorBob.append("timedOut",
+ static_cast<long long>(grid.getCursorManager()->cursorsTimedOut()));
+ {
+ BSONObjBuilder openBob(cursorBob.subobjStart("open"));
+ auto stats = grid.getCursorManager()->stats();
+ openBob.append("multiTarget", static_cast<long long>(stats.cursorsSharded));
+ openBob.append("singleTarget", static_cast<long long>(stats.cursorsNotSharded));
+ openBob.append("pinned", static_cast<long long>(stats.cursorsPinned));
+ openBob.append("total",
+ static_cast<long long>(stats.cursorsSharded + stats.cursorsNotSharded));
+ openBob.doneFast();
+ }
+ cursorBob.done();
}
} clusterCursorStats;
diff --git a/src/mongo/s/query/cluster_cursor_cleanup_job.cpp b/src/mongo/s/query/cluster_cursor_cleanup_job.cpp
index a7e4159c9b5..068d06ac98a 100644
--- a/src/mongo/s/query/cluster_cursor_cleanup_job.cpp
+++ b/src/mongo/s/query/cluster_cursor_cleanup_job.cpp
@@ -51,6 +51,9 @@ ExportedServerParameter<long long, ServerParameterType::kStartupAndRuntime>
"cursorTimeoutMillis",
&cursorTimeoutMillis);
+// Frequency with which ClusterCursorCleanupJob is run.
+MONGO_EXPORT_SERVER_PARAMETER(clientCursorMonitorFrequencySecs, long long, 4);
+
} // namespace
ClusterCursorCleanupJob clusterCursorCleanupJob;
@@ -67,8 +70,8 @@ void ClusterCursorCleanupJob::run() {
while (!inShutdown()) {
manager->killMortalCursorsInactiveSince(Date_t::now() -
Milliseconds(cursorTimeoutMillis.load()));
- manager->reapZombieCursors();
- sleepFor(Seconds(4));
+ manager->incrementCursorsTimedOut(manager->reapZombieCursors());
+ sleepsecs(clientCursorMonitorFrequencySecs);
}
}
diff --git a/src/mongo/s/query/cluster_cursor_manager.cpp b/src/mongo/s/query/cluster_cursor_manager.cpp
index 3c84fa7fa54..924a472d7a5 100644
--- a/src/mongo/s/query/cluster_cursor_manager.cpp
+++ b/src/mongo/s/query/cluster_cursor_manager.cpp
@@ -332,6 +332,7 @@ void ClusterCursorManager::killMortalCursorsInactiveSince(Date_t cutoff) {
CursorEntry& entry = cursorIdEntryPair.second;
if (entry.getLifetimeType() == CursorLifetime::Mortal &&
entry.getLastActive() <= cutoff) {
+ entry.setInactive();
log() << "Marking cursor id " << cursorIdEntryPair.first
<< " for deletion, idle since " << entry.getLastActive().toString();
entry.setKillPending();
@@ -350,13 +351,22 @@ void ClusterCursorManager::killAllCursors() {
}
}
-void ClusterCursorManager::reapZombieCursors() {
+std::size_t ClusterCursorManager::reapZombieCursors() {
+ struct CursorDescriptor {
+ CursorDescriptor(NamespaceString ns, CursorId cursorId, bool isInactive)
+ : ns(std::move(ns)), cursorId(cursorId), isInactive(isInactive) {}
+
+ NamespaceString ns;
+ CursorId cursorId;
+ bool isInactive;
+ };
+
// List all zombie cursors under the manager lock, and kill them one-by-one while not holding
// the lock (ClusterClientCursor::kill() is blocking, so we don't want to hold a lock while
// issuing the kill).
stdx::unique_lock<stdx::mutex> lk(_mutex);
- std::vector<std::pair<NamespaceString, CursorId>> zombieCursorDescriptors;
+ std::vector<CursorDescriptor> zombieCursorDescriptors;
for (auto& nsContainerPair : _namespaceToContainerMap) {
const NamespaceString& nss = nsContainerPair.first;
for (auto& cursorIdEntryPair : nsContainerPair.second.entryMap) {
@@ -365,13 +375,15 @@ void ClusterCursorManager::reapZombieCursors() {
if (!entry.getKillPending()) {
continue;
}
- zombieCursorDescriptors.emplace_back(nss, cursorId);
+ zombieCursorDescriptors.emplace_back(nss, cursorId, entry.isInactive());
}
}
- for (auto& namespaceCursorIdPair : zombieCursorDescriptors) {
+ std::size_t cursorsTimedOut = 0;
+
+ for (auto& cursorDescriptor : zombieCursorDescriptors) {
StatusWith<std::unique_ptr<ClusterClientCursor>> zombieCursor =
- detachCursor_inlock(namespaceCursorIdPair.first, namespaceCursorIdPair.second);
+ detachCursor_inlock(cursorDescriptor.ns, cursorDescriptor.cursorId);
if (!zombieCursor.isOK()) {
// Cursor in use, or has already been deleted.
continue;
@@ -381,7 +393,12 @@ void ClusterCursorManager::reapZombieCursors() {
zombieCursor.getValue()->kill();
zombieCursor.getValue().reset();
lk.lock();
+
+ if (cursorDescriptor.isInactive) {
+ ++cursorsTimedOut;
+ }
}
+ return cursorsTimedOut;
}
ClusterCursorManager::Stats ClusterCursorManager::stats() const {
diff --git a/src/mongo/s/query/cluster_cursor_manager.h b/src/mongo/s/query/cluster_cursor_manager.h
index 0561163b26b..7770cc741c8 100644
--- a/src/mongo/s/query/cluster_cursor_manager.h
+++ b/src/mongo/s/query/cluster_cursor_manager.h
@@ -309,7 +309,7 @@ public:
/**
* Attempts to performs a blocking kill and deletion of all non-pinned cursors that are marked
- * as 'kill pending'.
+ * as 'kill pending'. Returns the number of cursors that were marked as inactive.
*
* If no other non-const methods are called simultaneously, it is guaranteed that this method
* will delete all non-pinned cursors marked as 'kill pending'. Otherwise, no such guarantee is
@@ -318,7 +318,7 @@ public:
*
* Can block.
*/
- void reapZombieCursors();
+ std::size_t reapZombieCursors();
/**
* Returns the number of open cursors on a ClusterCursorManager, broken down by type.
@@ -340,6 +340,14 @@ public:
*/
boost::optional<NamespaceString> getNamespaceForCursorId(CursorId cursorId) const;
+ void incrementCursorsTimedOut(size_t inc) {
+ _cursorsTimedOut += inc;
+ }
+
+ size_t cursorsTimedOut() const {
+ return _cursorsTimedOut;
+ }
+
private:
class CursorEntry;
using CursorEntryMap = std::unordered_map<CursorId, CursorEntry>;
@@ -410,6 +418,10 @@ private:
return _killPending;
}
+ bool isInactive() const {
+ return _isInactive;
+ }
+
CursorType getCursorType() const {
return _cursorType;
}
@@ -447,6 +459,10 @@ private:
_killPending = true;
}
+ void setInactive() {
+ _isInactive = true;
+ }
+
void setLastActive(Date_t lastActive) {
_lastActive = lastActive;
}
@@ -454,6 +470,7 @@ private:
private:
std::unique_ptr<ClusterClientCursor> _cursor;
bool _killPending = false;
+ bool _isInactive = false;
CursorType _cursorType = CursorType::NamespaceNotSharded;
CursorLifetime _cursorLifetime = CursorLifetime::Mortal;
Date_t _lastActive;
@@ -508,6 +525,8 @@ private:
// when the last cursor on the given namespace is destroyed.
std::unordered_map<NamespaceString, CursorEntryContainer, NamespaceString::Hasher>
_namespaceToContainerMap;
+
+ size_t _cursorsTimedOut = 0;
};
} // namespace