summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaddie Zechar <mez2113@columbia.edu>2023-02-07 00:26:39 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-02-07 01:35:20 +0000
commit967a6745ba487edaff558e36fdc572c6ca066fea (patch)
tree47f9bf12fe59259e194deb63ee9b2d5736c7a551
parent3229175bb05be607fab3bef19e4ce1b17ab6f0e4 (diff)
downloadmongo-967a6745ba487edaff558e36fdc572c6ca066fea.tar.gz
SERVER-71882 Add serverStatus metric for the number of entries evicted from the telemetry store
-rw-r--r--jstests/telemetry/telemetry_server_status_metrics.js80
-rw-r--r--src/mongo/db/query/classic_plan_cache.h2
-rw-r--r--src/mongo/db/query/lru_key_value.h28
-rw-r--r--src/mongo/db/query/lru_key_value_test.cpp8
-rw-r--r--src/mongo/db/query/partitioned_cache.h32
-rw-r--r--src/mongo/db/query/plan_cache.h6
-rw-r--r--src/mongo/db/query/sbe_plan_cache.h5
-rw-r--r--src/mongo/db/query/telemetry.cpp9
-rw-r--r--src/mongo/db/query/telemetry.h14
-rw-r--r--src/mongo/db/query/telemetry_store_test.cpp23
10 files changed, 154 insertions, 53 deletions
diff --git a/jstests/telemetry/telemetry_server_status_metrics.js b/jstests/telemetry/telemetry_server_status_metrics.js
new file mode 100644
index 00000000000..2f31d4bd9ba
--- /dev/null
+++ b/jstests/telemetry/telemetry_server_status_metrics.js
@@ -0,0 +1,80 @@
+/**
+ * Test the telemetry related serverStatus metrics.
+ */
+load('jstests/libs/analyze_plan.js');
+load("jstests/libs/feature_flag_util.js");
+
+(function() {
+"use strict";
+
+if (!FeatureFlagUtil.isEnabled(db, "Telemetry")) {
+ return;
+}
+
+function runTestWithMongodOptions(options, test, resetCacheSize) {
+ const conn = MongoRunner.runMongod(options);
+ const testDB = conn.getDB('test');
+ const coll = testDB[jsTestName()];
+
+ test(conn, testDB, coll, resetCacheSize);
+
+ MongoRunner.stopMongod(conn);
+}
+
+// Test serverStatus metric which counts the number of evicted
+// entries.
+function evictionTest(conn, testDB, coll, resetCacheSize) {
+ const evictedBefore = testDB.serverStatus().metrics.telemetry.numEvicted;
+ assert.eq(evictedBefore, 0);
+ for (var i = 0; i < 100; i++) {
+ let query = {};
+ for (var j = 0; j < 25; ++j) {
+ query["foo.field.xyz." + i + "." + j] = 1;
+ query["bar.field.xyz." + i + "." + j] = 2;
+ query["baz.field.xyz." + i + "." + j] = 3;
+ }
+ coll.aggregate([{$match: query}]).itcount();
+ }
+ if (!resetCacheSize) {
+ const evictedAfter = testDB.serverStatus().metrics.telemetry.numEvicted;
+ assert.gt(evictedAfter, 0);
+ return;
+ }
+ // Make sure number of evicted entries increases when the cache size is reset, which forces out
+ // least recently used entries to meet the new, smaller size requirement.
+ assert.eq(testDB.serverStatus().metrics.telemetry.numEvicted, 0);
+ assert.commandWorked(
+ testDB.adminCommand({setParameter: 1, internalQueryConfigureTelemetryCacheSize: "2MB"}));
+ const evictedAfter = testDB.serverStatus().metrics.telemetry.numEvicted;
+ assert.gt(evictedAfter, 0);
+}
+
+/**
+ * In this configuration, every query is sampled. Each query has a key(~2200 - 2300 bytes) and
+ * value(208 bytes). With a cache size of 3MB, there are 1024 partitions, each of max size ~ 3072
+ * bytes. Each partition will only be able to fit one entry, eg a partition with one entry will be
+ * considered full. When a second query shape/key falls into an already full partition, it will have
+ * to evict the original entry.
+ *
+ * */
+runTestWithMongodOptions({
+ setParameter: {
+ internalQueryConfigureTelemetryCacheSize: "3MB",
+ internalQueryConfigureTelemetrySamplingRate: 2147483647
+ },
+},
+ evictionTest,
+ false);
+/**
+ * In this configuration, every query is sampled. Due to the large initial cache size, entries
+ * should only be evicted once the cache is reset after telemetry metric collecting, is finished.
+ * */
+runTestWithMongodOptions({
+ setParameter: {
+ internalQueryConfigureTelemetryCacheSize: "10MB",
+ internalQueryConfigureTelemetrySamplingRate: 2147483647
+ },
+},
+ evictionTest,
+ true);
+}()); \ No newline at end of file
diff --git a/src/mongo/db/query/classic_plan_cache.h b/src/mongo/db/query/classic_plan_cache.h
index 8d2e6889bfb..be87fc41763 100644
--- a/src/mongo/db/query/classic_plan_cache.h
+++ b/src/mongo/db/query/classic_plan_cache.h
@@ -235,7 +235,7 @@ using PlanCacheEntry = PlanCacheEntryBase<SolutionCacheData, plan_cache_debug_in
using CachedSolution = CachedPlanHolder<SolutionCacheData, plan_cache_debug_info::DebugInfo>;
struct BudgetEstimator {
- size_t operator()(const std::shared_ptr<const PlanCacheEntry>&) {
+ size_t operator()(const PlanCacheKey&, const std::shared_ptr<const PlanCacheEntry>&) {
return 1;
}
};
diff --git a/src/mongo/db/query/lru_key_value.h b/src/mongo/db/query/lru_key_value.h
index f4aec9fafbf..265379d4c18 100644
--- a/src/mongo/db/query/lru_key_value.h
+++ b/src/mongo/db/query/lru_key_value.h
@@ -28,7 +28,6 @@
*/
#pragma once
-
#include <fmt/format.h>
#include <list>
#include <memory>
@@ -45,18 +44,18 @@ namespace mongo {
* or any other value defined by the template parameter 'Estimator'.
* The 'Estimator' must be deterministic and always return the same value for the same entry.
*/
-template <typename V, typename Estimator>
+template <class K, class V, typename Estimator>
class LRUBudgetTracker {
public:
LRUBudgetTracker(size_t maxBudget) : _max(maxBudget), _current(0) {}
- void onAdd(const V& v) {
- _current += _estimator(v);
+ void onAdd(const K& k, const V& v) {
+ _current += _estimator(k, v);
}
- void onRemove(const V& v) {
+ void onRemove(const K& k, const V& v) {
using namespace fmt::literals;
- size_t budget = _estimator(v);
+ size_t budget = _estimator(k, v);
tassert(5968300,
"LRU budget underflow: current={}, budget={} "_format(_current, budget),
_current >= budget);
@@ -104,7 +103,7 @@ private:
*/
template <class K,
class V,
- class BudgetEstimator,
+ class KeyValueBudgetEstimator,
class KeyHasher = std::hash<K>,
class Eq = std::equal_to<K>>
class LRUKeyValue {
@@ -140,12 +139,12 @@ public:
KVMapConstIt i = _kvMap.find(key);
if (i != _kvMap.end()) {
KVListIt found = i->second;
- _budgetTracker.onRemove(found->second);
+ _budgetTracker.onRemove(key, found->second);
_kvMap.erase(i);
_kvList.erase(found);
}
- _budgetTracker.onAdd(entry);
+ _budgetTracker.onAdd(key, entry);
_kvList.push_front(std::make_pair(key, std::move(entry)));
_kvMap[key] = _kvList.begin();
@@ -183,7 +182,7 @@ public:
return false;
}
KVListIt found = i->second;
- _budgetTracker.onRemove(found->second);
+ _budgetTracker.onRemove(key, found->second);
_kvMap.erase(i);
_kvList.erase(found);
return true;
@@ -198,7 +197,7 @@ public:
size_t removed = 0;
for (auto it = _kvList.begin(); it != _kvList.end();) {
if (predicate(it->first, *it->second)) {
- _budgetTracker.onRemove(it->second);
+ _budgetTracker.onRemove(it->first, it->second);
_kvMap.erase(it->first);
it = _kvList.erase(it);
++removed;
@@ -213,9 +212,9 @@ public:
* Deletes all entries in the kv-store.
*/
void clear() {
- _budgetTracker.onClear();
_kvList.clear();
_kvMap.clear();
+ _budgetTracker.onClear();
}
/**
@@ -262,7 +261,7 @@ private:
while (_budgetTracker.isOverBudget()) {
invariant(!_kvList.empty());
- _budgetTracker.onRemove(_kvList.back().second);
+ _budgetTracker.onRemove(_kvList.back().first, _kvList.back().second);
_kvMap.erase(_kvList.back().first);
_kvList.pop_back();
@@ -272,13 +271,14 @@ private:
return nEvicted;
}
- LRUBudgetTracker<V, BudgetEstimator> _budgetTracker;
+ LRUBudgetTracker<K, V, KeyValueBudgetEstimator> _budgetTracker;
// (K, V) pairs are stored in this std::list. They are sorted in order of use, where the front
// is the most recently used and the back is the least recently used.
mutable KVList _kvList;
// Maps from a key to the corresponding std::list entry.
+ // TODO: SERVER-73659 LRUKeyValue should track and include the size of _kvMap in overall budget.
mutable KVMap _kvMap;
};
diff --git a/src/mongo/db/query/lru_key_value_test.cpp b/src/mongo/db/query/lru_key_value_test.cpp
index 3ebf47267fb..a92f68f39fa 100644
--- a/src/mongo/db/query/lru_key_value_test.cpp
+++ b/src/mongo/db/query/lru_key_value_test.cpp
@@ -64,15 +64,15 @@ struct ValueType {
struct TrivialBudgetEstimator {
static constexpr size_t kSize = 1;
- size_t operator()(const ValueType&) {
+ size_t operator()(const int&, const ValueType&) {
return kSize;
}
- size_t operator()(const std::shared_ptr<int>&) {
+ size_t operator()(const int&, const std::unique_ptr<int>&) {
return kSize;
}
- size_t operator()(const std::unique_ptr<int>&) {
+ size_t operator()(const int&, const std::shared_ptr<int>) {
return kSize;
}
};
@@ -87,7 +87,7 @@ struct NonTrivialEntry {
};
struct NonTrivialBudgetEstimator {
- size_t operator()(const std::shared_ptr<NonTrivialEntry>& value) {
+ size_t operator()(const int& key, const std::shared_ptr<NonTrivialEntry> value) {
return value->budgetSize;
}
};
diff --git a/src/mongo/db/query/partitioned_cache.h b/src/mongo/db/query/partitioned_cache.h
index caf8d806a14..a5b1f1218b6 100644
--- a/src/mongo/db/query/partitioned_cache.h
+++ b/src/mongo/db/query/partitioned_cache.h
@@ -43,7 +43,7 @@ namespace mongo {
*/
template <class KeyType,
class ValueType,
- class BudgetEstimator,
+ class KeyBudgetEstimator,
class Partitioner,
class KeyHasher = std::hash<KeyType>,
class Eq = std::equal_to<KeyType>>
@@ -53,7 +53,7 @@ private:
PartitionedCache& operator=(const PartitionedCache&) = delete;
public:
- using Lru = LRUKeyValue<KeyType, ValueType, BudgetEstimator, KeyHasher, Eq>;
+ using Lru = LRUKeyValue<KeyType, ValueType, KeyBudgetEstimator, KeyHasher, Eq>;
using Partition = typename Partitioned<Lru, Partitioner>::OnePartition;
using PartitionId = typename Partitioned<Lru, Partitioner>::PartitionId;
@@ -69,14 +69,20 @@ public:
}
~PartitionedCache() = default;
-
- void put(const KeyType& key, ValueType value) {
+ /**
+ * Inserts the provided <key, value> into the partition associated with that key. Returns the
+ * number of older entries evicted to fit this new one.
+ */
+ size_t put(const KeyType& key, ValueType value) {
auto partition = _partitionedCache->lockOnePartition(key);
- partition->add(key, std::move(value));
+ return partition->add(key, std::move(value));
}
-
- void put(const KeyType& key, ValueType value, Partition& partition) {
- partition->add(key, std::move(value));
+ /**
+ * Inserts the provided <key, value> into the specified partition. Returns the number of older
+ * entries evicted to fit this new one.
+ */
+ size_t put(const KeyType& key, ValueType value, Partition& partition) {
+ return partition->add(key, std::move(value));
}
StatusWith<ValueType*> lookup(const KeyType& key) const {
@@ -134,13 +140,17 @@ public:
/**
* Reset total cache size. If the size is set to a smaller value than before, enough entries are
- * evicted in order to ensure that the cache fits within the new budget.
+ * evicted in order to ensure that the cache fits within the new budget. Returns the number of
+ * entries evicted.
*/
- void reset(size_t cacheSize) {
+ size_t reset(size_t cacheSize) {
+ size_t numEvicted = 0;
for (size_t partitionId = 0; partitionId < _numPartitions; ++partitionId) {
auto lockedPartition = _partitionedCache->lockOnePartitionById(partitionId);
- lockedPartition->reset(cacheSize / _numPartitions);
+ numEvicted += lockedPartition->reset(cacheSize / _numPartitions);
}
+
+ return numEvicted;
}
/**
diff --git a/src/mongo/db/query/plan_cache.h b/src/mongo/db/query/plan_cache.h
index a7db20699f3..5746cf90441 100644
--- a/src/mongo/db/query/plan_cache.h
+++ b/src/mongo/db/query/plan_cache.h
@@ -294,7 +294,7 @@ private:
*/
template <class KeyType,
class CachedPlanType,
- class BudgetEstimator,
+ class KeyBudgetEstimator,
class DebugInfoType,
class Partitioner,
class KeyHasher = std::hash<KeyType>>
@@ -305,7 +305,7 @@ class PlanCacheBase
// cache entries out of the lock, therefore it is illegal to mutate the pieces of a cache
// entry that can be cloned whether you are holding a lock or not.
std::shared_ptr<const PlanCacheEntryBase<CachedPlanType, DebugInfoType>>,
- BudgetEstimator,
+ KeyBudgetEstimator,
Partitioner,
KeyHasher> {
private:
@@ -316,7 +316,7 @@ public:
using Base =
PartitionedCache<KeyType,
std::shared_ptr<const PlanCacheEntryBase<CachedPlanType, DebugInfoType>>,
- BudgetEstimator,
+ KeyBudgetEstimator,
Partitioner,
KeyHasher>;
using Entry = PlanCacheEntryBase<CachedPlanType, DebugInfoType>;
diff --git a/src/mongo/db/query/sbe_plan_cache.h b/src/mongo/db/query/sbe_plan_cache.h
index 8e6a145683d..79f1ed8e355 100644
--- a/src/mongo/db/query/sbe_plan_cache.h
+++ b/src/mongo/db/query/sbe_plan_cache.h
@@ -216,7 +216,10 @@ struct CachedSbePlan {
using PlanCacheEntry = PlanCacheEntryBase<CachedSbePlan, plan_cache_debug_info::DebugInfoSBE>;
struct BudgetEstimator {
- size_t operator()(const std::shared_ptr<const PlanCacheEntry>& entry) {
+ size_t operator()(const sbe::PlanCacheKey& key,
+ const std::shared_ptr<const PlanCacheEntry>& entry) {
+ // TODO: SERVER-73649 include size of underlying query shape and size of int_32 key hash in
+ // total size estimation.
return entry->estimatedEntrySizeBytes;
}
};
diff --git a/src/mongo/db/query/telemetry.cpp b/src/mongo/db/query/telemetry.cpp
index c1524d988d5..78d7511cc6d 100644
--- a/src/mongo/db/query/telemetry.cpp
+++ b/src/mongo/db/query/telemetry.cpp
@@ -57,9 +57,10 @@ bool isTelemetryEnabled() {
return feature_flags::gFeatureFlagTelemetry.isEnabledAndIgnoreFCV();
}
-
namespace {
+CounterMetric telemetryEvictedMetric("telemetry.numEvicted");
+
/**
* Cap the telemetry store size.
*/
@@ -129,7 +130,8 @@ public:
auto& telemetryStoreManager = telemetryStoreDecoration(serviceCtx);
auto&& telemetryStore = telemetryStoreManager->getTelemetryStore();
- telemetryStore.reset(cappedSize);
+ size_t numEvicted = telemetryStore.reset(cappedSize);
+ telemetryEvictedMetric.increment(numEvicted);
}
};
@@ -234,7 +236,8 @@ public:
if (statusWithMetrics.isOK()) {
metrics = statusWithMetrics.getValue();
} else {
- telemetryStore.put(telemetryKey, {}, partitionLock);
+ size_t numEvicted = telemetryStore.put(telemetryKey, {}, partitionLock);
+ telemetryEvictedMetric.increment(numEvicted);
auto newMetrics = partitionLock->get(telemetryKey);
// This can happen if the budget is immediately exceeded. Specifically if the there is
// not enough room for a single new entry if the number of partitions is too high
diff --git a/src/mongo/db/query/telemetry.h b/src/mongo/db/query/telemetry.h
index c8fb6aec5d4..91aceac1806 100644
--- a/src/mongo/db/query/telemetry.h
+++ b/src/mongo/db/query/telemetry.h
@@ -36,6 +36,7 @@
#include "mongo/db/query/plan_explainer.h"
#include "mongo/db/query/util/memory_util.h"
#include "mongo/db/service_context.h"
+#include <cstdint>
namespace mongo {
@@ -150,21 +151,16 @@ struct TelemetryPartitioner {
}
};
-/**
- * Average key size used to pad the metrics size. We store a cached redaction of the key in the
- * TelemetryMetrics object.
- */
-const size_t kAverageKeySize = 100;
-struct ComputeEntrySize {
- size_t operator()(const TelemetryMetrics& entry) {
- return sizeof(TelemetryMetrics) + kAverageKeySize;
+struct TelemetryStoreEntryBudgetor {
+ size_t operator()(const BSONObj& key, const TelemetryMetrics& value) {
+ return sizeof(TelemetryMetrics) + sizeof(BSONObj) + key.objsize();
}
};
using TelemetryStore = PartitionedCache<BSONObj,
TelemetryMetrics,
- ComputeEntrySize,
+ TelemetryStoreEntryBudgetor,
TelemetryPartitioner,
SimpleBSONObjComparator::Hasher,
SimpleBSONObjComparator::EqualTo>;
diff --git a/src/mongo/db/query/telemetry_store_test.cpp b/src/mongo/db/query/telemetry_store_test.cpp
index 0b81a1409c3..5e5237ff0bf 100644
--- a/src/mongo/db/query/telemetry_store_test.cpp
+++ b/src/mongo/db/query/telemetry_store_test.cpp
@@ -40,13 +40,6 @@ namespace mongo::telemetry {
class TelemetryStoreTest : public ServiceContextTest {};
TEST_F(TelemetryStoreTest, BasicUsage) {
- // Turning on the flag at runtime will crash as telemetry store registerer (which creates the
- // telemetry store) is called at start up and if flag is off, the telemetry store will have
- // never been created. Thus, instead of turning on the flag at runtime and crashing, we skip the
- // test if telemetry feature flag is off.
- if (!feature_flags::gFeatureFlagTelemetry.isEnabledAndIgnoreFCV()) {
- return;
- }
TelemetryStore telStore{5000000, 1000};
auto getMetrics = [&](BSONObj& key) {
@@ -101,4 +94,20 @@ TEST_F(TelemetryStoreTest, BasicUsage) {
ASSERT_EQ(numKeys, 2);
}
+
+TEST_F(TelemetryStoreTest, EvictEntries) {
+ // This creates a telemetry store with 2 partitions, each with a size of 1200 bytes.
+ TelemetryStore telStore{2400, 2};
+
+ for (int i = 0; i < 10; i++) {
+ auto query = BSON("query" + std::to_string(i) << 1 << "xEquals" << 42);
+ telStore.put(query, TelemetryMetrics{});
+ }
+ int numKeys = 0;
+ telStore.forEach([&](const BSONObj& key, const TelemetryMetrics& entry) { numKeys++; });
+ // Given the size of the bson keys (~46 bytes) and values (~208 bytes), each partition (1200
+ // bytes) can hold at most 4 entries.
+ ASSERT_EQ(numKeys, 8);
+}
+
} // namespace mongo::telemetry