summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMisha Tyulenev <misha.tyulenev@mongodb.com>2022-11-15 18:17:51 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-11-15 19:42:57 +0000
commit356921365a763fd365437ff979a6719731d08f7e (patch)
tree713b2ff9e6a56ff5f71c9c19d67468f3660461a8
parent919b31026d9d3c629bfd5f29556e3a532395dc62 (diff)
downloadmongo-356921365a763fd365437ff979a6719731d08f7e.tar.gz
SERVER-70856 load stats by path name
-rw-r--r--jstests/cqf/analyze/ce_histogram.js92
-rw-r--r--src/mongo/db/query/ce/stats_cache_loader_impl.cpp10
-rw-r--r--src/mongo/db/query/ce/stats_catalog.cpp2
3 files changed, 50 insertions, 54 deletions
diff --git a/jstests/cqf/analyze/ce_histogram.js b/jstests/cqf/analyze/ce_histogram.js
index 956368bf785..d3326d8ed8f 100644
--- a/jstests/cqf/analyze/ce_histogram.js
+++ b/jstests/cqf/analyze/ce_histogram.js
@@ -17,6 +17,7 @@ load('jstests/aggregation/extras/utils.js'); // For assertArrayEq.
load("jstests/libs/optimizer_utils.js"); // For checkCascadesOptimizerEnabled.
load("jstests/libs/sbe_util.js"); // For checkSBEEnabled.
+const collName = "ce_histogram";
const fields = ["int", "dbl", "str", "date"];
const tolerance = 0.01;
@@ -99,67 +100,60 @@ function verifyCEForMatch({coll, predicate, expected, hint}) {
* produced for this test.
*/
function verifyCEForNDV(ndv) {
- /**
- * For this test we create one collection and with an index for each field. We use a new
- * collection name for each field because until SERVER-70856 is fixed we can't have multiple
- * histograms on a collection because
- * there is no logic to correctly filter on field name, which means we will always retrieve the
- * first histogram generated for the collection (regardless of which field we care about), even
- * though we have correct histograms in the system collection for all fields.
- *
- * TODO: rewrite this test to reuse the same collection SERVER-70856 is addressed.
- */
- for (const field of fields) {
- // We can't use forceBonsai here because the new optimizer doesn't know how to handle the
- // analyze command.
- assert.commandWorked(
- db.adminCommand({setParameter: 1, internalQueryFrameworkControl: "tryBonsai"}));
+ const coll = db[collName];
+ coll.drop();
- const collName = `ce_histogram_${field}`;
- const coll = db[collName];
- coll.drop();
+ const expectedHistograms = [];
+ for (const field of fields) {
assert.commandWorked(coll.createIndex({[field]: 1}));
-
- const expectedHistograms = [];
expectedHistograms.push(
{_id: field, statistics: {documents: 0, scalarHistogram: {buckets: [], bounds: []}}});
+ }
- // Set up test collection and initialize the expected histograms in order to validate basic
- // histogram construction. We generate 'ndv' distinct values for each 'field', such that the
- // 'i'th distinct value has a frequency of 'i'. Because we have a small number of distinct
- // values, we expect to have one bucket per distinct value.
- _id = 0;
- let cumulativeCount = 0;
- let allDocs = [];
- for (let val = 1; val <= ndv; val++) {
- const docs = generateDocs(val);
- assert.commandWorked(coll.insertMany(docs));
- cumulativeCount += docs.length;
- for (const expectedHistogram of expectedHistograms) {
- const field = expectedHistogram._id;
- const {statistics} = expectedHistogram;
- statistics.documents = cumulativeCount;
- statistics.scalarHistogram.buckets.push({
- boundaryCount: val,
- rangeCount: 0,
- cumulativeCount,
- rangeDistincts: 0,
- cumulativeDistincts: val
- });
- statistics.scalarHistogram.bounds.push(docs[0][field]);
- }
- allDocs = allDocs.concat(docs);
+ // Set up test collection and initialize the expected histograms in order to validate basic
+ // histogram construction. We generate 'ndv' distinct values for each 'field', such that the
+ // 'i'th distinct value has a frequency of 'i'. Because we have a small number of distinct
+ // values, we expect to have one bucket per distinct value.
+ _id = 0;
+ let cumulativeCount = 0;
+ let allDocs = [];
+ for (let val = 1; val <= ndv; val++) {
+ const docs = generateDocs(val);
+ assert.commandWorked(coll.insertMany(docs));
+ cumulativeCount += docs.length;
+ for (const expectedHistogram of expectedHistograms) {
+ const field = expectedHistogram._id;
+ const {statistics} = expectedHistogram;
+ statistics.documents = cumulativeCount;
+ statistics.scalarHistogram.buckets.push({
+ boundaryCount: val,
+ rangeCount: 0,
+ cumulativeCount,
+ rangeDistincts: 0,
+ cumulativeDistincts: val
+ });
+ statistics.scalarHistogram.bounds.push(docs[0][field]);
}
+ allDocs = allDocs.concat(docs);
+ }
- // Set up histogram for test collection.
- const stats = db.system.statistics[collName];
+ // Set up histogram for test collection.
+ const stats = db.system.statistics[collName];
+ for (const field of fields) {
+ // We can't use forceBonsai here because the new optimizer doesn't know how to handle the
+ // analyze command.
+ assert.commandWorked(
+ db.adminCommand({setParameter: 1, internalQueryFrameworkControl: "tryBonsai"}));
const res = db.runCommand({analyze: collName, key: field});
assert.commandWorked(res);
// Validate histograms.
- const actualHistograms = stats.aggregate().toArray();
- assertArrayEq({actual: actualHistograms, expected: expectedHistograms});
+ const actualHistograms = stats.aggregate([{$match: {_id: field}}]).toArray();
+ const isField = (elem) => elem === field;
+
+ assertArrayEq(
+ {actual: actualHistograms.find(isField), expected: expectedHistograms.find(isField)});
// We need to set the CE query knob to use histograms and force the use of the new optimizer
// to ensure that we use histograms to estimate CE here.
diff --git a/src/mongo/db/query/ce/stats_cache_loader_impl.cpp b/src/mongo/db/query/ce/stats_cache_loader_impl.cpp
index f3106410c19..c0d80664e9c 100644
--- a/src/mongo/db/query/ce/stats_cache_loader_impl.cpp
+++ b/src/mongo/db/query/ce/stats_cache_loader_impl.cpp
@@ -38,6 +38,8 @@
#include "mongo/logv2/log.h"
#include "mongo/stdx/thread.h"
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
+
namespace mongo {
@@ -49,14 +51,14 @@ SemiFuture<StatsCacheVal> StatsCacheLoaderImpl::getStats(OperationContext* opCtx
NamespaceString statsNss(statsPath.first.db(), statsColl);
DBDirectClient client(opCtx);
- auto pathFilter = BSON("path" << statsPath.second);
FindCommandRequest findRequest{statsNss};
- // findRequest.setFilter(pathFilter);
- BSONObj result;
+ BSONObj filter = BSON("_id" << statsPath.second);
+ LOGV2_DEBUG(7085600, 1, "findRequest filter", "filter"_attr = filter.toString());
+ findRequest.setFilter(filter.getOwned());
try {
- auto cursor = client.find(findRequest);
+ auto cursor = client.find(std::move(findRequest));
if (!cursor) {
uasserted(ErrorCodes::OperationFailed,
diff --git a/src/mongo/db/query/ce/stats_catalog.cpp b/src/mongo/db/query/ce/stats_catalog.cpp
index 2582d1f34e5..d8b65d09e72 100644
--- a/src/mongo/db/query/ce/stats_catalog.cpp
+++ b/src/mongo/db/query/ce/stats_catalog.cpp
@@ -57,7 +57,7 @@ StatsCatalog::StatsCatalog(ServiceContext* service,
ThreadPool::Options options;
options.poolName = "StatsCache";
options.minThreads = 0;
- options.maxThreads = 1;
+ options.maxThreads = 2;
return options;
}())),
_statsCache(service, std::move(statsCacheLoader), *_executor, 1000) {