From 356921365a763fd365437ff979a6719731d08f7e Mon Sep 17 00:00:00 2001 From: Misha Tyulenev Date: Tue, 15 Nov 2022 18:17:51 +0000 Subject: SERVER-70856 load stats by path name --- jstests/cqf/analyze/ce_histogram.js | 92 +++++++++++------------ src/mongo/db/query/ce/stats_cache_loader_impl.cpp | 10 ++- src/mongo/db/query/ce/stats_catalog.cpp | 2 +- 3 files changed, 50 insertions(+), 54 deletions(-) diff --git a/jstests/cqf/analyze/ce_histogram.js b/jstests/cqf/analyze/ce_histogram.js index 956368bf785..d3326d8ed8f 100644 --- a/jstests/cqf/analyze/ce_histogram.js +++ b/jstests/cqf/analyze/ce_histogram.js @@ -17,6 +17,7 @@ load('jstests/aggregation/extras/utils.js'); // For assertArrayEq. load("jstests/libs/optimizer_utils.js"); // For checkCascadesOptimizerEnabled. load("jstests/libs/sbe_util.js"); // For checkSBEEnabled. +const collName = "ce_histogram"; const fields = ["int", "dbl", "str", "date"]; const tolerance = 0.01; @@ -99,67 +100,60 @@ function verifyCEForMatch({coll, predicate, expected, hint}) { * produced for this test. */ function verifyCEForNDV(ndv) { - /** - * For this test we create one collection and with an index for each field. We use a new - * collection name for each field because until SERVER-70856 is fixed we can't have multiple - * histograms on a collection because - * there is no logic to correctly filter on field name, which means we will always retrieve the - * first histogram generated for the collection (regardless of which field we care about), even - * though we have correct histograms in the system collection for all fields. - * - * TODO: rewrite this test to reuse the same collection SERVER-70856 is addressed. - */ - for (const field of fields) { - // We can't use forceBonsai here because the new optimizer doesn't know how to handle the - // analyze command. - assert.commandWorked( - db.adminCommand({setParameter: 1, internalQueryFrameworkControl: "tryBonsai"})); + const coll = db[collName]; + coll.drop(); - const collName = `ce_histogram_${field}`; - const coll = db[collName]; - coll.drop(); + const expectedHistograms = []; + for (const field of fields) { assert.commandWorked(coll.createIndex({[field]: 1})); - - const expectedHistograms = []; expectedHistograms.push( {_id: field, statistics: {documents: 0, scalarHistogram: {buckets: [], bounds: []}}}); + } - // Set up test collection and initialize the expected histograms in order to validate basic - // histogram construction. We generate 'ndv' distinct values for each 'field', such that the - // 'i'th distinct value has a frequency of 'i'. Because we have a small number of distinct - // values, we expect to have one bucket per distinct value. - _id = 0; - let cumulativeCount = 0; - let allDocs = []; - for (let val = 1; val <= ndv; val++) { - const docs = generateDocs(val); - assert.commandWorked(coll.insertMany(docs)); - cumulativeCount += docs.length; - for (const expectedHistogram of expectedHistograms) { - const field = expectedHistogram._id; - const {statistics} = expectedHistogram; - statistics.documents = cumulativeCount; - statistics.scalarHistogram.buckets.push({ - boundaryCount: val, - rangeCount: 0, - cumulativeCount, - rangeDistincts: 0, - cumulativeDistincts: val - }); - statistics.scalarHistogram.bounds.push(docs[0][field]); - } - allDocs = allDocs.concat(docs); + // Set up test collection and initialize the expected histograms in order to validate basic + // histogram construction. We generate 'ndv' distinct values for each 'field', such that the + // 'i'th distinct value has a frequency of 'i'. Because we have a small number of distinct + // values, we expect to have one bucket per distinct value. + _id = 0; + let cumulativeCount = 0; + let allDocs = []; + for (let val = 1; val <= ndv; val++) { + const docs = generateDocs(val); + assert.commandWorked(coll.insertMany(docs)); + cumulativeCount += docs.length; + for (const expectedHistogram of expectedHistograms) { + const field = expectedHistogram._id; + const {statistics} = expectedHistogram; + statistics.documents = cumulativeCount; + statistics.scalarHistogram.buckets.push({ + boundaryCount: val, + rangeCount: 0, + cumulativeCount, + rangeDistincts: 0, + cumulativeDistincts: val + }); + statistics.scalarHistogram.bounds.push(docs[0][field]); } + allDocs = allDocs.concat(docs); + } - // Set up histogram for test collection. - const stats = db.system.statistics[collName]; + // Set up histogram for test collection. + const stats = db.system.statistics[collName]; + for (const field of fields) { + // We can't use forceBonsai here because the new optimizer doesn't know how to handle the + // analyze command. + assert.commandWorked( + db.adminCommand({setParameter: 1, internalQueryFrameworkControl: "tryBonsai"})); const res = db.runCommand({analyze: collName, key: field}); assert.commandWorked(res); // Validate histograms. - const actualHistograms = stats.aggregate().toArray(); - assertArrayEq({actual: actualHistograms, expected: expectedHistograms}); + const actualHistograms = stats.aggregate([{$match: {_id: field}}]).toArray(); + const isField = (elem) => elem === field; + + assertArrayEq( + {actual: actualHistograms.find(isField), expected: expectedHistograms.find(isField)}); // We need to set the CE query knob to use histograms and force the use of the new optimizer // to ensure that we use histograms to estimate CE here. diff --git a/src/mongo/db/query/ce/stats_cache_loader_impl.cpp b/src/mongo/db/query/ce/stats_cache_loader_impl.cpp index f3106410c19..c0d80664e9c 100644 --- a/src/mongo/db/query/ce/stats_cache_loader_impl.cpp +++ b/src/mongo/db/query/ce/stats_cache_loader_impl.cpp @@ -38,6 +38,8 @@ #include "mongo/logv2/log.h" #include "mongo/stdx/thread.h" +#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery + namespace mongo { @@ -49,14 +51,14 @@ SemiFuture StatsCacheLoaderImpl::getStats(OperationContext* opCtx NamespaceString statsNss(statsPath.first.db(), statsColl); DBDirectClient client(opCtx); - auto pathFilter = BSON("path" << statsPath.second); FindCommandRequest findRequest{statsNss}; - // findRequest.setFilter(pathFilter); - BSONObj result; + BSONObj filter = BSON("_id" << statsPath.second); + LOGV2_DEBUG(7085600, 1, "findRequest filter", "filter"_attr = filter.toString()); + findRequest.setFilter(filter.getOwned()); try { - auto cursor = client.find(findRequest); + auto cursor = client.find(std::move(findRequest)); if (!cursor) { uasserted(ErrorCodes::OperationFailed, diff --git a/src/mongo/db/query/ce/stats_catalog.cpp b/src/mongo/db/query/ce/stats_catalog.cpp index 2582d1f34e5..d8b65d09e72 100644 --- a/src/mongo/db/query/ce/stats_catalog.cpp +++ b/src/mongo/db/query/ce/stats_catalog.cpp @@ -57,7 +57,7 @@ StatsCatalog::StatsCatalog(ServiceContext* service, ThreadPool::Options options; options.poolName = "StatsCache"; options.minThreads = 0; - options.maxThreads = 1; + options.maxThreads = 2; return options; }())), _statsCache(service, std::move(statsCacheLoader), *_executor, 1000) { -- cgit v1.2.1