summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlyssa Wagenmaker <alyssa.wagenmaker@mongodb.com>2023-02-01 19:08:22 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-02-02 18:29:15 +0000
commit55e4a05daabba24a6ec976a69335823384ed981b (patch)
tree54d5bf04939ae157164cc878ba67664fd876273b
parentf5c68f8f3efb3281f2ffad462640c38268deb319 (diff)
downloadmongo-55e4a05daabba24a6ec976a69335823384ed981b.tar.gz
SERVER-73009 Compute clustered scan direction when plan is hinted or cached
-rw-r--r--jstests/noPassthrough/clustered_collection_sorted_scan.js76
-rw-r--r--src/mongo/db/query/planner_access.h2
-rw-r--r--src/mongo/db/query/query_planner.cpp88
3 files changed, 102 insertions, 64 deletions
diff --git a/jstests/noPassthrough/clustered_collection_sorted_scan.js b/jstests/noPassthrough/clustered_collection_sorted_scan.js
index 103f7094b9a..0888012917a 100644
--- a/jstests/noPassthrough/clustered_collection_sorted_scan.js
+++ b/jstests/noPassthrough/clustered_collection_sorted_scan.js
@@ -17,7 +17,7 @@ Random.setRandomSeed();
const testConnection =
MongoRunner.runMongod({setParameter: {supportArbitraryClusterKeyIndex: true}});
const testDb = testConnection.getDB('local');
-const collectionSize = 200;
+const collectionSize = 10;
const clusteredCollName = "clustered_index_sorted_scan_coll";
const clusterField = "clusterKey";
@@ -40,20 +40,20 @@ for (let i = 0; i < collectionSize; ++i) {
assert.commandWorked(nonClusteredColl.insert({[clusterField]: i, a: a}));
}
-function runTest(isClustered, hasFilter, direction) {
+function runTest(isClustered, hasFilter, hasHint, direction) {
let tsColl = isClustered ? clusteredColl : nonClusteredColl;
- let query;
- if (hasFilter) {
- query = tsColl.find({[clusterField]: {$gt: -1}}).sort({[clusterField]: direction});
- } else {
- query = tsColl.find().sort({[clusterField]: direction});
- }
+ const filter = hasFilter ? {[clusterField]: {$gt: -1}} : {};
+ const sort = {[clusterField]: direction};
+ const hint = hasHint ? {[clusterField]: 1} : {};
+
+ let query = tsColl.find(filter).sort(sort).hint(hint);
function formatParamsAndPlan(plan) {
let params = {
isClustered: isClustered ? "true" : "false",
hasFilter: hasFilter ? "true" : "false",
+ hasHint: hasHint ? "true" : "false",
direction: direction ? "forward" : "backward",
};
@@ -76,7 +76,7 @@ function runTest(isClustered, hasFilter, direction) {
assert(!planHasStage(testDb, plan, "SORT"), "Unexpected sort in " + formatParamsAndPlan(plan));
}
-function testCollations() {
+function testCollations(direction) {
let strCollName = clusteredCollName + "_str";
// Generate a clustered collection for the remainder of the testing
@@ -94,7 +94,7 @@ function testCollations() {
// Because the collations don't match, we can't use the clustered index
// to provide a sort
let plan = tsColl.find()
- .sort({[clusterField]: 1})
+ .sort({[clusterField]: direction})
.collation({locale: "fo", caseLevel: true})
.explain();
assert(planHasStage(testDb, plan, "SORT"), "Expected sort in " + tojson(plan));
@@ -102,24 +102,26 @@ function testCollations() {
// However, if we can exclude strings, we don't need an explicit sort even
// if the collations don't match
plan = tsColl.find({[clusterField]: {$gt: -1}})
- .sort({[clusterField]: 1})
+ .sort({[clusterField]: direction})
.collation({locale: "fo", caseLevel: true})
.explain();
assert(!planHasStage(testDb, plan, "SORT"), "Unxpected sort in " + tojson(plan));
tsColl.drop();
}
-function testMinMax(direction) {
+function testMinMax() {
+ // Min and max are only supported on forward collection scans.
+ const direction = 1;
// Min and max should be between 0 and collection size
- const minResult = 10; // inclusive
- const maxResult = 20; // not inclusive
+ const minResult = 5; // inclusive
+ const maxResult = 8; // not inclusive
const resultCount = maxResult - minResult;
let normalCursor = nonClusteredColl.find()
.hint({[clusterField]: 1})
.min({[clusterField]: minResult})
.max({[clusterField]: maxResult})
- .sort({[clusterField]: 1});
+ .sort({[clusterField]: direction});
let normalResult = normalCursor.toArray();
assert.eq(normalResult.length,
resultCount,
@@ -129,7 +131,7 @@ function testMinMax(direction) {
.hint({[clusterField]: 1})
.min({[clusterField]: minResult})
.max({[clusterField]: maxResult})
- .sort({[clusterField]: 1});
+ .sort({[clusterField]: direction});
let clusterResult = clusterCursor.toArray();
assert.eq(clusterResult.length,
resultCount,
@@ -141,24 +143,38 @@ function testMinMax(direction) {
}
// Ensure that the plan gets cached correctly
-function testPlanCache() {
+function testPlanCache(direction) {
clusteredColl.getPlanCache().clear();
const indexName = "_a";
assert.commandWorked(clusteredColl.createIndex({a: 1}, {name: indexName}));
+ const filter = {a: {$gt: -1}};
+ const projection = {_id: 0, [clusterField]: 1};
+ const sort = {[clusterField]: direction};
+
// Because of the _a index above, we should have two alternatves -- filter via the
// index then a blocking sort, or filter during a collection scan. Because of the blocking
// sort and the fact that "a" doesnt actually filter anything, we expect the
// collection scan to win.
- let query = clusteredColl.find({a: {$gt: -1}}).sort({[clusterField]: -1});
-
- let plan = query.explain();
+ let plan = clusteredColl.find(filter, projection).sort(sort).explain();
assert(plan.queryPlanner.rejectedPlans.length > 0, tojson(plan));
assert(planHasStage(testDb, plan, "COLLSCAN"), "Expected COLLSCAN in " + tojson(plan));
- // Run the query to ensure it's cached
- query.toArray();
+ let nonClusteredResults = nonClusteredColl.find(filter, projection).sort(sort).toArray();
+ assert.eq(nonClusteredResults.length, collectionSize);
+
+ // Now run the query and verify that the results are expected. Run it a few times so that the
+ // cached plan will be used.
+ assert.eq(nonClusteredResults,
+ clusteredColl.find(filter, projection).sort(sort).toArray(),
+ tojson(plan));
+ assert.eq(nonClusteredResults,
+ clusteredColl.find(filter, projection).sort(sort).toArray(),
+ tojson(plan));
+ assert.eq(nonClusteredResults,
+ clusteredColl.find(filter, projection).sort(sort).toArray(),
+ tojson(plan));
// Verify that there's a cache entry for this query
let cacheEntries = clusteredColl.getPlanCache().list();
@@ -171,16 +187,20 @@ function testPlanCache() {
// Actually run all the tests:
for (let isClustered = 0; isClustered <= 1; isClustered++) {
for (let hasFilter = 0; hasFilter <= 1; hasFilter++) {
- runTest(isClustered, hasFilter, /* direction = */ 1);
- runTest(isClustered, hasFilter, /* direction = */ -1);
+ for (let hasHint = 0; hasHint <= 1; hasHint++) {
+ runTest(isClustered, hasFilter, hasHint, /* direction = */ 1);
+ runTest(isClustered, hasFilter, hasHint, /* direction = */ -1);
+ }
}
}
-testCollations();
-testMinMax(/* direction = */ 1);
-testMinMax(/* direction = */ -1);
+testCollations(/* direction = */ 1);
+testCollations(/* direction = */ -1);
+
+testMinMax();
-testPlanCache();
+testPlanCache(/* direction = */ 1);
+testPlanCache(/* direction = */ -1);
// If we're sorting on multiple columns, we still need an explicit sort
let plan = clusteredColl.find().sort({[clusterField]: 1, a: 1}).explain();
diff --git a/src/mongo/db/query/planner_access.h b/src/mongo/db/query/planner_access.h
index bd9c856ffab..03e9773a057 100644
--- a/src/mongo/db/query/planner_access.h
+++ b/src/mongo/db/query/planner_access.h
@@ -106,7 +106,7 @@ public:
static std::unique_ptr<QuerySolutionNode> makeCollectionScan(const CanonicalQuery& query,
bool tailable,
const QueryPlannerParams& params,
- int direction = 1);
+ int direction);
/**
* Return a plan that uses the provided index as a proxy for a collection scan.
diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp
index c258c9e6867..10a5f86b464 100644
--- a/src/mongo/db/query/query_planner.cpp
+++ b/src/mongo/db/query/query_planner.cpp
@@ -469,12 +469,46 @@ static BSONObj finishMaxObj(const IndexEntry& indexEntry,
}
}
+bool providesSort(const CanonicalQuery& query, const BSONObj& kp) {
+ return query.getFindCommandRequest().getSort().isPrefixOf(
+ kp, SimpleBSONElementComparator::kInstance);
+}
+
+/**
+ * Determine whether this query has a sort that can be provided by the clustered index, if so, which
+ * direction the scan should be. If the collection is not clustered, or the sort cannot be provided,
+ * returns 'boost::none'.
+ */
+boost::optional<int> determineClusteredScanDirection(const CanonicalQuery& query,
+ const QueryPlannerParams& params) {
+ if (params.clusteredInfo && query.getSortPattern() &&
+ CollatorInterface::collatorsMatch(params.clusteredCollectionCollator,
+ query.getCollator())) {
+ auto kp = clustered_util::getSortPattern(params.clusteredInfo->getIndexSpec());
+ if (providesSort(query, kp)) {
+ return 1;
+ } else if (providesSort(query, QueryPlannerCommon::reverseSortObj(kp))) {
+ return -1;
+ }
+ }
+
+ return boost::none;
+}
+
+/**
+ * Determine the direction of the scan needed for the query. Defaults to 1 unless this is a
+ * clustered collection and we have a sort that can be provided by the clustered index.
+ */
+int determineCollscanDirection(const CanonicalQuery& query, const QueryPlannerParams& params) {
+ return determineClusteredScanDirection(query, params).value_or(1);
+}
+
std::unique_ptr<QuerySolution> buildCollscanSoln(const CanonicalQuery& query,
bool tailable,
const QueryPlannerParams& params,
- int direction = 1) {
- std::unique_ptr<QuerySolutionNode> solnRoot(
- QueryPlannerAccess::makeCollectionScan(query, tailable, params, direction));
+ boost::optional<int> direction = boost::none) {
+ std::unique_ptr<QuerySolutionNode> solnRoot(QueryPlannerAccess::makeCollectionScan(
+ query, tailable, params, direction.value_or(determineCollscanDirection(query, params))));
return QueryPlannerAnalysis::analyzeDataAccess(query, params, std::move(solnRoot));
}
@@ -491,11 +525,6 @@ std::unique_ptr<QuerySolution> buildWholeIXSoln(
return QueryPlannerAnalysis::analyzeDataAccess(query, params, std::move(solnRoot));
}
-bool providesSort(const CanonicalQuery& query, const BSONObj& kp) {
- return query.getFindCommandRequest().getSort().isPrefixOf(
- kp, SimpleBSONElementComparator::kInstance);
-}
-
StatusWith<std::unique_ptr<PlanCacheIndexTree>> QueryPlanner::cacheDataFromTaggedTree(
const MatchExpression* const taggedTree, const vector<IndexEntry>& relevantIndices) {
if (!taggedTree) {
@@ -661,7 +690,7 @@ StatusWith<std::unique_ptr<QuerySolution>> QueryPlanner::planFromCache(
} else if (SolutionCacheData::COLLSCAN_SOLN == winnerCacheData.solnType) {
// The cached solution is a collection scan. We don't cache collscans
// with tailable==true, hence the false below.
- auto soln = buildCollscanSoln(query, false, params);
+ auto soln = buildCollscanSoln(query, false, params, winnerCacheData.wholeIXSolnDir);
if (!soln) {
return Status(ErrorCodes::NoQueryExecutionPlans,
"plan cache error: collection scan soln");
@@ -1260,32 +1289,19 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
// The base index is sorted on some key, so it's possible we might want to use
// a collection scan to provide the sort requested
- if (params.clusteredInfo) {
- if (CollatorInterface::collatorsMatch(params.clusteredCollectionCollator,
- query.getCollator())) {
- auto kp = clustered_util::getSortPattern(params.clusteredInfo->getIndexSpec());
- int direction = 0;
- if (providesSort(query, kp)) {
- direction = 1;
- } else if (providesSort(query, QueryPlannerCommon::reverseSortObj(kp))) {
- direction = -1;
- }
-
- if (direction != 0) {
- auto soln = buildCollscanSoln(query, isTailable, params, direction);
- if (soln) {
- LOGV2_DEBUG(6082401,
- 5,
- "Planner: outputting soln that uses clustered index to "
- "provide sort");
- SolutionCacheData* scd = new SolutionCacheData();
- scd->solnType = SolutionCacheData::COLLSCAN_SOLN;
- scd->wholeIXSolnDir = direction;
+ if (auto direction = determineClusteredScanDirection(query, params)) {
+ auto soln = buildCollscanSoln(query, isTailable, params, direction);
+ if (soln) {
+ LOGV2_DEBUG(6082401,
+ 5,
+ "Planner: outputting soln that uses clustered index to "
+ "provide sort");
+ SolutionCacheData* scd = new SolutionCacheData();
+ scd->solnType = SolutionCacheData::COLLSCAN_SOLN;
+ scd->wholeIXSolnDir = *direction;
- soln->cacheData.reset(scd);
- out.push_back(std::move(soln));
- }
- }
+ soln->cacheData.reset(scd);
+ out.push_back(std::move(soln));
}
}
}
@@ -1349,7 +1365,8 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
}
if (possibleToCollscan && (collscanRequested || collScanRequired)) {
- auto collscan = buildCollscanSoln(query, isTailable, params);
+ auto direction = determineCollscanDirection(query, params);
+ auto collscan = buildCollscanSoln(query, isTailable, params, direction);
if (!collscan && collScanRequired) {
return Status(ErrorCodes::NoQueryExecutionPlans,
"Failed to build collection scan soln");
@@ -1361,6 +1378,7 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
"collectionScan"_attr = redact(collscan->toString()));
SolutionCacheData* scd = new SolutionCacheData();
scd->solnType = SolutionCacheData::COLLSCAN_SOLN;
+ scd->wholeIXSolnDir = direction;
collscan->cacheData.reset(scd);
out.push_back(std::move(collscan));
}