From 55e4a05daabba24a6ec976a69335823384ed981b Mon Sep 17 00:00:00 2001 From: Alyssa Wagenmaker Date: Wed, 1 Feb 2023 19:08:22 +0000 Subject: SERVER-73009 Compute clustered scan direction when plan is hinted or cached --- .../clustered_collection_sorted_scan.js | 76 ++++++++++++------- src/mongo/db/query/planner_access.h | 2 +- src/mongo/db/query/query_planner.cpp | 88 +++++++++++++--------- 3 files changed, 102 insertions(+), 64 deletions(-) diff --git a/jstests/noPassthrough/clustered_collection_sorted_scan.js b/jstests/noPassthrough/clustered_collection_sorted_scan.js index 103f7094b9a..0888012917a 100644 --- a/jstests/noPassthrough/clustered_collection_sorted_scan.js +++ b/jstests/noPassthrough/clustered_collection_sorted_scan.js @@ -17,7 +17,7 @@ Random.setRandomSeed(); const testConnection = MongoRunner.runMongod({setParameter: {supportArbitraryClusterKeyIndex: true}}); const testDb = testConnection.getDB('local'); -const collectionSize = 200; +const collectionSize = 10; const clusteredCollName = "clustered_index_sorted_scan_coll"; const clusterField = "clusterKey"; @@ -40,20 +40,20 @@ for (let i = 0; i < collectionSize; ++i) { assert.commandWorked(nonClusteredColl.insert({[clusterField]: i, a: a})); } -function runTest(isClustered, hasFilter, direction) { +function runTest(isClustered, hasFilter, hasHint, direction) { let tsColl = isClustered ? clusteredColl : nonClusteredColl; - let query; - if (hasFilter) { - query = tsColl.find({[clusterField]: {$gt: -1}}).sort({[clusterField]: direction}); - } else { - query = tsColl.find().sort({[clusterField]: direction}); - } + const filter = hasFilter ? {[clusterField]: {$gt: -1}} : {}; + const sort = {[clusterField]: direction}; + const hint = hasHint ? {[clusterField]: 1} : {}; + + let query = tsColl.find(filter).sort(sort).hint(hint); function formatParamsAndPlan(plan) { let params = { isClustered: isClustered ? "true" : "false", hasFilter: hasFilter ? "true" : "false", + hasHint: hasHint ? "true" : "false", direction: direction ? "forward" : "backward", }; @@ -76,7 +76,7 @@ function runTest(isClustered, hasFilter, direction) { assert(!planHasStage(testDb, plan, "SORT"), "Unexpected sort in " + formatParamsAndPlan(plan)); } -function testCollations() { +function testCollations(direction) { let strCollName = clusteredCollName + "_str"; // Generate a clustered collection for the remainder of the testing @@ -94,7 +94,7 @@ function testCollations() { // Because the collations don't match, we can't use the clustered index // to provide a sort let plan = tsColl.find() - .sort({[clusterField]: 1}) + .sort({[clusterField]: direction}) .collation({locale: "fo", caseLevel: true}) .explain(); assert(planHasStage(testDb, plan, "SORT"), "Expected sort in " + tojson(plan)); @@ -102,24 +102,26 @@ function testCollations() { // However, if we can exclude strings, we don't need an explicit sort even // if the collations don't match plan = tsColl.find({[clusterField]: {$gt: -1}}) - .sort({[clusterField]: 1}) + .sort({[clusterField]: direction}) .collation({locale: "fo", caseLevel: true}) .explain(); assert(!planHasStage(testDb, plan, "SORT"), "Unxpected sort in " + tojson(plan)); tsColl.drop(); } -function testMinMax(direction) { +function testMinMax() { + // Min and max are only supported on forward collection scans. + const direction = 1; // Min and max should be between 0 and collection size - const minResult = 10; // inclusive - const maxResult = 20; // not inclusive + const minResult = 5; // inclusive + const maxResult = 8; // not inclusive const resultCount = maxResult - minResult; let normalCursor = nonClusteredColl.find() .hint({[clusterField]: 1}) .min({[clusterField]: minResult}) .max({[clusterField]: maxResult}) - .sort({[clusterField]: 1}); + .sort({[clusterField]: direction}); let normalResult = normalCursor.toArray(); assert.eq(normalResult.length, resultCount, @@ -129,7 +131,7 @@ function testMinMax(direction) { .hint({[clusterField]: 1}) .min({[clusterField]: minResult}) .max({[clusterField]: maxResult}) - .sort({[clusterField]: 1}); + .sort({[clusterField]: direction}); let clusterResult = clusterCursor.toArray(); assert.eq(clusterResult.length, resultCount, @@ -141,24 +143,38 @@ function testMinMax(direction) { } // Ensure that the plan gets cached correctly -function testPlanCache() { +function testPlanCache(direction) { clusteredColl.getPlanCache().clear(); const indexName = "_a"; assert.commandWorked(clusteredColl.createIndex({a: 1}, {name: indexName})); + const filter = {a: {$gt: -1}}; + const projection = {_id: 0, [clusterField]: 1}; + const sort = {[clusterField]: direction}; + // Because of the _a index above, we should have two alternatves -- filter via the // index then a blocking sort, or filter during a collection scan. Because of the blocking // sort and the fact that "a" doesnt actually filter anything, we expect the // collection scan to win. - let query = clusteredColl.find({a: {$gt: -1}}).sort({[clusterField]: -1}); - - let plan = query.explain(); + let plan = clusteredColl.find(filter, projection).sort(sort).explain(); assert(plan.queryPlanner.rejectedPlans.length > 0, tojson(plan)); assert(planHasStage(testDb, plan, "COLLSCAN"), "Expected COLLSCAN in " + tojson(plan)); - // Run the query to ensure it's cached - query.toArray(); + let nonClusteredResults = nonClusteredColl.find(filter, projection).sort(sort).toArray(); + assert.eq(nonClusteredResults.length, collectionSize); + + // Now run the query and verify that the results are expected. Run it a few times so that the + // cached plan will be used. + assert.eq(nonClusteredResults, + clusteredColl.find(filter, projection).sort(sort).toArray(), + tojson(plan)); + assert.eq(nonClusteredResults, + clusteredColl.find(filter, projection).sort(sort).toArray(), + tojson(plan)); + assert.eq(nonClusteredResults, + clusteredColl.find(filter, projection).sort(sort).toArray(), + tojson(plan)); // Verify that there's a cache entry for this query let cacheEntries = clusteredColl.getPlanCache().list(); @@ -171,16 +187,20 @@ function testPlanCache() { // Actually run all the tests: for (let isClustered = 0; isClustered <= 1; isClustered++) { for (let hasFilter = 0; hasFilter <= 1; hasFilter++) { - runTest(isClustered, hasFilter, /* direction = */ 1); - runTest(isClustered, hasFilter, /* direction = */ -1); + for (let hasHint = 0; hasHint <= 1; hasHint++) { + runTest(isClustered, hasFilter, hasHint, /* direction = */ 1); + runTest(isClustered, hasFilter, hasHint, /* direction = */ -1); + } } } -testCollations(); -testMinMax(/* direction = */ 1); -testMinMax(/* direction = */ -1); +testCollations(/* direction = */ 1); +testCollations(/* direction = */ -1); + +testMinMax(); -testPlanCache(); +testPlanCache(/* direction = */ 1); +testPlanCache(/* direction = */ -1); // If we're sorting on multiple columns, we still need an explicit sort let plan = clusteredColl.find().sort({[clusterField]: 1, a: 1}).explain(); diff --git a/src/mongo/db/query/planner_access.h b/src/mongo/db/query/planner_access.h index bd9c856ffab..03e9773a057 100644 --- a/src/mongo/db/query/planner_access.h +++ b/src/mongo/db/query/planner_access.h @@ -106,7 +106,7 @@ public: static std::unique_ptr makeCollectionScan(const CanonicalQuery& query, bool tailable, const QueryPlannerParams& params, - int direction = 1); + int direction); /** * Return a plan that uses the provided index as a proxy for a collection scan. diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index c258c9e6867..10a5f86b464 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -469,12 +469,46 @@ static BSONObj finishMaxObj(const IndexEntry& indexEntry, } } +bool providesSort(const CanonicalQuery& query, const BSONObj& kp) { + return query.getFindCommandRequest().getSort().isPrefixOf( + kp, SimpleBSONElementComparator::kInstance); +} + +/** + * Determine whether this query has a sort that can be provided by the clustered index, if so, which + * direction the scan should be. If the collection is not clustered, or the sort cannot be provided, + * returns 'boost::none'. + */ +boost::optional determineClusteredScanDirection(const CanonicalQuery& query, + const QueryPlannerParams& params) { + if (params.clusteredInfo && query.getSortPattern() && + CollatorInterface::collatorsMatch(params.clusteredCollectionCollator, + query.getCollator())) { + auto kp = clustered_util::getSortPattern(params.clusteredInfo->getIndexSpec()); + if (providesSort(query, kp)) { + return 1; + } else if (providesSort(query, QueryPlannerCommon::reverseSortObj(kp))) { + return -1; + } + } + + return boost::none; +} + +/** + * Determine the direction of the scan needed for the query. Defaults to 1 unless this is a + * clustered collection and we have a sort that can be provided by the clustered index. + */ +int determineCollscanDirection(const CanonicalQuery& query, const QueryPlannerParams& params) { + return determineClusteredScanDirection(query, params).value_or(1); +} + std::unique_ptr buildCollscanSoln(const CanonicalQuery& query, bool tailable, const QueryPlannerParams& params, - int direction = 1) { - std::unique_ptr solnRoot( - QueryPlannerAccess::makeCollectionScan(query, tailable, params, direction)); + boost::optional direction = boost::none) { + std::unique_ptr solnRoot(QueryPlannerAccess::makeCollectionScan( + query, tailable, params, direction.value_or(determineCollscanDirection(query, params)))); return QueryPlannerAnalysis::analyzeDataAccess(query, params, std::move(solnRoot)); } @@ -491,11 +525,6 @@ std::unique_ptr buildWholeIXSoln( return QueryPlannerAnalysis::analyzeDataAccess(query, params, std::move(solnRoot)); } -bool providesSort(const CanonicalQuery& query, const BSONObj& kp) { - return query.getFindCommandRequest().getSort().isPrefixOf( - kp, SimpleBSONElementComparator::kInstance); -} - StatusWith> QueryPlanner::cacheDataFromTaggedTree( const MatchExpression* const taggedTree, const vector& relevantIndices) { if (!taggedTree) { @@ -661,7 +690,7 @@ StatusWith> QueryPlanner::planFromCache( } else if (SolutionCacheData::COLLSCAN_SOLN == winnerCacheData.solnType) { // The cached solution is a collection scan. We don't cache collscans // with tailable==true, hence the false below. - auto soln = buildCollscanSoln(query, false, params); + auto soln = buildCollscanSoln(query, false, params, winnerCacheData.wholeIXSolnDir); if (!soln) { return Status(ErrorCodes::NoQueryExecutionPlans, "plan cache error: collection scan soln"); @@ -1260,32 +1289,19 @@ StatusWith>> QueryPlanner::plan( // The base index is sorted on some key, so it's possible we might want to use // a collection scan to provide the sort requested - if (params.clusteredInfo) { - if (CollatorInterface::collatorsMatch(params.clusteredCollectionCollator, - query.getCollator())) { - auto kp = clustered_util::getSortPattern(params.clusteredInfo->getIndexSpec()); - int direction = 0; - if (providesSort(query, kp)) { - direction = 1; - } else if (providesSort(query, QueryPlannerCommon::reverseSortObj(kp))) { - direction = -1; - } - - if (direction != 0) { - auto soln = buildCollscanSoln(query, isTailable, params, direction); - if (soln) { - LOGV2_DEBUG(6082401, - 5, - "Planner: outputting soln that uses clustered index to " - "provide sort"); - SolutionCacheData* scd = new SolutionCacheData(); - scd->solnType = SolutionCacheData::COLLSCAN_SOLN; - scd->wholeIXSolnDir = direction; + if (auto direction = determineClusteredScanDirection(query, params)) { + auto soln = buildCollscanSoln(query, isTailable, params, direction); + if (soln) { + LOGV2_DEBUG(6082401, + 5, + "Planner: outputting soln that uses clustered index to " + "provide sort"); + SolutionCacheData* scd = new SolutionCacheData(); + scd->solnType = SolutionCacheData::COLLSCAN_SOLN; + scd->wholeIXSolnDir = *direction; - soln->cacheData.reset(scd); - out.push_back(std::move(soln)); - } - } + soln->cacheData.reset(scd); + out.push_back(std::move(soln)); } } } @@ -1349,7 +1365,8 @@ StatusWith>> QueryPlanner::plan( } if (possibleToCollscan && (collscanRequested || collScanRequired)) { - auto collscan = buildCollscanSoln(query, isTailable, params); + auto direction = determineCollscanDirection(query, params); + auto collscan = buildCollscanSoln(query, isTailable, params, direction); if (!collscan && collScanRequired) { return Status(ErrorCodes::NoQueryExecutionPlans, "Failed to build collection scan soln"); @@ -1361,6 +1378,7 @@ StatusWith>> QueryPlanner::plan( "collectionScan"_attr = redact(collscan->toString())); SolutionCacheData* scd = new SolutionCacheData(); scd->solnType = SolutionCacheData::COLLSCAN_SOLN; + scd->wholeIXSolnDir = direction; collscan->cacheData.reset(scd); out.push_back(std::move(collscan)); } -- cgit v1.2.1