diff options
author | Mihai Andrei <mihai.andrei@10gen.com> | 2022-04-12 23:04:07 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-04-13 01:25:51 +0000 |
commit | b74d02e64d4c0e5d6a55022b53cbb3abb2c04fba (patch) | |
tree | 3fc5b11a86e947289750e75cac2335afe6116855 | |
parent | feedcb7bb0942188ffc3a56881bed64ac0111ec5 (diff) | |
download | mongo-b74d02e64d4c0e5d6a55022b53cbb3abb2c04fba.tar.gz |
SERVER-64443 Verify that replanning works for HashJoin
-rw-r--r-- | jstests/noPassthrough/lookup_pushdown_cache.js | 66 | ||||
-rw-r--r-- | jstests/noPassthrough/plan_cache_replan_group_lookup.js | 200 | ||||
-rw-r--r-- | src/mongo/db/query/get_executor.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/query/planner_analysis.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/query/query_solution.h | 5 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_cached_solution_planner.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder_lookup.cpp | 18 |
7 files changed, 180 insertions, 117 deletions
diff --git a/jstests/noPassthrough/lookup_pushdown_cache.js b/jstests/noPassthrough/lookup_pushdown_cache.js deleted file mode 100644 index 24fd84959f8..00000000000 --- a/jstests/noPassthrough/lookup_pushdown_cache.js +++ /dev/null @@ -1,66 +0,0 @@ -/** - * Tests basic functionality of integrating plan cache with lowered $lookup. Currently only the - * stages below group/lookup get cached in the classic cache. - */ -(function() { -"use strict"; - -load("jstests/libs/profiler.js"); // For 'getLatestProfilerEntry'. - -const conn = MongoRunner.runMongod({setParameter: {featureFlagSBELookupPushdown: true}}); -assert.neq(null, conn, "mongod was unable to start up"); -const name = "lookup_pushdown"; -const foreignCollName = "foreign_lookup_pushdown"; -let db = conn.getDB(name); -let coll = db[name]; -let foreignColl = db[foreignCollName]; - -function verifyPlanCache({query, isActive, planCacheKey}) { - const cacheEntries = coll.aggregate([{$planCacheStats: {}}]).toArray(); - assert.eq(cacheEntries.length, 1); - const cacheEntry = cacheEntries[0]; - // TODO(SERVER-61507): Convert the assertion to SBE cache once lowered $lookup integrates - // with SBE plan cache. - assert.eq(cacheEntry.version, 1); - assert.docEq(cacheEntry.createdFromQuery.query, query); - assert.eq(cacheEntry.isActive, isActive); - if (planCacheKey) { - assert.eq(cacheEntry.planCacheKey, planCacheKey); - } - return cacheEntry; -} - -// Create two indices to make sure the query gets multi-planned, so that the query subtree will -// be saved in the classic cache. -assert.commandWorked(coll.createIndexes([{a: 1, b: 1}, {a: 1, c: 1}])); -assert.commandWorked(coll.insert([{a: 1}, {a: 2}])); -assert.commandWorked(foreignColl.insert([{c: 1}, {c: 2}])); -const query = { - a: {$gt: 1} -}; -const pipeline = [ - {$match: query}, - {$lookup: {from: foreignCollName, localField: "a", foreignField: "c", as: "c_out"}} -]; - -// First run should create an inactive cache entry. -assert.eq(1, coll.aggregate(pipeline).itcount()); -const cacheEntry = verifyPlanCache({query, isActive: false}); -const planCacheKey = cacheEntry.planCacheKey; - -// Second run should mark the cache entry active. -assert.eq(1, coll.aggregate(pipeline).itcount()); -verifyPlanCache({query, planCacheKey, isActive: true}); - -// Third run should use the active cached entry. -assert.commandWorked(db.setProfilingLevel(2)); -assert.eq(1, coll.aggregate(pipeline).itcount()); -const profileEntry = getLatestProfilerEntry(db, {}); -assert.eq(planCacheKey, profileEntry.planCacheKey); - -// Explain output should show the same plan cache key. -const explain = coll.explain().aggregate(pipeline); -assert.eq(planCacheKey, explain.queryPlanner.planCacheKey); - -MongoRunner.stopMongod(conn); -}()); diff --git a/jstests/noPassthrough/plan_cache_replan_group_lookup.js b/jstests/noPassthrough/plan_cache_replan_group_lookup.js index de6d9c1a42c..be3c13b3eb7 100644 --- a/jstests/noPassthrough/plan_cache_replan_group_lookup.js +++ b/jstests/noPassthrough/plan_cache_replan_group_lookup.js @@ -19,14 +19,6 @@ const coll = db.plan_cache_replan_group_lookup; const foreignCollName = "foreign"; coll.drop(); -if (checkSBEEnabled(db, ["featureFlagSbePlanCache"])) { - jsTest.log("Skipping test because SBE and SBE plan cache are both enabled."); - MongoRunner.stopMongod(conn); - return; -} -// The test should have the same caching behavior whether or not aggregation stages are being -// lowered into SBE. - function getPlansForCacheEntry(match) { const matchingCacheEntries = coll.getPlanCache().list([{$match: match}]); assert.eq(matchingCacheEntries.length, 1, coll.getPlanCache().list()); @@ -39,16 +31,34 @@ function planHasIxScanStageForKey(planStats, keyPattern) { return false; } - return bsonWoCompare(keyPattern, stage.keyPattern) == 0; + return bsonWoCompare(keyPattern, stage.keyPattern) === 0; } -function assertCacheUsage(multiPlanning, activeCacheEntry, cachedIndex) { +function assertCacheUsage( + multiPlanning, cacheEntryIsActive, cachedIndex, pipeline, aggOptions = {}) { const profileObj = getLatestProfilerEntry(db, {op: "command", ns: coll.getFullName()}); const queryHash = profileObj.queryHash; + const planCacheKey = profileObj.planCacheKey; assert.eq(multiPlanning, !!profileObj.fromMultiPlanner); const entry = getPlansForCacheEntry({queryHash: queryHash}); - assert.eq(activeCacheEntry, entry.isActive); + // TODO(SERVER-61507): Convert the assertion to SBE cache once lowered $lookup integrates + // with SBE plan cache. + assert.eq(entry.version, 1); + assert.eq(cacheEntryIsActive, entry.isActive); + + // If the entry is active, we should have a plan cache key. + if (entry.isActive) { + assert(entry.planCacheKey); + } + if (planCacheKey) { + assert.eq(entry.planCacheKey, planCacheKey); + const explain = coll.explain().aggregate(pipeline, aggOptions); + const explainKey = explain.hasOwnProperty("queryPlanner") + ? explain.queryPlanner.planCacheKey + : explain.stages[0].$cursor.queryPlanner.planCacheKey; + assert.eq(explainKey, entry.planCacheKey); + } assert.eq(planHasIxScanStageForKey(getCachedPlan(entry.cachedPlan), cachedIndex), true, entry); } @@ -69,6 +79,20 @@ for (let i = 1000; i < 1100; i++) { assert.commandWorked(coll.createIndex({a: 1})); assert.commandWorked(coll.createIndex({b: 1})); +function setUpActiveCacheEntry(pipeline, cachedIndex) { + // For the first run, the query should go through multiplanning and create inactive cache entry. + assert.eq(2, coll.aggregate(pipeline).toArray()[0].n); + assertCacheUsage(true /*multiPlanning*/, false /*cacheEntryIsActive*/, cachedIndex, pipeline); + + // After the second run, the inactive cache entry should be promoted to an active entry. + assert.eq(2, coll.aggregate(pipeline).toArray()[0].n); + assertCacheUsage(true /*multiPlanning*/, true /*cacheEntryIsActive*/, cachedIndex, pipeline); + + // For the third run, the active cached query should be used. + assert.eq(2, coll.aggregate(pipeline).toArray()[0].n); + assertCacheUsage(false /*multiPlanning*/, true /*cacheEntryIsActive*/, cachedIndex, pipeline); +} + function testFn(aIndexPipeline, bIndexPipeline, setUpFn = undefined, @@ -83,26 +107,43 @@ function testFn(aIndexPipeline, explainFn(bIndexPipeline); } - // For the first run, the query should go through multiplanning and create inactive cache entry. - assert.eq(2, coll.aggregate(aIndexPipeline).toArray()[0].n); - assertCacheUsage(true /*multiPlanning*/, false /*activeCacheEntry*/, {a: 1} /*cachedIndex*/); - - // After the second run, the inactive cache entry should be promoted to an active entry. - assert.eq(2, coll.aggregate(aIndexPipeline).toArray()[0].n); - assertCacheUsage(true /*multiPlanning*/, true /*activeCacheEntry*/, {a: 1} /*cachedIndex*/); - - // For the third run, the active cached query should be used. - assert.eq(2, coll.aggregate(aIndexPipeline).toArray()[0].n); - assertCacheUsage(false /*multiPlanning*/, true /*activeCacheEntry*/, {a: 1} /*cachedIndex*/); + setUpActiveCacheEntry(aIndexPipeline, {a: 1} /* cachedIndex */); // Now run the other pipeline, which has the same query shape but is faster with a different // index. It should trigger re-planning of the query. assert.eq(3, coll.aggregate(bIndexPipeline).toArray()[0].n); - assertCacheUsage(true /*multiPlanning*/, true /*activeCacheEntry*/, {b: 1} /*cachedIndex*/); + + // TODO(SERVER-65345): When the SBE plan cache is enabled, we should be using the classic plan + // cache for queries with pushed down $lookup and $group stages because these stages aren't + // currently supported in the SBE plan cache. However, it appears that we are generating SBE + // plan cache keys in the presence of pushed down $lookup and $group stages. The generated + // plan cache keys are slightly different, and as a result, we will not be able to use the + // active cache entry above, even though the $match queries from both pipelines have the same + // shape. Once this bug is fixed, the block below should be deleted. + if (checkSBEEnabled(db, ["featureFlagSbePlanCache"])) { + // As stated above, we will not be able to reuse the plan cache entry that we've generated + // for 'aIndexPipeline'. As such, we have to run the query an extra time to set up an + // active cache entry for 'bIndexPipeline'. + assertCacheUsage(true /*multiPlanning*/, + false /*cacheEntryIsActive*/, + {b: 1} /*cachedIndex*/, + bIndexPipeline); + assert.eq(3, coll.aggregate(bIndexPipeline).toArray()[0].n); + } // The other pipeline again, The cache should be used now. + assertCacheUsage(true /*multiPlanning*/, + true /*cacheEntryIsActive*/, + {b: 1} /*cachedIndex*/, + bIndexPipeline); + + // Run it once again so that the cache entry is reused. assert.eq(3, coll.aggregate(bIndexPipeline).toArray()[0].n); - assertCacheUsage(false /*multiPlanning*/, true /*activeCacheEntry*/, {b: 1} /*cachedIndex*/); + assertCacheUsage(false /*multiPlanning*/, + true /*cacheEntryIsActive*/, + {b: 1} /*cachedIndex*/, + bIndexPipeline); + if (tearDownFn) { tearDownFn(); } @@ -141,11 +182,11 @@ function dropLookupForeignColl() { } const lookupPushdownEnabled = checkSBEEnabled(db, ["featureFlagSBELookupPushdown"]); -function verifyCorrectLookupAlgorithmUsed(targetJoinAlgorithm, pipeline, options = {}) { +function verifyCorrectLookupAlgorithmUsed(targetJoinAlgorithm, pipeline, aggOptions = {}) { if (!lookupPushdownEnabled) { return; } - const explain = coll.explain().aggregate(pipeline, options); + const explain = coll.explain().aggregate(pipeline, aggOptions); const eqLookupNodes = getAggPlanStages(explain, "EQ_LOOKUP"); // Verify via explain that $lookup was lowered and appropriate $lookup algorithm was chosen. @@ -170,9 +211,46 @@ testFn(aLookup, assert.commandWorked(db[foreignCollName].createIndex({foreignKey: 1})); }, dropLookupForeignColl, - (pipeline) => verifyCorrectLookupAlgorithmUsed("IndexedLoopJoin", pipeline)); - -// TODO SERVER-64443: Verify that replanning works when HashLookupStage is implemented. + (pipeline) => + verifyCorrectLookupAlgorithmUsed("IndexedLoopJoin", pipeline, {allowDiskUse: false})); + +// HJ. +testFn(aLookup, bLookup, () => { + createLookupForeignColl(); +}, dropLookupForeignColl, (pipeline) => verifyCorrectLookupAlgorithmUsed("HashJoin", pipeline, { + allowDiskUse: true + })); + +// Verify that a cached plan which initially uses an INLJ will use HJ once the index is dropped and +// the foreign collection is dropped, and NLJ when 'allowDiskUse' is set to 'false'. + +// For the first run, the query should go through multiplanning and create inactive cache entry. +createLookupForeignColl(); +assert.commandWorked(db[foreignCollName].createIndex({foreignKey: 1})); +verifyCorrectLookupAlgorithmUsed("IndexedLoopJoin", aLookup, {allowDiskUse: true}); +setUpActiveCacheEntry(aLookup, {a: 1} /* cachedIndex */); + +// Drop the index. This should result in using the active plan, but switching to HJ. +assert.commandWorked(db[foreignCollName].dropIndex({foreignKey: 1})); +verifyCorrectLookupAlgorithmUsed("HashJoin", aLookup, {allowDiskUse: true}); +assert.eq(2, coll.aggregate(aLookup).toArray()[0].n); +assertCacheUsage( + false /*multiPlanning*/, true /*cacheEntryIsActive*/, {a: 1} /*cachedIndex*/, aLookup); + +// Set 'allowDiskUse' to 'false'. This should still result in using the active plan, but switching +// to NLJ. +verifyCorrectLookupAlgorithmUsed("NestedLoopJoin", aLookup, {allowDiskUse: false}); +assert.eq(2, coll.aggregate(aLookup).toArray()[0].n); +assertCacheUsage( + false /*multiPlanning*/, true /*cacheEntryIsActive*/, {a: 1} /*cachedIndex*/, aLookup); + +// Drop the foreign collection. This should still result in using the active plan with a special +// empty collection plan. +dropLookupForeignColl(); +verifyCorrectLookupAlgorithmUsed("NonExistentForeignCollection", aLookup, {allowDiskUse: true}); +assert.eq(2, coll.aggregate(aLookup).toArray()[0].n); +assertCacheUsage( + false /*multiPlanning*/, true /*cacheEntryIsActive*/, {a: 1} /*cachedIndex*/, aLookup); // Verify that changing the plan for the right side does not trigger a replan. const foreignColl = db[foreignCollName]; @@ -204,13 +282,21 @@ function runQuery(options = {}) { } // Verify that we are using IndexedLoopJoin. -verifyCorrectLookupAlgorithmUsed("IndexedLoopJoin", avoidReplanPipeline); +verifyCorrectLookupAlgorithmUsed("IndexedLoopJoin", avoidReplanPipeline, {allowDiskUse: false}); -runQuery(); -assertCacheUsage(true /*multiPlanning*/, false /*activeCacheEntry*/, {b: 1} /*cachedIndex*/); +runQuery({allowDiskUse: false}); +assertCacheUsage(true /*multiPlanning*/, + false /*activeCacheEntry*/, + {b: 1} /*cachedIndex*/, + avoidReplanPipeline, + {allowDiskUse: false}); -runQuery(); -assertCacheUsage(true /*multiPlanning*/, true /*activeCacheEntry*/, {b: 1} /*cachedIndex*/); +runQuery({allowDiskUse: false}); +assertCacheUsage(true /*multiPlanning*/, + true /*activeCacheEntry*/, + {b: 1} /*cachedIndex*/, + avoidReplanPipeline, + {allowDiskUse: false}); // After dropping the index on the right-hand side, we should NOT replan the cached query. We // will, however, choose a different join algorithm. @@ -220,18 +306,54 @@ assert.commandWorked(foreignColl.dropIndex({c: 1})); verifyCorrectLookupAlgorithmUsed("NestedLoopJoin", avoidReplanPipeline, {allowDiskUse: false}); runQuery({allowDiskUse: false}); -assertCacheUsage(false /*multiPlanning*/, true /*activeCacheEntry*/, {b: 1} /*cachedIndex*/); +assertCacheUsage(false /*multiPlanning*/, + true /*activeCacheEntry*/, + {b: 1} /*cachedIndex*/, + avoidReplanPipeline, + {allowDiskUse: false}); + runQuery({allowDiskUse: false}); -assertCacheUsage(false /*multiPlanning*/, true /*activeCacheEntry*/, {b: 1} /*cachedIndex*/); +assertCacheUsage(false /*multiPlanning*/, + true /*activeCacheEntry*/, + {b: 1} /*cachedIndex*/, + avoidReplanPipeline, + {allowDiskUse: false}); // Run with 'allowDiskUse: true'. This should now use HashJoin, and we should still avoid // replanning the cached query. - verifyCorrectLookupAlgorithmUsed("HashJoin", avoidReplanPipeline, {allowDiskUse: true}); +// TODO(SERVER-65345): When the SBE plan cache is enabled, we will encode the 'allowDiskUse' +// option when constructing the plan cache key. As such, we will not be able to reuse the cache +// entry generated above to execute a HashJoin. +if (checkSBEEnabled(db, ["featureFlagSbePlanCache"])) { + runQuery({allowDiskUse: true}); + assertCacheUsage(true /*multiPlanning*/, + false /*activeCacheEntry*/, + {b: 1} /*cachedIndex*/, + avoidReplanPipeline, + {allowDiskUse: true}); + + runQuery({allowDiskUse: true}); + assertCacheUsage(true /*multiPlanning*/, + true /*activeCacheEntry*/, + {b: 1} /*cachedIndex*/, + avoidReplanPipeline, + {allowDiskUse: true}); +} + runQuery({allowDiskUse: true}); -assertCacheUsage(false /*multiPlanning*/, true /*activeCacheEntry*/, {b: 1} /*cachedIndex*/); +assertCacheUsage(false /*multiPlanning*/, + true /*activeCacheEntry*/, + {b: 1} /*cachedIndex*/, + avoidReplanPipeline, + {allowDiskUse: true}); runQuery({allowDiskUse: true}); -assertCacheUsage(false /*multiPlanning*/, true /*activeCacheEntry*/, {b: 1} /*cachedIndex*/); +assertCacheUsage(false /*multiPlanning*/, + true /*activeCacheEntry*/, + {b: 1} /*cachedIndex*/, + avoidReplanPipeline, + {allowDiskUse: true}); + MongoRunner.stopMongod(conn); }()); diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index 4d4ed564928..2a5fcad2cc8 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -588,7 +588,7 @@ public: } /** - * Returns a reference to the main collection that is targetted by this query. + * Returns a reference to the main collection that is targeted by this query. */ virtual const CollectionPtr& getMainCollection() const = 0; diff --git a/src/mongo/db/query/planner_analysis.cpp b/src/mongo/db/query/planner_analysis.cpp index eea3c702a61..654fa0f56ca 100644 --- a/src/mongo/db/query/planner_analysis.cpp +++ b/src/mongo/db/query/planner_analysis.cpp @@ -670,7 +670,9 @@ void QueryPlannerAnalysis::determineLookupStrategy( return boost::none; }(); - if (foreignIndex) { + if (!foreignCollItr->second.exists) { + eqLookupNode->lookupStrategy = EqLookupNode::LookupStrategy::kNonExistentForeignCollection; + } else if (foreignIndex) { eqLookupNode->lookupStrategy = EqLookupNode::LookupStrategy::kIndexedLoopJoin; eqLookupNode->idxEntry = foreignIndex; } else if (allowDiskUse && isEligibleForHashJoin(foreignCollItr->second)) { diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h index 6446fe42cc5..694f5d7c681 100644 --- a/src/mongo/db/query/query_solution.h +++ b/src/mongo/db/query/query_solution.h @@ -1462,6 +1462,9 @@ struct EqLookupNode : public QuerySolutionNode { // Execute the join by iterating over the foreign collection for each local key. kNestedLoopJoin, + + // Create a plan for a non existent foreign collection. + kNonExistentForeignCollection, }; static StringData serializeLookupStrategy(LookupStrategy strategy) { @@ -1472,6 +1475,8 @@ struct EqLookupNode : public QuerySolutionNode { return "IndexedLoopJoin"; case EqLookupNode::LookupStrategy::kNestedLoopJoin: return "NestedLoopJoin"; + case EqLookupNode::LookupStrategy::kNonExistentForeignCollection: + return "NonExistentForeignCollection"; default: uasserted(6357204, "Unknown $lookup strategy type"); } diff --git a/src/mongo/db/query/sbe_cached_solution_planner.cpp b/src/mongo/db/query/sbe_cached_solution_planner.cpp index b0ec16dac8c..ace203172f3 100644 --- a/src/mongo/db/query/sbe_cached_solution_planner.cpp +++ b/src/mongo/db/query/sbe_cached_solution_planner.cpp @@ -170,9 +170,9 @@ CandidatePlans CachedSolutionPlanner::replan(bool shouldCache, std::string reaso // The plan drawn from the cache is being discarded, and should no longer be registered with the // yield policy. _yieldPolicy->clearRegisteredPlans(); - const auto& mainColl = _collections.getMainCollection(); if (shouldCache) { + const auto& mainColl = _collections.getMainCollection(); // Deactivate the current cache entry. auto cache = CollectionQueryInfo::get(mainColl).getPlanCache(); cache->deactivate(plan_cache_key_factory::make<mongo::PlanCacheKey>(_cq, mainColl)); diff --git a/src/mongo/db/query/sbe_stage_builder_lookup.cpp b/src/mongo/db/query/sbe_stage_builder_lookup.cpp index 90b868b4b72..82dbee56d99 100644 --- a/src/mongo/db/query/sbe_stage_builder_lookup.cpp +++ b/src/mongo/db/query/sbe_stage_builder_lookup.cpp @@ -999,18 +999,18 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder -> std::pair<SlotId, std::unique_ptr<sbe::PlanStage>> { const auto& foreignColl = _collections.lookupCollection(NamespaceString(eqLookupNode->foreignCollection)); - // When foreign collection doesn't exist, we create stages that simply append empty arrays - // to each local document and do not consider the case that foreign collection may be - // created during the query, since we cannot easily create dynamic plan stages and it has - // messier semantics. Builds a project stage that projects an empty array for each local - // document. - if (!foreignColl) { - return buildNonExistentForeignCollLookupStage( - std::move(localStage), eqLookupNode->nodeId(), _slotIdGenerator); - } boost::optional<SlotId> collatorSlot = _state.data->env->getSlotIfExists("collator"_sd); switch (eqLookupNode->lookupStrategy) { + // When foreign collection doesn't exist, we create stages that simply append empty + // arrays to each local document and do not consider the case that foreign collection + // may be created during the query, since we cannot easily create dynamic plan stages + // and it has messier semantics. Builds a project stage that projects an empty array for + // each local document. + case EqLookupNode::LookupStrategy::kNonExistentForeignCollection: { + return buildNonExistentForeignCollLookupStage( + std::move(localStage), eqLookupNode->nodeId(), _slotIdGenerator); + } case EqLookupNode::LookupStrategy::kIndexedLoopJoin: { tassert( 6357201, |