summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenis Grebennicov <denis.grebennicov@mongodb.com>2022-07-15 13:59:06 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-07-15 14:43:46 +0000
commit84bbe8a09bad398a08a899405ad353c07c018993 (patch)
tree5d4caf5d5a2d00fce35796df5ef19e72bef52a32
parent8f895e99ade9d42f015595a60500bb694b9027ce (diff)
downloadmongo-84bbe8a09bad398a08a899405ad353c07c018993.tar.gz
SERVER-66935 Invalidate $lookup plan cache when foreign collection size changes
-rw-r--r--jstests/noPassthrough/plan_cache_replan_group_lookup.js91
-rw-r--r--src/mongo/db/exec/plan_cache_util.cpp2
-rw-r--r--src/mongo/db/query/get_executor.cpp18
-rw-r--r--src/mongo/db/query/plan_explainer_sbe.cpp2
-rw-r--r--src/mongo/db/query/planner_analysis.cpp22
-rw-r--r--src/mongo/db/query/planner_analysis.h6
-rw-r--r--src/mongo/db/query/query_planner.cpp2
-rw-r--r--src/mongo/db/query/query_planner_test_lib.cpp2
-rw-r--r--src/mongo/db/query/query_solution.h6
-rw-r--r--src/mongo/db/query/query_solution_test.cpp21
-rw-r--r--src/mongo/db/query/sbe_cached_solution_planner.cpp91
-rw-r--r--src/mongo/db/query/sbe_cached_solution_planner.h6
-rw-r--r--src/mongo/db/query/sbe_stage_builder.h5
-rw-r--r--src/mongo/db/query/sbe_stage_builder_lookup.cpp3
-rw-r--r--src/mongo/db/query/sbe_stage_builder_lookup_test.cpp3
15 files changed, 216 insertions, 64 deletions
diff --git a/jstests/noPassthrough/plan_cache_replan_group_lookup.js b/jstests/noPassthrough/plan_cache_replan_group_lookup.js
index 5da23cf502f..e5657c35535 100644
--- a/jstests/noPassthrough/plan_cache_replan_group_lookup.js
+++ b/jstests/noPassthrough/plan_cache_replan_group_lookup.js
@@ -461,6 +461,97 @@ assertCacheUsage(false /*multiPlanning*/,
"b_1" /*cachedIndexName*/,
avoidReplanLookupPipeline);
+/**
+ * Tests if replanning and cache invalidation are performed for hash-join plans, when foreign
+ * collection size increases.
+ *
+ * 'singleSolution' indicates whether the initial aggregation pipeline run will result in a single
+ * solution.
+ */
+function testReplanningAndCacheInvalidationOnForeignCollSizeIncrease(singleSolution) {
+ if (!sbeFullEnabled) {
+ return;
+ }
+
+ const coll = db.plan_cache_replan_group_lookup_coll_resize;
+ const foreignColl = db.plan_cache_replan_group_lookup_coll_resize_foreign;
+
+ const pipeline = [
+ {$match: {a: 2, b: 2}},
+ {$lookup: {from: foreignColl.getName(), localField: "a", foreignField: "a", as: "out"}}
+ ];
+
+ function runLookup() {
+ assert.eq([{_id: 2, a: 2, b: 2, out: [{_id: 1, a: 2, b: 1}]}],
+ coll.aggregate(pipeline).toArray());
+ }
+
+ // Asserts that the plan cache has only one entry and checks if it has a hash_lookup stage.
+ function assertPlanCacheEntry(shouldHaveHashLookup) {
+ const entries = coll.getPlanCache().list();
+ assert.eq(entries.length, 1, entries);
+ assert(entries[0].isActive, entries[0]);
+ const hasHashLookup = entries[0].cachedPlan.stages.includes("hash_lookup");
+ assert.eq(shouldHaveHashLookup, hasHashLookup, entries[0]);
+ }
+
+ // Set maximum number of documents in the foreign collection to 5.
+ const initialMaxNoOfDocuments =
+ assert
+ .commandWorked(db.adminCommand(
+ {getParameter: 1, internalQueryCollectionMaxNoOfDocumentsToChooseHashJoin: 1}))
+ .internalQueryCollectionMaxNoOfDocumentsToChooseHashJoin;
+ assert.commandWorked(db.adminCommand(
+ {setParameter: 1, internalQueryCollectionMaxNoOfDocumentsToChooseHashJoin: 5}));
+
+ coll.drop();
+ foreignColl.drop();
+
+ if (!singleSolution) {
+ assert.commandWorked(coll.createIndex({a: 1}));
+ assert.commandWorked(coll.createIndex({b: 1}));
+ }
+
+ assert.commandWorked(coll.insert({_id: 1, a: 1, b: 1}));
+ assert.commandWorked(coll.insert({_id: 2, a: 2, b: 2}));
+ assert.commandWorked(coll.insert({_id: 3, a: 2, b: 3}));
+ assert.commandWorked(coll.insert({_id: 4, a: 3, b: 4}));
+ assert.commandWorked(coll.insert({_id: 5, a: 5, b: 5}));
+ assert.commandWorked(coll.insert({_id: 6, a: 6, b: 6}));
+ assert.commandWorked(coll.insert({_id: 7, a: 6, b: 7}));
+ assert.commandWorked(foreignColl.insert({_id: 1, a: 2, b: 1}));
+ assert.commandWorked(foreignColl.insert({_id: 2, a: 3, b: 2}));
+
+ // Ensure that plan cache entry has a plan with hash-join in it.
+ runLookup();
+ runLookup();
+ assertPlanCacheEntry(true /* shouldHaveHashLookup */);
+ verifyCorrectLookupAlgorithmUsed("HashJoin", pipeline);
+
+ // Increase the size of the foreign collection
+ assert.commandWorked(foreignColl.insert({a: 3, b: 3}));
+ assert.commandWorked(foreignColl.insert({a: 5, b: 4}));
+ assert.commandWorked(foreignColl.insert({a: 6, b: 5}));
+ assert.commandWorked(foreignColl.insert({a: 6, b: 6}));
+ assert.commandWorked(foreignColl.insert({a: 7, b: 7}));
+
+ // Ensure that plan cache entry does not have a plan with hash-join in it.
+ runLookup();
+ runLookup();
+ assertPlanCacheEntry(false /* shouldHaveHashLookup */);
+
+ // Ensure that hash-join is no longer used in the plan.
+ verifyCorrectLookupAlgorithmUsed("NestedLoopJoin", pipeline);
+
+ // Reset the 'internalQueryCollectionMaxNoOfDocumentsToChooseHashJoin' knob.
+ assert.commandWorked(db.adminCommand({
+ setParameter: 1,
+ internalQueryCollectionMaxNoOfDocumentsToChooseHashJoin: initialMaxNoOfDocuments
+ }));
+}
+testReplanningAndCacheInvalidationOnForeignCollSizeIncrease(true /* singleSolution */);
+testReplanningAndCacheInvalidationOnForeignCollSizeIncrease(false /* singleSolution */);
+
// Disable $lookup pushdown. This should not invalidate the cache entry, but it should prevent
// $lookup from being pushed down.
assert.commandWorked(
diff --git a/src/mongo/db/exec/plan_cache_util.cpp b/src/mongo/db/exec/plan_cache_util.cpp
index 1bd6f9fe75d..34e819890f4 100644
--- a/src/mongo/db/exec/plan_cache_util.cpp
+++ b/src/mongo/db/exec/plan_cache_util.cpp
@@ -176,7 +176,7 @@ plan_cache_debug_info::DebugInfoSBE buildDebugInfo(const QuerySolution* solution
}
case STAGE_EQ_LOOKUP: {
auto eln = static_cast<const EqLookupNode*>(node);
- auto& secondaryStats = debugInfo.secondaryStats[eln->foreignCollection];
+ auto& secondaryStats = debugInfo.secondaryStats[eln->foreignCollection.toString()];
if (eln->lookupStrategy == EqLookupNode::LookupStrategy::kIndexedLoopJoin) {
tassert(6466200, "Index join lookup should have an index entry", eln->idxEntry);
secondaryStats.indexesUsed.push_back(eln->idxEntry->identifier.catalogName);
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index ae3c0d20c5f..c6a54e26102 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -1247,7 +1247,8 @@ std::unique_ptr<sbe::RuntimePlanner> makeRuntimePlannerIfNeeded(
boost::optional<size_t> decisionWorks,
bool needsSubplanning,
PlanYieldPolicySBE* yieldPolicy,
- size_t plannerOptions) {
+ size_t plannerOptions,
+ const stage_builder::PlanStageData& planStageData) {
// If we have multiple solutions, we always need to do the runtime planning.
if (numSolutions > 1) {
invariant(!needsSubplanning && !decisionWorks);
@@ -1279,15 +1280,15 @@ std::unique_ptr<sbe::RuntimePlanner> makeRuntimePlannerIfNeeded(
invariant(numSolutions == 1);
- // If we have a single solution but it was created from a cached plan, we will need to do the
- // runtime planning to check if the cached plan still performs efficiently, or requires
- // re-planning. The 'decisionWorks' is used to determine whether the existing cache entry should
- // be evicted, and the query re-planned.
- if (decisionWorks) {
+ // If we have a single solution and the plan is not pinned or plan contains a hash_lookup stage,
+ // we will need we will need to do the runtime planning to check if the cached plan still
+ // performs efficiently, or requires re-planning.
+ const bool hasHashLookup = !planStageData.foreignHashJoinCollections.empty();
+ if (decisionWorks || hasHashLookup) {
QueryPlannerParams plannerParams;
plannerParams.options = plannerOptions;
return std::make_unique<sbe::CachedSolutionPlanner>(
- opCtx, collections, *canonicalQuery, plannerParams, *decisionWorks, yieldPolicy);
+ opCtx, collections, *canonicalQuery, plannerParams, decisionWorks, yieldPolicy);
}
// Runtime planning is not required.
@@ -1344,7 +1345,8 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getSlotBasedExe
planningResult->decisionWorks(),
planningResult->needsSubplanning(),
yieldPolicy.get(),
- plannerParams.options)) {
+ plannerParams.options,
+ roots[0].second)) {
// Do the runtime planning and pick the best candidate plan.
auto candidates = planner->plan(std::move(solutions), std::move(roots));
diff --git a/src/mongo/db/query/plan_explainer_sbe.cpp b/src/mongo/db/query/plan_explainer_sbe.cpp
index 05e9e472fcb..fea5b81cea4 100644
--- a/src/mongo/db/query/plan_explainer_sbe.cpp
+++ b/src/mongo/db/query/plan_explainer_sbe.cpp
@@ -172,7 +172,7 @@ void statsToBSON(const QuerySolutionNode* node,
case STAGE_EQ_LOOKUP: {
auto eln = static_cast<const EqLookupNode*>(node);
- bob->append("foreignCollection", eln->foreignCollection);
+ bob->append("foreignCollection", eln->foreignCollection.toString());
bob->append("localField", eln->joinFieldLocal.fullPath());
bob->append("foreignField", eln->joinFieldForeign.fullPath());
bob->append("asField", eln->joinField.fullPath());
diff --git a/src/mongo/db/query/planner_analysis.cpp b/src/mongo/db/query/planner_analysis.cpp
index ce58c70ab64..d5e8f0acc28 100644
--- a/src/mongo/db/query/planner_analysis.cpp
+++ b/src/mongo/db/query/planner_analysis.cpp
@@ -644,18 +644,6 @@ void removeInclusionProjectionBelowGroupRecursive(QuerySolutionNode* solnRoot) {
}
}
-// Checks if the foreign collection is eligible for the hash join algorithm. We conservatively
-// choose the hash join algorithm for cases when the hash table is unlikely to spill data.
-bool isEligibleForHashJoin(const SecondaryCollectionInfo& foreignCollInfo) {
- return !internalQueryDisableLookupExecutionUsingHashJoin.load() && foreignCollInfo.exists &&
- foreignCollInfo.noOfRecords <=
- internalQueryCollectionMaxNoOfDocumentsToChooseHashJoin.load() &&
- foreignCollInfo.approximateDataSizeBytes <=
- internalQueryCollectionMaxDataSizeBytesToChooseHashJoin.load() &&
- foreignCollInfo.storageSizeBytes <=
- internalQueryCollectionMaxStorageSizeBytesToChooseHashJoin.load();
-}
-
// Determines whether 'index' is eligible for executing the right side of a pushed down $lookup over
// 'foreignField'.
bool isIndexEligibleForRightSideOfLookupPushdown(const IndexEntry& index,
@@ -667,6 +655,16 @@ bool isIndexEligibleForRightSideOfLookupPushdown(const IndexEntry& index,
}
} // namespace
+bool QueryPlannerAnalysis::isEligibleForHashJoin(const SecondaryCollectionInfo& foreignCollInfo) {
+ return !internalQueryDisableLookupExecutionUsingHashJoin.load() && foreignCollInfo.exists &&
+ foreignCollInfo.noOfRecords <=
+ internalQueryCollectionMaxNoOfDocumentsToChooseHashJoin.load() &&
+ foreignCollInfo.approximateDataSizeBytes <=
+ internalQueryCollectionMaxDataSizeBytesToChooseHashJoin.load() &&
+ foreignCollInfo.storageSizeBytes <=
+ internalQueryCollectionMaxStorageSizeBytesToChooseHashJoin.load();
+}
+
// static
std::unique_ptr<QuerySolution> QueryPlannerAnalysis::removeInclusionProjectionBelowGroup(
std::unique_ptr<QuerySolution> soln) {
diff --git a/src/mongo/db/query/planner_analysis.h b/src/mongo/db/query/planner_analysis.h
index 7ba6e8feeca..3233ef82d7b 100644
--- a/src/mongo/db/query/planner_analysis.h
+++ b/src/mongo/db/query/planner_analysis.h
@@ -147,6 +147,12 @@ public:
const std::map<NamespaceString, SecondaryCollectionInfo>& collectionsInfo,
bool allowDiskUse,
const CollatorInterface* collator);
+
+ /**
+ * Checks if the foreign collection is eligible for the hash join algorithm. We conservatively
+ * choose the hash join algorithm for cases when the hash table is unlikely to spill to disk.
+ */
+ static bool isEligibleForHashJoin(const SecondaryCollectionInfo& foreignCollInfo);
};
} // namespace mongo
diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp
index 5ec4c27f84a..b7df376979a 100644
--- a/src/mongo/db/query/query_planner.cpp
+++ b/src/mongo/db/query/query_planner.cpp
@@ -1412,7 +1412,7 @@ std::unique_ptr<QuerySolution> QueryPlanner::extendWithAggPipeline(
query.getCollator());
auto eqLookupNode =
std::make_unique<EqLookupNode>(std::move(solnForAgg),
- lookupStage->getFromNs().toString(),
+ lookupStage->getFromNs(),
lookupStage->getLocalField()->fullPath(),
lookupStage->getForeignField()->fullPath(),
lookupStage->getAsField().fullPath(),
diff --git a/src/mongo/db/query/query_planner_test_lib.cpp b/src/mongo/db/query/query_planner_test_lib.cpp
index e3e045a721c..ce1754ba817 100644
--- a/src/mongo/db/query/query_planner_test_lib.cpp
+++ b/src/mongo/db/query/query_planner_test_lib.cpp
@@ -1395,7 +1395,7 @@ Status QueryPlannerTestLib::solutionMatches(const BSONObj& testSoln,
<< testSoln.toString()};
}
- if (expectedForeignCollection.str() != actualEqLookupNode->foreignCollection) {
+ if (expectedForeignCollection.str() != actualEqLookupNode->foreignCollection.toString()) {
return {
ErrorCodes::Error{6267502},
str::stream() << "Test solution 'foreignCollection' does not match actual; test "
diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h
index 0965d1cd507..5f379bc5b05 100644
--- a/src/mongo/db/query/query_solution.h
+++ b/src/mongo/db/query/query_solution.h
@@ -1479,7 +1479,7 @@ struct EqLookupNode : public QuerySolutionNode {
}
EqLookupNode(std::unique_ptr<QuerySolutionNode> child,
- const std::string& foreignCollection,
+ const NamespaceString& foreignCollection,
const FieldPath& joinFieldLocal,
const FieldPath& joinFieldForeign,
const FieldPath& joinField,
@@ -1528,9 +1528,9 @@ struct EqLookupNode : public QuerySolutionNode {
std::unique_ptr<QuerySolutionNode> clone() const final;
/**
- * The foreign (inner) collection namespace name.
+ * The foreign (inner) collection namespace string.
*/
- std::string foreignCollection;
+ NamespaceString foreignCollection;
/**
* The local (outer) join field.
diff --git a/src/mongo/db/query/query_solution_test.cpp b/src/mongo/db/query/query_solution_test.cpp
index 55003d3f8c1..522c2e108b9 100644
--- a/src/mongo/db/query/query_solution_test.cpp
+++ b/src/mongo/db/query/query_solution_test.cpp
@@ -1134,7 +1134,7 @@ TEST(QuerySolutionTest, EqLookupNodeWithIndexScan) {
scanNode->bounds.endKey = BSON("a" << 1 << "b" << 1);
EqLookupNode node(std::move(scanNode),
- "col",
+ NamespaceString("db.col"),
"local",
"foreign",
"as",
@@ -1169,7 +1169,7 @@ TEST(QuerySolutionTest, EqLookupNodeWithIndexScanFieldOverwrite) {
<< "1");
EqLookupNode node(std::move(scanNode),
- "col",
+ NamespaceString("db.col"),
"local",
"foreign",
"b",
@@ -1242,7 +1242,7 @@ TEST(QuerySolutionTest, FieldAvailabilityOutputStreamOperator) {
TEST(QuerySolutionTest, GetSecondaryNamespaceVectorOverSingleEqLookupNode) {
auto scanNode = std::make_unique<IndexScanNode>(buildSimpleIndexEntry(BSON("a" << 1)));
const NamespaceString mainNss("db.main");
- const auto foreignColl = "db.col";
+ const NamespaceString foreignColl("db.col");
auto root = std::make_unique<EqLookupNode>(std::move(scanNode),
foreignColl,
"local",
@@ -1257,7 +1257,7 @@ TEST(QuerySolutionTest, GetSecondaryNamespaceVectorOverSingleEqLookupNode) {
qs.setRoot(std::move(root));
// The output vector should only contain 'foreignColl'.
- std::vector<NamespaceStringOrUUID> expectedNssVector{NamespaceString(foreignColl)};
+ std::vector<NamespaceStringOrUUID> expectedNssVector{foreignColl};
assertNamespaceVectorsAreEqual(qs.getAllSecondaryNamespaces(mainNss), expectedNssVector);
}
@@ -1265,7 +1265,7 @@ TEST(QuerySolutionTest, GetSecondaryNamespaceVectorDeduplicatesMainNss) {
auto scanNode = std::make_unique<IndexScanNode>(buildSimpleIndexEntry(BSON("a" << 1)));
const NamespaceString mainNss("db.main");
auto root = std::make_unique<EqLookupNode>(std::move(scanNode),
- mainNss.toString(),
+ mainNss,
"local",
"remote",
"b",
@@ -1286,8 +1286,8 @@ TEST(QuerySolutionTest, GetSecondaryNamespaceVectorDeduplicatesMainNss) {
TEST(QuerySolutionTest, GetSecondaryNamespaceVectorOverNestedEqLookupNodes) {
auto scanNode = std::make_unique<IndexScanNode>(buildSimpleIndexEntry(BSON("a" << 1)));
const NamespaceString mainNss("db.main");
- const auto foreignCollOne = "db.col";
- const auto foreignCollTwo = "db.foo";
+ const NamespaceString foreignCollOne("db.col");
+ const NamespaceString foreignCollTwo("db.foo");
auto childEqLookupNode =
std::make_unique<EqLookupNode>(std::move(scanNode),
foreignCollOne,
@@ -1314,15 +1314,14 @@ TEST(QuerySolutionTest, GetSecondaryNamespaceVectorOverNestedEqLookupNodes) {
// The foreign collections are unique, so our output vector should contain both of them. Note
// that because 'getAllSecondaryNamespaces' uses a set internally, these namespaces are
// expected to be in sorted order in the output vector.
- std::vector<NamespaceStringOrUUID> expectedNssVector{NamespaceString(foreignCollOne),
- NamespaceString(foreignCollTwo)};
+ std::vector<NamespaceStringOrUUID> expectedNssVector{foreignCollOne, foreignCollTwo};
assertNamespaceVectorsAreEqual(qs.getAllSecondaryNamespaces(mainNss), expectedNssVector);
}
TEST(QuerySolutionTest, GetSecondaryNamespaceVectorDeduplicatesNestedEqLookupNodes) {
auto scanNode = std::make_unique<IndexScanNode>(buildSimpleIndexEntry(BSON("a" << 1)));
const NamespaceString mainNss("db.main");
- const auto foreignColl = "db.col";
+ const NamespaceString foreignColl("db.col");
auto childEqLookupNode =
std::make_unique<EqLookupNode>(std::move(scanNode),
foreignColl,
@@ -1348,7 +1347,7 @@ TEST(QuerySolutionTest, GetSecondaryNamespaceVectorDeduplicatesNestedEqLookupNod
// Both nodes reference the same foreign collection. Therefore, our output vector should contain
// a single copy of that namespace.
- std::vector<NamespaceStringOrUUID> expectedNssVector{NamespaceString(foreignColl)};
+ std::vector<NamespaceStringOrUUID> expectedNssVector{foreignColl};
assertNamespaceVectorsAreEqual(qs.getAllSecondaryNamespaces(mainNss), expectedNssVector);
}
} // namespace
diff --git a/src/mongo/db/query/sbe_cached_solution_planner.cpp b/src/mongo/db/query/sbe_cached_solution_planner.cpp
index 416610f401f..72f53591409 100644
--- a/src/mongo/db/query/sbe_cached_solution_planner.cpp
+++ b/src/mongo/db/query/sbe_cached_solution_planner.cpp
@@ -37,6 +37,7 @@
#include "mongo/db/query/explain.h"
#include "mongo/db/query/get_executor.h"
#include "mongo/db/query/plan_cache_key_factory.h"
+#include "mongo/db/query/planner_analysis.h"
#include "mongo/db/query/query_planner.h"
#include "mongo/db/query/sbe_multi_planner.h"
#include "mongo/db/query/stage_builder_util.h"
@@ -50,29 +51,70 @@ namespace mongo::sbe {
CandidatePlans CachedSolutionPlanner::plan(
std::vector<std::unique_ptr<QuerySolution>> solutions,
std::vector<std::pair<std::unique_ptr<PlanStage>, stage_builder::PlanStageData>> roots) {
-
- // If the cached plan is accepted we'd like to keep the results from the trials even if there
- // are parts of agg pipelines being lowered into SBE, so we run the trial with the extended
- // plan. This works because TrialRunTracker, attached to HashAgg stage in $group queries, tracks
- // as "results" the results of its child stage. For $lookup queries, the TrialRunTracker will
- // only track the number of reads from the local side. Thus, we can use the number of reads the
- // plan was cached with during multiplanning even though multiplanning ran trials of
- // pre-extended plans.
- //
- // When "featureFlagSbeFull" is enabled we use the SBE plan cache. The SBE plan cache stores the
- // entire plan, including the part for any agg pipeline pushed down to SBE. Therefore, this
- // logic is only necessary when "featureFlagSbeFull" is disabled.
- if (!_cq.pipeline().empty() && !feature_flags::gFeatureFlagSbeFull.isEnabledAndIgnoreFCV()) {
- _yieldPolicy->clearRegisteredPlans();
+ if (!_cq.pipeline().empty()) {
+ // When "featureFlagSbeFull" is enabled we use the SBE plan cache. If the plan cache is
+ // enabled we'd like to check if there is any foreign collection in the hash_lookup stage
+ // that is no longer eligible for it. In this case we invalidate the cache and immediately
+ // replan without ever running a trial period.
auto secondaryCollectionsInfo =
fillOutSecondaryCollectionsInformation(_opCtx, _collections, &_cq);
- solutions[0] = QueryPlanner::extendWithAggPipeline(
- _cq, std::move(solutions[0]), secondaryCollectionsInfo);
- roots[0] = stage_builder::buildSlotBasedExecutableTree(
- _opCtx, _collections, _cq, *solutions[0], _yieldPolicy);
+
+ if (feature_flags::gFeatureFlagSbeFull.isEnabledAndIgnoreFCV()) {
+ for (const auto foreignCollection : roots[0].second.foreignHashJoinCollections) {
+ const auto collectionInfo = secondaryCollectionsInfo.find(foreignCollection);
+ tassert(6693500,
+ "Foreign collection must be present in the collections info",
+ collectionInfo != secondaryCollectionsInfo.end());
+ tassert(6693501, "Foreign collection must exist", collectionInfo->second.exists);
+
+ if (!QueryPlannerAnalysis::isEligibleForHashJoin(collectionInfo->second)) {
+ return replan(/* shouldCache */ true,
+ str::stream() << "Foreign collection " << foreignCollection
+ << " is not eligible for hash join anymore");
+ }
+ }
+ } else {
+ // The SBE plan cache is not enabled. If the cached plan is accepted we'd like to keep
+ // the results from the trials even if there are parts of agg pipelines being lowered
+ // into SBE, so we run the trial with the extended plan. This works because
+ // TrialRunTracker, attached to HashAgg stage in $group queries, tracks as "results" the
+ // results of its child stage. For $lookup queries, the TrialRunTracker will only track
+ // the number of reads from the local side. Thus, we can use the number of reads the
+ // plan was cached with during multiplanning even though multiplanning ran trials of
+ // pre-extended plans.
+ //
+ // The SBE plan cache stores the entire plan, including the part for any agg pipeline
+ // pushed down to SBE. Therefore, this logic is only necessary when "featureFlagSbeFull"
+ // is disabled.
+ _yieldPolicy->clearRegisteredPlans();
+ solutions[0] = QueryPlanner::extendWithAggPipeline(
+ _cq, std::move(solutions[0]), secondaryCollectionsInfo);
+ roots[0] = stage_builder::buildSlotBasedExecutableTree(
+ _opCtx, _collections, _cq, *solutions[0], _yieldPolicy);
+ }
}
- const size_t maxReadsBeforeReplan = internalQueryCacheEvictionRatio * _decisionReads;
+ // If the '_decisionReads' is not present then we do not run a trial period, keeping the current
+ // plan.
+ if (!_decisionReads) {
+ const auto status = prepareExecutionPlan(
+ roots[0].first.get(), &roots[0].second, true /* preparingFromCache */);
+ uassertStatusOK(status);
+ bool exitedEarly;
+
+ // Discarding SlotAccessor pointers as they will be reacquired later.
+ std::tie(std::ignore, std::ignore, exitedEarly) = status.getValue();
+ tassert(
+ 6693502, "TrialRunTracker is not attached therefore can not exit early", !exitedEarly);
+ return {makeVector(plan_ranker::CandidatePlan{std::move(solutions[0]),
+ std::move(roots[0].first),
+ std::move(roots[0].second),
+ false /* exitedEarly*/,
+ Status::OK()}),
+ 0};
+ }
+
+ const size_t maxReadsBeforeReplan = internalQueryCacheEvictionRatio * _decisionReads.get();
// In cached solution planning we collect execution stats with an upper bound on reads allowed
// per trial run computed based on previous decision reads. If the trial run ends before
@@ -122,14 +164,14 @@ CandidatePlans CachedSolutionPlanner::plan(
"Evicting cache entry for a query and replanning it since the number of required reads "
"mismatch the number of cached reads",
"maxReadsBeforeReplan"_attr = maxReadsBeforeReplan,
- "decisionReads"_attr = _decisionReads,
+ "decisionReads"_attr = *_decisionReads,
"query"_attr = redact(_cq.toStringShort()),
"planSummary"_attr = explainer->getPlanSummary());
return replan(
true,
str::stream()
<< "cached plan was less efficient than expected: expected trial execution to take "
- << _decisionReads << " reads but it took at least " << numReads << " reads");
+ << *_decisionReads << " reads but it took at least " << numReads << " reads");
}
plan_ranker::CandidatePlan CachedSolutionPlanner::collectExecutionStatsForCachedPlan(
@@ -209,6 +251,13 @@ CandidatePlans CachedSolutionPlanner::replan(bool shouldCache, std::string reaso
auto solutions = uassertStatusOK(std::move(statusWithMultiPlanSolns));
if (solutions.size() == 1) {
+ if (!_cq.pipeline().empty()) {
+ auto secondaryCollectionsInfo =
+ fillOutSecondaryCollectionsInformation(_opCtx, _collections, &_cq);
+ solutions[0] = QueryPlanner::extendWithAggPipeline(
+ _cq, std::move(solutions[0]), secondaryCollectionsInfo);
+ }
+
// Only one possible plan. Build the stages from the solution.
auto [root, data] = buildExecutableTree(*solutions[0]);
auto status = prepareExecutionPlan(root.get(), &data);
diff --git a/src/mongo/db/query/sbe_cached_solution_planner.h b/src/mongo/db/query/sbe_cached_solution_planner.h
index 0fa3d1cbb14..5070c4f25c8 100644
--- a/src/mongo/db/query/sbe_cached_solution_planner.h
+++ b/src/mongo/db/query/sbe_cached_solution_planner.h
@@ -48,7 +48,7 @@ public:
const MultipleCollectionAccessor& collections,
const CanonicalQuery& cq,
const QueryPlannerParams& queryParams,
- size_t decisionReads,
+ boost::optional<size_t> decisionReads,
PlanYieldPolicySBE* yieldPolicy)
: BaseRuntimePlanner{opCtx, collections, cq, queryParams, yieldPolicy},
_decisionReads{decisionReads} {}
@@ -96,7 +96,7 @@ private:
CandidatePlans replan(bool shouldCache, std::string reason) const;
// The number of physical reads taken to decide on a winning plan when the plan was first
- // cached.
- const size_t _decisionReads;
+ // cached. boost::none in case planing will not be based on the trial run logic.
+ const boost::optional<size_t> _decisionReads;
};
} // namespace mongo::sbe
diff --git a/src/mongo/db/query/sbe_stage_builder.h b/src/mongo/db/query/sbe_stage_builder.h
index 7abd0e2fa46..ab928f14329 100644
--- a/src/mongo/db/query/sbe_stage_builder.h
+++ b/src/mongo/db/query/sbe_stage_builder.h
@@ -374,6 +374,10 @@ struct PlanStageData {
// every index used by the plan.
std::vector<IndexBoundsEvaluationInfo> indexBoundsEvaluationInfos;
+ // Stores all namespaces involved in the build side of a hash join plan. Needed to check if the
+ // plan should be evicted as the size of the foreign namespace changes.
+ stdx::unordered_set<NamespaceString> foreignHashJoinCollections;
+
private:
// This copy function copies data from 'other' but will not create a copy of its
// RuntimeEnvironment and CompileCtx.
@@ -397,6 +401,7 @@ private:
inputParamToSlotMap = other.inputParamToSlotMap;
variableIdToSlotMap = other.variableIdToSlotMap;
indexBoundsEvaluationInfos = other.indexBoundsEvaluationInfos;
+ foreignHashJoinCollections = other.foreignHashJoinCollections;
}
};
diff --git a/src/mongo/db/query/sbe_stage_builder_lookup.cpp b/src/mongo/db/query/sbe_stage_builder_lookup.cpp
index 4525bedd4f7..a83614792e9 100644
--- a/src/mongo/db/query/sbe_stage_builder_lookup.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_lookup.cpp
@@ -1053,6 +1053,9 @@ std::pair<SlotId, std::unique_ptr<sbe::PlanStage>> buildLookupResultObject(
std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::buildLookup(
const QuerySolutionNode* root, const PlanStageReqs& reqs) {
const auto eqLookupNode = static_cast<const EqLookupNode*>(root);
+ if (eqLookupNode->lookupStrategy == EqLookupNode::LookupStrategy::kHashJoin) {
+ _state.data->foreignHashJoinCollections.emplace(eqLookupNode->foreignCollection);
+ }
// $lookup creates its own output documents.
_shouldProduceRecordIdSlot = false;
diff --git a/src/mongo/db/query/sbe_stage_builder_lookup_test.cpp b/src/mongo/db/query/sbe_stage_builder_lookup_test.cpp
index 36e7fee3c09..2a2b8041ff0 100644
--- a/src/mongo/db/query/sbe_stage_builder_lookup_test.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_lookup_test.cpp
@@ -119,9 +119,8 @@ public:
localScanNode->name = _nss.toString();
// Construct logical query solution.
- auto foreignCollName = _foreignNss.toString();
auto lookupNode = std::make_unique<EqLookupNode>(std::move(localScanNode),
- foreignCollName,
+ _foreignNss,
localKey,
foreignKey,
asKey,