diff options
author | Denis Grebennicov <denis.grebennicov@mongodb.com> | 2022-07-15 13:59:06 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-07-15 14:43:46 +0000 |
commit | 84bbe8a09bad398a08a899405ad353c07c018993 (patch) | |
tree | 5d4caf5d5a2d00fce35796df5ef19e72bef52a32 /src | |
parent | 8f895e99ade9d42f015595a60500bb694b9027ce (diff) | |
download | mongo-84bbe8a09bad398a08a899405ad353c07c018993.tar.gz |
SERVER-66935 Invalidate $lookup plan cache when foreign collection size changes
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/exec/plan_cache_util.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/query/get_executor.cpp | 18 | ||||
-rw-r--r-- | src/mongo/db/query/plan_explainer_sbe.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/query/planner_analysis.cpp | 22 | ||||
-rw-r--r-- | src/mongo/db/query/planner_analysis.h | 6 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_test_lib.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/query/query_solution.h | 6 | ||||
-rw-r--r-- | src/mongo/db/query/query_solution_test.cpp | 21 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_cached_solution_planner.cpp | 91 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_cached_solution_planner.h | 6 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder.h | 5 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder_lookup.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder_lookup_test.cpp | 3 |
14 files changed, 125 insertions, 64 deletions
diff --git a/src/mongo/db/exec/plan_cache_util.cpp b/src/mongo/db/exec/plan_cache_util.cpp index 1bd6f9fe75d..34e819890f4 100644 --- a/src/mongo/db/exec/plan_cache_util.cpp +++ b/src/mongo/db/exec/plan_cache_util.cpp @@ -176,7 +176,7 @@ plan_cache_debug_info::DebugInfoSBE buildDebugInfo(const QuerySolution* solution } case STAGE_EQ_LOOKUP: { auto eln = static_cast<const EqLookupNode*>(node); - auto& secondaryStats = debugInfo.secondaryStats[eln->foreignCollection]; + auto& secondaryStats = debugInfo.secondaryStats[eln->foreignCollection.toString()]; if (eln->lookupStrategy == EqLookupNode::LookupStrategy::kIndexedLoopJoin) { tassert(6466200, "Index join lookup should have an index entry", eln->idxEntry); secondaryStats.indexesUsed.push_back(eln->idxEntry->identifier.catalogName); diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index ae3c0d20c5f..c6a54e26102 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -1247,7 +1247,8 @@ std::unique_ptr<sbe::RuntimePlanner> makeRuntimePlannerIfNeeded( boost::optional<size_t> decisionWorks, bool needsSubplanning, PlanYieldPolicySBE* yieldPolicy, - size_t plannerOptions) { + size_t plannerOptions, + const stage_builder::PlanStageData& planStageData) { // If we have multiple solutions, we always need to do the runtime planning. if (numSolutions > 1) { invariant(!needsSubplanning && !decisionWorks); @@ -1279,15 +1280,15 @@ std::unique_ptr<sbe::RuntimePlanner> makeRuntimePlannerIfNeeded( invariant(numSolutions == 1); - // If we have a single solution but it was created from a cached plan, we will need to do the - // runtime planning to check if the cached plan still performs efficiently, or requires - // re-planning. The 'decisionWorks' is used to determine whether the existing cache entry should - // be evicted, and the query re-planned. - if (decisionWorks) { + // If we have a single solution and the plan is not pinned or plan contains a hash_lookup stage, + // we will need we will need to do the runtime planning to check if the cached plan still + // performs efficiently, or requires re-planning. + const bool hasHashLookup = !planStageData.foreignHashJoinCollections.empty(); + if (decisionWorks || hasHashLookup) { QueryPlannerParams plannerParams; plannerParams.options = plannerOptions; return std::make_unique<sbe::CachedSolutionPlanner>( - opCtx, collections, *canonicalQuery, plannerParams, *decisionWorks, yieldPolicy); + opCtx, collections, *canonicalQuery, plannerParams, decisionWorks, yieldPolicy); } // Runtime planning is not required. @@ -1344,7 +1345,8 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getSlotBasedExe planningResult->decisionWorks(), planningResult->needsSubplanning(), yieldPolicy.get(), - plannerParams.options)) { + plannerParams.options, + roots[0].second)) { // Do the runtime planning and pick the best candidate plan. auto candidates = planner->plan(std::move(solutions), std::move(roots)); diff --git a/src/mongo/db/query/plan_explainer_sbe.cpp b/src/mongo/db/query/plan_explainer_sbe.cpp index 05e9e472fcb..fea5b81cea4 100644 --- a/src/mongo/db/query/plan_explainer_sbe.cpp +++ b/src/mongo/db/query/plan_explainer_sbe.cpp @@ -172,7 +172,7 @@ void statsToBSON(const QuerySolutionNode* node, case STAGE_EQ_LOOKUP: { auto eln = static_cast<const EqLookupNode*>(node); - bob->append("foreignCollection", eln->foreignCollection); + bob->append("foreignCollection", eln->foreignCollection.toString()); bob->append("localField", eln->joinFieldLocal.fullPath()); bob->append("foreignField", eln->joinFieldForeign.fullPath()); bob->append("asField", eln->joinField.fullPath()); diff --git a/src/mongo/db/query/planner_analysis.cpp b/src/mongo/db/query/planner_analysis.cpp index ce58c70ab64..d5e8f0acc28 100644 --- a/src/mongo/db/query/planner_analysis.cpp +++ b/src/mongo/db/query/planner_analysis.cpp @@ -644,18 +644,6 @@ void removeInclusionProjectionBelowGroupRecursive(QuerySolutionNode* solnRoot) { } } -// Checks if the foreign collection is eligible for the hash join algorithm. We conservatively -// choose the hash join algorithm for cases when the hash table is unlikely to spill data. -bool isEligibleForHashJoin(const SecondaryCollectionInfo& foreignCollInfo) { - return !internalQueryDisableLookupExecutionUsingHashJoin.load() && foreignCollInfo.exists && - foreignCollInfo.noOfRecords <= - internalQueryCollectionMaxNoOfDocumentsToChooseHashJoin.load() && - foreignCollInfo.approximateDataSizeBytes <= - internalQueryCollectionMaxDataSizeBytesToChooseHashJoin.load() && - foreignCollInfo.storageSizeBytes <= - internalQueryCollectionMaxStorageSizeBytesToChooseHashJoin.load(); -} - // Determines whether 'index' is eligible for executing the right side of a pushed down $lookup over // 'foreignField'. bool isIndexEligibleForRightSideOfLookupPushdown(const IndexEntry& index, @@ -667,6 +655,16 @@ bool isIndexEligibleForRightSideOfLookupPushdown(const IndexEntry& index, } } // namespace +bool QueryPlannerAnalysis::isEligibleForHashJoin(const SecondaryCollectionInfo& foreignCollInfo) { + return !internalQueryDisableLookupExecutionUsingHashJoin.load() && foreignCollInfo.exists && + foreignCollInfo.noOfRecords <= + internalQueryCollectionMaxNoOfDocumentsToChooseHashJoin.load() && + foreignCollInfo.approximateDataSizeBytes <= + internalQueryCollectionMaxDataSizeBytesToChooseHashJoin.load() && + foreignCollInfo.storageSizeBytes <= + internalQueryCollectionMaxStorageSizeBytesToChooseHashJoin.load(); +} + // static std::unique_ptr<QuerySolution> QueryPlannerAnalysis::removeInclusionProjectionBelowGroup( std::unique_ptr<QuerySolution> soln) { diff --git a/src/mongo/db/query/planner_analysis.h b/src/mongo/db/query/planner_analysis.h index 7ba6e8feeca..3233ef82d7b 100644 --- a/src/mongo/db/query/planner_analysis.h +++ b/src/mongo/db/query/planner_analysis.h @@ -147,6 +147,12 @@ public: const std::map<NamespaceString, SecondaryCollectionInfo>& collectionsInfo, bool allowDiskUse, const CollatorInterface* collator); + + /** + * Checks if the foreign collection is eligible for the hash join algorithm. We conservatively + * choose the hash join algorithm for cases when the hash table is unlikely to spill to disk. + */ + static bool isEligibleForHashJoin(const SecondaryCollectionInfo& foreignCollInfo); }; } // namespace mongo diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index 5ec4c27f84a..b7df376979a 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -1412,7 +1412,7 @@ std::unique_ptr<QuerySolution> QueryPlanner::extendWithAggPipeline( query.getCollator()); auto eqLookupNode = std::make_unique<EqLookupNode>(std::move(solnForAgg), - lookupStage->getFromNs().toString(), + lookupStage->getFromNs(), lookupStage->getLocalField()->fullPath(), lookupStage->getForeignField()->fullPath(), lookupStage->getAsField().fullPath(), diff --git a/src/mongo/db/query/query_planner_test_lib.cpp b/src/mongo/db/query/query_planner_test_lib.cpp index e3e045a721c..ce1754ba817 100644 --- a/src/mongo/db/query/query_planner_test_lib.cpp +++ b/src/mongo/db/query/query_planner_test_lib.cpp @@ -1395,7 +1395,7 @@ Status QueryPlannerTestLib::solutionMatches(const BSONObj& testSoln, << testSoln.toString()}; } - if (expectedForeignCollection.str() != actualEqLookupNode->foreignCollection) { + if (expectedForeignCollection.str() != actualEqLookupNode->foreignCollection.toString()) { return { ErrorCodes::Error{6267502}, str::stream() << "Test solution 'foreignCollection' does not match actual; test " diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h index 0965d1cd507..5f379bc5b05 100644 --- a/src/mongo/db/query/query_solution.h +++ b/src/mongo/db/query/query_solution.h @@ -1479,7 +1479,7 @@ struct EqLookupNode : public QuerySolutionNode { } EqLookupNode(std::unique_ptr<QuerySolutionNode> child, - const std::string& foreignCollection, + const NamespaceString& foreignCollection, const FieldPath& joinFieldLocal, const FieldPath& joinFieldForeign, const FieldPath& joinField, @@ -1528,9 +1528,9 @@ struct EqLookupNode : public QuerySolutionNode { std::unique_ptr<QuerySolutionNode> clone() const final; /** - * The foreign (inner) collection namespace name. + * The foreign (inner) collection namespace string. */ - std::string foreignCollection; + NamespaceString foreignCollection; /** * The local (outer) join field. diff --git a/src/mongo/db/query/query_solution_test.cpp b/src/mongo/db/query/query_solution_test.cpp index 55003d3f8c1..522c2e108b9 100644 --- a/src/mongo/db/query/query_solution_test.cpp +++ b/src/mongo/db/query/query_solution_test.cpp @@ -1134,7 +1134,7 @@ TEST(QuerySolutionTest, EqLookupNodeWithIndexScan) { scanNode->bounds.endKey = BSON("a" << 1 << "b" << 1); EqLookupNode node(std::move(scanNode), - "col", + NamespaceString("db.col"), "local", "foreign", "as", @@ -1169,7 +1169,7 @@ TEST(QuerySolutionTest, EqLookupNodeWithIndexScanFieldOverwrite) { << "1"); EqLookupNode node(std::move(scanNode), - "col", + NamespaceString("db.col"), "local", "foreign", "b", @@ -1242,7 +1242,7 @@ TEST(QuerySolutionTest, FieldAvailabilityOutputStreamOperator) { TEST(QuerySolutionTest, GetSecondaryNamespaceVectorOverSingleEqLookupNode) { auto scanNode = std::make_unique<IndexScanNode>(buildSimpleIndexEntry(BSON("a" << 1))); const NamespaceString mainNss("db.main"); - const auto foreignColl = "db.col"; + const NamespaceString foreignColl("db.col"); auto root = std::make_unique<EqLookupNode>(std::move(scanNode), foreignColl, "local", @@ -1257,7 +1257,7 @@ TEST(QuerySolutionTest, GetSecondaryNamespaceVectorOverSingleEqLookupNode) { qs.setRoot(std::move(root)); // The output vector should only contain 'foreignColl'. - std::vector<NamespaceStringOrUUID> expectedNssVector{NamespaceString(foreignColl)}; + std::vector<NamespaceStringOrUUID> expectedNssVector{foreignColl}; assertNamespaceVectorsAreEqual(qs.getAllSecondaryNamespaces(mainNss), expectedNssVector); } @@ -1265,7 +1265,7 @@ TEST(QuerySolutionTest, GetSecondaryNamespaceVectorDeduplicatesMainNss) { auto scanNode = std::make_unique<IndexScanNode>(buildSimpleIndexEntry(BSON("a" << 1))); const NamespaceString mainNss("db.main"); auto root = std::make_unique<EqLookupNode>(std::move(scanNode), - mainNss.toString(), + mainNss, "local", "remote", "b", @@ -1286,8 +1286,8 @@ TEST(QuerySolutionTest, GetSecondaryNamespaceVectorDeduplicatesMainNss) { TEST(QuerySolutionTest, GetSecondaryNamespaceVectorOverNestedEqLookupNodes) { auto scanNode = std::make_unique<IndexScanNode>(buildSimpleIndexEntry(BSON("a" << 1))); const NamespaceString mainNss("db.main"); - const auto foreignCollOne = "db.col"; - const auto foreignCollTwo = "db.foo"; + const NamespaceString foreignCollOne("db.col"); + const NamespaceString foreignCollTwo("db.foo"); auto childEqLookupNode = std::make_unique<EqLookupNode>(std::move(scanNode), foreignCollOne, @@ -1314,15 +1314,14 @@ TEST(QuerySolutionTest, GetSecondaryNamespaceVectorOverNestedEqLookupNodes) { // The foreign collections are unique, so our output vector should contain both of them. Note // that because 'getAllSecondaryNamespaces' uses a set internally, these namespaces are // expected to be in sorted order in the output vector. - std::vector<NamespaceStringOrUUID> expectedNssVector{NamespaceString(foreignCollOne), - NamespaceString(foreignCollTwo)}; + std::vector<NamespaceStringOrUUID> expectedNssVector{foreignCollOne, foreignCollTwo}; assertNamespaceVectorsAreEqual(qs.getAllSecondaryNamespaces(mainNss), expectedNssVector); } TEST(QuerySolutionTest, GetSecondaryNamespaceVectorDeduplicatesNestedEqLookupNodes) { auto scanNode = std::make_unique<IndexScanNode>(buildSimpleIndexEntry(BSON("a" << 1))); const NamespaceString mainNss("db.main"); - const auto foreignColl = "db.col"; + const NamespaceString foreignColl("db.col"); auto childEqLookupNode = std::make_unique<EqLookupNode>(std::move(scanNode), foreignColl, @@ -1348,7 +1347,7 @@ TEST(QuerySolutionTest, GetSecondaryNamespaceVectorDeduplicatesNestedEqLookupNod // Both nodes reference the same foreign collection. Therefore, our output vector should contain // a single copy of that namespace. - std::vector<NamespaceStringOrUUID> expectedNssVector{NamespaceString(foreignColl)}; + std::vector<NamespaceStringOrUUID> expectedNssVector{foreignColl}; assertNamespaceVectorsAreEqual(qs.getAllSecondaryNamespaces(mainNss), expectedNssVector); } } // namespace diff --git a/src/mongo/db/query/sbe_cached_solution_planner.cpp b/src/mongo/db/query/sbe_cached_solution_planner.cpp index 416610f401f..72f53591409 100644 --- a/src/mongo/db/query/sbe_cached_solution_planner.cpp +++ b/src/mongo/db/query/sbe_cached_solution_planner.cpp @@ -37,6 +37,7 @@ #include "mongo/db/query/explain.h" #include "mongo/db/query/get_executor.h" #include "mongo/db/query/plan_cache_key_factory.h" +#include "mongo/db/query/planner_analysis.h" #include "mongo/db/query/query_planner.h" #include "mongo/db/query/sbe_multi_planner.h" #include "mongo/db/query/stage_builder_util.h" @@ -50,29 +51,70 @@ namespace mongo::sbe { CandidatePlans CachedSolutionPlanner::plan( std::vector<std::unique_ptr<QuerySolution>> solutions, std::vector<std::pair<std::unique_ptr<PlanStage>, stage_builder::PlanStageData>> roots) { - - // If the cached plan is accepted we'd like to keep the results from the trials even if there - // are parts of agg pipelines being lowered into SBE, so we run the trial with the extended - // plan. This works because TrialRunTracker, attached to HashAgg stage in $group queries, tracks - // as "results" the results of its child stage. For $lookup queries, the TrialRunTracker will - // only track the number of reads from the local side. Thus, we can use the number of reads the - // plan was cached with during multiplanning even though multiplanning ran trials of - // pre-extended plans. - // - // When "featureFlagSbeFull" is enabled we use the SBE plan cache. The SBE plan cache stores the - // entire plan, including the part for any agg pipeline pushed down to SBE. Therefore, this - // logic is only necessary when "featureFlagSbeFull" is disabled. - if (!_cq.pipeline().empty() && !feature_flags::gFeatureFlagSbeFull.isEnabledAndIgnoreFCV()) { - _yieldPolicy->clearRegisteredPlans(); + if (!_cq.pipeline().empty()) { + // When "featureFlagSbeFull" is enabled we use the SBE plan cache. If the plan cache is + // enabled we'd like to check if there is any foreign collection in the hash_lookup stage + // that is no longer eligible for it. In this case we invalidate the cache and immediately + // replan without ever running a trial period. auto secondaryCollectionsInfo = fillOutSecondaryCollectionsInformation(_opCtx, _collections, &_cq); - solutions[0] = QueryPlanner::extendWithAggPipeline( - _cq, std::move(solutions[0]), secondaryCollectionsInfo); - roots[0] = stage_builder::buildSlotBasedExecutableTree( - _opCtx, _collections, _cq, *solutions[0], _yieldPolicy); + + if (feature_flags::gFeatureFlagSbeFull.isEnabledAndIgnoreFCV()) { + for (const auto foreignCollection : roots[0].second.foreignHashJoinCollections) { + const auto collectionInfo = secondaryCollectionsInfo.find(foreignCollection); + tassert(6693500, + "Foreign collection must be present in the collections info", + collectionInfo != secondaryCollectionsInfo.end()); + tassert(6693501, "Foreign collection must exist", collectionInfo->second.exists); + + if (!QueryPlannerAnalysis::isEligibleForHashJoin(collectionInfo->second)) { + return replan(/* shouldCache */ true, + str::stream() << "Foreign collection " << foreignCollection + << " is not eligible for hash join anymore"); + } + } + } else { + // The SBE plan cache is not enabled. If the cached plan is accepted we'd like to keep + // the results from the trials even if there are parts of agg pipelines being lowered + // into SBE, so we run the trial with the extended plan. This works because + // TrialRunTracker, attached to HashAgg stage in $group queries, tracks as "results" the + // results of its child stage. For $lookup queries, the TrialRunTracker will only track + // the number of reads from the local side. Thus, we can use the number of reads the + // plan was cached with during multiplanning even though multiplanning ran trials of + // pre-extended plans. + // + // The SBE plan cache stores the entire plan, including the part for any agg pipeline + // pushed down to SBE. Therefore, this logic is only necessary when "featureFlagSbeFull" + // is disabled. + _yieldPolicy->clearRegisteredPlans(); + solutions[0] = QueryPlanner::extendWithAggPipeline( + _cq, std::move(solutions[0]), secondaryCollectionsInfo); + roots[0] = stage_builder::buildSlotBasedExecutableTree( + _opCtx, _collections, _cq, *solutions[0], _yieldPolicy); + } } - const size_t maxReadsBeforeReplan = internalQueryCacheEvictionRatio * _decisionReads; + // If the '_decisionReads' is not present then we do not run a trial period, keeping the current + // plan. + if (!_decisionReads) { + const auto status = prepareExecutionPlan( + roots[0].first.get(), &roots[0].second, true /* preparingFromCache */); + uassertStatusOK(status); + bool exitedEarly; + + // Discarding SlotAccessor pointers as they will be reacquired later. + std::tie(std::ignore, std::ignore, exitedEarly) = status.getValue(); + tassert( + 6693502, "TrialRunTracker is not attached therefore can not exit early", !exitedEarly); + return {makeVector(plan_ranker::CandidatePlan{std::move(solutions[0]), + std::move(roots[0].first), + std::move(roots[0].second), + false /* exitedEarly*/, + Status::OK()}), + 0}; + } + + const size_t maxReadsBeforeReplan = internalQueryCacheEvictionRatio * _decisionReads.get(); // In cached solution planning we collect execution stats with an upper bound on reads allowed // per trial run computed based on previous decision reads. If the trial run ends before @@ -122,14 +164,14 @@ CandidatePlans CachedSolutionPlanner::plan( "Evicting cache entry for a query and replanning it since the number of required reads " "mismatch the number of cached reads", "maxReadsBeforeReplan"_attr = maxReadsBeforeReplan, - "decisionReads"_attr = _decisionReads, + "decisionReads"_attr = *_decisionReads, "query"_attr = redact(_cq.toStringShort()), "planSummary"_attr = explainer->getPlanSummary()); return replan( true, str::stream() << "cached plan was less efficient than expected: expected trial execution to take " - << _decisionReads << " reads but it took at least " << numReads << " reads"); + << *_decisionReads << " reads but it took at least " << numReads << " reads"); } plan_ranker::CandidatePlan CachedSolutionPlanner::collectExecutionStatsForCachedPlan( @@ -209,6 +251,13 @@ CandidatePlans CachedSolutionPlanner::replan(bool shouldCache, std::string reaso auto solutions = uassertStatusOK(std::move(statusWithMultiPlanSolns)); if (solutions.size() == 1) { + if (!_cq.pipeline().empty()) { + auto secondaryCollectionsInfo = + fillOutSecondaryCollectionsInformation(_opCtx, _collections, &_cq); + solutions[0] = QueryPlanner::extendWithAggPipeline( + _cq, std::move(solutions[0]), secondaryCollectionsInfo); + } + // Only one possible plan. Build the stages from the solution. auto [root, data] = buildExecutableTree(*solutions[0]); auto status = prepareExecutionPlan(root.get(), &data); diff --git a/src/mongo/db/query/sbe_cached_solution_planner.h b/src/mongo/db/query/sbe_cached_solution_planner.h index 0fa3d1cbb14..5070c4f25c8 100644 --- a/src/mongo/db/query/sbe_cached_solution_planner.h +++ b/src/mongo/db/query/sbe_cached_solution_planner.h @@ -48,7 +48,7 @@ public: const MultipleCollectionAccessor& collections, const CanonicalQuery& cq, const QueryPlannerParams& queryParams, - size_t decisionReads, + boost::optional<size_t> decisionReads, PlanYieldPolicySBE* yieldPolicy) : BaseRuntimePlanner{opCtx, collections, cq, queryParams, yieldPolicy}, _decisionReads{decisionReads} {} @@ -96,7 +96,7 @@ private: CandidatePlans replan(bool shouldCache, std::string reason) const; // The number of physical reads taken to decide on a winning plan when the plan was first - // cached. - const size_t _decisionReads; + // cached. boost::none in case planing will not be based on the trial run logic. + const boost::optional<size_t> _decisionReads; }; } // namespace mongo::sbe diff --git a/src/mongo/db/query/sbe_stage_builder.h b/src/mongo/db/query/sbe_stage_builder.h index 7abd0e2fa46..ab928f14329 100644 --- a/src/mongo/db/query/sbe_stage_builder.h +++ b/src/mongo/db/query/sbe_stage_builder.h @@ -374,6 +374,10 @@ struct PlanStageData { // every index used by the plan. std::vector<IndexBoundsEvaluationInfo> indexBoundsEvaluationInfos; + // Stores all namespaces involved in the build side of a hash join plan. Needed to check if the + // plan should be evicted as the size of the foreign namespace changes. + stdx::unordered_set<NamespaceString> foreignHashJoinCollections; + private: // This copy function copies data from 'other' but will not create a copy of its // RuntimeEnvironment and CompileCtx. @@ -397,6 +401,7 @@ private: inputParamToSlotMap = other.inputParamToSlotMap; variableIdToSlotMap = other.variableIdToSlotMap; indexBoundsEvaluationInfos = other.indexBoundsEvaluationInfos; + foreignHashJoinCollections = other.foreignHashJoinCollections; } }; diff --git a/src/mongo/db/query/sbe_stage_builder_lookup.cpp b/src/mongo/db/query/sbe_stage_builder_lookup.cpp index 4525bedd4f7..a83614792e9 100644 --- a/src/mongo/db/query/sbe_stage_builder_lookup.cpp +++ b/src/mongo/db/query/sbe_stage_builder_lookup.cpp @@ -1053,6 +1053,9 @@ std::pair<SlotId, std::unique_ptr<sbe::PlanStage>> buildLookupResultObject( std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::buildLookup( const QuerySolutionNode* root, const PlanStageReqs& reqs) { const auto eqLookupNode = static_cast<const EqLookupNode*>(root); + if (eqLookupNode->lookupStrategy == EqLookupNode::LookupStrategy::kHashJoin) { + _state.data->foreignHashJoinCollections.emplace(eqLookupNode->foreignCollection); + } // $lookup creates its own output documents. _shouldProduceRecordIdSlot = false; diff --git a/src/mongo/db/query/sbe_stage_builder_lookup_test.cpp b/src/mongo/db/query/sbe_stage_builder_lookup_test.cpp index 36e7fee3c09..2a2b8041ff0 100644 --- a/src/mongo/db/query/sbe_stage_builder_lookup_test.cpp +++ b/src/mongo/db/query/sbe_stage_builder_lookup_test.cpp @@ -119,9 +119,8 @@ public: localScanNode->name = _nss.toString(); // Construct logical query solution. - auto foreignCollName = _foreignNss.toString(); auto lookupNode = std::make_unique<EqLookupNode>(std::move(localScanNode), - foreignCollName, + _foreignNss, localKey, foreignKey, asKey, |