diff options
author | Anton Korshunov <anton.korshunov@mongodb.com> | 2021-03-02 13:03:45 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-03-13 07:52:14 +0000 |
commit | d362ea53d66d85750b0cb63b69168e1b3a4a330e (patch) | |
tree | ad4a34c4c31ba25b252d52ddf854fd3f23550bbc /src/mongo/db/query | |
parent | a90fee27ae85894ae6e4522251fe0ea35ef473c7 (diff) | |
download | mongo-d362ea53d66d85750b0cb63b69168e1b3a4a330e.tar.gz |
SERVER-54322 Text query plans are not shown properly in SBE explain
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r-- | src/mongo/db/query/classic_stage_builder.cpp | 58 | ||||
-rw-r--r-- | src/mongo/db/query/classic_stage_builder.h | 2 | ||||
-rw-r--r-- | src/mongo/db/query/explain.cpp | 1 | ||||
-rw-r--r-- | src/mongo/db/query/plan_executor_impl.cpp | 1 | ||||
-rw-r--r-- | src/mongo/db/query/plan_explainer_impl.cpp | 20 | ||||
-rw-r--r-- | src/mongo/db/query/plan_explainer_sbe.cpp | 12 | ||||
-rw-r--r-- | src/mongo/db/query/planner_access.cpp | 120 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_test_lib.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/query/query_solution.cpp | 100 | ||||
-rw-r--r-- | src/mongo/db/query/query_solution.h | 94 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder.cpp | 183 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder.h | 2 | ||||
-rw-r--r-- | src/mongo/db/query/stage_types.cpp | 1 | ||||
-rw-r--r-- | src/mongo/db/query/stage_types.h | 1 |
14 files changed, 363 insertions, 236 deletions
diff --git a/src/mongo/db/query/classic_stage_builder.cpp b/src/mongo/db/query/classic_stage_builder.cpp index 3d52f8b3a9f..c26e0777765 100644 --- a/src/mongo/db/query/classic_stage_builder.cpp +++ b/src/mongo/db/query/classic_stage_builder.cpp @@ -59,7 +59,8 @@ #include "mongo/db/exec/skip.h" #include "mongo/db/exec/sort.h" #include "mongo/db/exec/sort_key_generator.h" -#include "mongo/db/exec/text.h" +#include "mongo/db/exec/text_match.h" +#include "mongo/db/exec/text_or.h" #include "mongo/db/index/fts_access_method.h" #include "mongo/db/matcher/extensions_callback_real.h" #include "mongo/db/record_id_helpers.h" @@ -270,26 +271,47 @@ std::unique_ptr<PlanStage> ClassicStageBuilder::build(const QuerySolutionNode* r return std::make_unique<GeoNear2DSphereStage>( params, expCtx, _ws, _collection, s2Index); } - case STAGE_TEXT: { - const TextNode* node = static_cast<const TextNode*>(root); - invariant(_collection); - const IndexDescriptor* desc = _collection->getIndexCatalog()->findIndexByName( - _opCtx, node->index.identifier.catalogName); - invariant(desc); - const FTSAccessMethod* fam = static_cast<const FTSAccessMethod*>( - _collection->getIndexCatalog()->getEntry(desc)->accessMethod()); - invariant(fam); + case STAGE_TEXT_OR: { + tassert(5432204, + "text index key prefix must be defined before processing TEXT_OR node", + _ftsKeyPrefixSize); + + auto node = static_cast<const TextOrNode*>(root); + auto ret = std::make_unique<TextOrStage>( + expCtx, *_ftsKeyPrefixSize, _ws, node->filter.get(), _collection); + for (auto childNode : root->children) { + ret->addChild(build(childNode)); + } + return ret; + } + case STAGE_TEXT_MATCH: { + auto node = static_cast<const TextMatchNode*>(root); + tassert(5432200, "collection object is not provided", _collection); + auto catalog = _collection->getIndexCatalog(); + tassert(5432201, "index catalog is unavailable", catalog); + auto desc = catalog->findIndexByName(_opCtx, node->index.identifier.catalogName); + tassert(5432202, + str::stream() << "no index named '" << node->index.identifier.catalogName + << "' found in catalog", + catalog); + auto fam = static_cast<const FTSAccessMethod*>(catalog->getEntry(desc)->accessMethod()); + tassert(5432203, "access method for index is not defined", fam); - TextStageParams params(fam->getSpec()); - params.index = desc; - params.indexPrefix = node->indexPrefix; // We assume here that node->ftsQuery is an FTSQueryImpl, not an FTSQueryNoop. In // practice, this means that it is illegal to use the StageBuilder on a QuerySolution // created by planning a query that contains "no-op" expressions. - params.query = static_cast<FTSQueryImpl&>(*node->ftsQuery); - params.wantTextScore = _cq.metadataDeps()[DocumentMetadataFields::kTextScore]; - return std::make_unique<TextStage>( - expCtx, _collection, params, _ws, node->filter.get()); + TextMatchParams params{desc, + fam->getSpec(), + node->indexPrefix, + static_cast<const FTSQueryImpl&>(*node->ftsQuery)}; + + // Children of this node may need to know about the key prefix size, so we'll set it + // here before recursively descending into procession child nodes, and will reset once a + // text sub-tree is constructed. + _ftsKeyPrefixSize.emplace(params.spec.numExtraBefore()); + ON_BLOCK_EXIT([&] { _ftsKeyPrefixSize = {}; }); + + return std::make_unique<TextMatchStage>(expCtx, build(root->children[0]), params, _ws); } case STAGE_SHARDING_FILTER: { const ShardingFilterNode* fn = static_cast<const ShardingFilterNode*>(root); @@ -392,8 +414,6 @@ std::unique_ptr<PlanStage> ClassicStageBuilder::build(const QuerySolutionNode* r case STAGE_QUEUED_DATA: case STAGE_RECORD_STORE_FAST_COUNT: case STAGE_SUBPLAN: - case STAGE_TEXT_MATCH: - case STAGE_TEXT_OR: case STAGE_TRIAL: case STAGE_UNKNOWN: case STAGE_UPDATE: { diff --git a/src/mongo/db/query/classic_stage_builder.h b/src/mongo/db/query/classic_stage_builder.h index 1c63e6714b5..c99531cae95 100644 --- a/src/mongo/db/query/classic_stage_builder.h +++ b/src/mongo/db/query/classic_stage_builder.h @@ -49,5 +49,7 @@ public: private: WorkingSet* _ws; + + boost::optional<size_t> _ftsKeyPrefixSize; }; } // namespace mongo::stage_builder diff --git a/src/mongo/db/query/explain.cpp b/src/mongo/db/query/explain.cpp index b800af89597..de24431948a 100644 --- a/src/mongo/db/query/explain.cpp +++ b/src/mongo/db/query/explain.cpp @@ -41,7 +41,6 @@ #include "mongo/db/exec/multi_plan.h" #include "mongo/db/exec/near.h" #include "mongo/db/exec/sort.h" -#include "mongo/db/exec/text.h" #include "mongo/db/exec/working_set_common.h" #include "mongo/db/keypattern.h" #include "mongo/db/pipeline/plan_executor_pipeline.h" diff --git a/src/mongo/db/query/plan_executor_impl.cpp b/src/mongo/db/query/plan_executor_impl.cpp index 1e5516d09a9..3d89b63a4ce 100644 --- a/src/mongo/db/query/plan_executor_impl.cpp +++ b/src/mongo/db/query/plan_executor_impl.cpp @@ -50,7 +50,6 @@ #include "mongo/db/exec/plan_stats.h" #include "mongo/db/exec/sort.h" #include "mongo/db/exec/subplan.h" -#include "mongo/db/exec/text.h" #include "mongo/db/exec/trial_stage.h" #include "mongo/db/exec/working_set.h" #include "mongo/db/exec/working_set_common.h" diff --git a/src/mongo/db/query/plan_explainer_impl.cpp b/src/mongo/db/query/plan_explainer_impl.cpp index 1d3e156af13..f75c88d140b 100644 --- a/src/mongo/db/query/plan_explainer_impl.cpp +++ b/src/mongo/db/query/plan_explainer_impl.cpp @@ -45,7 +45,7 @@ #include "mongo/db/exec/plan_stats.h" #include "mongo/db/exec/sort.h" #include "mongo/db/exec/subplan.h" -#include "mongo/db/exec/text.h" +#include "mongo/db/exec/text_match.h" #include "mongo/db/exec/trial_stage.h" #include "mongo/db/keypattern.h" #include "mongo/db/query/explain.h" @@ -84,8 +84,8 @@ void addStageSummaryStr(const PlanStage* stage, StringBuilder& sb) { const IndexScanStats* spec = static_cast<const IndexScanStats*>(specific); const KeyPattern keyPattern{spec->keyPattern}; sb << " " << keyPattern; - } else if (STAGE_TEXT == stage->stageType()) { - const TextStats* spec = static_cast<const TextStats*>(specific); + } else if (STAGE_TEXT_MATCH == stage->stageType()) { + const TextMatchStats* spec = static_cast<const TextMatchStats*>(specific); const KeyPattern keyPattern{spec->indexPrefix}; sb << " " << keyPattern; } @@ -460,15 +460,13 @@ void statsToBSON(const PlanStageStats& stats, bob->appendNumber("dupsTested", static_cast<long long>(spec->dupsTested)); bob->appendNumber("dupsDropped", static_cast<long long>(spec->dupsDropped)); } - } else if (STAGE_TEXT == stats.stageType) { - TextStats* spec = static_cast<TextStats*>(stats.specific.get()); + } else if (STAGE_TEXT_MATCH == stats.stageType) { + TextMatchStats* spec = static_cast<TextMatchStats*>(stats.specific.get()); bob->append("indexPrefix", spec->indexPrefix); bob->append("indexName", spec->indexName); bob->append("parsedTextQuery", spec->parsedTextQuery); bob->append("textIndexVersion", spec->textIndexVersion); - } else if (STAGE_TEXT_MATCH == stats.stageType) { - TextMatchStats* spec = static_cast<TextMatchStats*>(stats.specific.get()); if (verbosity >= ExplainOptions::Verbosity::kExecStats) { bob->appendNumber("docsRejected", static_cast<long long>(spec->docsRejected)); @@ -685,10 +683,10 @@ void PlanExplainerImpl::getSummaryStats(PlanSummaryStats* statsOut) const { const DistinctScanStats* distinctScanStats = static_cast<const DistinctScanStats*>(distinctScan->getSpecificStats()); statsOut->indexesUsed.insert(distinctScanStats->indexName); - } else if (STAGE_TEXT == stages[i]->stageType()) { - const TextStage* textStage = static_cast<const TextStage*>(stages[i]); - const TextStats* textStats = - static_cast<const TextStats*>(textStage->getSpecificStats()); + } else if (STAGE_TEXT_MATCH == stages[i]->stageType()) { + const TextMatchStage* textStage = static_cast<const TextMatchStage*>(stages[i]); + const TextMatchStats* textStats = + static_cast<const TextMatchStats*>(textStage->getSpecificStats()); statsOut->indexesUsed.insert(textStats->indexName); } else if (STAGE_GEO_NEAR_2D == stages[i]->stageType() || STAGE_GEO_NEAR_2DSPHERE == stages[i]->stageType()) { diff --git a/src/mongo/db/query/plan_explainer_sbe.cpp b/src/mongo/db/query/plan_explainer_sbe.cpp index 08f3ddcad0b..bca674394c4 100644 --- a/src/mongo/db/query/plan_explainer_sbe.cpp +++ b/src/mongo/db/query/plan_explainer_sbe.cpp @@ -158,8 +158,8 @@ void statsToBSON(const QuerySolutionNode* node, bob->append("sortPattern", smn->sort); break; } - case STAGE_TEXT: { - auto tn = static_cast<const TextNode*>(node); + case STAGE_TEXT_MATCH: { + auto tn = static_cast<const TextMatchNode*>(node); bob->append("indexPrefix", tn->indexPrefix); bob->append("indexName", tn->index.identifier.catalogName); @@ -388,8 +388,8 @@ std::string PlanExplainerSBE::getPlanSummary() const { sb << " " << keyPattern; break; } - case STAGE_TEXT: { - auto tn = static_cast<const TextNode*>(node); + case STAGE_TEXT_MATCH: { + auto tn = static_cast<const TextMatchNode*>(node); const KeyPattern keyPattern{tn->indexPrefix}; sb << " " << keyPattern; break; @@ -460,8 +460,8 @@ void PlanExplainerSBE::getSummaryStats(PlanSummaryStats* statsOut) const { statsOut->indexesUsed.insert(ixn->index.identifier.catalogName); break; } - case STAGE_TEXT: { - auto tn = static_cast<const TextNode*>(node); + case STAGE_TEXT_MATCH: { + auto tn = static_cast<const TextMatchNode*>(node); statsOut->indexesUsed.insert(tn->index.identifier.catalogName); break; } diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp index 56893e7ab11..46762805cec 100644 --- a/src/mongo/db/query/planner_access.cpp +++ b/src/mongo/db/query/planner_access.cpp @@ -40,6 +40,9 @@ #include "mongo/base/owned_pointer_vector.h" #include "mongo/bson/simple_bsonobj_comparator.h" #include "mongo/db/bson/dotted_path_support.h" +#include "mongo/db/fts/fts_index_format.h" +#include "mongo/db/fts/fts_query_noop.h" +#include "mongo/db/fts/fts_spec.h" #include "mongo/db/matcher/expression_array.h" #include "mongo/db/matcher/expression_geo.h" #include "mongo/db/matcher/expression_text.h" @@ -65,7 +68,7 @@ namespace dps = ::mongo::dotted_path_support; * Text node functors. */ bool isTextNode(const QuerySolutionNode* node) { - return STAGE_TEXT == node->getType(); + return STAGE_TEXT_MATCH == node->getType(); } /** @@ -346,9 +349,10 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::makeLeafNode( // We must not keep the expression node around. *tightnessOut = IndexBoundsBuilder::EXACT; auto textExpr = static_cast<const TextMatchExpressionBase*>(expr); - auto ret = std::make_unique<TextNode>(index); - ret->ftsQuery = textExpr->getFTSQuery().clone(); - + auto ret = std::make_unique<TextMatchNode>( + index, + textExpr->getFTSQuery().clone(), + query.metadataDeps()[DocumentMetadataFields::kTextScore]); // Count the number of prefix fields before the "text" field. for (auto&& keyPatternElt : ret->index.keyPattern) { // We know that the only key pattern with a type of String is the _fts field @@ -415,7 +419,7 @@ bool QueryPlannerAccess::shouldMergeWithLeaf(const MatchExpression* expr, // by adding a filter to the special leaf type. // - if (STAGE_TEXT == type) { + if (STAGE_TEXT_MATCH == type) { // Currently only one text predicate is allowed, but to be safe, make sure that we // do not try to merge two text predicates. return MatchExpression::AND == mergeType && MatchExpression::TEXT != exprType; @@ -469,8 +473,8 @@ void QueryPlannerAccess::mergeWithLeafNode(MatchExpression* expr, ScanBuildingSt const StageType type = node->getType(); - if (STAGE_TEXT == type) { - auto textNode = static_cast<TextNode*>(node); + if (STAGE_TEXT_MATCH == type) { + auto textNode = static_cast<TextMatchNode*>(node); if (pos < textNode->numPrefixFields) { // This predicate is assigned to one of the prefix fields of the text index. Such @@ -568,12 +572,108 @@ void QueryPlannerAccess::mergeWithLeafNode(MatchExpression* expr, ScanBuildingSt } } +void buildTextSubPlan(TextMatchNode* tn) { + tassert(5432205, "text match node is null", tn); + tassert(5432206, "text match node already has children", tn->children.empty()); + tassert(5432207, "text search query is not provided", tn->ftsQuery.get()); + + auto query = dynamic_cast<const fts::FTSQueryImpl*>(tn->ftsQuery.get()); + // If we're unable to cast to FTSQueryImpl, then the given query must be an FTSQueryNoop, which + // is only used for testing the QueryPlanner and never tries to execute the query, so we don't + // need to construct an entire text sub-plan. Moreover, to compute index bounds we need a list + // of terms, which can only be obtain from FTSQueryImpl. + if (!query) { + return; + } + + // If the query requires the "textScore" field or involves multiple search terms, a TEXT_OR or + // OR stage is needed. Otherwise, we can use a single index scan directly. + const bool needOrStage = tn->wantTextScore || query->getTermsForBounds().size() > 1; + + tassert(5432208, + "failed to obtain text index version", + tn->index.infoObj.hasField("textIndexVersion")); + const auto textIndexVersion = + static_cast<fts::TextIndexVersion>(tn->index.infoObj["textIndexVersion"].numberInt()); + + // Get all the index scans for each term in our query. + std::vector<std::unique_ptr<QuerySolutionNode>> indexScanList; + indexScanList.reserve(query->getTermsForBounds().size()); + for (const auto& term : query->getTermsForBounds()) { + auto ixscan = std::make_unique<IndexScanNode>(tn->index); + ixscan->bounds.startKey = fts::FTSIndexFormat::getIndexKey( + fts::MAX_WEIGHT, term, tn->indexPrefix, textIndexVersion); + ixscan->bounds.endKey = + fts::FTSIndexFormat::getIndexKey(0, term, tn->indexPrefix, textIndexVersion); + ixscan->bounds.boundInclusion = BoundInclusion::kIncludeBothStartAndEndKeys; + ixscan->bounds.isSimpleRange = true; + ixscan->direction = -1; + ixscan->shouldDedup = tn->index.multikey; + + // If we will be adding a TEXT_OR or OR stage, then it is responsible for applying the + // filter. Otherwise, the index scan applies the filter. + if (!needOrStage && tn->filter) { + ixscan->filter = tn->filter->shallowClone(); + } + + indexScanList.push_back(std::move(ixscan)); + } + + // In case the query didn't have any search term, we can simply use an EOF sub-plan, as no + // results can be returned in this case anyway. + if (indexScanList.empty()) { + indexScanList.push_back(std::make_unique<EofNode>()); + } + + // Build the union of the index scans as a TEXT_OR or an OR stage, depending on whether the + // projection requires the "textScore" $meta field. + if (tn->wantTextScore) { + // We use a TEXT_OR stage to get the union of the results from the index scans and then + // compute their text scores. This is a blocking operation. + auto textScorer = std::make_unique<TextOrNode>(); + textScorer->filter = std::move(tn->filter); + for (auto&& ixscan : indexScanList) { + textScorer->children.push_back(ixscan.release()); + } + + tn->children.push_back(textScorer.release()); + } else { + // Because we don't need the text score, we can use a non-blocking OR stage to get the union + // of the index scans or use the index scan directly if there is only one. + auto textSearcher = [&]() -> std::unique_ptr<QuerySolutionNode> { + if (indexScanList.size() == 1) { + tassert(5397400, + "If there is only one index scan and we do not need textScore, needOrStage " + "should be false", + !needOrStage); + return std::move(indexScanList[0]); + } else { + auto orTextSearcher = std::make_unique<OrNode>(); + orTextSearcher->filter = std::move(tn->filter); + for (auto&& ixscan : indexScanList) { + orTextSearcher->children.push_back(ixscan.release()); + } + return std::move(orTextSearcher); + } + }(); + + // Unlike the TEXT_OR stage, the OR stage does not fetch the documents that it outputs. We + // add our own FETCH stage to satisfy the requirement of the TEXT_MATCH stage that its + // WorkingSetMember inputs have fetched data. + auto fetchNode = std::make_unique<FetchNode>(); + fetchNode->children.push_back(textSearcher.release()); + + tn->children.push_back(fetchNode.release()); + } +} + void QueryPlannerAccess::finishTextNode(QuerySolutionNode* node, const IndexEntry& index) { - TextNode* tn = static_cast<TextNode*>(node); + auto tn = static_cast<TextMatchNode*>(node); // If there's no prefix, the filter is already on the node and the index prefix is null. // We can just return. if (!tn->numPrefixFields) { + buildTextSubPlan(tn); return; } @@ -648,6 +748,8 @@ void QueryPlannerAccess::finishTextNode(QuerySolutionNode* node, const IndexEntr } tn->indexPrefix = prefixBob.obj(); + + buildTextSubPlan(tn); } bool QueryPlannerAccess::orNeedsFetch(const ScanBuildingState* scanState) { @@ -698,7 +800,7 @@ void QueryPlannerAccess::finishAndOutputLeaf(ScanBuildingState* scanState, void QueryPlannerAccess::finishLeafNode(QuerySolutionNode* node, const IndexEntry& index) { const StageType type = node->getType(); - if (STAGE_TEXT == type) { + if (STAGE_TEXT_MATCH == type) { return finishTextNode(node, index); } diff --git a/src/mongo/db/query/query_planner_test_lib.cpp b/src/mongo/db/query/query_planner_test_lib.cpp index 63f94f14293..9be90219927 100644 --- a/src/mongo/db/query/query_planner_test_lib.cpp +++ b/src/mongo/db/query/query_planner_test_lib.cpp @@ -387,9 +387,9 @@ bool QueryPlannerTestLib::solutionMatches(const BSONObj& testSoln, } return true; - } else if (STAGE_TEXT == trueSoln->getType()) { + } else if (STAGE_TEXT_MATCH == trueSoln->getType()) { // {text: {search: "somestr", language: "something", filter: {blah: 1}}} - const TextNode* node = static_cast<const TextNode*>(trueSoln); + const TextMatchNode* node = static_cast<const TextMatchNode*>(trueSoln); BSONElement el = testSoln["text"]; if (el.eoo() || !el.isABSONObj()) { return false; diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp index d86f6c5865f..cf11752de25 100644 --- a/src/mongo/db/query/query_solution.cpp +++ b/src/mongo/db/query/query_solution.cpp @@ -168,44 +168,6 @@ void QuerySolution::setRoot(std::unique_ptr<QuerySolutionNode> root) { } // -// TextNode -// - -void TextNode::appendToString(str::stream* ss, int indent) const { - addIndent(ss, indent); - *ss << "TEXT\n"; - addIndent(ss, indent + 1); - *ss << "name = " << index.identifier.catalogName << '\n'; - addIndent(ss, indent + 1); - *ss << "keyPattern = " << index.keyPattern.toString() << '\n'; - addIndent(ss, indent + 1); - *ss << "query = " << ftsQuery->getQuery() << '\n'; - addIndent(ss, indent + 1); - *ss << "language = " << ftsQuery->getLanguage() << '\n'; - addIndent(ss, indent + 1); - *ss << "caseSensitive= " << ftsQuery->getCaseSensitive() << '\n'; - addIndent(ss, indent + 1); - *ss << "diacriticSensitive= " << ftsQuery->getDiacriticSensitive() << '\n'; - addIndent(ss, indent + 1); - *ss << "indexPrefix = " << indexPrefix.toString() << '\n'; - if (nullptr != filter) { - addIndent(ss, indent + 1); - *ss << " filter = " << filter->debugString(); - } - addCommon(ss, indent); -} - -QuerySolutionNode* TextNode::clone() const { - TextNode* copy = new TextNode(this->index); - cloneBaseData(copy); - - copy->ftsQuery = this->ftsQuery->clone(); - copy->indexPrefix = this->indexPrefix; - - return copy; -} - -// // CollectionScanNode // @@ -1374,4 +1336,66 @@ QuerySolutionNode* EofNode::clone() const { return copy; } +// +// TextOrNode +// +void TextOrNode::appendToString(str::stream* ss, int indent) const { + addIndent(ss, indent); + *ss << "TEXT_OR\n"; + if (nullptr != filter) { + addIndent(ss, indent + 1); + *ss << " filter = " << filter->debugString() << '\n'; + } + addCommon(ss, indent); + for (size_t i = 0; i < children.size(); ++i) { + addIndent(ss, indent + 1); + *ss << "Child " << i << ":\n"; + children[i]->appendToString(ss, indent + 2); + *ss << '\n'; + } +} + +QuerySolutionNode* TextOrNode::clone() const { + auto copy = std::make_unique<TextOrNode>(); + cloneBaseData(copy.get()); + copy->dedup = this->dedup; + return copy.release(); +} + +// +// TextMatchNode +// +void TextMatchNode::appendToString(str::stream* ss, int indent) const { + addIndent(ss, indent); + *ss << "TEXT_MATCH\n"; + addIndent(ss, indent + 1); + *ss << "name = " << index.identifier.catalogName << '\n'; + addIndent(ss, indent + 1); + *ss << "keyPattern = " << index.keyPattern.toString() << '\n'; + addIndent(ss, indent + 1); + *ss << "query = " << ftsQuery->getQuery() << '\n'; + addIndent(ss, indent + 1); + *ss << "language = " << ftsQuery->getLanguage() << '\n'; + addIndent(ss, indent + 1); + *ss << "caseSensitive= " << ftsQuery->getCaseSensitive() << '\n'; + addIndent(ss, indent + 1); + *ss << "diacriticSensitive= " << ftsQuery->getDiacriticSensitive() << '\n'; + addIndent(ss, indent + 1); + *ss << "indexPrefix = " << indexPrefix.toString() << '\n'; + addIndent(ss, indent + 1); + *ss << "wantTextScorex = " << wantTextScore << '\n'; + if (nullptr != filter) { + addIndent(ss, indent + 1); + *ss << " filter = " << filter->debugString(); + } + addCommon(ss, indent); +} + +QuerySolutionNode* TextMatchNode::clone() const { + auto copy = std::make_unique<TextMatchNode>(index, ftsQuery->clone(), wantTextScore); + cloneBaseData(copy.get()); + copy->indexPrefix = indexPrefix; + return copy.release(); +} + } // namespace mongo diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h index 5a7f6a0db09..30f5e6c3e09 100644 --- a/src/mongo/db/query/query_solution.h +++ b/src/mongo/db/query/query_solution.h @@ -405,46 +405,6 @@ private: std::unique_ptr<QuerySolutionNode> _root; }; -struct TextNode : public QuerySolutionNodeWithSortSet { - TextNode(IndexEntry index) : index(std::move(index)) {} - - virtual ~TextNode() {} - - virtual StageType getType() const { - return STAGE_TEXT; - } - - virtual void appendToString(str::stream* ss, int indent) const; - - // Text's return is LOC_AND_OBJ so it's fetched and has all fields. - bool fetched() const { - return true; - } - FieldAvailability getFieldAvailability(const std::string& field) const { - return FieldAvailability::kFullyProvided; - } - bool sortedByDiskLoc() const { - return false; - } - - QuerySolutionNode* clone() const; - - IndexEntry index; - std::unique_ptr<fts::FTSQuery> ftsQuery; - - // The number of fields in the prefix of the text index. For example, if the key pattern is - // - // { a: 1, b: 1, _fts: "text", _ftsx: 1, c: 1 } - // - // then the number of prefix fields is 2, because of "a" and "b". - size_t numPrefixFields = 0u; - - // "Prefix" fields of a text index can handle equality predicates. We group them with the - // text node while creating the text leaf node and convert them into a BSONObj index prefix - // when we finish the text leaf node. - BSONObj indexPrefix; -}; - struct CollectionScanNode : public QuerySolutionNodeWithSortSet { CollectionScanNode(); virtual ~CollectionScanNode() {} @@ -1278,4 +1238,58 @@ struct EofNode : public QuerySolutionNodeWithSortSet { QuerySolutionNode* clone() const; }; + +struct TextOrNode : public OrNode { + TextOrNode() {} + + StageType getType() const override { + return STAGE_TEXT_OR; + } + + void appendToString(str::stream* ss, int indent) const override; + QuerySolutionNode* clone() const override; +}; + +struct TextMatchNode : public QuerySolutionNodeWithSortSet { + TextMatchNode(IndexEntry index, std::unique_ptr<fts::FTSQuery> ftsQuery, bool wantTextScore) + : index(std::move(index)), ftsQuery(std::move(ftsQuery)), wantTextScore(wantTextScore) {} + + StageType getType() const override { + return STAGE_TEXT_MATCH; + } + + void appendToString(str::stream* ss, int indent) const override; + + // Text's return is LOC_AND_OBJ so it's fetched and has all fields. + bool fetched() const { + return true; + } + FieldAvailability getFieldAvailability(const std::string& field) const { + return FieldAvailability::kFullyProvided; + } + bool sortedByDiskLoc() const override { + return false; + } + + QuerySolutionNode* clone() const override; + + IndexEntry index; + std::unique_ptr<fts::FTSQuery> ftsQuery; + + // The number of fields in the prefix of the text index. For example, if the key pattern is + // + // { a: 1, b: 1, _fts: "text", _ftsx: 1, c: 1 } + // + // then the number of prefix fields is 2, because of "a" and "b". + size_t numPrefixFields = 0u; + + // "Prefix" fields of a text index can handle equality predicates. We group them with the + // text node while creating the text leaf node and convert them into a BSONObj index prefix + // when we finish the text leaf node. + BSONObj indexPrefix; + + // True, if we need to compute text scores. + bool wantTextScore; +}; + } // namespace mongo diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp index 6de6f645bc8..6b99a116b3d 100644 --- a/src/mongo/db/query/sbe_stage_builder.cpp +++ b/src/mongo/db/query/sbe_stage_builder.cpp @@ -44,7 +44,6 @@ #include "mongo/db/exec/sbe/stages/scan.h" #include "mongo/db/exec/sbe/stages/sort.h" #include "mongo/db/exec/sbe/stages/sorted_merge.h" -#include "mongo/db/exec/sbe/stages/text_match.h" #include "mongo/db/exec/sbe/stages/traverse.h" #include "mongo/db/exec/sbe/stages/union.h" #include "mongo/db/exec/sbe/stages/unique.h" @@ -208,7 +207,6 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateEofPlan( } } // namespace - std::unique_ptr<sbe::RuntimeEnvironment> makeRuntimeEnvironment( const CanonicalQuery& cq, OperationContext* opCtx, @@ -285,6 +283,36 @@ sbe::LockAcquisitionCallback makeLockAcquisitionCallback(bool checkNodeCanServeR opCtx, coll.getNss(), true)); }; } + +std::unique_ptr<fts::FTSMatcher> makeFtsMatcher(OperationContext* opCtx, + const CollectionPtr& collection, + const std::string& indexName, + const fts::FTSQuery* ftsQuery) { + auto desc = collection->getIndexCatalog()->findIndexByName(opCtx, indexName); + tassert(5432209, + str::stream() << "index descriptor not found for index named '" << indexName + << "' in collection '" << collection->ns() << "'", + desc); + + auto entry = collection->getIndexCatalog()->getEntry(desc); + tassert(5432210, + str::stream() << "index entry not found for index named '" << indexName + << "' in collection '" << collection->ns() << "'", + entry); + + auto accessMethod = static_cast<const FTSAccessMethod*>(entry->accessMethod()); + tassert(5432211, + str::stream() << "access method is not defined for index named '" << indexName + << "' in collection '" << collection->ns() << "'", + accessMethod); + + // We assume here that node->ftsQuery is an FTSQueryImpl, not an FTSQueryNoop. In practice, this + // means that it is illegal to use the StageBuilder on a QuerySolution created by planning a + // query that contains "no-op" expressions. + auto query = dynamic_cast<const fts::FTSQueryImpl*>(ftsQuery); + tassert(5432220, "expected FTSQueryImpl", query); + return std::make_unique<fts::FTSMatcher>(*query, accessMethod->getSpec()); +} } // namespace SlotBasedStageBuilder::SlotBasedStageBuilder(OperationContext* opCtx, @@ -1074,116 +1102,56 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder return {std::move(stage), std::move(outputs)}; } -std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::buildText( +std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::buildTextMatch( const QuerySolutionNode* root, const PlanStageReqs& reqs) { - invariant(_collection); - invariant(!reqs.getIndexKeyBitset()); - - // At present, makeLoopJoinForFetch() doesn't have the necessary logic for producing an - // oplogTsSlot, so assert that the caller doesn't need oplogTsSlot. - invariant(!reqs.has(kOplogTs)); - - auto textNode = static_cast<const TextNode*>(root); - - auto&& indexName = textNode->index.identifier.catalogName; - const auto desc = _collection->getIndexCatalog()->findIndexByName(_opCtx, indexName); - invariant(desc); - const auto accessMethod = static_cast<const FTSAccessMethod*>( - _collection->getIndexCatalog()->getEntry(desc)->accessMethod()); - invariant(accessMethod); - auto&& ftsSpec = accessMethod->getSpec(); - - // We assume here that node->ftsQuery is an FTSQueryImpl, not an FTSQueryNoop. In practice, this - // means that it is illegal to use the StageBuilder on a QuerySolution created by planning a - // query that contains "no-op" expressions. - auto ftsQuery = static_cast<fts::FTSQueryImpl&>(*textNode->ftsQuery); - - // A vector of the output slots for each index scan stage. Each stage outputs a record id and a - // record, so we expect each inner vector to be of length two. - std::vector<sbe::value::SlotVector> ixscanOutputSlots; - - const bool forward = true; - const bool inclusive = true; - auto makeKeyString = [&](const BSONObj& bsonKey) { - return std::make_unique<KeyString::Value>( - IndexEntryComparison::makeKeyStringFromBSONKeyForSeek( - bsonKey, - accessMethod->getSortedDataInterface()->getKeyStringVersion(), - accessMethod->getSortedDataInterface()->getOrdering(), - forward, - inclusive)); - }; - - std::vector<std::unique_ptr<sbe::PlanStage>> indexScanList; - for (const auto& term : ftsQuery.getTermsForBounds()) { - // TODO: Should we scan in the opposite direction? - auto startKeyBson = fts::FTSIndexFormat::getIndexKey( - 0, term, textNode->indexPrefix, ftsSpec.getTextIndexVersion()); - auto endKeyBson = fts::FTSIndexFormat::getIndexKey( - fts::MAX_WEIGHT, term, textNode->indexPrefix, ftsSpec.getTextIndexVersion()); - - auto&& [recordIdSlot, ixscan] = - generateSingleIntervalIndexScan(_collection, - indexName, - forward, - makeKeyString(startKeyBson), - makeKeyString(endKeyBson), - sbe::IndexKeysInclusionSet{}, - sbe::makeSV(), - boost::none, // recordSlot - &_slotIdGenerator, - _yieldPolicy, - root->nodeId(), - _lockAcquisitionCallback); - indexScanList.push_back(std::move(ixscan)); - ixscanOutputSlots.push_back(sbe::makeSV(recordIdSlot)); - } - - // If we don't have any index scan stages, produce an EOF plan. - if (indexScanList.empty()) { - return generateEofPlan(root->nodeId(), reqs, &_slotIdGenerator); - } - - PlanStageSlots outputs; - - // Union will output a slot for the record id and another for the record. - auto recordIdSlot = _slotIdGenerator.generate(); - auto unionOutputSlots = sbe::makeSV(recordIdSlot); - - // Index scan output slots become the input slots to the union. - auto stage = sbe::makeS<sbe::UnionStage>( - std::move(indexScanList), ixscanOutputSlots, unionOutputSlots, root->nodeId()); + tassert(5432212, "no collection object", _collection); + tassert(5432213, "index keys requsted for text match node", !reqs.getIndexKeyBitset()); + tassert(5432214, "oplogTs requsted for text match node", !reqs.has(kOplogTs)); + tassert(5432215, + str::stream() << "text match node must have one child, but got " + << root->children.size(), + root->children.size() == 1); + // TextMatchNode guarantees to produce a fetched sub-plan, but it doesn't fetch itself. Instead, + // its child sub-plan must be fully fetched, and a text match plan is constructed under this + // assumption. + tassert(5432216, "text match input must be fetched", root->children[0]->fetched()); + + auto textNode = static_cast<const TextMatchNode*>(root); - // TODO: If text score metadata is requested, then we should sum over the text scores inside the - // index keys for a given document. This will require expression evaluation to be able to - // extract the score directly from the key string. + auto childReqs = reqs.copy().set(kResult); + auto [stage, outputs] = build(textNode->children[0], childReqs); + tassert(5432217, "result slot is not produced by text match sub-plan", outputs.has(kResult)); + + // Create an FTS 'matcher' to apply 'ftsQuery' to matching documents. + auto matcher = makeFtsMatcher( + _opCtx, _collection, textNode->index.identifier.catalogName, textNode->ftsQuery.get()); + + // Build an 'ftsMatch' expression to match a document stored in the 'kResult' slot using the + // 'matcher' instance. + auto ftsMatch = + makeFunction("ftsMatch", + makeConstant(sbe::value::TypeTags::ftsMatcher, + sbe::value::bitcastFrom<fts::FTSMatcher*>(matcher.release())), + makeVariable(outputs.get(kResult))); + + // Wrap the 'ftsMatch' expression into an 'if' expression to ensure that it can be applied only + // to a document. + auto filter = + sbe::makeE<sbe::EIf>(makeFunction("isObject", makeVariable(outputs.get(kResult))), + std::move(ftsMatch), + sbe::makeE<sbe::EFail>(ErrorCodes::Error{4623400}, + "textmatch requires input to be an object")); + + // Add a filter stage to apply 'ftsQuery' to matching documents and discard documents which do + // not match. stage = - sbe::makeS<sbe::UniqueStage>(std::move(stage), sbe::makeSV(recordIdSlot), root->nodeId()); - - sbe::value::SlotId resultSlot; - std::tie(resultSlot, recordIdSlot, stage) = - makeLoopJoinForFetch(std::move(stage), recordIdSlot, root->nodeId()); - - // Add a special stage to apply 'ftsQuery' to matching documents, and then add a FilterStage to - // discard documents which do not match. - auto textMatchResultSlot = _slotIdGenerator.generate(); - stage = sbe::makeS<sbe::TextMatchStage>( - std::move(stage), ftsQuery, ftsSpec, resultSlot, textMatchResultSlot, root->nodeId()); - - // Filter based on the contents of the slot filled out by the TextMatchStage. - stage = sbe::makeS<sbe::FilterStage<false>>( - std::move(stage), sbe::makeE<sbe::EVariable>(textMatchResultSlot), root->nodeId()); - - outputs.set(kResult, resultSlot); - outputs.set(kRecordId, recordIdSlot); + sbe::makeS<sbe::FilterStage<false>>(std::move(stage), std::move(filter), root->nodeId()); if (reqs.has(kReturnKey)) { // Assign the 'returnKeySlot' to be the empty object. outputs.set(kReturnKey, _slotIdGenerator.generate()); - stage = sbe::makeProjectStage(std::move(stage), - root->nodeId(), - outputs.get(kReturnKey), - sbe::makeE<sbe::EFunction>("newObj", sbe::makeEs())); + stage = sbe::makeProjectStage( + std::move(stage), root->nodeId(), outputs.get(kReturnKey), makeFunction("newObj")); } return {std::move(stage), std::move(outputs)}; @@ -1643,7 +1611,10 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder {STAGE_PROJECTION_DEFAULT, &SlotBasedStageBuilder::buildProjectionDefault}, {STAGE_PROJECTION_COVERED, &SlotBasedStageBuilder::buildProjectionCovered}, {STAGE_OR, &SlotBasedStageBuilder::buildOr}, - {STAGE_TEXT, &SlotBasedStageBuilder::buildText}, + // In SBE TEXT_OR behaves like a regular OR. All the work to support "textScore" + // metadata is done outside of TEXT_OR, unlike the legacy implementation. + {STAGE_TEXT_OR, &SlotBasedStageBuilder::buildOr}, + {STAGE_TEXT_MATCH, &SlotBasedStageBuilder::buildTextMatch}, {STAGE_RETURN_KEY, &SlotBasedStageBuilder::buildReturnKey}, {STAGE_EOF, &SlotBasedStageBuilder::buildEof}, {STAGE_AND_HASH, &SlotBasedStageBuilder::buildAndHash}, diff --git a/src/mongo/db/query/sbe_stage_builder.h b/src/mongo/db/query/sbe_stage_builder.h index 67b43d11682..240d44d7830 100644 --- a/src/mongo/db/query/sbe_stage_builder.h +++ b/src/mongo/db/query/sbe_stage_builder.h @@ -304,7 +304,7 @@ private: std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> buildOr( const QuerySolutionNode* root, const PlanStageReqs& reqs); - std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> buildText( + std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> buildTextMatch( const QuerySolutionNode* root, const PlanStageReqs& reqs); std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> buildReturnKey( diff --git a/src/mongo/db/query/stage_types.cpp b/src/mongo/db/query/stage_types.cpp index b5f1f16c106..6d3b0f1fd2b 100644 --- a/src/mongo/db/query/stage_types.cpp +++ b/src/mongo/db/query/stage_types.cpp @@ -68,7 +68,6 @@ StringData stageTypeToString(StageType stageType) { {STAGE_SORT_KEY_GENERATOR, "SORT_KEY_GENERATOR"_sd}, {STAGE_SORT_MERGE, "SORT_MERGE"_sd}, {STAGE_SUBPLAN, "SUBPLAN"_sd}, - {STAGE_TEXT, "TEXT"_sd}, {STAGE_TEXT_OR, "TEXT_OR"_sd}, {STAGE_TEXT_MATCH, "TEXT_MATCH"_sd}, {STAGE_TRIAL, "TRIAL"_sd}, diff --git a/src/mongo/db/query/stage_types.h b/src/mongo/db/query/stage_types.h index 882d0bf032b..a0dc411028b 100644 --- a/src/mongo/db/query/stage_types.h +++ b/src/mongo/db/query/stage_types.h @@ -113,7 +113,6 @@ enum StageType { STAGE_SUBPLAN, // Stages for running text search. - STAGE_TEXT, STAGE_TEXT_OR, STAGE_TEXT_MATCH, |