summaryrefslogtreecommitdiff
path: root/src/mongo/db/query
diff options
context:
space:
mode:
authorAnton Korshunov <anton.korshunov@mongodb.com>2021-03-02 13:03:45 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-03-13 07:52:14 +0000
commitd362ea53d66d85750b0cb63b69168e1b3a4a330e (patch)
treead4a34c4c31ba25b252d52ddf854fd3f23550bbc /src/mongo/db/query
parenta90fee27ae85894ae6e4522251fe0ea35ef473c7 (diff)
downloadmongo-d362ea53d66d85750b0cb63b69168e1b3a4a330e.tar.gz
SERVER-54322 Text query plans are not shown properly in SBE explain
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r--src/mongo/db/query/classic_stage_builder.cpp58
-rw-r--r--src/mongo/db/query/classic_stage_builder.h2
-rw-r--r--src/mongo/db/query/explain.cpp1
-rw-r--r--src/mongo/db/query/plan_executor_impl.cpp1
-rw-r--r--src/mongo/db/query/plan_explainer_impl.cpp20
-rw-r--r--src/mongo/db/query/plan_explainer_sbe.cpp12
-rw-r--r--src/mongo/db/query/planner_access.cpp120
-rw-r--r--src/mongo/db/query/query_planner_test_lib.cpp4
-rw-r--r--src/mongo/db/query/query_solution.cpp100
-rw-r--r--src/mongo/db/query/query_solution.h94
-rw-r--r--src/mongo/db/query/sbe_stage_builder.cpp183
-rw-r--r--src/mongo/db/query/sbe_stage_builder.h2
-rw-r--r--src/mongo/db/query/stage_types.cpp1
-rw-r--r--src/mongo/db/query/stage_types.h1
14 files changed, 363 insertions, 236 deletions
diff --git a/src/mongo/db/query/classic_stage_builder.cpp b/src/mongo/db/query/classic_stage_builder.cpp
index 3d52f8b3a9f..c26e0777765 100644
--- a/src/mongo/db/query/classic_stage_builder.cpp
+++ b/src/mongo/db/query/classic_stage_builder.cpp
@@ -59,7 +59,8 @@
#include "mongo/db/exec/skip.h"
#include "mongo/db/exec/sort.h"
#include "mongo/db/exec/sort_key_generator.h"
-#include "mongo/db/exec/text.h"
+#include "mongo/db/exec/text_match.h"
+#include "mongo/db/exec/text_or.h"
#include "mongo/db/index/fts_access_method.h"
#include "mongo/db/matcher/extensions_callback_real.h"
#include "mongo/db/record_id_helpers.h"
@@ -270,26 +271,47 @@ std::unique_ptr<PlanStage> ClassicStageBuilder::build(const QuerySolutionNode* r
return std::make_unique<GeoNear2DSphereStage>(
params, expCtx, _ws, _collection, s2Index);
}
- case STAGE_TEXT: {
- const TextNode* node = static_cast<const TextNode*>(root);
- invariant(_collection);
- const IndexDescriptor* desc = _collection->getIndexCatalog()->findIndexByName(
- _opCtx, node->index.identifier.catalogName);
- invariant(desc);
- const FTSAccessMethod* fam = static_cast<const FTSAccessMethod*>(
- _collection->getIndexCatalog()->getEntry(desc)->accessMethod());
- invariant(fam);
+ case STAGE_TEXT_OR: {
+ tassert(5432204,
+ "text index key prefix must be defined before processing TEXT_OR node",
+ _ftsKeyPrefixSize);
+
+ auto node = static_cast<const TextOrNode*>(root);
+ auto ret = std::make_unique<TextOrStage>(
+ expCtx, *_ftsKeyPrefixSize, _ws, node->filter.get(), _collection);
+ for (auto childNode : root->children) {
+ ret->addChild(build(childNode));
+ }
+ return ret;
+ }
+ case STAGE_TEXT_MATCH: {
+ auto node = static_cast<const TextMatchNode*>(root);
+ tassert(5432200, "collection object is not provided", _collection);
+ auto catalog = _collection->getIndexCatalog();
+ tassert(5432201, "index catalog is unavailable", catalog);
+ auto desc = catalog->findIndexByName(_opCtx, node->index.identifier.catalogName);
+ tassert(5432202,
+ str::stream() << "no index named '" << node->index.identifier.catalogName
+ << "' found in catalog",
+ catalog);
+ auto fam = static_cast<const FTSAccessMethod*>(catalog->getEntry(desc)->accessMethod());
+ tassert(5432203, "access method for index is not defined", fam);
- TextStageParams params(fam->getSpec());
- params.index = desc;
- params.indexPrefix = node->indexPrefix;
// We assume here that node->ftsQuery is an FTSQueryImpl, not an FTSQueryNoop. In
// practice, this means that it is illegal to use the StageBuilder on a QuerySolution
// created by planning a query that contains "no-op" expressions.
- params.query = static_cast<FTSQueryImpl&>(*node->ftsQuery);
- params.wantTextScore = _cq.metadataDeps()[DocumentMetadataFields::kTextScore];
- return std::make_unique<TextStage>(
- expCtx, _collection, params, _ws, node->filter.get());
+ TextMatchParams params{desc,
+ fam->getSpec(),
+ node->indexPrefix,
+ static_cast<const FTSQueryImpl&>(*node->ftsQuery)};
+
+ // Children of this node may need to know about the key prefix size, so we'll set it
+ // here before recursively descending into procession child nodes, and will reset once a
+ // text sub-tree is constructed.
+ _ftsKeyPrefixSize.emplace(params.spec.numExtraBefore());
+ ON_BLOCK_EXIT([&] { _ftsKeyPrefixSize = {}; });
+
+ return std::make_unique<TextMatchStage>(expCtx, build(root->children[0]), params, _ws);
}
case STAGE_SHARDING_FILTER: {
const ShardingFilterNode* fn = static_cast<const ShardingFilterNode*>(root);
@@ -392,8 +414,6 @@ std::unique_ptr<PlanStage> ClassicStageBuilder::build(const QuerySolutionNode* r
case STAGE_QUEUED_DATA:
case STAGE_RECORD_STORE_FAST_COUNT:
case STAGE_SUBPLAN:
- case STAGE_TEXT_MATCH:
- case STAGE_TEXT_OR:
case STAGE_TRIAL:
case STAGE_UNKNOWN:
case STAGE_UPDATE: {
diff --git a/src/mongo/db/query/classic_stage_builder.h b/src/mongo/db/query/classic_stage_builder.h
index 1c63e6714b5..c99531cae95 100644
--- a/src/mongo/db/query/classic_stage_builder.h
+++ b/src/mongo/db/query/classic_stage_builder.h
@@ -49,5 +49,7 @@ public:
private:
WorkingSet* _ws;
+
+ boost::optional<size_t> _ftsKeyPrefixSize;
};
} // namespace mongo::stage_builder
diff --git a/src/mongo/db/query/explain.cpp b/src/mongo/db/query/explain.cpp
index b800af89597..de24431948a 100644
--- a/src/mongo/db/query/explain.cpp
+++ b/src/mongo/db/query/explain.cpp
@@ -41,7 +41,6 @@
#include "mongo/db/exec/multi_plan.h"
#include "mongo/db/exec/near.h"
#include "mongo/db/exec/sort.h"
-#include "mongo/db/exec/text.h"
#include "mongo/db/exec/working_set_common.h"
#include "mongo/db/keypattern.h"
#include "mongo/db/pipeline/plan_executor_pipeline.h"
diff --git a/src/mongo/db/query/plan_executor_impl.cpp b/src/mongo/db/query/plan_executor_impl.cpp
index 1e5516d09a9..3d89b63a4ce 100644
--- a/src/mongo/db/query/plan_executor_impl.cpp
+++ b/src/mongo/db/query/plan_executor_impl.cpp
@@ -50,7 +50,6 @@
#include "mongo/db/exec/plan_stats.h"
#include "mongo/db/exec/sort.h"
#include "mongo/db/exec/subplan.h"
-#include "mongo/db/exec/text.h"
#include "mongo/db/exec/trial_stage.h"
#include "mongo/db/exec/working_set.h"
#include "mongo/db/exec/working_set_common.h"
diff --git a/src/mongo/db/query/plan_explainer_impl.cpp b/src/mongo/db/query/plan_explainer_impl.cpp
index 1d3e156af13..f75c88d140b 100644
--- a/src/mongo/db/query/plan_explainer_impl.cpp
+++ b/src/mongo/db/query/plan_explainer_impl.cpp
@@ -45,7 +45,7 @@
#include "mongo/db/exec/plan_stats.h"
#include "mongo/db/exec/sort.h"
#include "mongo/db/exec/subplan.h"
-#include "mongo/db/exec/text.h"
+#include "mongo/db/exec/text_match.h"
#include "mongo/db/exec/trial_stage.h"
#include "mongo/db/keypattern.h"
#include "mongo/db/query/explain.h"
@@ -84,8 +84,8 @@ void addStageSummaryStr(const PlanStage* stage, StringBuilder& sb) {
const IndexScanStats* spec = static_cast<const IndexScanStats*>(specific);
const KeyPattern keyPattern{spec->keyPattern};
sb << " " << keyPattern;
- } else if (STAGE_TEXT == stage->stageType()) {
- const TextStats* spec = static_cast<const TextStats*>(specific);
+ } else if (STAGE_TEXT_MATCH == stage->stageType()) {
+ const TextMatchStats* spec = static_cast<const TextMatchStats*>(specific);
const KeyPattern keyPattern{spec->indexPrefix};
sb << " " << keyPattern;
}
@@ -460,15 +460,13 @@ void statsToBSON(const PlanStageStats& stats,
bob->appendNumber("dupsTested", static_cast<long long>(spec->dupsTested));
bob->appendNumber("dupsDropped", static_cast<long long>(spec->dupsDropped));
}
- } else if (STAGE_TEXT == stats.stageType) {
- TextStats* spec = static_cast<TextStats*>(stats.specific.get());
+ } else if (STAGE_TEXT_MATCH == stats.stageType) {
+ TextMatchStats* spec = static_cast<TextMatchStats*>(stats.specific.get());
bob->append("indexPrefix", spec->indexPrefix);
bob->append("indexName", spec->indexName);
bob->append("parsedTextQuery", spec->parsedTextQuery);
bob->append("textIndexVersion", spec->textIndexVersion);
- } else if (STAGE_TEXT_MATCH == stats.stageType) {
- TextMatchStats* spec = static_cast<TextMatchStats*>(stats.specific.get());
if (verbosity >= ExplainOptions::Verbosity::kExecStats) {
bob->appendNumber("docsRejected", static_cast<long long>(spec->docsRejected));
@@ -685,10 +683,10 @@ void PlanExplainerImpl::getSummaryStats(PlanSummaryStats* statsOut) const {
const DistinctScanStats* distinctScanStats =
static_cast<const DistinctScanStats*>(distinctScan->getSpecificStats());
statsOut->indexesUsed.insert(distinctScanStats->indexName);
- } else if (STAGE_TEXT == stages[i]->stageType()) {
- const TextStage* textStage = static_cast<const TextStage*>(stages[i]);
- const TextStats* textStats =
- static_cast<const TextStats*>(textStage->getSpecificStats());
+ } else if (STAGE_TEXT_MATCH == stages[i]->stageType()) {
+ const TextMatchStage* textStage = static_cast<const TextMatchStage*>(stages[i]);
+ const TextMatchStats* textStats =
+ static_cast<const TextMatchStats*>(textStage->getSpecificStats());
statsOut->indexesUsed.insert(textStats->indexName);
} else if (STAGE_GEO_NEAR_2D == stages[i]->stageType() ||
STAGE_GEO_NEAR_2DSPHERE == stages[i]->stageType()) {
diff --git a/src/mongo/db/query/plan_explainer_sbe.cpp b/src/mongo/db/query/plan_explainer_sbe.cpp
index 08f3ddcad0b..bca674394c4 100644
--- a/src/mongo/db/query/plan_explainer_sbe.cpp
+++ b/src/mongo/db/query/plan_explainer_sbe.cpp
@@ -158,8 +158,8 @@ void statsToBSON(const QuerySolutionNode* node,
bob->append("sortPattern", smn->sort);
break;
}
- case STAGE_TEXT: {
- auto tn = static_cast<const TextNode*>(node);
+ case STAGE_TEXT_MATCH: {
+ auto tn = static_cast<const TextMatchNode*>(node);
bob->append("indexPrefix", tn->indexPrefix);
bob->append("indexName", tn->index.identifier.catalogName);
@@ -388,8 +388,8 @@ std::string PlanExplainerSBE::getPlanSummary() const {
sb << " " << keyPattern;
break;
}
- case STAGE_TEXT: {
- auto tn = static_cast<const TextNode*>(node);
+ case STAGE_TEXT_MATCH: {
+ auto tn = static_cast<const TextMatchNode*>(node);
const KeyPattern keyPattern{tn->indexPrefix};
sb << " " << keyPattern;
break;
@@ -460,8 +460,8 @@ void PlanExplainerSBE::getSummaryStats(PlanSummaryStats* statsOut) const {
statsOut->indexesUsed.insert(ixn->index.identifier.catalogName);
break;
}
- case STAGE_TEXT: {
- auto tn = static_cast<const TextNode*>(node);
+ case STAGE_TEXT_MATCH: {
+ auto tn = static_cast<const TextMatchNode*>(node);
statsOut->indexesUsed.insert(tn->index.identifier.catalogName);
break;
}
diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp
index 56893e7ab11..46762805cec 100644
--- a/src/mongo/db/query/planner_access.cpp
+++ b/src/mongo/db/query/planner_access.cpp
@@ -40,6 +40,9 @@
#include "mongo/base/owned_pointer_vector.h"
#include "mongo/bson/simple_bsonobj_comparator.h"
#include "mongo/db/bson/dotted_path_support.h"
+#include "mongo/db/fts/fts_index_format.h"
+#include "mongo/db/fts/fts_query_noop.h"
+#include "mongo/db/fts/fts_spec.h"
#include "mongo/db/matcher/expression_array.h"
#include "mongo/db/matcher/expression_geo.h"
#include "mongo/db/matcher/expression_text.h"
@@ -65,7 +68,7 @@ namespace dps = ::mongo::dotted_path_support;
* Text node functors.
*/
bool isTextNode(const QuerySolutionNode* node) {
- return STAGE_TEXT == node->getType();
+ return STAGE_TEXT_MATCH == node->getType();
}
/**
@@ -346,9 +349,10 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::makeLeafNode(
// We must not keep the expression node around.
*tightnessOut = IndexBoundsBuilder::EXACT;
auto textExpr = static_cast<const TextMatchExpressionBase*>(expr);
- auto ret = std::make_unique<TextNode>(index);
- ret->ftsQuery = textExpr->getFTSQuery().clone();
-
+ auto ret = std::make_unique<TextMatchNode>(
+ index,
+ textExpr->getFTSQuery().clone(),
+ query.metadataDeps()[DocumentMetadataFields::kTextScore]);
// Count the number of prefix fields before the "text" field.
for (auto&& keyPatternElt : ret->index.keyPattern) {
// We know that the only key pattern with a type of String is the _fts field
@@ -415,7 +419,7 @@ bool QueryPlannerAccess::shouldMergeWithLeaf(const MatchExpression* expr,
// by adding a filter to the special leaf type.
//
- if (STAGE_TEXT == type) {
+ if (STAGE_TEXT_MATCH == type) {
// Currently only one text predicate is allowed, but to be safe, make sure that we
// do not try to merge two text predicates.
return MatchExpression::AND == mergeType && MatchExpression::TEXT != exprType;
@@ -469,8 +473,8 @@ void QueryPlannerAccess::mergeWithLeafNode(MatchExpression* expr, ScanBuildingSt
const StageType type = node->getType();
- if (STAGE_TEXT == type) {
- auto textNode = static_cast<TextNode*>(node);
+ if (STAGE_TEXT_MATCH == type) {
+ auto textNode = static_cast<TextMatchNode*>(node);
if (pos < textNode->numPrefixFields) {
// This predicate is assigned to one of the prefix fields of the text index. Such
@@ -568,12 +572,108 @@ void QueryPlannerAccess::mergeWithLeafNode(MatchExpression* expr, ScanBuildingSt
}
}
+void buildTextSubPlan(TextMatchNode* tn) {
+ tassert(5432205, "text match node is null", tn);
+ tassert(5432206, "text match node already has children", tn->children.empty());
+ tassert(5432207, "text search query is not provided", tn->ftsQuery.get());
+
+ auto query = dynamic_cast<const fts::FTSQueryImpl*>(tn->ftsQuery.get());
+ // If we're unable to cast to FTSQueryImpl, then the given query must be an FTSQueryNoop, which
+ // is only used for testing the QueryPlanner and never tries to execute the query, so we don't
+ // need to construct an entire text sub-plan. Moreover, to compute index bounds we need a list
+ // of terms, which can only be obtain from FTSQueryImpl.
+ if (!query) {
+ return;
+ }
+
+ // If the query requires the "textScore" field or involves multiple search terms, a TEXT_OR or
+ // OR stage is needed. Otherwise, we can use a single index scan directly.
+ const bool needOrStage = tn->wantTextScore || query->getTermsForBounds().size() > 1;
+
+ tassert(5432208,
+ "failed to obtain text index version",
+ tn->index.infoObj.hasField("textIndexVersion"));
+ const auto textIndexVersion =
+ static_cast<fts::TextIndexVersion>(tn->index.infoObj["textIndexVersion"].numberInt());
+
+ // Get all the index scans for each term in our query.
+ std::vector<std::unique_ptr<QuerySolutionNode>> indexScanList;
+ indexScanList.reserve(query->getTermsForBounds().size());
+ for (const auto& term : query->getTermsForBounds()) {
+ auto ixscan = std::make_unique<IndexScanNode>(tn->index);
+ ixscan->bounds.startKey = fts::FTSIndexFormat::getIndexKey(
+ fts::MAX_WEIGHT, term, tn->indexPrefix, textIndexVersion);
+ ixscan->bounds.endKey =
+ fts::FTSIndexFormat::getIndexKey(0, term, tn->indexPrefix, textIndexVersion);
+ ixscan->bounds.boundInclusion = BoundInclusion::kIncludeBothStartAndEndKeys;
+ ixscan->bounds.isSimpleRange = true;
+ ixscan->direction = -1;
+ ixscan->shouldDedup = tn->index.multikey;
+
+ // If we will be adding a TEXT_OR or OR stage, then it is responsible for applying the
+ // filter. Otherwise, the index scan applies the filter.
+ if (!needOrStage && tn->filter) {
+ ixscan->filter = tn->filter->shallowClone();
+ }
+
+ indexScanList.push_back(std::move(ixscan));
+ }
+
+ // In case the query didn't have any search term, we can simply use an EOF sub-plan, as no
+ // results can be returned in this case anyway.
+ if (indexScanList.empty()) {
+ indexScanList.push_back(std::make_unique<EofNode>());
+ }
+
+ // Build the union of the index scans as a TEXT_OR or an OR stage, depending on whether the
+ // projection requires the "textScore" $meta field.
+ if (tn->wantTextScore) {
+ // We use a TEXT_OR stage to get the union of the results from the index scans and then
+ // compute their text scores. This is a blocking operation.
+ auto textScorer = std::make_unique<TextOrNode>();
+ textScorer->filter = std::move(tn->filter);
+ for (auto&& ixscan : indexScanList) {
+ textScorer->children.push_back(ixscan.release());
+ }
+
+ tn->children.push_back(textScorer.release());
+ } else {
+ // Because we don't need the text score, we can use a non-blocking OR stage to get the union
+ // of the index scans or use the index scan directly if there is only one.
+ auto textSearcher = [&]() -> std::unique_ptr<QuerySolutionNode> {
+ if (indexScanList.size() == 1) {
+ tassert(5397400,
+ "If there is only one index scan and we do not need textScore, needOrStage "
+ "should be false",
+ !needOrStage);
+ return std::move(indexScanList[0]);
+ } else {
+ auto orTextSearcher = std::make_unique<OrNode>();
+ orTextSearcher->filter = std::move(tn->filter);
+ for (auto&& ixscan : indexScanList) {
+ orTextSearcher->children.push_back(ixscan.release());
+ }
+ return std::move(orTextSearcher);
+ }
+ }();
+
+ // Unlike the TEXT_OR stage, the OR stage does not fetch the documents that it outputs. We
+ // add our own FETCH stage to satisfy the requirement of the TEXT_MATCH stage that its
+ // WorkingSetMember inputs have fetched data.
+ auto fetchNode = std::make_unique<FetchNode>();
+ fetchNode->children.push_back(textSearcher.release());
+
+ tn->children.push_back(fetchNode.release());
+ }
+}
+
void QueryPlannerAccess::finishTextNode(QuerySolutionNode* node, const IndexEntry& index) {
- TextNode* tn = static_cast<TextNode*>(node);
+ auto tn = static_cast<TextMatchNode*>(node);
// If there's no prefix, the filter is already on the node and the index prefix is null.
// We can just return.
if (!tn->numPrefixFields) {
+ buildTextSubPlan(tn);
return;
}
@@ -648,6 +748,8 @@ void QueryPlannerAccess::finishTextNode(QuerySolutionNode* node, const IndexEntr
}
tn->indexPrefix = prefixBob.obj();
+
+ buildTextSubPlan(tn);
}
bool QueryPlannerAccess::orNeedsFetch(const ScanBuildingState* scanState) {
@@ -698,7 +800,7 @@ void QueryPlannerAccess::finishAndOutputLeaf(ScanBuildingState* scanState,
void QueryPlannerAccess::finishLeafNode(QuerySolutionNode* node, const IndexEntry& index) {
const StageType type = node->getType();
- if (STAGE_TEXT == type) {
+ if (STAGE_TEXT_MATCH == type) {
return finishTextNode(node, index);
}
diff --git a/src/mongo/db/query/query_planner_test_lib.cpp b/src/mongo/db/query/query_planner_test_lib.cpp
index 63f94f14293..9be90219927 100644
--- a/src/mongo/db/query/query_planner_test_lib.cpp
+++ b/src/mongo/db/query/query_planner_test_lib.cpp
@@ -387,9 +387,9 @@ bool QueryPlannerTestLib::solutionMatches(const BSONObj& testSoln,
}
return true;
- } else if (STAGE_TEXT == trueSoln->getType()) {
+ } else if (STAGE_TEXT_MATCH == trueSoln->getType()) {
// {text: {search: "somestr", language: "something", filter: {blah: 1}}}
- const TextNode* node = static_cast<const TextNode*>(trueSoln);
+ const TextMatchNode* node = static_cast<const TextMatchNode*>(trueSoln);
BSONElement el = testSoln["text"];
if (el.eoo() || !el.isABSONObj()) {
return false;
diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp
index d86f6c5865f..cf11752de25 100644
--- a/src/mongo/db/query/query_solution.cpp
+++ b/src/mongo/db/query/query_solution.cpp
@@ -168,44 +168,6 @@ void QuerySolution::setRoot(std::unique_ptr<QuerySolutionNode> root) {
}
//
-// TextNode
-//
-
-void TextNode::appendToString(str::stream* ss, int indent) const {
- addIndent(ss, indent);
- *ss << "TEXT\n";
- addIndent(ss, indent + 1);
- *ss << "name = " << index.identifier.catalogName << '\n';
- addIndent(ss, indent + 1);
- *ss << "keyPattern = " << index.keyPattern.toString() << '\n';
- addIndent(ss, indent + 1);
- *ss << "query = " << ftsQuery->getQuery() << '\n';
- addIndent(ss, indent + 1);
- *ss << "language = " << ftsQuery->getLanguage() << '\n';
- addIndent(ss, indent + 1);
- *ss << "caseSensitive= " << ftsQuery->getCaseSensitive() << '\n';
- addIndent(ss, indent + 1);
- *ss << "diacriticSensitive= " << ftsQuery->getDiacriticSensitive() << '\n';
- addIndent(ss, indent + 1);
- *ss << "indexPrefix = " << indexPrefix.toString() << '\n';
- if (nullptr != filter) {
- addIndent(ss, indent + 1);
- *ss << " filter = " << filter->debugString();
- }
- addCommon(ss, indent);
-}
-
-QuerySolutionNode* TextNode::clone() const {
- TextNode* copy = new TextNode(this->index);
- cloneBaseData(copy);
-
- copy->ftsQuery = this->ftsQuery->clone();
- copy->indexPrefix = this->indexPrefix;
-
- return copy;
-}
-
-//
// CollectionScanNode
//
@@ -1374,4 +1336,66 @@ QuerySolutionNode* EofNode::clone() const {
return copy;
}
+//
+// TextOrNode
+//
+void TextOrNode::appendToString(str::stream* ss, int indent) const {
+ addIndent(ss, indent);
+ *ss << "TEXT_OR\n";
+ if (nullptr != filter) {
+ addIndent(ss, indent + 1);
+ *ss << " filter = " << filter->debugString() << '\n';
+ }
+ addCommon(ss, indent);
+ for (size_t i = 0; i < children.size(); ++i) {
+ addIndent(ss, indent + 1);
+ *ss << "Child " << i << ":\n";
+ children[i]->appendToString(ss, indent + 2);
+ *ss << '\n';
+ }
+}
+
+QuerySolutionNode* TextOrNode::clone() const {
+ auto copy = std::make_unique<TextOrNode>();
+ cloneBaseData(copy.get());
+ copy->dedup = this->dedup;
+ return copy.release();
+}
+
+//
+// TextMatchNode
+//
+void TextMatchNode::appendToString(str::stream* ss, int indent) const {
+ addIndent(ss, indent);
+ *ss << "TEXT_MATCH\n";
+ addIndent(ss, indent + 1);
+ *ss << "name = " << index.identifier.catalogName << '\n';
+ addIndent(ss, indent + 1);
+ *ss << "keyPattern = " << index.keyPattern.toString() << '\n';
+ addIndent(ss, indent + 1);
+ *ss << "query = " << ftsQuery->getQuery() << '\n';
+ addIndent(ss, indent + 1);
+ *ss << "language = " << ftsQuery->getLanguage() << '\n';
+ addIndent(ss, indent + 1);
+ *ss << "caseSensitive= " << ftsQuery->getCaseSensitive() << '\n';
+ addIndent(ss, indent + 1);
+ *ss << "diacriticSensitive= " << ftsQuery->getDiacriticSensitive() << '\n';
+ addIndent(ss, indent + 1);
+ *ss << "indexPrefix = " << indexPrefix.toString() << '\n';
+ addIndent(ss, indent + 1);
+ *ss << "wantTextScorex = " << wantTextScore << '\n';
+ if (nullptr != filter) {
+ addIndent(ss, indent + 1);
+ *ss << " filter = " << filter->debugString();
+ }
+ addCommon(ss, indent);
+}
+
+QuerySolutionNode* TextMatchNode::clone() const {
+ auto copy = std::make_unique<TextMatchNode>(index, ftsQuery->clone(), wantTextScore);
+ cloneBaseData(copy.get());
+ copy->indexPrefix = indexPrefix;
+ return copy.release();
+}
+
} // namespace mongo
diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h
index 5a7f6a0db09..30f5e6c3e09 100644
--- a/src/mongo/db/query/query_solution.h
+++ b/src/mongo/db/query/query_solution.h
@@ -405,46 +405,6 @@ private:
std::unique_ptr<QuerySolutionNode> _root;
};
-struct TextNode : public QuerySolutionNodeWithSortSet {
- TextNode(IndexEntry index) : index(std::move(index)) {}
-
- virtual ~TextNode() {}
-
- virtual StageType getType() const {
- return STAGE_TEXT;
- }
-
- virtual void appendToString(str::stream* ss, int indent) const;
-
- // Text's return is LOC_AND_OBJ so it's fetched and has all fields.
- bool fetched() const {
- return true;
- }
- FieldAvailability getFieldAvailability(const std::string& field) const {
- return FieldAvailability::kFullyProvided;
- }
- bool sortedByDiskLoc() const {
- return false;
- }
-
- QuerySolutionNode* clone() const;
-
- IndexEntry index;
- std::unique_ptr<fts::FTSQuery> ftsQuery;
-
- // The number of fields in the prefix of the text index. For example, if the key pattern is
- //
- // { a: 1, b: 1, _fts: "text", _ftsx: 1, c: 1 }
- //
- // then the number of prefix fields is 2, because of "a" and "b".
- size_t numPrefixFields = 0u;
-
- // "Prefix" fields of a text index can handle equality predicates. We group them with the
- // text node while creating the text leaf node and convert them into a BSONObj index prefix
- // when we finish the text leaf node.
- BSONObj indexPrefix;
-};
-
struct CollectionScanNode : public QuerySolutionNodeWithSortSet {
CollectionScanNode();
virtual ~CollectionScanNode() {}
@@ -1278,4 +1238,58 @@ struct EofNode : public QuerySolutionNodeWithSortSet {
QuerySolutionNode* clone() const;
};
+
+struct TextOrNode : public OrNode {
+ TextOrNode() {}
+
+ StageType getType() const override {
+ return STAGE_TEXT_OR;
+ }
+
+ void appendToString(str::stream* ss, int indent) const override;
+ QuerySolutionNode* clone() const override;
+};
+
+struct TextMatchNode : public QuerySolutionNodeWithSortSet {
+ TextMatchNode(IndexEntry index, std::unique_ptr<fts::FTSQuery> ftsQuery, bool wantTextScore)
+ : index(std::move(index)), ftsQuery(std::move(ftsQuery)), wantTextScore(wantTextScore) {}
+
+ StageType getType() const override {
+ return STAGE_TEXT_MATCH;
+ }
+
+ void appendToString(str::stream* ss, int indent) const override;
+
+ // Text's return is LOC_AND_OBJ so it's fetched and has all fields.
+ bool fetched() const {
+ return true;
+ }
+ FieldAvailability getFieldAvailability(const std::string& field) const {
+ return FieldAvailability::kFullyProvided;
+ }
+ bool sortedByDiskLoc() const override {
+ return false;
+ }
+
+ QuerySolutionNode* clone() const override;
+
+ IndexEntry index;
+ std::unique_ptr<fts::FTSQuery> ftsQuery;
+
+ // The number of fields in the prefix of the text index. For example, if the key pattern is
+ //
+ // { a: 1, b: 1, _fts: "text", _ftsx: 1, c: 1 }
+ //
+ // then the number of prefix fields is 2, because of "a" and "b".
+ size_t numPrefixFields = 0u;
+
+ // "Prefix" fields of a text index can handle equality predicates. We group them with the
+ // text node while creating the text leaf node and convert them into a BSONObj index prefix
+ // when we finish the text leaf node.
+ BSONObj indexPrefix;
+
+ // True, if we need to compute text scores.
+ bool wantTextScore;
+};
+
} // namespace mongo
diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp
index 6de6f645bc8..6b99a116b3d 100644
--- a/src/mongo/db/query/sbe_stage_builder.cpp
+++ b/src/mongo/db/query/sbe_stage_builder.cpp
@@ -44,7 +44,6 @@
#include "mongo/db/exec/sbe/stages/scan.h"
#include "mongo/db/exec/sbe/stages/sort.h"
#include "mongo/db/exec/sbe/stages/sorted_merge.h"
-#include "mongo/db/exec/sbe/stages/text_match.h"
#include "mongo/db/exec/sbe/stages/traverse.h"
#include "mongo/db/exec/sbe/stages/union.h"
#include "mongo/db/exec/sbe/stages/unique.h"
@@ -208,7 +207,6 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateEofPlan(
}
} // namespace
-
std::unique_ptr<sbe::RuntimeEnvironment> makeRuntimeEnvironment(
const CanonicalQuery& cq,
OperationContext* opCtx,
@@ -285,6 +283,36 @@ sbe::LockAcquisitionCallback makeLockAcquisitionCallback(bool checkNodeCanServeR
opCtx, coll.getNss(), true));
};
}
+
+std::unique_ptr<fts::FTSMatcher> makeFtsMatcher(OperationContext* opCtx,
+ const CollectionPtr& collection,
+ const std::string& indexName,
+ const fts::FTSQuery* ftsQuery) {
+ auto desc = collection->getIndexCatalog()->findIndexByName(opCtx, indexName);
+ tassert(5432209,
+ str::stream() << "index descriptor not found for index named '" << indexName
+ << "' in collection '" << collection->ns() << "'",
+ desc);
+
+ auto entry = collection->getIndexCatalog()->getEntry(desc);
+ tassert(5432210,
+ str::stream() << "index entry not found for index named '" << indexName
+ << "' in collection '" << collection->ns() << "'",
+ entry);
+
+ auto accessMethod = static_cast<const FTSAccessMethod*>(entry->accessMethod());
+ tassert(5432211,
+ str::stream() << "access method is not defined for index named '" << indexName
+ << "' in collection '" << collection->ns() << "'",
+ accessMethod);
+
+ // We assume here that node->ftsQuery is an FTSQueryImpl, not an FTSQueryNoop. In practice, this
+ // means that it is illegal to use the StageBuilder on a QuerySolution created by planning a
+ // query that contains "no-op" expressions.
+ auto query = dynamic_cast<const fts::FTSQueryImpl*>(ftsQuery);
+ tassert(5432220, "expected FTSQueryImpl", query);
+ return std::make_unique<fts::FTSMatcher>(*query, accessMethod->getSpec());
+}
} // namespace
SlotBasedStageBuilder::SlotBasedStageBuilder(OperationContext* opCtx,
@@ -1074,116 +1102,56 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
return {std::move(stage), std::move(outputs)};
}
-std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::buildText(
+std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::buildTextMatch(
const QuerySolutionNode* root, const PlanStageReqs& reqs) {
- invariant(_collection);
- invariant(!reqs.getIndexKeyBitset());
-
- // At present, makeLoopJoinForFetch() doesn't have the necessary logic for producing an
- // oplogTsSlot, so assert that the caller doesn't need oplogTsSlot.
- invariant(!reqs.has(kOplogTs));
-
- auto textNode = static_cast<const TextNode*>(root);
-
- auto&& indexName = textNode->index.identifier.catalogName;
- const auto desc = _collection->getIndexCatalog()->findIndexByName(_opCtx, indexName);
- invariant(desc);
- const auto accessMethod = static_cast<const FTSAccessMethod*>(
- _collection->getIndexCatalog()->getEntry(desc)->accessMethod());
- invariant(accessMethod);
- auto&& ftsSpec = accessMethod->getSpec();
-
- // We assume here that node->ftsQuery is an FTSQueryImpl, not an FTSQueryNoop. In practice, this
- // means that it is illegal to use the StageBuilder on a QuerySolution created by planning a
- // query that contains "no-op" expressions.
- auto ftsQuery = static_cast<fts::FTSQueryImpl&>(*textNode->ftsQuery);
-
- // A vector of the output slots for each index scan stage. Each stage outputs a record id and a
- // record, so we expect each inner vector to be of length two.
- std::vector<sbe::value::SlotVector> ixscanOutputSlots;
-
- const bool forward = true;
- const bool inclusive = true;
- auto makeKeyString = [&](const BSONObj& bsonKey) {
- return std::make_unique<KeyString::Value>(
- IndexEntryComparison::makeKeyStringFromBSONKeyForSeek(
- bsonKey,
- accessMethod->getSortedDataInterface()->getKeyStringVersion(),
- accessMethod->getSortedDataInterface()->getOrdering(),
- forward,
- inclusive));
- };
-
- std::vector<std::unique_ptr<sbe::PlanStage>> indexScanList;
- for (const auto& term : ftsQuery.getTermsForBounds()) {
- // TODO: Should we scan in the opposite direction?
- auto startKeyBson = fts::FTSIndexFormat::getIndexKey(
- 0, term, textNode->indexPrefix, ftsSpec.getTextIndexVersion());
- auto endKeyBson = fts::FTSIndexFormat::getIndexKey(
- fts::MAX_WEIGHT, term, textNode->indexPrefix, ftsSpec.getTextIndexVersion());
-
- auto&& [recordIdSlot, ixscan] =
- generateSingleIntervalIndexScan(_collection,
- indexName,
- forward,
- makeKeyString(startKeyBson),
- makeKeyString(endKeyBson),
- sbe::IndexKeysInclusionSet{},
- sbe::makeSV(),
- boost::none, // recordSlot
- &_slotIdGenerator,
- _yieldPolicy,
- root->nodeId(),
- _lockAcquisitionCallback);
- indexScanList.push_back(std::move(ixscan));
- ixscanOutputSlots.push_back(sbe::makeSV(recordIdSlot));
- }
-
- // If we don't have any index scan stages, produce an EOF plan.
- if (indexScanList.empty()) {
- return generateEofPlan(root->nodeId(), reqs, &_slotIdGenerator);
- }
-
- PlanStageSlots outputs;
-
- // Union will output a slot for the record id and another for the record.
- auto recordIdSlot = _slotIdGenerator.generate();
- auto unionOutputSlots = sbe::makeSV(recordIdSlot);
-
- // Index scan output slots become the input slots to the union.
- auto stage = sbe::makeS<sbe::UnionStage>(
- std::move(indexScanList), ixscanOutputSlots, unionOutputSlots, root->nodeId());
+ tassert(5432212, "no collection object", _collection);
+ tassert(5432213, "index keys requsted for text match node", !reqs.getIndexKeyBitset());
+ tassert(5432214, "oplogTs requsted for text match node", !reqs.has(kOplogTs));
+ tassert(5432215,
+ str::stream() << "text match node must have one child, but got "
+ << root->children.size(),
+ root->children.size() == 1);
+ // TextMatchNode guarantees to produce a fetched sub-plan, but it doesn't fetch itself. Instead,
+ // its child sub-plan must be fully fetched, and a text match plan is constructed under this
+ // assumption.
+ tassert(5432216, "text match input must be fetched", root->children[0]->fetched());
+
+ auto textNode = static_cast<const TextMatchNode*>(root);
- // TODO: If text score metadata is requested, then we should sum over the text scores inside the
- // index keys for a given document. This will require expression evaluation to be able to
- // extract the score directly from the key string.
+ auto childReqs = reqs.copy().set(kResult);
+ auto [stage, outputs] = build(textNode->children[0], childReqs);
+ tassert(5432217, "result slot is not produced by text match sub-plan", outputs.has(kResult));
+
+ // Create an FTS 'matcher' to apply 'ftsQuery' to matching documents.
+ auto matcher = makeFtsMatcher(
+ _opCtx, _collection, textNode->index.identifier.catalogName, textNode->ftsQuery.get());
+
+ // Build an 'ftsMatch' expression to match a document stored in the 'kResult' slot using the
+ // 'matcher' instance.
+ auto ftsMatch =
+ makeFunction("ftsMatch",
+ makeConstant(sbe::value::TypeTags::ftsMatcher,
+ sbe::value::bitcastFrom<fts::FTSMatcher*>(matcher.release())),
+ makeVariable(outputs.get(kResult)));
+
+ // Wrap the 'ftsMatch' expression into an 'if' expression to ensure that it can be applied only
+ // to a document.
+ auto filter =
+ sbe::makeE<sbe::EIf>(makeFunction("isObject", makeVariable(outputs.get(kResult))),
+ std::move(ftsMatch),
+ sbe::makeE<sbe::EFail>(ErrorCodes::Error{4623400},
+ "textmatch requires input to be an object"));
+
+ // Add a filter stage to apply 'ftsQuery' to matching documents and discard documents which do
+ // not match.
stage =
- sbe::makeS<sbe::UniqueStage>(std::move(stage), sbe::makeSV(recordIdSlot), root->nodeId());
-
- sbe::value::SlotId resultSlot;
- std::tie(resultSlot, recordIdSlot, stage) =
- makeLoopJoinForFetch(std::move(stage), recordIdSlot, root->nodeId());
-
- // Add a special stage to apply 'ftsQuery' to matching documents, and then add a FilterStage to
- // discard documents which do not match.
- auto textMatchResultSlot = _slotIdGenerator.generate();
- stage = sbe::makeS<sbe::TextMatchStage>(
- std::move(stage), ftsQuery, ftsSpec, resultSlot, textMatchResultSlot, root->nodeId());
-
- // Filter based on the contents of the slot filled out by the TextMatchStage.
- stage = sbe::makeS<sbe::FilterStage<false>>(
- std::move(stage), sbe::makeE<sbe::EVariable>(textMatchResultSlot), root->nodeId());
-
- outputs.set(kResult, resultSlot);
- outputs.set(kRecordId, recordIdSlot);
+ sbe::makeS<sbe::FilterStage<false>>(std::move(stage), std::move(filter), root->nodeId());
if (reqs.has(kReturnKey)) {
// Assign the 'returnKeySlot' to be the empty object.
outputs.set(kReturnKey, _slotIdGenerator.generate());
- stage = sbe::makeProjectStage(std::move(stage),
- root->nodeId(),
- outputs.get(kReturnKey),
- sbe::makeE<sbe::EFunction>("newObj", sbe::makeEs()));
+ stage = sbe::makeProjectStage(
+ std::move(stage), root->nodeId(), outputs.get(kReturnKey), makeFunction("newObj"));
}
return {std::move(stage), std::move(outputs)};
@@ -1643,7 +1611,10 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
{STAGE_PROJECTION_DEFAULT, &SlotBasedStageBuilder::buildProjectionDefault},
{STAGE_PROJECTION_COVERED, &SlotBasedStageBuilder::buildProjectionCovered},
{STAGE_OR, &SlotBasedStageBuilder::buildOr},
- {STAGE_TEXT, &SlotBasedStageBuilder::buildText},
+ // In SBE TEXT_OR behaves like a regular OR. All the work to support "textScore"
+ // metadata is done outside of TEXT_OR, unlike the legacy implementation.
+ {STAGE_TEXT_OR, &SlotBasedStageBuilder::buildOr},
+ {STAGE_TEXT_MATCH, &SlotBasedStageBuilder::buildTextMatch},
{STAGE_RETURN_KEY, &SlotBasedStageBuilder::buildReturnKey},
{STAGE_EOF, &SlotBasedStageBuilder::buildEof},
{STAGE_AND_HASH, &SlotBasedStageBuilder::buildAndHash},
diff --git a/src/mongo/db/query/sbe_stage_builder.h b/src/mongo/db/query/sbe_stage_builder.h
index 67b43d11682..240d44d7830 100644
--- a/src/mongo/db/query/sbe_stage_builder.h
+++ b/src/mongo/db/query/sbe_stage_builder.h
@@ -304,7 +304,7 @@ private:
std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> buildOr(
const QuerySolutionNode* root, const PlanStageReqs& reqs);
- std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> buildText(
+ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> buildTextMatch(
const QuerySolutionNode* root, const PlanStageReqs& reqs);
std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> buildReturnKey(
diff --git a/src/mongo/db/query/stage_types.cpp b/src/mongo/db/query/stage_types.cpp
index b5f1f16c106..6d3b0f1fd2b 100644
--- a/src/mongo/db/query/stage_types.cpp
+++ b/src/mongo/db/query/stage_types.cpp
@@ -68,7 +68,6 @@ StringData stageTypeToString(StageType stageType) {
{STAGE_SORT_KEY_GENERATOR, "SORT_KEY_GENERATOR"_sd},
{STAGE_SORT_MERGE, "SORT_MERGE"_sd},
{STAGE_SUBPLAN, "SUBPLAN"_sd},
- {STAGE_TEXT, "TEXT"_sd},
{STAGE_TEXT_OR, "TEXT_OR"_sd},
{STAGE_TEXT_MATCH, "TEXT_MATCH"_sd},
{STAGE_TRIAL, "TRIAL"_sd},
diff --git a/src/mongo/db/query/stage_types.h b/src/mongo/db/query/stage_types.h
index 882d0bf032b..a0dc411028b 100644
--- a/src/mongo/db/query/stage_types.h
+++ b/src/mongo/db/query/stage_types.h
@@ -113,7 +113,6 @@ enum StageType {
STAGE_SUBPLAN,
// Stages for running text search.
- STAGE_TEXT,
STAGE_TEXT_OR,
STAGE_TEXT_MATCH,