diff options
author | Anton Korshunov <anton.korshunov@mongodb.com> | 2021-03-02 13:03:45 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-03-13 07:52:14 +0000 |
commit | d362ea53d66d85750b0cb63b69168e1b3a4a330e (patch) | |
tree | ad4a34c4c31ba25b252d52ddf854fd3f23550bbc /src/mongo | |
parent | a90fee27ae85894ae6e4522251fe0ea35ef473c7 (diff) | |
download | mongo-d362ea53d66d85750b0cb63b69168e1b3a4a330e.tar.gz |
SERVER-54322 Text query plans are not shown properly in SBE explain
Diffstat (limited to 'src/mongo')
31 files changed, 459 insertions, 797 deletions
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript index 4286b248b48..66bbd2a78a7 100644 --- a/src/mongo/db/SConscript +++ b/src/mongo/db/SConscript @@ -1187,7 +1187,6 @@ env.Library( 'exec/sort.cpp', 'exec/sort_key_generator.cpp', 'exec/subplan.cpp', - 'exec/text.cpp', 'exec/text_match.cpp', 'exec/text_or.cpp', 'exec/trial_period_utils.cpp', diff --git a/src/mongo/db/exec/plan_stats.h b/src/mongo/db/exec/plan_stats.h index aad7e562913..d9dd6074bab 100644 --- a/src/mongo/db/exec/plan_stats.h +++ b/src/mongo/db/exec/plan_stats.h @@ -753,11 +753,11 @@ struct UpdateStats : public SpecificStats { BSONObj objInserted; }; -struct TextStats : public SpecificStats { - TextStats() : parsedTextQuery(), textIndexVersion(0) {} +struct TextMatchStats : public SpecificStats { + TextMatchStats() : parsedTextQuery(), textIndexVersion(0) {} std::unique_ptr<SpecificStats> clone() const final { - return std::make_unique<TextStats>(*this); + return std::make_unique<TextMatchStats>(*this); } uint64_t estimateObjectSizeInBytes() const { @@ -774,18 +774,6 @@ struct TextStats : public SpecificStats { // Index keys that precede the "text" index key. BSONObj indexPrefix; -}; - -struct TextMatchStats : public SpecificStats { - TextMatchStats() : docsRejected(0) {} - - std::unique_ptr<SpecificStats> clone() const final { - return std::make_unique<TextMatchStats>(*this); - } - - uint64_t estimateObjectSizeInBytes() const { - return sizeof(*this); - } size_t docsRejected; }; diff --git a/src/mongo/db/exec/sbe/SConscript b/src/mongo/db/exec/sbe/SConscript index 96ef6fcfeab..0bc3157699e 100644 --- a/src/mongo/db/exec/sbe/SConscript +++ b/src/mongo/db/exec/sbe/SConscript @@ -20,6 +20,7 @@ env.Library( ], LIBDEPS=[ '$BUILD_DIR/mongo/base', + '$BUILD_DIR/mongo/db/fts/base_fts', '$BUILD_DIR/mongo/db/query/collation/collator_interface', '$BUILD_DIR/mongo/db/query/datetime/date_time_support', '$BUILD_DIR/mongo/db/storage/key_string', @@ -48,7 +49,6 @@ sbeEnv.Library( 'stages/sort.cpp', 'stages/sorted_merge.cpp', 'stages/spool.cpp', - 'stages/text_match.cpp', 'stages/traverse.cpp', 'stages/union.cpp', 'stages/unique.cpp', diff --git a/src/mongo/db/exec/sbe/expressions/expression.cpp b/src/mongo/db/exec/sbe/expressions/expression.cpp index 8783b35d7fd..c9668ea03a3 100644 --- a/src/mongo/db/exec/sbe/expressions/expression.cpp +++ b/src/mongo/db/exec/sbe/expressions/expression.cpp @@ -445,6 +445,7 @@ static stdx::unordered_map<std::string, BuiltinFn> kBuiltinFunctions = { {"reverseArray", BuiltinFn{[](size_t n) { return n == 1; }, vm::Builtin::reverseArray, false}}, {"dateAdd", BuiltinFn{[](size_t n) { return n == 5; }, vm::Builtin::dateAdd, false}}, {"hasNullBytes", BuiltinFn{[](size_t n) { return n == 1; }, vm::Builtin::hasNullBytes, false}}, + {"ftsMatch", BuiltinFn{[](size_t n) { return n == 2; }, vm::Builtin::ftsMatch, false}}, }; /** diff --git a/src/mongo/db/exec/sbe/stages/text_match.cpp b/src/mongo/db/exec/sbe/stages/text_match.cpp deleted file mode 100644 index 23afc8b5aa0..00000000000 --- a/src/mongo/db/exec/sbe/stages/text_match.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/** - * Copyright (C) 2020-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * <http://www.mongodb.com/licensing/server-side-public-license>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#include "mongo/platform/basic.h" - -#include "mongo/db/exec/sbe/stages/text_match.h" - -#include "mongo/db/exec/sbe/expressions/expression.h" -#include "mongo/db/exec/sbe/values/bson.h" - -namespace mongo::sbe { - -std::unique_ptr<PlanStage> TextMatchStage::clone() const { - return makeS<TextMatchStage>(_children[0]->clone(), - _ftsMatcher.query(), - _ftsMatcher.spec(), - _inputSlot, - _outputSlot, - _commonStats.nodeId); -} - -void TextMatchStage::prepare(CompileCtx& ctx) { - _children[0]->prepare(ctx); - _inValueAccessor = _children[0]->getAccessor(ctx, _inputSlot); -} - -value::SlotAccessor* TextMatchStage::getAccessor(CompileCtx& ctx, value::SlotId slot) { - if (slot == _outputSlot) { - return &_outValueAccessor; - } - - return _children[0]->getAccessor(ctx, slot); -} - -void TextMatchStage::open(bool reOpen) { - auto optTimer(getOptTimer(_opCtx)); - - _commonStats.opens++; - _children[0]->open(reOpen); -} - -PlanState TextMatchStage::getNext() { - auto optTimer(getOptTimer(_opCtx)); - - auto state = _children[0]->getNext(); - - if (state == PlanState::ADVANCED) { - auto&& [typeTag, value] = _inValueAccessor->getViewOfValue(); - uassert(ErrorCodes::Error(4623400), - "textmatch requires input to be an object", - value::isObject(typeTag)); - BSONObj obj; - if (typeTag == value::TypeTags::bsonObject) { - obj = BSONObj{value::bitcastTo<const char*>(value)}; - } else { - BSONObjBuilder builder; - bson::convertToBsonObj(builder, value::getObjectView(value)); - obj = builder.obj(); - } - const auto matchResult = _ftsMatcher.matches(obj); - _outValueAccessor.reset(value::TypeTags::Boolean, value::bitcastFrom<bool>(matchResult)); - } - - return trackPlanState(state); -} - -void TextMatchStage::close() { - auto optTimer(getOptTimer(_opCtx)); - - _commonStats.closes++; - _children[0]->close(); -} - -std::vector<DebugPrinter::Block> TextMatchStage::debugPrint() const { - // TODO: Add 'textmatch' to the parser so that the debug output can be parsed back to an - // execution plan. - auto ret = PlanStage::debugPrint(); - - DebugPrinter::addIdentifier(ret, _inputSlot); - DebugPrinter::addIdentifier(ret, _outputSlot); - - DebugPrinter::addNewLine(ret); - DebugPrinter::addBlocks(ret, _children[0]->debugPrint()); - - return ret; -} - -std::unique_ptr<PlanStageStats> TextMatchStage::getStats(bool includeDebugInfo) const { - auto ret = std::make_unique<PlanStageStats>(_commonStats); - - if (includeDebugInfo) { - BSONObjBuilder bob; - bob.appendNumber("inputSlot", static_cast<long long>(_inputSlot)); - bob.appendNumber("outputSlot", static_cast<long long>(_outputSlot)); - ret->debugInfo = bob.obj(); - } - - ret->children.emplace_back(_children[0]->getStats(includeDebugInfo)); - return ret; -} - -} // namespace mongo::sbe diff --git a/src/mongo/db/exec/sbe/stages/text_match.h b/src/mongo/db/exec/sbe/stages/text_match.h deleted file mode 100644 index 2fc7d98a620..00000000000 --- a/src/mongo/db/exec/sbe/stages/text_match.h +++ /dev/null @@ -1,91 +0,0 @@ -/** - * Copyright (C) 2020-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * <http://www.mongodb.com/licensing/server-side-public-license>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#pragma once - -#include "mongo/db/exec/sbe/stages/stages.h" -#include "mongo/db/fts/fts_matcher.h" - -namespace mongo::sbe { - -/** - * Special PlanStage for evaluating an FTSMatcher. Reads a BSON object from 'inputSlot' and passes - * it to the FTSMatcher. Fills out 'outputSlot' with the resulting boolean. If 'inputSlot' contains - * a value of any type other than 'bsonObject', throws a UserException. - * - * TODO: Can this be expressed via string manipulation EExpressions? That would eliminate the need - * for this special stage. - */ -class TextMatchStage final : public PlanStage { -public: - TextMatchStage(std::unique_ptr<PlanStage> inputStage, - const fts::FTSQueryImpl& ftsQuery, - const fts::FTSSpec& ftsSpec, - value::SlotId inputSlot, - value::SlotId outputSlot, - PlanNodeId planNodeId) - : PlanStage("textmatch", planNodeId), - _ftsMatcher(ftsQuery, ftsSpec), - _inputSlot(inputSlot), - _outputSlot(outputSlot) { - _children.emplace_back(std::move(inputStage)); - } - - std::unique_ptr<PlanStage> clone() const final; - - void prepare(CompileCtx& ctx) final; - - value::SlotAccessor* getAccessor(CompileCtx& ctx, value::SlotId slot) final; - - void open(bool reOpen) final; - - PlanState getNext() final; - - void close() final; - - std::vector<DebugPrinter::Block> debugPrint() const final; - - std::unique_ptr<PlanStageStats> getStats(bool includeDebugInfo) const final; - - const SpecificStats* getSpecificStats() const final { - return nullptr; - } - -private: - // Phrase and negated term matcher. - const fts::FTSMatcher _ftsMatcher; - - const value::SlotId _inputSlot; - const value::SlotId _outputSlot; - - value::SlotAccessor* _inValueAccessor{nullptr}; - value::ViewOfValueAccessor _outValueAccessor; -}; - -} // namespace mongo::sbe diff --git a/src/mongo/db/exec/sbe/values/value.cpp b/src/mongo/db/exec/sbe/values/value.cpp index 11fdac4f871..3522679d78f 100644 --- a/src/mongo/db/exec/sbe/values/value.cpp +++ b/src/mongo/db/exec/sbe/values/value.cpp @@ -122,6 +122,11 @@ std::pair<TypeTags, Value> makeCopyShardFilterer(const ShardFilterer& filterer) return {TypeTags::shardFilterer, filter}; } +std::pair<TypeTags, Value> makeCopyFtsMatcher(const fts::FTSMatcher& matcher) { + auto copy = bitcastFrom<fts::FTSMatcher*>(new fts::FTSMatcher(matcher.query(), matcher.spec())); + return {TypeTags::ftsMatcher, copy}; +} + void releaseValue(TypeTags tag, Value val) noexcept { switch (tag) { case TypeTags::NumberDecimal: @@ -164,6 +169,9 @@ void releaseValue(TypeTags tag, Value val) noexcept { case TypeTags::shardFilterer: delete getShardFiltererView(val); break; + case TypeTags::ftsMatcher: + delete getFtsMatcherView(val); + break; default: break; } @@ -268,6 +276,9 @@ void writeTagToStream(T& stream, const TypeTags tag) { case TypeTags::bsonJavascript: stream << "bsonJavascript"; break; + case TypeTags::ftsMatcher: + stream << "ftsMatcher"; + break; default: stream << "unknown tag"; break; @@ -490,6 +501,11 @@ void writeValueToStream(T& stream, TypeTags tag, Value val) { case value::TypeTags::bsonJavascript: stream << "Javascript(" << getBsonJavascriptView(val) << ")"; break; + case value::TypeTags::ftsMatcher: { + auto ftsMatcher = getFtsMatcherView(val); + stream << "FtsMatcher(" << ftsMatcher->query().toBSON().toString() << ")"; + break; + } default: MONGO_UNREACHABLE; } @@ -694,7 +710,6 @@ std::size_t hashValue(TypeTags tag, Value val, const CollatorInterface* collator return 0; } - /** * Performs a three-way comparison for any type that has < and == operators. Additionally, * guarantees that the result will be exactlty -1, 0, or 1, which is important, because not all diff --git a/src/mongo/db/exec/sbe/values/value.h b/src/mongo/db/exec/sbe/values/value.h index d24fe5b2bae..31ade47676d 100644 --- a/src/mongo/db/exec/sbe/values/value.h +++ b/src/mongo/db/exec/sbe/values/value.h @@ -44,6 +44,7 @@ #include "mongo/base/data_view.h" #include "mongo/bson/ordering.h" #include "mongo/db/exec/shard_filterer.h" +#include "mongo/db/fts/fts_matcher.h" #include "mongo/db/query/bson_typemask.h" #include "mongo/db/query/collation/collator_interface.h" #include "mongo/platform/decimal128.h" @@ -134,6 +135,9 @@ enum class TypeTags : uint8_t { // Pointer to a collator interface object. collator, + + // Pointer to fts::FTSMatcher for full text search. + ftsMatcher, }; inline constexpr bool isNumber(TypeTags tag) noexcept { @@ -906,6 +910,10 @@ inline CollatorInterface* getCollatorView(Value val) noexcept { return reinterpret_cast<CollatorInterface*>(val); } +inline fts::FTSMatcher* getFtsMatcherView(Value val) noexcept { + return reinterpret_cast<fts::FTSMatcher*>(val); +} + /** * Pattern and flags of Regex are stored in BSON as two C strings written one after another. * @@ -961,6 +969,8 @@ std::pair<TypeTags, Value> makeCopyJsFunction(const JsFunction&); std::pair<TypeTags, Value> makeCopyShardFilterer(const ShardFilterer&); +std::pair<TypeTags, Value> makeCopyFtsMatcher(const fts::FTSMatcher&); + void releaseValue(TypeTags tag, Value val) noexcept; inline std::pair<TypeTags, Value> copyValue(TypeTags tag, Value val) { @@ -1017,6 +1027,8 @@ inline std::pair<TypeTags, Value> copyValue(TypeTags tag, Value val) { return makeCopyBsonRegex(getBsonRegexView(val)); case TypeTags::bsonJavascript: return makeCopyBsonJavascript(getBsonJavascriptView(val)); + case TypeTags::ftsMatcher: + return makeCopyFtsMatcher(*getFtsMatcherView(val)); default: break; } diff --git a/src/mongo/db/exec/sbe/vm/vm.cpp b/src/mongo/db/exec/sbe/vm/vm.cpp index 3e061ef2f7c..dd207101ee6 100644 --- a/src/mongo/db/exec/sbe/vm/vm.cpp +++ b/src/mongo/db/exec/sbe/vm/vm.cpp @@ -2935,6 +2935,31 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinDateAdd(ArityTy false, value::TypeTags::Date, value::bitcastFrom<int64_t>(resDate.toMillisSinceEpoch())}; } +std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinFtsMatch(ArityType arity) { + invariant(arity == 2); + + auto [matcherOwn, matcherTag, matcherVal] = getFromStack(0); + auto [inputOwn, inputTag, inputVal] = getFromStack(1); + + if (matcherTag != value::TypeTags::ftsMatcher || !value::isObject(inputTag)) { + return {false, value::TypeTags::Nothing, 0}; + } + + auto obj = [inputTag = inputTag, inputVal = inputVal]() { + if (inputTag == value::TypeTags::bsonObject) { + return BSONObj{value::bitcastTo<const char*>(inputVal)}; + } + + invariant(inputTag == value::TypeTags::Object); + BSONObjBuilder builder; + bson::convertToBsonObj(builder, value::getObjectView(inputVal)); + return builder.obj(); + }(); + + const bool matches = value::getFtsMatcherView(matcherVal)->matches(obj); + return {false, value::TypeTags::Boolean, value::bitcastFrom<bool>(matches)}; +} + std::tuple<bool, value::TypeTags, value::Value> ByteCode::dispatchBuiltin(Builtin f, ArityType arity) { switch (f) { @@ -3090,6 +3115,8 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::dispatchBuiltin(Builti return builtinGetRegexPattern(arity); case Builtin::getRegexFlags: return builtinGetRegexFlags(arity); + case Builtin::ftsMatch: + return builtinFtsMatch(arity); } MONGO_UNREACHABLE; diff --git a/src/mongo/db/exec/sbe/vm/vm.h b/src/mongo/db/exec/sbe/vm/vm.h index 7a51732a972..b98f3261d12 100644 --- a/src/mongo/db/exec/sbe/vm/vm.h +++ b/src/mongo/db/exec/sbe/vm/vm.h @@ -318,6 +318,7 @@ enum class Builtin : uint8_t { hasNullBytes, getRegexPattern, getRegexFlags, + ftsMatch, }; using SmallArityType = uint8_t; @@ -732,6 +733,7 @@ private: std::tuple<bool, value::TypeTags, value::Value> builtinHasNullBytes(ArityType arity); std::tuple<bool, value::TypeTags, value::Value> builtinGetRegexPattern(ArityType arity); std::tuple<bool, value::TypeTags, value::Value> builtinGetRegexFlags(ArityType arity); + std::tuple<bool, value::TypeTags, value::Value> builtinFtsMatch(ArityType arity); std::tuple<bool, value::TypeTags, value::Value> dispatchBuiltin(Builtin f, ArityType arity); diff --git a/src/mongo/db/exec/stagedebug_cmd.cpp b/src/mongo/db/exec/stagedebug_cmd.cpp index ecc5bdde2a0..7bc6c4f5423 100644 --- a/src/mongo/db/exec/stagedebug_cmd.cpp +++ b/src/mongo/db/exec/stagedebug_cmd.cpp @@ -52,7 +52,6 @@ #include "mongo/db/exec/or.h" #include "mongo/db/exec/skip.h" #include "mongo/db/exec/sort.h" -#include "mongo/db/exec/text.h" #include "mongo/db/exec/working_set_common.h" #include "mongo/db/index/fts_access_method.h" #include "mongo/db/jsobj.h" @@ -437,38 +436,6 @@ public: mergeStage->addChild(std::move(subNode)); } return mergeStage.release(); - } else if ("text" == nodeName) { - string search = nodeArgs["search"].String(); - - vector<const IndexDescriptor*> idxMatches; - collection->getIndexCatalog()->findIndexByType(opCtx, "text", idxMatches); - uassert(17194, "Expected exactly one text index", idxMatches.size() == 1); - - const IndexDescriptor* index = idxMatches[0]; - const FTSAccessMethod* fam = dynamic_cast<const FTSAccessMethod*>( - collection->getIndexCatalog()->getEntry(index)->accessMethod()); - invariant(fam); - TextStageParams params(fam->getSpec()); - params.index = index; - - // TODO: Deal with non-empty filters. This is a hack to put in covering information - // that can only be checked for equality. We ignore this now. - Status s = fam->getSpec().getIndexPrefix(BSONObj(), ¶ms.indexPrefix); - if (!s.isOK()) { - return nullptr; - } - - params.spec = fam->getSpec(); - - params.query.setQuery(search); - params.query.setLanguage(fam->getSpec().defaultLanguage().str()); - params.query.setCaseSensitive(TextMatchExpressionBase::kCaseSensitiveDefault); - params.query.setDiacriticSensitive(TextMatchExpressionBase::kDiacriticSensitiveDefault); - if (!params.query.parse(fam->getSpec().getTextIndexVersion()).isOK()) { - return nullptr; - } - - return new TextStage(expCtx.get(), collection, params, workingSet, matcher); } else if ("delete" == nodeName) { uassert(18636, "Delete stage doesn't have a filter (put it on the child)", diff --git a/src/mongo/db/exec/text.cpp b/src/mongo/db/exec/text.cpp deleted file mode 100644 index 78820085967..00000000000 --- a/src/mongo/db/exec/text.cpp +++ /dev/null @@ -1,168 +0,0 @@ -/** - * Copyright (C) 2018-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * <http://www.mongodb.com/licensing/server-side-public-license>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#include "mongo/db/exec/text.h" - -#include <memory> -#include <vector> - -#include "mongo/db/exec/fetch.h" -#include "mongo/db/exec/filter.h" -#include "mongo/db/exec/index_scan.h" -#include "mongo/db/exec/or.h" -#include "mongo/db/exec/scoped_timer.h" -#include "mongo/db/exec/text_match.h" -#include "mongo/db/exec/text_or.h" -#include "mongo/db/exec/working_set.h" -#include "mongo/db/fts/fts_index_format.h" -#include "mongo/db/jsobj.h" -#include "mongo/db/query/internal_plans.h" - -namespace mongo { - -using std::string; -using std::unique_ptr; -using std::vector; - - -using fts::FTSIndexFormat; -using fts::MAX_WEIGHT; - -const char* TextStage::kStageType = "TEXT"; - -TextStage::TextStage(ExpressionContext* expCtx, - const CollectionPtr& collection, - const TextStageParams& params, - WorkingSet* ws, - const MatchExpression* filter) - : PlanStage(kStageType, expCtx), _params(params) { - _children.emplace_back( - buildTextTree(expCtx->opCtx, collection, ws, filter, params.wantTextScore)); - _specificStats.indexPrefix = _params.indexPrefix; - _specificStats.indexName = _params.index->indexName(); - _specificStats.parsedTextQuery = _params.query.toBSON(); - _specificStats.textIndexVersion = _params.index->infoObj()["textIndexVersion"].numberInt(); -} - -bool TextStage::isEOF() { - return child()->isEOF(); -} - -PlanStage::StageState TextStage::doWork(WorkingSetID* out) { - if (isEOF()) { - return PlanStage::IS_EOF; - } - - return child()->work(out); -} - -unique_ptr<PlanStageStats> TextStage::getStats() { - _commonStats.isEOF = isEOF(); - - unique_ptr<PlanStageStats> ret = std::make_unique<PlanStageStats>(_commonStats, STAGE_TEXT); - ret->specific = std::make_unique<TextStats>(_specificStats); - ret->children.emplace_back(child()->getStats()); - return ret; -} - -const SpecificStats* TextStage::getSpecificStats() const { - return &_specificStats; -} - -unique_ptr<PlanStage> TextStage::buildTextTree(OperationContext* opCtx, - const CollectionPtr& collection, - WorkingSet* ws, - const MatchExpression* filter, - bool wantTextScore) const { - // If the query requires the "textScore" field or involves multiple search terms, a TEXT_OR or - // OR stage is needed. Otherwise, we can use a single index scan directly. - const bool needOrStage = wantTextScore || _params.query.getTermsForBounds().size() > 1; - const MatchExpression* emptyFilter = nullptr; - // Get all the index scans for each term in our query. - std::vector<std::unique_ptr<PlanStage>> indexScanList; - for (const auto& term : _params.query.getTermsForBounds()) { - IndexScanParams ixparams(opCtx, _params.index); - ixparams.bounds.startKey = FTSIndexFormat::getIndexKey( - MAX_WEIGHT, term, _params.indexPrefix, _params.spec.getTextIndexVersion()); - ixparams.bounds.endKey = FTSIndexFormat::getIndexKey( - 0, term, _params.indexPrefix, _params.spec.getTextIndexVersion()); - ixparams.bounds.boundInclusion = BoundInclusion::kIncludeBothStartAndEndKeys; - ixparams.bounds.isSimpleRange = true; - ixparams.direction = -1; - ixparams.shouldDedup = _params.index->getEntry()->isMultikey(); - - // If we will be adding a TEXT_OR or OR stage, then it is responsible for applying the - // filter. Otherwise, the index scan applies the filter. - indexScanList.push_back(std::make_unique<IndexScan>( - expCtx(), collection, ixparams, ws, needOrStage ? emptyFilter : filter)); - } - - // Build the union of the index scans as a TEXT_OR or an OR stage, depending on whether the - // projection requires the "textScore" $meta field. - std::unique_ptr<PlanStage> textMatchStage; - if (wantTextScore) { - // We use a TEXT_OR stage to get the union of the results from the index scans and then - // compute their text scores. This is a blocking operation. - auto textScorer = - std::make_unique<TextOrStage>(expCtx(), _params.spec, ws, filter, collection); - - textScorer->addChildren(std::move(indexScanList)); - - textMatchStage = std::make_unique<TextMatchStage>( - expCtx(), std::move(textScorer), _params.query, _params.spec, ws); - } else { - // Because we don't need the text score, we can use a non-blocking OR stage to get the union - // of the index scans or use the index scan directly if there is only one. - std::unique_ptr<mongo::PlanStage> textSearcher; - if (indexScanList.size() == 1) { - tassert(5397400, - "If there is only one index scan and we do not need textScore, needOrStage " - "should be false", - !needOrStage); - textSearcher = std::move(indexScanList[0]); - } else { - auto orTextSearcher = std::make_unique<OrStage>(expCtx(), ws, true, filter); - orTextSearcher->addChildren(std::move(indexScanList)); - textSearcher = std::move(orTextSearcher); - } - - // Unlike the TEXT_OR stage, the OR stage does not fetch the documents that it outputs. We - // add our own FETCH stage to satisfy the requirement of the TEXT_MATCH stage that its - // WorkingSetMember inputs have fetched data. - auto fetchStage = std::make_unique<FetchStage>( - expCtx(), ws, std::move(textSearcher), emptyFilter, collection); - - textMatchStage = std::make_unique<TextMatchStage>( - expCtx(), std::move(fetchStage), _params.query, _params.spec, ws); - } - - return textMatchStage; -} - -} // namespace mongo diff --git a/src/mongo/db/exec/text.h b/src/mongo/db/exec/text.h deleted file mode 100644 index 040ecb9f424..00000000000 --- a/src/mongo/db/exec/text.h +++ /dev/null @@ -1,112 +0,0 @@ -/** - * Copyright (C) 2018-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * <http://www.mongodb.com/licensing/server-side-public-license>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#pragma once - -#include <memory> - -#include "mongo/db/exec/plan_stage.h" -#include "mongo/db/exec/working_set.h" -#include "mongo/db/fts/fts_query_impl.h" -#include "mongo/db/fts/fts_spec.h" -#include "mongo/db/fts/fts_util.h" -#include "mongo/db/index/index_descriptor.h" - -namespace mongo { - -using fts::FTSQueryImpl; -using fts::FTSSpec; - -class MatchExpression; -class OperationContext; - -struct TextStageParams { - TextStageParams(const FTSSpec& s) : spec(s) {} - - // Text index descriptor. IndexCatalog owns this. - const IndexDescriptor* index; - - // Index spec. - FTSSpec spec; - - // Index keys that precede the "text" index key. - BSONObj indexPrefix; - - // The text query. - FTSQueryImpl query; - - // True if we need the text score in the output, because the projection includes the 'textScore' - // metadata field. - bool wantTextScore = true; -}; - -/** - * Implements a blocking stage that returns text search results. - * - * Output type: LOC_AND_OBJ. - */ -class TextStage final : public PlanStage { -public: - TextStage(ExpressionContext* expCtx, - const CollectionPtr& collection, - const TextStageParams& params, - WorkingSet* ws, - const MatchExpression* filter); - - StageState doWork(WorkingSetID* out) final; - bool isEOF() final; - - StageType stageType() const final { - return STAGE_TEXT; - } - - std::unique_ptr<PlanStageStats> getStats(); - - const SpecificStats* getSpecificStats() const final; - - static const char* kStageType; - -private: - /** - * Helper method to built the query execution plan for the text stage. - */ - std::unique_ptr<PlanStage> buildTextTree(OperationContext* opCtx, - const CollectionPtr& collection, - WorkingSet* ws, - const MatchExpression* filter, - bool wantTextScore) const; - - // Parameters of this text stage. - TextStageParams _params; - - // Stats. - TextStats _specificStats; -}; - -} // namespace mongo diff --git a/src/mongo/db/exec/text_match.cpp b/src/mongo/db/exec/text_match.cpp index 67b5ccf762d..67468cee843 100644 --- a/src/mongo/db/exec/text_match.cpp +++ b/src/mongo/db/exec/text_match.cpp @@ -47,10 +47,13 @@ const char* TextMatchStage::kStageType = "TEXT_MATCH"; TextMatchStage::TextMatchStage(ExpressionContext* expCtx, unique_ptr<PlanStage> child, - const FTSQueryImpl& query, - const FTSSpec& spec, + const TextMatchParams& params, WorkingSet* ws) - : PlanStage(kStageType, expCtx), _ftsMatcher(query, spec), _ws(ws) { + : PlanStage(kStageType, expCtx), _ftsMatcher(params.query, params.spec), _ws(ws) { + _specificStats.indexPrefix = params.indexPrefix; + _specificStats.indexName = params.index->indexName(); + _specificStats.parsedTextQuery = params.query.toBSON(); + _specificStats.textIndexVersion = params.index->infoObj()["textIndexVersion"].numberInt(); _children.emplace_back(std::move(child)); } diff --git a/src/mongo/db/exec/text_match.h b/src/mongo/db/exec/text_match.h index 9ba74a5ac19..d9018198ad8 100644 --- a/src/mongo/db/exec/text_match.h +++ b/src/mongo/db/exec/text_match.h @@ -36,6 +36,7 @@ #include "mongo/db/fts/fts_matcher.h" #include "mongo/db/fts/fts_query_impl.h" #include "mongo/db/fts/fts_spec.h" +#include "mongo/db/index/index_descriptor.h" namespace mongo { @@ -47,6 +48,26 @@ using fts::FTSSpec; class OperationContext; class RecordID; +struct TextMatchParams { + TextMatchParams(const IndexDescriptor* index, + const FTSSpec& spec, + BSONObj indexPrefix, + const FTSQueryImpl& query) + : index(index), spec(spec), indexPrefix(std::move(indexPrefix)), query(query) {} + + // Text index descriptor. IndexCatalog owns this. + const IndexDescriptor* const index; + + // Index spec. + const FTSSpec spec; + + // Index keys that precede the "text" index key. + const BSONObj indexPrefix; + + // The text query. + const FTSQueryImpl query; +}; + /** * A stage that returns every document in the child that satisfies the FTS text matcher built with * the query parameter. @@ -58,8 +79,7 @@ class TextMatchStage final : public PlanStage { public: TextMatchStage(ExpressionContext* expCtx, std::unique_ptr<PlanStage> child, - const FTSQueryImpl& query, - const FTSSpec& spec, + const TextMatchParams& params, WorkingSet* ws); ~TextMatchStage(); diff --git a/src/mongo/db/exec/text_or.cpp b/src/mongo/db/exec/text_or.cpp index 23e6d0fcbc5..f9f3d012358 100644 --- a/src/mongo/db/exec/text_or.cpp +++ b/src/mongo/db/exec/text_or.cpp @@ -53,12 +53,12 @@ using fts::FTSSpec; const char* TextOrStage::kStageType = "TEXT_OR"; TextOrStage::TextOrStage(ExpressionContext* expCtx, - const FTSSpec& ftsSpec, + size_t keyPrefixSize, WorkingSet* ws, const MatchExpression* filter, const CollectionPtr& collection) : RequiresCollectionStage(kStageType, expCtx, collection), - _ftsSpec(ftsSpec), + _keyPrefixSize(keyPrefixSize), _ws(ws), _scoreIterator(_scores.end()), _filter(filter), @@ -284,7 +284,7 @@ PlanStage::StageState TextOrStage::addTerm(WorkingSetID wsid, WorkingSetID* out) // Locate score within possibly compound key: {prefix,term,score,suffix}. BSONObjIterator keyIt(newKeyData.keyData); - for (unsigned i = 0; i < _ftsSpec.numExtraBefore(); i++) { + for (unsigned i = 0; i < _keyPrefixSize; i++) { keyIt.next(); } diff --git a/src/mongo/db/exec/text_or.h b/src/mongo/db/exec/text_or.h index 2e358bf71ed..87936b531aa 100644 --- a/src/mongo/db/exec/text_or.h +++ b/src/mongo/db/exec/text_or.h @@ -68,7 +68,7 @@ public: }; TextOrStage(ExpressionContext* expCtx, - const FTSSpec& ftsSpec, + size_t keyPrefixSize, WorkingSet* ws, const MatchExpression* filter, const CollectionPtr& collection); @@ -123,8 +123,8 @@ private: */ StageState returnResults(WorkingSetID* out); - // The index spec used to determine where to find the score. - FTSSpec _ftsSpec; + // The key prefix length within a possibly compound key: {prefix,term,score,suffix}. + const size_t _keyPrefixSize; // Not owned by us. WorkingSet* _ws; diff --git a/src/mongo/db/query/classic_stage_builder.cpp b/src/mongo/db/query/classic_stage_builder.cpp index 3d52f8b3a9f..c26e0777765 100644 --- a/src/mongo/db/query/classic_stage_builder.cpp +++ b/src/mongo/db/query/classic_stage_builder.cpp @@ -59,7 +59,8 @@ #include "mongo/db/exec/skip.h" #include "mongo/db/exec/sort.h" #include "mongo/db/exec/sort_key_generator.h" -#include "mongo/db/exec/text.h" +#include "mongo/db/exec/text_match.h" +#include "mongo/db/exec/text_or.h" #include "mongo/db/index/fts_access_method.h" #include "mongo/db/matcher/extensions_callback_real.h" #include "mongo/db/record_id_helpers.h" @@ -270,26 +271,47 @@ std::unique_ptr<PlanStage> ClassicStageBuilder::build(const QuerySolutionNode* r return std::make_unique<GeoNear2DSphereStage>( params, expCtx, _ws, _collection, s2Index); } - case STAGE_TEXT: { - const TextNode* node = static_cast<const TextNode*>(root); - invariant(_collection); - const IndexDescriptor* desc = _collection->getIndexCatalog()->findIndexByName( - _opCtx, node->index.identifier.catalogName); - invariant(desc); - const FTSAccessMethod* fam = static_cast<const FTSAccessMethod*>( - _collection->getIndexCatalog()->getEntry(desc)->accessMethod()); - invariant(fam); + case STAGE_TEXT_OR: { + tassert(5432204, + "text index key prefix must be defined before processing TEXT_OR node", + _ftsKeyPrefixSize); + + auto node = static_cast<const TextOrNode*>(root); + auto ret = std::make_unique<TextOrStage>( + expCtx, *_ftsKeyPrefixSize, _ws, node->filter.get(), _collection); + for (auto childNode : root->children) { + ret->addChild(build(childNode)); + } + return ret; + } + case STAGE_TEXT_MATCH: { + auto node = static_cast<const TextMatchNode*>(root); + tassert(5432200, "collection object is not provided", _collection); + auto catalog = _collection->getIndexCatalog(); + tassert(5432201, "index catalog is unavailable", catalog); + auto desc = catalog->findIndexByName(_opCtx, node->index.identifier.catalogName); + tassert(5432202, + str::stream() << "no index named '" << node->index.identifier.catalogName + << "' found in catalog", + catalog); + auto fam = static_cast<const FTSAccessMethod*>(catalog->getEntry(desc)->accessMethod()); + tassert(5432203, "access method for index is not defined", fam); - TextStageParams params(fam->getSpec()); - params.index = desc; - params.indexPrefix = node->indexPrefix; // We assume here that node->ftsQuery is an FTSQueryImpl, not an FTSQueryNoop. In // practice, this means that it is illegal to use the StageBuilder on a QuerySolution // created by planning a query that contains "no-op" expressions. - params.query = static_cast<FTSQueryImpl&>(*node->ftsQuery); - params.wantTextScore = _cq.metadataDeps()[DocumentMetadataFields::kTextScore]; - return std::make_unique<TextStage>( - expCtx, _collection, params, _ws, node->filter.get()); + TextMatchParams params{desc, + fam->getSpec(), + node->indexPrefix, + static_cast<const FTSQueryImpl&>(*node->ftsQuery)}; + + // Children of this node may need to know about the key prefix size, so we'll set it + // here before recursively descending into procession child nodes, and will reset once a + // text sub-tree is constructed. + _ftsKeyPrefixSize.emplace(params.spec.numExtraBefore()); + ON_BLOCK_EXIT([&] { _ftsKeyPrefixSize = {}; }); + + return std::make_unique<TextMatchStage>(expCtx, build(root->children[0]), params, _ws); } case STAGE_SHARDING_FILTER: { const ShardingFilterNode* fn = static_cast<const ShardingFilterNode*>(root); @@ -392,8 +414,6 @@ std::unique_ptr<PlanStage> ClassicStageBuilder::build(const QuerySolutionNode* r case STAGE_QUEUED_DATA: case STAGE_RECORD_STORE_FAST_COUNT: case STAGE_SUBPLAN: - case STAGE_TEXT_MATCH: - case STAGE_TEXT_OR: case STAGE_TRIAL: case STAGE_UNKNOWN: case STAGE_UPDATE: { diff --git a/src/mongo/db/query/classic_stage_builder.h b/src/mongo/db/query/classic_stage_builder.h index 1c63e6714b5..c99531cae95 100644 --- a/src/mongo/db/query/classic_stage_builder.h +++ b/src/mongo/db/query/classic_stage_builder.h @@ -49,5 +49,7 @@ public: private: WorkingSet* _ws; + + boost::optional<size_t> _ftsKeyPrefixSize; }; } // namespace mongo::stage_builder diff --git a/src/mongo/db/query/explain.cpp b/src/mongo/db/query/explain.cpp index b800af89597..de24431948a 100644 --- a/src/mongo/db/query/explain.cpp +++ b/src/mongo/db/query/explain.cpp @@ -41,7 +41,6 @@ #include "mongo/db/exec/multi_plan.h" #include "mongo/db/exec/near.h" #include "mongo/db/exec/sort.h" -#include "mongo/db/exec/text.h" #include "mongo/db/exec/working_set_common.h" #include "mongo/db/keypattern.h" #include "mongo/db/pipeline/plan_executor_pipeline.h" diff --git a/src/mongo/db/query/plan_executor_impl.cpp b/src/mongo/db/query/plan_executor_impl.cpp index 1e5516d09a9..3d89b63a4ce 100644 --- a/src/mongo/db/query/plan_executor_impl.cpp +++ b/src/mongo/db/query/plan_executor_impl.cpp @@ -50,7 +50,6 @@ #include "mongo/db/exec/plan_stats.h" #include "mongo/db/exec/sort.h" #include "mongo/db/exec/subplan.h" -#include "mongo/db/exec/text.h" #include "mongo/db/exec/trial_stage.h" #include "mongo/db/exec/working_set.h" #include "mongo/db/exec/working_set_common.h" diff --git a/src/mongo/db/query/plan_explainer_impl.cpp b/src/mongo/db/query/plan_explainer_impl.cpp index 1d3e156af13..f75c88d140b 100644 --- a/src/mongo/db/query/plan_explainer_impl.cpp +++ b/src/mongo/db/query/plan_explainer_impl.cpp @@ -45,7 +45,7 @@ #include "mongo/db/exec/plan_stats.h" #include "mongo/db/exec/sort.h" #include "mongo/db/exec/subplan.h" -#include "mongo/db/exec/text.h" +#include "mongo/db/exec/text_match.h" #include "mongo/db/exec/trial_stage.h" #include "mongo/db/keypattern.h" #include "mongo/db/query/explain.h" @@ -84,8 +84,8 @@ void addStageSummaryStr(const PlanStage* stage, StringBuilder& sb) { const IndexScanStats* spec = static_cast<const IndexScanStats*>(specific); const KeyPattern keyPattern{spec->keyPattern}; sb << " " << keyPattern; - } else if (STAGE_TEXT == stage->stageType()) { - const TextStats* spec = static_cast<const TextStats*>(specific); + } else if (STAGE_TEXT_MATCH == stage->stageType()) { + const TextMatchStats* spec = static_cast<const TextMatchStats*>(specific); const KeyPattern keyPattern{spec->indexPrefix}; sb << " " << keyPattern; } @@ -460,15 +460,13 @@ void statsToBSON(const PlanStageStats& stats, bob->appendNumber("dupsTested", static_cast<long long>(spec->dupsTested)); bob->appendNumber("dupsDropped", static_cast<long long>(spec->dupsDropped)); } - } else if (STAGE_TEXT == stats.stageType) { - TextStats* spec = static_cast<TextStats*>(stats.specific.get()); + } else if (STAGE_TEXT_MATCH == stats.stageType) { + TextMatchStats* spec = static_cast<TextMatchStats*>(stats.specific.get()); bob->append("indexPrefix", spec->indexPrefix); bob->append("indexName", spec->indexName); bob->append("parsedTextQuery", spec->parsedTextQuery); bob->append("textIndexVersion", spec->textIndexVersion); - } else if (STAGE_TEXT_MATCH == stats.stageType) { - TextMatchStats* spec = static_cast<TextMatchStats*>(stats.specific.get()); if (verbosity >= ExplainOptions::Verbosity::kExecStats) { bob->appendNumber("docsRejected", static_cast<long long>(spec->docsRejected)); @@ -685,10 +683,10 @@ void PlanExplainerImpl::getSummaryStats(PlanSummaryStats* statsOut) const { const DistinctScanStats* distinctScanStats = static_cast<const DistinctScanStats*>(distinctScan->getSpecificStats()); statsOut->indexesUsed.insert(distinctScanStats->indexName); - } else if (STAGE_TEXT == stages[i]->stageType()) { - const TextStage* textStage = static_cast<const TextStage*>(stages[i]); - const TextStats* textStats = - static_cast<const TextStats*>(textStage->getSpecificStats()); + } else if (STAGE_TEXT_MATCH == stages[i]->stageType()) { + const TextMatchStage* textStage = static_cast<const TextMatchStage*>(stages[i]); + const TextMatchStats* textStats = + static_cast<const TextMatchStats*>(textStage->getSpecificStats()); statsOut->indexesUsed.insert(textStats->indexName); } else if (STAGE_GEO_NEAR_2D == stages[i]->stageType() || STAGE_GEO_NEAR_2DSPHERE == stages[i]->stageType()) { diff --git a/src/mongo/db/query/plan_explainer_sbe.cpp b/src/mongo/db/query/plan_explainer_sbe.cpp index 08f3ddcad0b..bca674394c4 100644 --- a/src/mongo/db/query/plan_explainer_sbe.cpp +++ b/src/mongo/db/query/plan_explainer_sbe.cpp @@ -158,8 +158,8 @@ void statsToBSON(const QuerySolutionNode* node, bob->append("sortPattern", smn->sort); break; } - case STAGE_TEXT: { - auto tn = static_cast<const TextNode*>(node); + case STAGE_TEXT_MATCH: { + auto tn = static_cast<const TextMatchNode*>(node); bob->append("indexPrefix", tn->indexPrefix); bob->append("indexName", tn->index.identifier.catalogName); @@ -388,8 +388,8 @@ std::string PlanExplainerSBE::getPlanSummary() const { sb << " " << keyPattern; break; } - case STAGE_TEXT: { - auto tn = static_cast<const TextNode*>(node); + case STAGE_TEXT_MATCH: { + auto tn = static_cast<const TextMatchNode*>(node); const KeyPattern keyPattern{tn->indexPrefix}; sb << " " << keyPattern; break; @@ -460,8 +460,8 @@ void PlanExplainerSBE::getSummaryStats(PlanSummaryStats* statsOut) const { statsOut->indexesUsed.insert(ixn->index.identifier.catalogName); break; } - case STAGE_TEXT: { - auto tn = static_cast<const TextNode*>(node); + case STAGE_TEXT_MATCH: { + auto tn = static_cast<const TextMatchNode*>(node); statsOut->indexesUsed.insert(tn->index.identifier.catalogName); break; } diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp index 56893e7ab11..46762805cec 100644 --- a/src/mongo/db/query/planner_access.cpp +++ b/src/mongo/db/query/planner_access.cpp @@ -40,6 +40,9 @@ #include "mongo/base/owned_pointer_vector.h" #include "mongo/bson/simple_bsonobj_comparator.h" #include "mongo/db/bson/dotted_path_support.h" +#include "mongo/db/fts/fts_index_format.h" +#include "mongo/db/fts/fts_query_noop.h" +#include "mongo/db/fts/fts_spec.h" #include "mongo/db/matcher/expression_array.h" #include "mongo/db/matcher/expression_geo.h" #include "mongo/db/matcher/expression_text.h" @@ -65,7 +68,7 @@ namespace dps = ::mongo::dotted_path_support; * Text node functors. */ bool isTextNode(const QuerySolutionNode* node) { - return STAGE_TEXT == node->getType(); + return STAGE_TEXT_MATCH == node->getType(); } /** @@ -346,9 +349,10 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::makeLeafNode( // We must not keep the expression node around. *tightnessOut = IndexBoundsBuilder::EXACT; auto textExpr = static_cast<const TextMatchExpressionBase*>(expr); - auto ret = std::make_unique<TextNode>(index); - ret->ftsQuery = textExpr->getFTSQuery().clone(); - + auto ret = std::make_unique<TextMatchNode>( + index, + textExpr->getFTSQuery().clone(), + query.metadataDeps()[DocumentMetadataFields::kTextScore]); // Count the number of prefix fields before the "text" field. for (auto&& keyPatternElt : ret->index.keyPattern) { // We know that the only key pattern with a type of String is the _fts field @@ -415,7 +419,7 @@ bool QueryPlannerAccess::shouldMergeWithLeaf(const MatchExpression* expr, // by adding a filter to the special leaf type. // - if (STAGE_TEXT == type) { + if (STAGE_TEXT_MATCH == type) { // Currently only one text predicate is allowed, but to be safe, make sure that we // do not try to merge two text predicates. return MatchExpression::AND == mergeType && MatchExpression::TEXT != exprType; @@ -469,8 +473,8 @@ void QueryPlannerAccess::mergeWithLeafNode(MatchExpression* expr, ScanBuildingSt const StageType type = node->getType(); - if (STAGE_TEXT == type) { - auto textNode = static_cast<TextNode*>(node); + if (STAGE_TEXT_MATCH == type) { + auto textNode = static_cast<TextMatchNode*>(node); if (pos < textNode->numPrefixFields) { // This predicate is assigned to one of the prefix fields of the text index. Such @@ -568,12 +572,108 @@ void QueryPlannerAccess::mergeWithLeafNode(MatchExpression* expr, ScanBuildingSt } } +void buildTextSubPlan(TextMatchNode* tn) { + tassert(5432205, "text match node is null", tn); + tassert(5432206, "text match node already has children", tn->children.empty()); + tassert(5432207, "text search query is not provided", tn->ftsQuery.get()); + + auto query = dynamic_cast<const fts::FTSQueryImpl*>(tn->ftsQuery.get()); + // If we're unable to cast to FTSQueryImpl, then the given query must be an FTSQueryNoop, which + // is only used for testing the QueryPlanner and never tries to execute the query, so we don't + // need to construct an entire text sub-plan. Moreover, to compute index bounds we need a list + // of terms, which can only be obtain from FTSQueryImpl. + if (!query) { + return; + } + + // If the query requires the "textScore" field or involves multiple search terms, a TEXT_OR or + // OR stage is needed. Otherwise, we can use a single index scan directly. + const bool needOrStage = tn->wantTextScore || query->getTermsForBounds().size() > 1; + + tassert(5432208, + "failed to obtain text index version", + tn->index.infoObj.hasField("textIndexVersion")); + const auto textIndexVersion = + static_cast<fts::TextIndexVersion>(tn->index.infoObj["textIndexVersion"].numberInt()); + + // Get all the index scans for each term in our query. + std::vector<std::unique_ptr<QuerySolutionNode>> indexScanList; + indexScanList.reserve(query->getTermsForBounds().size()); + for (const auto& term : query->getTermsForBounds()) { + auto ixscan = std::make_unique<IndexScanNode>(tn->index); + ixscan->bounds.startKey = fts::FTSIndexFormat::getIndexKey( + fts::MAX_WEIGHT, term, tn->indexPrefix, textIndexVersion); + ixscan->bounds.endKey = + fts::FTSIndexFormat::getIndexKey(0, term, tn->indexPrefix, textIndexVersion); + ixscan->bounds.boundInclusion = BoundInclusion::kIncludeBothStartAndEndKeys; + ixscan->bounds.isSimpleRange = true; + ixscan->direction = -1; + ixscan->shouldDedup = tn->index.multikey; + + // If we will be adding a TEXT_OR or OR stage, then it is responsible for applying the + // filter. Otherwise, the index scan applies the filter. + if (!needOrStage && tn->filter) { + ixscan->filter = tn->filter->shallowClone(); + } + + indexScanList.push_back(std::move(ixscan)); + } + + // In case the query didn't have any search term, we can simply use an EOF sub-plan, as no + // results can be returned in this case anyway. + if (indexScanList.empty()) { + indexScanList.push_back(std::make_unique<EofNode>()); + } + + // Build the union of the index scans as a TEXT_OR or an OR stage, depending on whether the + // projection requires the "textScore" $meta field. + if (tn->wantTextScore) { + // We use a TEXT_OR stage to get the union of the results from the index scans and then + // compute their text scores. This is a blocking operation. + auto textScorer = std::make_unique<TextOrNode>(); + textScorer->filter = std::move(tn->filter); + for (auto&& ixscan : indexScanList) { + textScorer->children.push_back(ixscan.release()); + } + + tn->children.push_back(textScorer.release()); + } else { + // Because we don't need the text score, we can use a non-blocking OR stage to get the union + // of the index scans or use the index scan directly if there is only one. + auto textSearcher = [&]() -> std::unique_ptr<QuerySolutionNode> { + if (indexScanList.size() == 1) { + tassert(5397400, + "If there is only one index scan and we do not need textScore, needOrStage " + "should be false", + !needOrStage); + return std::move(indexScanList[0]); + } else { + auto orTextSearcher = std::make_unique<OrNode>(); + orTextSearcher->filter = std::move(tn->filter); + for (auto&& ixscan : indexScanList) { + orTextSearcher->children.push_back(ixscan.release()); + } + return std::move(orTextSearcher); + } + }(); + + // Unlike the TEXT_OR stage, the OR stage does not fetch the documents that it outputs. We + // add our own FETCH stage to satisfy the requirement of the TEXT_MATCH stage that its + // WorkingSetMember inputs have fetched data. + auto fetchNode = std::make_unique<FetchNode>(); + fetchNode->children.push_back(textSearcher.release()); + + tn->children.push_back(fetchNode.release()); + } +} + void QueryPlannerAccess::finishTextNode(QuerySolutionNode* node, const IndexEntry& index) { - TextNode* tn = static_cast<TextNode*>(node); + auto tn = static_cast<TextMatchNode*>(node); // If there's no prefix, the filter is already on the node and the index prefix is null. // We can just return. if (!tn->numPrefixFields) { + buildTextSubPlan(tn); return; } @@ -648,6 +748,8 @@ void QueryPlannerAccess::finishTextNode(QuerySolutionNode* node, const IndexEntr } tn->indexPrefix = prefixBob.obj(); + + buildTextSubPlan(tn); } bool QueryPlannerAccess::orNeedsFetch(const ScanBuildingState* scanState) { @@ -698,7 +800,7 @@ void QueryPlannerAccess::finishAndOutputLeaf(ScanBuildingState* scanState, void QueryPlannerAccess::finishLeafNode(QuerySolutionNode* node, const IndexEntry& index) { const StageType type = node->getType(); - if (STAGE_TEXT == type) { + if (STAGE_TEXT_MATCH == type) { return finishTextNode(node, index); } diff --git a/src/mongo/db/query/query_planner_test_lib.cpp b/src/mongo/db/query/query_planner_test_lib.cpp index 63f94f14293..9be90219927 100644 --- a/src/mongo/db/query/query_planner_test_lib.cpp +++ b/src/mongo/db/query/query_planner_test_lib.cpp @@ -387,9 +387,9 @@ bool QueryPlannerTestLib::solutionMatches(const BSONObj& testSoln, } return true; - } else if (STAGE_TEXT == trueSoln->getType()) { + } else if (STAGE_TEXT_MATCH == trueSoln->getType()) { // {text: {search: "somestr", language: "something", filter: {blah: 1}}} - const TextNode* node = static_cast<const TextNode*>(trueSoln); + const TextMatchNode* node = static_cast<const TextMatchNode*>(trueSoln); BSONElement el = testSoln["text"]; if (el.eoo() || !el.isABSONObj()) { return false; diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp index d86f6c5865f..cf11752de25 100644 --- a/src/mongo/db/query/query_solution.cpp +++ b/src/mongo/db/query/query_solution.cpp @@ -168,44 +168,6 @@ void QuerySolution::setRoot(std::unique_ptr<QuerySolutionNode> root) { } // -// TextNode -// - -void TextNode::appendToString(str::stream* ss, int indent) const { - addIndent(ss, indent); - *ss << "TEXT\n"; - addIndent(ss, indent + 1); - *ss << "name = " << index.identifier.catalogName << '\n'; - addIndent(ss, indent + 1); - *ss << "keyPattern = " << index.keyPattern.toString() << '\n'; - addIndent(ss, indent + 1); - *ss << "query = " << ftsQuery->getQuery() << '\n'; - addIndent(ss, indent + 1); - *ss << "language = " << ftsQuery->getLanguage() << '\n'; - addIndent(ss, indent + 1); - *ss << "caseSensitive= " << ftsQuery->getCaseSensitive() << '\n'; - addIndent(ss, indent + 1); - *ss << "diacriticSensitive= " << ftsQuery->getDiacriticSensitive() << '\n'; - addIndent(ss, indent + 1); - *ss << "indexPrefix = " << indexPrefix.toString() << '\n'; - if (nullptr != filter) { - addIndent(ss, indent + 1); - *ss << " filter = " << filter->debugString(); - } - addCommon(ss, indent); -} - -QuerySolutionNode* TextNode::clone() const { - TextNode* copy = new TextNode(this->index); - cloneBaseData(copy); - - copy->ftsQuery = this->ftsQuery->clone(); - copy->indexPrefix = this->indexPrefix; - - return copy; -} - -// // CollectionScanNode // @@ -1374,4 +1336,66 @@ QuerySolutionNode* EofNode::clone() const { return copy; } +// +// TextOrNode +// +void TextOrNode::appendToString(str::stream* ss, int indent) const { + addIndent(ss, indent); + *ss << "TEXT_OR\n"; + if (nullptr != filter) { + addIndent(ss, indent + 1); + *ss << " filter = " << filter->debugString() << '\n'; + } + addCommon(ss, indent); + for (size_t i = 0; i < children.size(); ++i) { + addIndent(ss, indent + 1); + *ss << "Child " << i << ":\n"; + children[i]->appendToString(ss, indent + 2); + *ss << '\n'; + } +} + +QuerySolutionNode* TextOrNode::clone() const { + auto copy = std::make_unique<TextOrNode>(); + cloneBaseData(copy.get()); + copy->dedup = this->dedup; + return copy.release(); +} + +// +// TextMatchNode +// +void TextMatchNode::appendToString(str::stream* ss, int indent) const { + addIndent(ss, indent); + *ss << "TEXT_MATCH\n"; + addIndent(ss, indent + 1); + *ss << "name = " << index.identifier.catalogName << '\n'; + addIndent(ss, indent + 1); + *ss << "keyPattern = " << index.keyPattern.toString() << '\n'; + addIndent(ss, indent + 1); + *ss << "query = " << ftsQuery->getQuery() << '\n'; + addIndent(ss, indent + 1); + *ss << "language = " << ftsQuery->getLanguage() << '\n'; + addIndent(ss, indent + 1); + *ss << "caseSensitive= " << ftsQuery->getCaseSensitive() << '\n'; + addIndent(ss, indent + 1); + *ss << "diacriticSensitive= " << ftsQuery->getDiacriticSensitive() << '\n'; + addIndent(ss, indent + 1); + *ss << "indexPrefix = " << indexPrefix.toString() << '\n'; + addIndent(ss, indent + 1); + *ss << "wantTextScorex = " << wantTextScore << '\n'; + if (nullptr != filter) { + addIndent(ss, indent + 1); + *ss << " filter = " << filter->debugString(); + } + addCommon(ss, indent); +} + +QuerySolutionNode* TextMatchNode::clone() const { + auto copy = std::make_unique<TextMatchNode>(index, ftsQuery->clone(), wantTextScore); + cloneBaseData(copy.get()); + copy->indexPrefix = indexPrefix; + return copy.release(); +} + } // namespace mongo diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h index 5a7f6a0db09..30f5e6c3e09 100644 --- a/src/mongo/db/query/query_solution.h +++ b/src/mongo/db/query/query_solution.h @@ -405,46 +405,6 @@ private: std::unique_ptr<QuerySolutionNode> _root; }; -struct TextNode : public QuerySolutionNodeWithSortSet { - TextNode(IndexEntry index) : index(std::move(index)) {} - - virtual ~TextNode() {} - - virtual StageType getType() const { - return STAGE_TEXT; - } - - virtual void appendToString(str::stream* ss, int indent) const; - - // Text's return is LOC_AND_OBJ so it's fetched and has all fields. - bool fetched() const { - return true; - } - FieldAvailability getFieldAvailability(const std::string& field) const { - return FieldAvailability::kFullyProvided; - } - bool sortedByDiskLoc() const { - return false; - } - - QuerySolutionNode* clone() const; - - IndexEntry index; - std::unique_ptr<fts::FTSQuery> ftsQuery; - - // The number of fields in the prefix of the text index. For example, if the key pattern is - // - // { a: 1, b: 1, _fts: "text", _ftsx: 1, c: 1 } - // - // then the number of prefix fields is 2, because of "a" and "b". - size_t numPrefixFields = 0u; - - // "Prefix" fields of a text index can handle equality predicates. We group them with the - // text node while creating the text leaf node and convert them into a BSONObj index prefix - // when we finish the text leaf node. - BSONObj indexPrefix; -}; - struct CollectionScanNode : public QuerySolutionNodeWithSortSet { CollectionScanNode(); virtual ~CollectionScanNode() {} @@ -1278,4 +1238,58 @@ struct EofNode : public QuerySolutionNodeWithSortSet { QuerySolutionNode* clone() const; }; + +struct TextOrNode : public OrNode { + TextOrNode() {} + + StageType getType() const override { + return STAGE_TEXT_OR; + } + + void appendToString(str::stream* ss, int indent) const override; + QuerySolutionNode* clone() const override; +}; + +struct TextMatchNode : public QuerySolutionNodeWithSortSet { + TextMatchNode(IndexEntry index, std::unique_ptr<fts::FTSQuery> ftsQuery, bool wantTextScore) + : index(std::move(index)), ftsQuery(std::move(ftsQuery)), wantTextScore(wantTextScore) {} + + StageType getType() const override { + return STAGE_TEXT_MATCH; + } + + void appendToString(str::stream* ss, int indent) const override; + + // Text's return is LOC_AND_OBJ so it's fetched and has all fields. + bool fetched() const { + return true; + } + FieldAvailability getFieldAvailability(const std::string& field) const { + return FieldAvailability::kFullyProvided; + } + bool sortedByDiskLoc() const override { + return false; + } + + QuerySolutionNode* clone() const override; + + IndexEntry index; + std::unique_ptr<fts::FTSQuery> ftsQuery; + + // The number of fields in the prefix of the text index. For example, if the key pattern is + // + // { a: 1, b: 1, _fts: "text", _ftsx: 1, c: 1 } + // + // then the number of prefix fields is 2, because of "a" and "b". + size_t numPrefixFields = 0u; + + // "Prefix" fields of a text index can handle equality predicates. We group them with the + // text node while creating the text leaf node and convert them into a BSONObj index prefix + // when we finish the text leaf node. + BSONObj indexPrefix; + + // True, if we need to compute text scores. + bool wantTextScore; +}; + } // namespace mongo diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp index 6de6f645bc8..6b99a116b3d 100644 --- a/src/mongo/db/query/sbe_stage_builder.cpp +++ b/src/mongo/db/query/sbe_stage_builder.cpp @@ -44,7 +44,6 @@ #include "mongo/db/exec/sbe/stages/scan.h" #include "mongo/db/exec/sbe/stages/sort.h" #include "mongo/db/exec/sbe/stages/sorted_merge.h" -#include "mongo/db/exec/sbe/stages/text_match.h" #include "mongo/db/exec/sbe/stages/traverse.h" #include "mongo/db/exec/sbe/stages/union.h" #include "mongo/db/exec/sbe/stages/unique.h" @@ -208,7 +207,6 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateEofPlan( } } // namespace - std::unique_ptr<sbe::RuntimeEnvironment> makeRuntimeEnvironment( const CanonicalQuery& cq, OperationContext* opCtx, @@ -285,6 +283,36 @@ sbe::LockAcquisitionCallback makeLockAcquisitionCallback(bool checkNodeCanServeR opCtx, coll.getNss(), true)); }; } + +std::unique_ptr<fts::FTSMatcher> makeFtsMatcher(OperationContext* opCtx, + const CollectionPtr& collection, + const std::string& indexName, + const fts::FTSQuery* ftsQuery) { + auto desc = collection->getIndexCatalog()->findIndexByName(opCtx, indexName); + tassert(5432209, + str::stream() << "index descriptor not found for index named '" << indexName + << "' in collection '" << collection->ns() << "'", + desc); + + auto entry = collection->getIndexCatalog()->getEntry(desc); + tassert(5432210, + str::stream() << "index entry not found for index named '" << indexName + << "' in collection '" << collection->ns() << "'", + entry); + + auto accessMethod = static_cast<const FTSAccessMethod*>(entry->accessMethod()); + tassert(5432211, + str::stream() << "access method is not defined for index named '" << indexName + << "' in collection '" << collection->ns() << "'", + accessMethod); + + // We assume here that node->ftsQuery is an FTSQueryImpl, not an FTSQueryNoop. In practice, this + // means that it is illegal to use the StageBuilder on a QuerySolution created by planning a + // query that contains "no-op" expressions. + auto query = dynamic_cast<const fts::FTSQueryImpl*>(ftsQuery); + tassert(5432220, "expected FTSQueryImpl", query); + return std::make_unique<fts::FTSMatcher>(*query, accessMethod->getSpec()); +} } // namespace SlotBasedStageBuilder::SlotBasedStageBuilder(OperationContext* opCtx, @@ -1074,116 +1102,56 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder return {std::move(stage), std::move(outputs)}; } -std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::buildText( +std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::buildTextMatch( const QuerySolutionNode* root, const PlanStageReqs& reqs) { - invariant(_collection); - invariant(!reqs.getIndexKeyBitset()); - - // At present, makeLoopJoinForFetch() doesn't have the necessary logic for producing an - // oplogTsSlot, so assert that the caller doesn't need oplogTsSlot. - invariant(!reqs.has(kOplogTs)); - - auto textNode = static_cast<const TextNode*>(root); - - auto&& indexName = textNode->index.identifier.catalogName; - const auto desc = _collection->getIndexCatalog()->findIndexByName(_opCtx, indexName); - invariant(desc); - const auto accessMethod = static_cast<const FTSAccessMethod*>( - _collection->getIndexCatalog()->getEntry(desc)->accessMethod()); - invariant(accessMethod); - auto&& ftsSpec = accessMethod->getSpec(); - - // We assume here that node->ftsQuery is an FTSQueryImpl, not an FTSQueryNoop. In practice, this - // means that it is illegal to use the StageBuilder on a QuerySolution created by planning a - // query that contains "no-op" expressions. - auto ftsQuery = static_cast<fts::FTSQueryImpl&>(*textNode->ftsQuery); - - // A vector of the output slots for each index scan stage. Each stage outputs a record id and a - // record, so we expect each inner vector to be of length two. - std::vector<sbe::value::SlotVector> ixscanOutputSlots; - - const bool forward = true; - const bool inclusive = true; - auto makeKeyString = [&](const BSONObj& bsonKey) { - return std::make_unique<KeyString::Value>( - IndexEntryComparison::makeKeyStringFromBSONKeyForSeek( - bsonKey, - accessMethod->getSortedDataInterface()->getKeyStringVersion(), - accessMethod->getSortedDataInterface()->getOrdering(), - forward, - inclusive)); - }; - - std::vector<std::unique_ptr<sbe::PlanStage>> indexScanList; - for (const auto& term : ftsQuery.getTermsForBounds()) { - // TODO: Should we scan in the opposite direction? - auto startKeyBson = fts::FTSIndexFormat::getIndexKey( - 0, term, textNode->indexPrefix, ftsSpec.getTextIndexVersion()); - auto endKeyBson = fts::FTSIndexFormat::getIndexKey( - fts::MAX_WEIGHT, term, textNode->indexPrefix, ftsSpec.getTextIndexVersion()); - - auto&& [recordIdSlot, ixscan] = - generateSingleIntervalIndexScan(_collection, - indexName, - forward, - makeKeyString(startKeyBson), - makeKeyString(endKeyBson), - sbe::IndexKeysInclusionSet{}, - sbe::makeSV(), - boost::none, // recordSlot - &_slotIdGenerator, - _yieldPolicy, - root->nodeId(), - _lockAcquisitionCallback); - indexScanList.push_back(std::move(ixscan)); - ixscanOutputSlots.push_back(sbe::makeSV(recordIdSlot)); - } - - // If we don't have any index scan stages, produce an EOF plan. - if (indexScanList.empty()) { - return generateEofPlan(root->nodeId(), reqs, &_slotIdGenerator); - } - - PlanStageSlots outputs; - - // Union will output a slot for the record id and another for the record. - auto recordIdSlot = _slotIdGenerator.generate(); - auto unionOutputSlots = sbe::makeSV(recordIdSlot); - - // Index scan output slots become the input slots to the union. - auto stage = sbe::makeS<sbe::UnionStage>( - std::move(indexScanList), ixscanOutputSlots, unionOutputSlots, root->nodeId()); + tassert(5432212, "no collection object", _collection); + tassert(5432213, "index keys requsted for text match node", !reqs.getIndexKeyBitset()); + tassert(5432214, "oplogTs requsted for text match node", !reqs.has(kOplogTs)); + tassert(5432215, + str::stream() << "text match node must have one child, but got " + << root->children.size(), + root->children.size() == 1); + // TextMatchNode guarantees to produce a fetched sub-plan, but it doesn't fetch itself. Instead, + // its child sub-plan must be fully fetched, and a text match plan is constructed under this + // assumption. + tassert(5432216, "text match input must be fetched", root->children[0]->fetched()); + + auto textNode = static_cast<const TextMatchNode*>(root); - // TODO: If text score metadata is requested, then we should sum over the text scores inside the - // index keys for a given document. This will require expression evaluation to be able to - // extract the score directly from the key string. + auto childReqs = reqs.copy().set(kResult); + auto [stage, outputs] = build(textNode->children[0], childReqs); + tassert(5432217, "result slot is not produced by text match sub-plan", outputs.has(kResult)); + + // Create an FTS 'matcher' to apply 'ftsQuery' to matching documents. + auto matcher = makeFtsMatcher( + _opCtx, _collection, textNode->index.identifier.catalogName, textNode->ftsQuery.get()); + + // Build an 'ftsMatch' expression to match a document stored in the 'kResult' slot using the + // 'matcher' instance. + auto ftsMatch = + makeFunction("ftsMatch", + makeConstant(sbe::value::TypeTags::ftsMatcher, + sbe::value::bitcastFrom<fts::FTSMatcher*>(matcher.release())), + makeVariable(outputs.get(kResult))); + + // Wrap the 'ftsMatch' expression into an 'if' expression to ensure that it can be applied only + // to a document. + auto filter = + sbe::makeE<sbe::EIf>(makeFunction("isObject", makeVariable(outputs.get(kResult))), + std::move(ftsMatch), + sbe::makeE<sbe::EFail>(ErrorCodes::Error{4623400}, + "textmatch requires input to be an object")); + + // Add a filter stage to apply 'ftsQuery' to matching documents and discard documents which do + // not match. stage = - sbe::makeS<sbe::UniqueStage>(std::move(stage), sbe::makeSV(recordIdSlot), root->nodeId()); - - sbe::value::SlotId resultSlot; - std::tie(resultSlot, recordIdSlot, stage) = - makeLoopJoinForFetch(std::move(stage), recordIdSlot, root->nodeId()); - - // Add a special stage to apply 'ftsQuery' to matching documents, and then add a FilterStage to - // discard documents which do not match. - auto textMatchResultSlot = _slotIdGenerator.generate(); - stage = sbe::makeS<sbe::TextMatchStage>( - std::move(stage), ftsQuery, ftsSpec, resultSlot, textMatchResultSlot, root->nodeId()); - - // Filter based on the contents of the slot filled out by the TextMatchStage. - stage = sbe::makeS<sbe::FilterStage<false>>( - std::move(stage), sbe::makeE<sbe::EVariable>(textMatchResultSlot), root->nodeId()); - - outputs.set(kResult, resultSlot); - outputs.set(kRecordId, recordIdSlot); + sbe::makeS<sbe::FilterStage<false>>(std::move(stage), std::move(filter), root->nodeId()); if (reqs.has(kReturnKey)) { // Assign the 'returnKeySlot' to be the empty object. outputs.set(kReturnKey, _slotIdGenerator.generate()); - stage = sbe::makeProjectStage(std::move(stage), - root->nodeId(), - outputs.get(kReturnKey), - sbe::makeE<sbe::EFunction>("newObj", sbe::makeEs())); + stage = sbe::makeProjectStage( + std::move(stage), root->nodeId(), outputs.get(kReturnKey), makeFunction("newObj")); } return {std::move(stage), std::move(outputs)}; @@ -1643,7 +1611,10 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder {STAGE_PROJECTION_DEFAULT, &SlotBasedStageBuilder::buildProjectionDefault}, {STAGE_PROJECTION_COVERED, &SlotBasedStageBuilder::buildProjectionCovered}, {STAGE_OR, &SlotBasedStageBuilder::buildOr}, - {STAGE_TEXT, &SlotBasedStageBuilder::buildText}, + // In SBE TEXT_OR behaves like a regular OR. All the work to support "textScore" + // metadata is done outside of TEXT_OR, unlike the legacy implementation. + {STAGE_TEXT_OR, &SlotBasedStageBuilder::buildOr}, + {STAGE_TEXT_MATCH, &SlotBasedStageBuilder::buildTextMatch}, {STAGE_RETURN_KEY, &SlotBasedStageBuilder::buildReturnKey}, {STAGE_EOF, &SlotBasedStageBuilder::buildEof}, {STAGE_AND_HASH, &SlotBasedStageBuilder::buildAndHash}, diff --git a/src/mongo/db/query/sbe_stage_builder.h b/src/mongo/db/query/sbe_stage_builder.h index 67b43d11682..240d44d7830 100644 --- a/src/mongo/db/query/sbe_stage_builder.h +++ b/src/mongo/db/query/sbe_stage_builder.h @@ -304,7 +304,7 @@ private: std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> buildOr( const QuerySolutionNode* root, const PlanStageReqs& reqs); - std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> buildText( + std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> buildTextMatch( const QuerySolutionNode* root, const PlanStageReqs& reqs); std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> buildReturnKey( diff --git a/src/mongo/db/query/stage_types.cpp b/src/mongo/db/query/stage_types.cpp index b5f1f16c106..6d3b0f1fd2b 100644 --- a/src/mongo/db/query/stage_types.cpp +++ b/src/mongo/db/query/stage_types.cpp @@ -68,7 +68,6 @@ StringData stageTypeToString(StageType stageType) { {STAGE_SORT_KEY_GENERATOR, "SORT_KEY_GENERATOR"_sd}, {STAGE_SORT_MERGE, "SORT_MERGE"_sd}, {STAGE_SUBPLAN, "SUBPLAN"_sd}, - {STAGE_TEXT, "TEXT"_sd}, {STAGE_TEXT_OR, "TEXT_OR"_sd}, {STAGE_TEXT_MATCH, "TEXT_MATCH"_sd}, {STAGE_TRIAL, "TRIAL"_sd}, diff --git a/src/mongo/db/query/stage_types.h b/src/mongo/db/query/stage_types.h index 882d0bf032b..a0dc411028b 100644 --- a/src/mongo/db/query/stage_types.h +++ b/src/mongo/db/query/stage_types.h @@ -113,7 +113,6 @@ enum StageType { STAGE_SUBPLAN, // Stages for running text search. - STAGE_TEXT, STAGE_TEXT_OR, STAGE_TEXT_MATCH, |