diff options
author | Mark Benvenuto <mark.benvenuto@mongodb.com> | 2015-05-15 16:01:42 -0400 |
---|---|---|
committer | Mark Benvenuto <mark.benvenuto@mongodb.com> | 2015-06-02 14:09:25 -0400 |
commit | 2c1e081ac6fa825b0499198799b1345c56329b6d (patch) | |
tree | 4ef88378e0b739bcd275e8567e0c495d839c2140 /src | |
parent | 8bab38cfd8a58c6e5247f44cfe721fa70733e744 (diff) | |
download | mongo-2c1e081ac6fa825b0499198799b1345c56329b6d.tar.gz |
SERVER-18514: Improve Multi-Word Term Search performance with RLP
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/fts/fts_query.cpp | 27 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_query.h | 4 |
2 files changed, 23 insertions, 8 deletions
diff --git a/src/mongo/db/fts/fts_query.cpp b/src/mongo/db/fts/fts_query.cpp index b3384d4b40c..8cc93810f41 100644 --- a/src/mongo/db/fts/fts_query.cpp +++ b/src/mongo/db/fts/fts_query.cpp @@ -60,7 +60,9 @@ namespace mongo { _language = swl.getValue(); _caseSensitive = caseSensitive; - std::unique_ptr<FTSTokenizer> tokenizer(_language->createTokenizer()); + // Build a space delimited list of words to have the FtsTokenizer tokenize + string positiveTermSentence; + string negativeTermSentence; bool inNegation = false; bool inPhrase = false; @@ -78,7 +80,14 @@ namespace mongo { // don't add term } else { - _addTerm( tokenizer.get(), s, inNegation ); + if (inNegation) { + negativeTermSentence.append(s); + negativeTermSentence.push_back(' '); + } + else { + positiveTermSentence.append(s); + positiveTermSentence.push_back(' '); + } } if ( inNegation && !inPhrase ) @@ -119,13 +128,19 @@ namespace mongo { } } + std::unique_ptr<FTSTokenizer> tokenizer(_language->createTokenizer()); + + _addTerms(tokenizer.get(), positiveTermSentence, false); + _addTerms(tokenizer.get(), negativeTermSentence, true); + return Status::OK(); } - void FTSQuery::_addTerm( FTSTokenizer* tokenizer, - const string& token, + void FTSQuery::_addTerms( FTSTokenizer* tokenizer, + const string& sentence, bool negated ) { - tokenizer->reset(token.c_str(), FTSTokenizer::FilterStopWords); + + tokenizer->reset(sentence.c_str(), FTSTokenizer::FilterStopWords); auto& activeTerms = negated ? _negatedTerms : _positiveTerms; @@ -152,7 +167,7 @@ namespace mongo { return; } - tokenizer->reset(token.c_str(), static_cast<FTSTokenizer::Options>( + tokenizer->reset(sentence.c_str(), static_cast<FTSTokenizer::Options>( FTSTokenizer::FilterStopWords | FTSTokenizer::GenerateCaseSensitiveTokens)); diff --git a/src/mongo/db/fts/fts_query.h b/src/mongo/db/fts/fts_query.h index f9ea7f2d1eb..88ca4ce64d0 100644 --- a/src/mongo/db/fts/fts_query.h +++ b/src/mongo/db/fts/fts_query.h @@ -79,8 +79,8 @@ namespace mongo { static const bool caseSensitiveDefault; private: - void _addTerm( FTSTokenizer* tokenizer, - const std::string& token, + void _addTerms( FTSTokenizer* tokenizer, + const std::string& tokens, bool negated ); const FTSLanguage* _language; |