summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Benvenuto <mark.benvenuto@mongodb.com>2015-05-15 16:01:42 -0400
committerMark Benvenuto <mark.benvenuto@mongodb.com>2015-06-02 14:09:25 -0400
commit2c1e081ac6fa825b0499198799b1345c56329b6d (patch)
tree4ef88378e0b739bcd275e8567e0c495d839c2140
parent8bab38cfd8a58c6e5247f44cfe721fa70733e744 (diff)
downloadmongo-2c1e081ac6fa825b0499198799b1345c56329b6d.tar.gz
SERVER-18514: Improve Multi-Word Term Search performance with RLP
-rw-r--r--src/mongo/db/fts/fts_query.cpp27
-rw-r--r--src/mongo/db/fts/fts_query.h4
2 files changed, 23 insertions, 8 deletions
diff --git a/src/mongo/db/fts/fts_query.cpp b/src/mongo/db/fts/fts_query.cpp
index b3384d4b40c..8cc93810f41 100644
--- a/src/mongo/db/fts/fts_query.cpp
+++ b/src/mongo/db/fts/fts_query.cpp
@@ -60,7 +60,9 @@ namespace mongo {
_language = swl.getValue();
_caseSensitive = caseSensitive;
- std::unique_ptr<FTSTokenizer> tokenizer(_language->createTokenizer());
+ // Build a space delimited list of words to have the FtsTokenizer tokenize
+ string positiveTermSentence;
+ string negativeTermSentence;
bool inNegation = false;
bool inPhrase = false;
@@ -78,7 +80,14 @@ namespace mongo {
// don't add term
}
else {
- _addTerm( tokenizer.get(), s, inNegation );
+ if (inNegation) {
+ negativeTermSentence.append(s);
+ negativeTermSentence.push_back(' ');
+ }
+ else {
+ positiveTermSentence.append(s);
+ positiveTermSentence.push_back(' ');
+ }
}
if ( inNegation && !inPhrase )
@@ -119,13 +128,19 @@ namespace mongo {
}
}
+ std::unique_ptr<FTSTokenizer> tokenizer(_language->createTokenizer());
+
+ _addTerms(tokenizer.get(), positiveTermSentence, false);
+ _addTerms(tokenizer.get(), negativeTermSentence, true);
+
return Status::OK();
}
- void FTSQuery::_addTerm( FTSTokenizer* tokenizer,
- const string& token,
+ void FTSQuery::_addTerms( FTSTokenizer* tokenizer,
+ const string& sentence,
bool negated ) {
- tokenizer->reset(token.c_str(), FTSTokenizer::FilterStopWords);
+
+ tokenizer->reset(sentence.c_str(), FTSTokenizer::FilterStopWords);
auto& activeTerms = negated ? _negatedTerms : _positiveTerms;
@@ -152,7 +167,7 @@ namespace mongo {
return;
}
- tokenizer->reset(token.c_str(), static_cast<FTSTokenizer::Options>(
+ tokenizer->reset(sentence.c_str(), static_cast<FTSTokenizer::Options>(
FTSTokenizer::FilterStopWords
| FTSTokenizer::GenerateCaseSensitiveTokens));
diff --git a/src/mongo/db/fts/fts_query.h b/src/mongo/db/fts/fts_query.h
index f9ea7f2d1eb..88ca4ce64d0 100644
--- a/src/mongo/db/fts/fts_query.h
+++ b/src/mongo/db/fts/fts_query.h
@@ -79,8 +79,8 @@ namespace mongo {
static const bool caseSensitiveDefault;
private:
- void _addTerm( FTSTokenizer* tokenizer,
- const std::string& token,
+ void _addTerms( FTSTokenizer* tokenizer,
+ const std::string& tokens,
bool negated );
const FTSLanguage* _language;