summaryrefslogtreecommitdiff
path: root/src/mongo/db/fts/fts_query.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/fts/fts_query.cpp')
-rw-r--r--src/mongo/db/fts/fts_query.cpp345
1 files changed, 167 insertions, 178 deletions
diff --git a/src/mongo/db/fts/fts_query.cpp b/src/mongo/db/fts/fts_query.cpp
index bbaac9b2f1e..8dec8e29204 100644
--- a/src/mongo/db/fts/fts_query.cpp
+++ b/src/mongo/db/fts/fts_query.cpp
@@ -40,219 +40,208 @@
namespace mongo {
- namespace fts {
+namespace fts {
- using namespace mongoutils;
+using namespace mongoutils;
- using std::set;
- using std::string;
- using std::stringstream;
- using std::vector;
+using std::set;
+using std::string;
+using std::stringstream;
+using std::vector;
- const bool FTSQuery::caseSensitiveDefault = false;
+const bool FTSQuery::caseSensitiveDefault = false;
- Status FTSQuery::parse(const string& query, StringData language, bool caseSensitive,
- TextIndexVersion textIndexVersion) {
- StatusWithFTSLanguage swl = FTSLanguage::make( language, textIndexVersion );
- if ( !swl.getStatus().isOK() ) {
- return swl.getStatus();
- }
- _language = swl.getValue();
- _caseSensitive = caseSensitive;
-
- // Build a space delimited list of words to have the FtsTokenizer tokenize
- string positiveTermSentence;
- string negativeTermSentence;
-
- bool inNegation = false;
- bool inPhrase = false;
-
- unsigned quoteOffset = 0;
-
- FTSQueryParser i(query);
- while ( i.more() ) {
- QueryToken t = i.next();
-
- if ( t.type == QueryToken::TEXT ) {
- string s = t.data.toString();
-
- if ( inPhrase && inNegation ) {
- // don't add term
- }
- else {
- if (inNegation) {
- negativeTermSentence.append(s);
- negativeTermSentence.push_back(' ');
- }
- else {
- positiveTermSentence.append(s);
- positiveTermSentence.push_back(' ');
- }
- }
-
- if ( inNegation && !inPhrase )
- inNegation = false;
+Status FTSQuery::parse(const string& query,
+ StringData language,
+ bool caseSensitive,
+ TextIndexVersion textIndexVersion) {
+ StatusWithFTSLanguage swl = FTSLanguage::make(language, textIndexVersion);
+ if (!swl.getStatus().isOK()) {
+ return swl.getStatus();
+ }
+ _language = swl.getValue();
+ _caseSensitive = caseSensitive;
+
+ // Build a space delimited list of words to have the FtsTokenizer tokenize
+ string positiveTermSentence;
+ string negativeTermSentence;
+
+ bool inNegation = false;
+ bool inPhrase = false;
+
+ unsigned quoteOffset = 0;
+
+ FTSQueryParser i(query);
+ while (i.more()) {
+ QueryToken t = i.next();
+
+ if (t.type == QueryToken::TEXT) {
+ string s = t.data.toString();
+
+ if (inPhrase && inNegation) {
+ // don't add term
+ } else {
+ if (inNegation) {
+ negativeTermSentence.append(s);
+ negativeTermSentence.push_back(' ');
+ } else {
+ positiveTermSentence.append(s);
+ positiveTermSentence.push_back(' ');
}
- else if ( t.type == QueryToken::DELIMITER ) {
- char c = t.data[0];
- if ( c == '-' ) {
- if ( !inPhrase && t.previousWhiteSpace ) {
- // phrases can be negated, and terms not in phrases can be negated.
- // terms in phrases can not be negated.
- inNegation = true;
- }
- }
- else if ( c == '"' ) {
- if ( inPhrase ) {
- // end of a phrase
- unsigned phraseStart = quoteOffset + 1;
- unsigned phraseLength = t.offset - phraseStart;
- StringData phrase = StringData( query ).substr( phraseStart,
- phraseLength );
- if ( inNegation )
- _negatedPhrases.push_back( normalizeString( phrase ) );
- else
- _positivePhrases.push_back( normalizeString( phrase ) );
- inNegation = false;
- inPhrase = false;
- }
- else {
- // start of a phrase
- inPhrase = true;
- quoteOffset = t.offset;
- }
- }
+ }
+
+ if (inNegation && !inPhrase)
+ inNegation = false;
+ } else if (t.type == QueryToken::DELIMITER) {
+ char c = t.data[0];
+ if (c == '-') {
+ if (!inPhrase && t.previousWhiteSpace) {
+ // phrases can be negated, and terms not in phrases can be negated.
+ // terms in phrases can not be negated.
+ inNegation = true;
}
- else {
- invariant( false );
+ } else if (c == '"') {
+ if (inPhrase) {
+ // end of a phrase
+ unsigned phraseStart = quoteOffset + 1;
+ unsigned phraseLength = t.offset - phraseStart;
+ StringData phrase = StringData(query).substr(phraseStart, phraseLength);
+ if (inNegation)
+ _negatedPhrases.push_back(normalizeString(phrase));
+ else
+ _positivePhrases.push_back(normalizeString(phrase));
+ inNegation = false;
+ inPhrase = false;
+ } else {
+ // start of a phrase
+ inPhrase = true;
+ quoteOffset = t.offset;
}
}
-
- std::unique_ptr<FTSTokenizer> tokenizer(_language->createTokenizer());
-
- _addTerms(tokenizer.get(), positiveTermSentence, false);
- _addTerms(tokenizer.get(), negativeTermSentence, true);
-
- return Status::OK();
+ } else {
+ invariant(false);
}
+ }
- void FTSQuery::_addTerms( FTSTokenizer* tokenizer,
- const string& sentence,
- bool negated ) {
-
- tokenizer->reset(sentence.c_str(), FTSTokenizer::FilterStopWords);
+ std::unique_ptr<FTSTokenizer> tokenizer(_language->createTokenizer());
- auto& activeTerms = negated ? _negatedTerms : _positiveTerms;
+ _addTerms(tokenizer.get(), positiveTermSentence, false);
+ _addTerms(tokenizer.get(), negativeTermSentence, true);
- // First, get all the terms for indexing, ie, lower cased words
- // If we are case-insensitive, we can also used this for positive, and negative terms
- // Some terms may be expanded into multiple words in some non-English languages
- while (tokenizer->moveNext()) {
+ return Status::OK();
+}
- string word = tokenizer->get().toString();
+void FTSQuery::_addTerms(FTSTokenizer* tokenizer, const string& sentence, bool negated) {
+ tokenizer->reset(sentence.c_str(), FTSTokenizer::FilterStopWords);
- if (!negated) {
- _termsForBounds.insert(word);
- }
+ auto& activeTerms = negated ? _negatedTerms : _positiveTerms;
- // Compute the string corresponding to 'token' that will be used for the matcher.
- // For case-insensitive queries, this is the same string as 'boundsTerm' computed
- // above.
- if (!_caseSensitive) {
- activeTerms.insert(word);
- }
- }
+ // First, get all the terms for indexing, ie, lower cased words
+ // If we are case-insensitive, we can also used this for positive, and negative terms
+ // Some terms may be expanded into multiple words in some non-English languages
+ while (tokenizer->moveNext()) {
+ string word = tokenizer->get().toString();
- if (!_caseSensitive) {
- return;
- }
+ if (!negated) {
+ _termsForBounds.insert(word);
+ }
- tokenizer->reset(sentence.c_str(), static_cast<FTSTokenizer::Options>(
- FTSTokenizer::FilterStopWords
- | FTSTokenizer::GenerateCaseSensitiveTokens));
+ // Compute the string corresponding to 'token' that will be used for the matcher.
+ // For case-insensitive queries, this is the same string as 'boundsTerm' computed
+ // above.
+ if (!_caseSensitive) {
+ activeTerms.insert(word);
+ }
+ }
- // If we want case-sensitivity, get the case-sensitive token
- while (tokenizer->moveNext()) {
+ if (!_caseSensitive) {
+ return;
+ }
- string word = tokenizer->get().toString();
+ tokenizer->reset(sentence.c_str(),
+ static_cast<FTSTokenizer::Options>(FTSTokenizer::FilterStopWords |
+ FTSTokenizer::GenerateCaseSensitiveTokens));
- activeTerms.insert(word);
- }
- }
+ // If we want case-sensitivity, get the case-sensitive token
+ while (tokenizer->moveNext()) {
+ string word = tokenizer->get().toString();
- string FTSQuery::normalizeString(StringData str) const {
- if (_caseSensitive) {
- return str.toString();
- }
- return tolowerString(str);
- }
+ activeTerms.insert(word);
+ }
+}
- namespace {
- void _debugHelp( stringstream& ss, const set<string>& s, const string& sep ) {
- bool first = true;
- for ( set<string>::const_iterator i = s.begin(); i != s.end(); ++i ) {
- if ( first )
- first = false;
- else
- ss << sep;
- ss << *i;
- }
- }
+string FTSQuery::normalizeString(StringData str) const {
+ if (_caseSensitive) {
+ return str.toString();
+ }
+ return tolowerString(str);
+}
- void _debugHelp( stringstream& ss, const vector<string>& v, const string& sep ) {
- set<string> s( v.begin(), v.end() );
- _debugHelp( ss, s, sep );
- }
+namespace {
+void _debugHelp(stringstream& ss, const set<string>& s, const string& sep) {
+ bool first = true;
+ for (set<string>::const_iterator i = s.begin(); i != s.end(); ++i) {
+ if (first)
+ first = false;
+ else
+ ss << sep;
+ ss << *i;
+ }
+}
- }
+void _debugHelp(stringstream& ss, const vector<string>& v, const string& sep) {
+ set<string> s(v.begin(), v.end());
+ _debugHelp(ss, s, sep);
+}
+}
- string FTSQuery::toString() const {
- stringstream ss;
- ss << "FTSQuery\n";
+string FTSQuery::toString() const {
+ stringstream ss;
+ ss << "FTSQuery\n";
- ss << " terms: ";
- _debugHelp( ss, getPositiveTerms(), ", " );
- ss << "\n";
+ ss << " terms: ";
+ _debugHelp(ss, getPositiveTerms(), ", ");
+ ss << "\n";
- ss << " negated terms: ";
- _debugHelp( ss, getNegatedTerms(), ", " );
- ss << "\n";
+ ss << " negated terms: ";
+ _debugHelp(ss, getNegatedTerms(), ", ");
+ ss << "\n";
- ss << " phrases: ";
- _debugHelp( ss, getPositivePhr(), ", " );
- ss << "\n";
+ ss << " phrases: ";
+ _debugHelp(ss, getPositivePhr(), ", ");
+ ss << "\n";
- ss << " negated phrases: ";
- _debugHelp( ss, getNegatedPhr(), ", " );
- ss << "\n";
+ ss << " negated phrases: ";
+ _debugHelp(ss, getNegatedPhr(), ", ");
+ ss << "\n";
- return ss.str();
- }
+ return ss.str();
+}
- string FTSQuery::debugString() const {
- stringstream ss;
+string FTSQuery::debugString() const {
+ stringstream ss;
- _debugHelp( ss, getPositiveTerms(), "|" );
- ss << "||";
+ _debugHelp(ss, getPositiveTerms(), "|");
+ ss << "||";
- _debugHelp( ss, getNegatedTerms(), "|" );
- ss << "||";
+ _debugHelp(ss, getNegatedTerms(), "|");
+ ss << "||";
- _debugHelp( ss, getPositivePhr(), "|" );
- ss << "||";
+ _debugHelp(ss, getPositivePhr(), "|");
+ ss << "||";
- _debugHelp( ss, getNegatedPhr(), "|" );
+ _debugHelp(ss, getNegatedPhr(), "|");
- return ss.str();
- }
+ return ss.str();
+}
- BSONObj FTSQuery::toBSON() const {
- BSONObjBuilder bob;
- bob.append( "terms", getPositiveTerms() );
- bob.append( "negatedTerms", getNegatedTerms() );
- bob.append( "phrases", getPositivePhr() );
- bob.append( "negatedPhrases", getNegatedPhr() );
- return bob.obj();
- }
- }
+BSONObj FTSQuery::toBSON() const {
+ BSONObjBuilder bob;
+ bob.append("terms", getPositiveTerms());
+ bob.append("negatedTerms", getNegatedTerms());
+ bob.append("phrases", getPositivePhr());
+ bob.append("negatedPhrases", getNegatedPhr());
+ return bob.obj();
+}
+}
}