diff options
Diffstat (limited to 'src/mongo/db/fts/fts_query.cpp')
-rw-r--r-- | src/mongo/db/fts/fts_query.cpp | 345 |
1 files changed, 167 insertions, 178 deletions
diff --git a/src/mongo/db/fts/fts_query.cpp b/src/mongo/db/fts/fts_query.cpp index bbaac9b2f1e..8dec8e29204 100644 --- a/src/mongo/db/fts/fts_query.cpp +++ b/src/mongo/db/fts/fts_query.cpp @@ -40,219 +40,208 @@ namespace mongo { - namespace fts { +namespace fts { - using namespace mongoutils; +using namespace mongoutils; - using std::set; - using std::string; - using std::stringstream; - using std::vector; +using std::set; +using std::string; +using std::stringstream; +using std::vector; - const bool FTSQuery::caseSensitiveDefault = false; +const bool FTSQuery::caseSensitiveDefault = false; - Status FTSQuery::parse(const string& query, StringData language, bool caseSensitive, - TextIndexVersion textIndexVersion) { - StatusWithFTSLanguage swl = FTSLanguage::make( language, textIndexVersion ); - if ( !swl.getStatus().isOK() ) { - return swl.getStatus(); - } - _language = swl.getValue(); - _caseSensitive = caseSensitive; - - // Build a space delimited list of words to have the FtsTokenizer tokenize - string positiveTermSentence; - string negativeTermSentence; - - bool inNegation = false; - bool inPhrase = false; - - unsigned quoteOffset = 0; - - FTSQueryParser i(query); - while ( i.more() ) { - QueryToken t = i.next(); - - if ( t.type == QueryToken::TEXT ) { - string s = t.data.toString(); - - if ( inPhrase && inNegation ) { - // don't add term - } - else { - if (inNegation) { - negativeTermSentence.append(s); - negativeTermSentence.push_back(' '); - } - else { - positiveTermSentence.append(s); - positiveTermSentence.push_back(' '); - } - } - - if ( inNegation && !inPhrase ) - inNegation = false; +Status FTSQuery::parse(const string& query, + StringData language, + bool caseSensitive, + TextIndexVersion textIndexVersion) { + StatusWithFTSLanguage swl = FTSLanguage::make(language, textIndexVersion); + if (!swl.getStatus().isOK()) { + return swl.getStatus(); + } + _language = swl.getValue(); + _caseSensitive = caseSensitive; + + // Build a space delimited list of words to have the FtsTokenizer tokenize + string positiveTermSentence; + string negativeTermSentence; + + bool inNegation = false; + bool inPhrase = false; + + unsigned quoteOffset = 0; + + FTSQueryParser i(query); + while (i.more()) { + QueryToken t = i.next(); + + if (t.type == QueryToken::TEXT) { + string s = t.data.toString(); + + if (inPhrase && inNegation) { + // don't add term + } else { + if (inNegation) { + negativeTermSentence.append(s); + negativeTermSentence.push_back(' '); + } else { + positiveTermSentence.append(s); + positiveTermSentence.push_back(' '); } - else if ( t.type == QueryToken::DELIMITER ) { - char c = t.data[0]; - if ( c == '-' ) { - if ( !inPhrase && t.previousWhiteSpace ) { - // phrases can be negated, and terms not in phrases can be negated. - // terms in phrases can not be negated. - inNegation = true; - } - } - else if ( c == '"' ) { - if ( inPhrase ) { - // end of a phrase - unsigned phraseStart = quoteOffset + 1; - unsigned phraseLength = t.offset - phraseStart; - StringData phrase = StringData( query ).substr( phraseStart, - phraseLength ); - if ( inNegation ) - _negatedPhrases.push_back( normalizeString( phrase ) ); - else - _positivePhrases.push_back( normalizeString( phrase ) ); - inNegation = false; - inPhrase = false; - } - else { - // start of a phrase - inPhrase = true; - quoteOffset = t.offset; - } - } + } + + if (inNegation && !inPhrase) + inNegation = false; + } else if (t.type == QueryToken::DELIMITER) { + char c = t.data[0]; + if (c == '-') { + if (!inPhrase && t.previousWhiteSpace) { + // phrases can be negated, and terms not in phrases can be negated. + // terms in phrases can not be negated. + inNegation = true; } - else { - invariant( false ); + } else if (c == '"') { + if (inPhrase) { + // end of a phrase + unsigned phraseStart = quoteOffset + 1; + unsigned phraseLength = t.offset - phraseStart; + StringData phrase = StringData(query).substr(phraseStart, phraseLength); + if (inNegation) + _negatedPhrases.push_back(normalizeString(phrase)); + else + _positivePhrases.push_back(normalizeString(phrase)); + inNegation = false; + inPhrase = false; + } else { + // start of a phrase + inPhrase = true; + quoteOffset = t.offset; } } - - std::unique_ptr<FTSTokenizer> tokenizer(_language->createTokenizer()); - - _addTerms(tokenizer.get(), positiveTermSentence, false); - _addTerms(tokenizer.get(), negativeTermSentence, true); - - return Status::OK(); + } else { + invariant(false); } + } - void FTSQuery::_addTerms( FTSTokenizer* tokenizer, - const string& sentence, - bool negated ) { - - tokenizer->reset(sentence.c_str(), FTSTokenizer::FilterStopWords); + std::unique_ptr<FTSTokenizer> tokenizer(_language->createTokenizer()); - auto& activeTerms = negated ? _negatedTerms : _positiveTerms; + _addTerms(tokenizer.get(), positiveTermSentence, false); + _addTerms(tokenizer.get(), negativeTermSentence, true); - // First, get all the terms for indexing, ie, lower cased words - // If we are case-insensitive, we can also used this for positive, and negative terms - // Some terms may be expanded into multiple words in some non-English languages - while (tokenizer->moveNext()) { + return Status::OK(); +} - string word = tokenizer->get().toString(); +void FTSQuery::_addTerms(FTSTokenizer* tokenizer, const string& sentence, bool negated) { + tokenizer->reset(sentence.c_str(), FTSTokenizer::FilterStopWords); - if (!negated) { - _termsForBounds.insert(word); - } + auto& activeTerms = negated ? _negatedTerms : _positiveTerms; - // Compute the string corresponding to 'token' that will be used for the matcher. - // For case-insensitive queries, this is the same string as 'boundsTerm' computed - // above. - if (!_caseSensitive) { - activeTerms.insert(word); - } - } + // First, get all the terms for indexing, ie, lower cased words + // If we are case-insensitive, we can also used this for positive, and negative terms + // Some terms may be expanded into multiple words in some non-English languages + while (tokenizer->moveNext()) { + string word = tokenizer->get().toString(); - if (!_caseSensitive) { - return; - } + if (!negated) { + _termsForBounds.insert(word); + } - tokenizer->reset(sentence.c_str(), static_cast<FTSTokenizer::Options>( - FTSTokenizer::FilterStopWords - | FTSTokenizer::GenerateCaseSensitiveTokens)); + // Compute the string corresponding to 'token' that will be used for the matcher. + // For case-insensitive queries, this is the same string as 'boundsTerm' computed + // above. + if (!_caseSensitive) { + activeTerms.insert(word); + } + } - // If we want case-sensitivity, get the case-sensitive token - while (tokenizer->moveNext()) { + if (!_caseSensitive) { + return; + } - string word = tokenizer->get().toString(); + tokenizer->reset(sentence.c_str(), + static_cast<FTSTokenizer::Options>(FTSTokenizer::FilterStopWords | + FTSTokenizer::GenerateCaseSensitiveTokens)); - activeTerms.insert(word); - } - } + // If we want case-sensitivity, get the case-sensitive token + while (tokenizer->moveNext()) { + string word = tokenizer->get().toString(); - string FTSQuery::normalizeString(StringData str) const { - if (_caseSensitive) { - return str.toString(); - } - return tolowerString(str); - } + activeTerms.insert(word); + } +} - namespace { - void _debugHelp( stringstream& ss, const set<string>& s, const string& sep ) { - bool first = true; - for ( set<string>::const_iterator i = s.begin(); i != s.end(); ++i ) { - if ( first ) - first = false; - else - ss << sep; - ss << *i; - } - } +string FTSQuery::normalizeString(StringData str) const { + if (_caseSensitive) { + return str.toString(); + } + return tolowerString(str); +} - void _debugHelp( stringstream& ss, const vector<string>& v, const string& sep ) { - set<string> s( v.begin(), v.end() ); - _debugHelp( ss, s, sep ); - } +namespace { +void _debugHelp(stringstream& ss, const set<string>& s, const string& sep) { + bool first = true; + for (set<string>::const_iterator i = s.begin(); i != s.end(); ++i) { + if (first) + first = false; + else + ss << sep; + ss << *i; + } +} - } +void _debugHelp(stringstream& ss, const vector<string>& v, const string& sep) { + set<string> s(v.begin(), v.end()); + _debugHelp(ss, s, sep); +} +} - string FTSQuery::toString() const { - stringstream ss; - ss << "FTSQuery\n"; +string FTSQuery::toString() const { + stringstream ss; + ss << "FTSQuery\n"; - ss << " terms: "; - _debugHelp( ss, getPositiveTerms(), ", " ); - ss << "\n"; + ss << " terms: "; + _debugHelp(ss, getPositiveTerms(), ", "); + ss << "\n"; - ss << " negated terms: "; - _debugHelp( ss, getNegatedTerms(), ", " ); - ss << "\n"; + ss << " negated terms: "; + _debugHelp(ss, getNegatedTerms(), ", "); + ss << "\n"; - ss << " phrases: "; - _debugHelp( ss, getPositivePhr(), ", " ); - ss << "\n"; + ss << " phrases: "; + _debugHelp(ss, getPositivePhr(), ", "); + ss << "\n"; - ss << " negated phrases: "; - _debugHelp( ss, getNegatedPhr(), ", " ); - ss << "\n"; + ss << " negated phrases: "; + _debugHelp(ss, getNegatedPhr(), ", "); + ss << "\n"; - return ss.str(); - } + return ss.str(); +} - string FTSQuery::debugString() const { - stringstream ss; +string FTSQuery::debugString() const { + stringstream ss; - _debugHelp( ss, getPositiveTerms(), "|" ); - ss << "||"; + _debugHelp(ss, getPositiveTerms(), "|"); + ss << "||"; - _debugHelp( ss, getNegatedTerms(), "|" ); - ss << "||"; + _debugHelp(ss, getNegatedTerms(), "|"); + ss << "||"; - _debugHelp( ss, getPositivePhr(), "|" ); - ss << "||"; + _debugHelp(ss, getPositivePhr(), "|"); + ss << "||"; - _debugHelp( ss, getNegatedPhr(), "|" ); + _debugHelp(ss, getNegatedPhr(), "|"); - return ss.str(); - } + return ss.str(); +} - BSONObj FTSQuery::toBSON() const { - BSONObjBuilder bob; - bob.append( "terms", getPositiveTerms() ); - bob.append( "negatedTerms", getNegatedTerms() ); - bob.append( "phrases", getPositivePhr() ); - bob.append( "negatedPhrases", getNegatedPhr() ); - return bob.obj(); - } - } +BSONObj FTSQuery::toBSON() const { + BSONObjBuilder bob; + bob.append("terms", getPositiveTerms()); + bob.append("negatedTerms", getNegatedTerms()); + bob.append("phrases", getPositivePhr()); + bob.append("negatedPhrases", getNegatedPhr()); + return bob.obj(); +} +} } |