summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Rassi <rassi@10gen.com>2015-03-12 18:47:49 -0400
committerJason Rassi <rassi@10gen.com>2015-03-12 18:55:39 -0400
commit86f3f05da7238cabb8d260677e81a07399a0906b (patch)
treed9072aeb1ae8a3715751bbdfdd87f8761a4342da
parent7cd9cf303c824478f0f6d60cadfcc1a25bdb21f2 (diff)
downloadmongo-86f3f05da7238cabb8d260677e81a07399a0906b.tar.gz
SERVER-17437 Case-sensitive mode for FTSQuery/FTSMatcher
-rw-r--r--src/mongo/db/exec/stagedebug_cmd.cpp1
-rw-r--r--src/mongo/db/exec/text.cpp14
-rw-r--r--src/mongo/db/fts/fts_matcher.cpp125
-rw-r--r--src/mongo/db/fts/fts_matcher.h61
-rw-r--r--src/mongo/db/fts/fts_matcher_test.cpp127
-rw-r--r--src/mongo/db/fts/fts_query.cpp70
-rw-r--r--src/mongo/db/fts/fts_query.h49
-rw-r--r--src/mongo/db/fts/fts_query_test.cpp176
-rw-r--r--src/mongo/db/fts/fts_spec.cpp6
-rw-r--r--src/mongo/db/fts/fts_spec_legacy.cpp4
-rw-r--r--src/mongo/db/fts/fts_util.h9
-rw-r--r--src/mongo/db/fts/stop_words_turkish.txt1
-rw-r--r--src/mongo/db/query/stage_builder.cpp4
13 files changed, 472 insertions, 175 deletions
diff --git a/src/mongo/db/exec/stagedebug_cmd.cpp b/src/mongo/db/exec/stagedebug_cmd.cpp
index 3c512bdd2e3..e1fc8e2b068 100644
--- a/src/mongo/db/exec/stagedebug_cmd.cpp
+++ b/src/mongo/db/exec/stagedebug_cmd.cpp
@@ -418,6 +418,7 @@ namespace mongo {
if (!params.query.parse(search,
fam->getSpec().defaultLanguage().str().c_str(),
+ fts::FTSQuery::caseSensitiveDefault,
fam->getSpec().getTextIndexVersion()).isOK()) {
return NULL;
}
diff --git a/src/mongo/db/exec/text.cpp b/src/mongo/db/exec/text.cpp
index fc876d0f17c..48be5d2e213 100644
--- a/src/mongo/db/exec/text.cpp
+++ b/src/mongo/db/exec/text.cpp
@@ -200,8 +200,10 @@ namespace mongo {
// Get all the index scans for each term in our query.
// TODO it would be more efficient to only have one active scan at a time and create the
// next when each finishes.
- for (size_t i = 0; i < _params.query.getTerms().size(); i++) {
- const string& term = _params.query.getTerms()[i];
+ for (std::set<std::string>::const_iterator it = _params.query.getTermsForBounds().begin();
+ it != _params.query.getTermsForBounds().end();
+ ++it) {
+ const string& term = *it;
IndexScanParams params;
params.bounds.startKey = FTSIndexFormat::getIndexKey(MAX_WEIGHT,
term,
@@ -318,11 +320,9 @@ namespace mongo {
_scoreIterator++;
// Filter for phrases and negated terms
- if (_params.query.hasNonTermPieces()) {
- if (!_ftsMatcher.matchesNonTerm(wsm->obj.value())) {
- _ws->free(textRecordData.wsid);
- return PlanStage::NEED_TIME;
- }
+ if (!_ftsMatcher.matches(wsm->obj.value())) {
+ _ws->free(textRecordData.wsid);
+ return PlanStage::NEED_TIME;
}
// Populate the working set member with the text score and return it.
diff --git a/src/mongo/db/fts/fts_matcher.cpp b/src/mongo/db/fts/fts_matcher.cpp
index e4a38726011..492dbdf7b7b 100644
--- a/src/mongo/db/fts/fts_matcher.cpp
+++ b/src/mongo/db/fts/fts_matcher.cpp
@@ -40,29 +40,87 @@ namespace mongo {
using std::string;
+ /**
+ * Does the string 'phrase' occur in the string 'haystack'? Match is case-insensitive if
+ * 'caseSensitive' is false; otherwise, an exact substring match is performed.
+ */
+ static bool phraseMatches( const string& phrase,
+ const string& haystack,
+ bool caseSensitive ) {
+ if ( caseSensitive ) {
+ return haystack.find( phrase ) != string::npos;
+ }
+ return strcasestr( haystack.c_str(), phrase.c_str() ) != NULL;
+ }
+
FTSMatcher::FTSMatcher( const FTSQuery& query, const FTSSpec& spec )
: _query( query ),
_spec( spec ) {
}
- /*
- * Checks if the obj contains any of the negTerms, if so returns true, otherwise false
- * @param obj, object to be checked
- */
- bool FTSMatcher::hasNegativeTerm(const BSONObj& obj ) const {
- // called during search. deals with the case in which we have a term
- // flagged for exclusion, i.e. "hello -world" we want to remove all
- // results that include "world"
+ bool FTSMatcher::matches( const BSONObj& obj ) const {
+ if ( canSkipPositiveTermCheck() ) {
+ // We can assume that 'obj' has at least one positive term, and dassert as a sanity
+ // check.
+ dassert( hasPositiveTerm( obj ) );
+ }
+ else {
+ if ( !hasPositiveTerm( obj ) ) {
+ return false;
+ }
+ }
+
+ if ( hasNegativeTerm( obj ) ) {
+ return false;
+ }
+ if ( !positivePhrasesMatch( obj ) ) {
+ return false;
+ }
+
+ return negativePhrasesMatch( obj );
+ }
+
+ bool FTSMatcher::hasPositiveTerm( const BSONObj& obj ) const {
+ FTSElementIterator it( _spec, obj );
+
+ while ( it.more() ) {
+ FTSIteratorValue val = it.next();
+ if ( _hasPositiveTerm_string( val._language, val._text ) ) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ bool FTSMatcher::_hasPositiveTerm_string( const FTSLanguage* language,
+ const string& raw ) const {
+ Tokenizer i( *language, raw );
+ Stemmer stemmer( *language );
+ while ( i.more() ) {
+ Token t = i.next();
+ if ( t.type != Token::TEXT ) {
+ continue;
+ }
+ string word = stemmer.stem( _query.normalizeString( t.data ) );
+ if ( _query.getPositiveTerms().count( word ) > 0 ) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ bool FTSMatcher::hasNegativeTerm( const BSONObj& obj ) const {
if ( _query.getNegatedTerms().size() == 0 ) {
return false;
}
- FTSElementIterator it( _spec, obj);
+ FTSElementIterator it( _spec, obj );
while ( it.more() ) {
FTSIteratorValue val = it.next();
- if (_hasNegativeTerm_string( val._language, val._text )) {
+ if ( _hasNegativeTerm_string( val._language, val._text ) ) {
return true;
}
}
@@ -70,35 +128,36 @@ namespace mongo {
return false;
}
- /*
- * Checks if any of the negTerms is in the tokenized string
- * @param raw, the raw string to be tokenized
- */
bool FTSMatcher::_hasNegativeTerm_string( const FTSLanguage* language,
const string& raw ) const {
-
Tokenizer i( *language, raw );
Stemmer stemmer( *language );
while ( i.more() ) {
Token t = i.next();
- if ( t.type != Token::TEXT )
+ if ( t.type != Token::TEXT ) {
continue;
- string word = stemmer.stem( tolowerString( t.data ) );
- if ( _query.getNegatedTerms().count( word ) > 0 )
+ }
+ string word = stemmer.stem( _query.normalizeString( t.data ) );
+ if ( _query.getNegatedTerms().count( word ) > 0 ) {
return true;
+ }
}
return false;
}
- bool FTSMatcher::phrasesMatch( const BSONObj& obj ) const {
- for (unsigned i = 0; i < _query.getPhr().size(); i++ ) {
- if ( !phraseMatch( _query.getPhr()[i], obj ) ) {
+ bool FTSMatcher::positivePhrasesMatch( const BSONObj& obj ) const {
+ for ( size_t i = 0; i < _query.getPositivePhr().size(); i++ ) {
+ if ( !_phraseMatch( _query.getPositivePhr()[i], obj ) ) {
return false;
}
}
- for (unsigned i = 0; i < _query.getNegatedPhr().size(); i++ ) {
- if ( phraseMatch( _query.getNegatedPhr()[i], obj ) ) {
+ return true;
+ }
+
+ bool FTSMatcher::negativePhrasesMatch( const BSONObj& obj ) const {
+ for ( size_t i = 0; i < _query.getNegatedPhr().size(); i++ ) {
+ if ( _phraseMatch( _query.getNegatedPhr()[i], obj ) ) {
return false;
}
}
@@ -106,31 +165,17 @@ namespace mongo {
return true;
}
- /**
- * Checks if phrase is exactly matched in obj, returns true if so, false otherwise
- * @param phrase, the string to be matched
- * @param obj, document in the collection to match against
- */
- bool FTSMatcher::phraseMatch( const string& phrase, const BSONObj& obj ) const {
- FTSElementIterator it( _spec, obj);
+ bool FTSMatcher::_phraseMatch( const string& phrase, const BSONObj& obj ) const {
+ FTSElementIterator it( _spec, obj );
while ( it.more() ) {
FTSIteratorValue val = it.next();
- if (_phraseMatches( phrase, val._text )) {
+ if ( phraseMatches( phrase, val._text, _query.getCaseSensitive() ) ) {
return true;
}
}
return false;
}
-
- /*
- * Looks for phrase in a raw string
- * @param phrase, phrase to match
- * @param haystack, raw string to be parsed
- */
- bool FTSMatcher::_phraseMatches( const string& phrase, const string& haystack ) const {
- return strcasestr( haystack.c_str(), phrase.c_str() ) != NULL;
- }
}
}
diff --git a/src/mongo/db/fts/fts_matcher.h b/src/mongo/db/fts/fts_matcher.h
index 1ddf5f80ebd..058dcc7bcb6 100644
--- a/src/mongo/db/fts/fts_matcher.h
+++ b/src/mongo/db/fts/fts_matcher.h
@@ -44,34 +44,65 @@ namespace mongo {
FTSMatcher( const FTSQuery& query, const FTSSpec& spec );
/**
- * @return true if obj has a negated term
+ * Returns whether 'obj' matches the query. An object is considered to match the query
+ * if all four of the following conditions hold:
+ * 1) The object contains at least one positive term.
+ * 2) The object contains zero negative terms.
+ * 3) The object contains all positive phrases.
+ * 4) The object contains zero negative phrases.
*/
- bool hasNegativeTerm(const BSONObj& obj ) const;
+ bool matches( const BSONObj& obj ) const;
/**
- * @return true if obj is ok by all phrases
- * so all full phrases and no negated
+ * Returns whether 'obj' contains at least one positive term.
*/
- bool phrasesMatch( const BSONObj& obj ) const;
- bool phraseMatch( const std::string& phrase, const BSONObj& obj ) const;
+ bool hasPositiveTerm( const BSONObj& obj ) const;
- bool matchesNonTerm( const BSONObj& obj ) const {
- return !hasNegativeTerm( obj ) && phrasesMatch( obj );
- }
+ /**
+ * Returns whether 'obj' contains at least one negative term.
+ */
+ bool hasNegativeTerm( const BSONObj& obj ) const;
+
+ /**
+ * Returns whether 'obj' contains all positive phrases.
+ */
+ bool positivePhrasesMatch( const BSONObj& obj ) const;
+
+ /**
+ * Returns whether 'obj' contains zero negative phrases.
+ */
+ bool negativePhrasesMatch( const BSONObj& obj ) const;
private:
/**
- * @return true if raw has a negated term
+ * For matching, can we skip the positive term check? This is done as optimization when
+ * we have a-priori knowledge that all documents being matched pass the positive term
+ * check.
+ */
+ bool canSkipPositiveTermCheck() const { return !_query.getCaseSensitive(); }
+
+ /**
+ * Returns whether the string 'raw' contains any positive terms from the query.
+ * 'language' specifies the language for 'raw'.
+ */
+ bool _hasPositiveTerm_string( const FTSLanguage* language,
+ const std::string& raw ) const;
+
+ /**
+ * Returns whether the string 'raw' contains any negative terms from the query.
+ * 'language' specifies the language for 'raw'.
*/
- bool _hasNegativeTerm_string( const FTSLanguage* language, const std::string& raw ) const;
+ bool _hasNegativeTerm_string( const FTSLanguage* language,
+ const std::string& raw ) const;
/**
- * @return true if raw has a phrase
+ * Returns whether 'obj' contains the exact string 'phrase' in any indexed fields.
*/
- bool _phraseMatches( const std::string& phrase, const std::string& raw ) const;
+ bool _phraseMatch( const std::string& phrase, const BSONObj& obj ) const;
- FTSQuery _query;
- FTSSpec _spec;
+ // TODO These should be unowned pointers instead of owned copies.
+ const FTSQuery _query;
+ const FTSSpec _spec;
};
}
diff --git a/src/mongo/db/fts/fts_matcher_test.cpp b/src/mongo/db/fts/fts_matcher_test.cpp
index 0201ed4ba09..9f8becb83cd 100644
--- a/src/mongo/db/fts/fts_matcher_test.cpp
+++ b/src/mongo/db/fts/fts_matcher_test.cpp
@@ -38,7 +38,7 @@ namespace mongo {
TEST( FTSMatcher, NegWild1 ) {
FTSQuery q;
- ASSERT_OK( q.parse( "foo -bar", "english", TEXT_INDEX_VERSION_2 ) );
+ ASSERT_OK( q.parse( "foo -bar", "english", false, TEXT_INDEX_VERSION_2 ) );
FTSMatcher m( q,
FTSSpec( FTSSpec::fixSpec( BSON( "key" << BSON( "$**" << "text" ) ) ) ) );
@@ -49,7 +49,7 @@ namespace mongo {
// Regression test for SERVER-11994.
TEST( FTSMatcher, NegWild2 ) {
FTSQuery q;
- ASSERT_OK( q.parse( "pizza -restaurant", "english", TEXT_INDEX_VERSION_2 ) );
+ ASSERT_OK( q.parse( "pizza -restaurant", "english", false, TEXT_INDEX_VERSION_2 ) );
FTSMatcher m( q,
FTSSpec( FTSSpec::fixSpec( BSON( "key" << BSON( "$**" << "text" ) ) ) ) );
@@ -59,34 +59,33 @@ namespace mongo {
TEST( FTSMatcher, Phrase1 ) {
FTSQuery q;
- ASSERT_OK( q.parse( "foo \"table top\"", "english", TEXT_INDEX_VERSION_2 ) );
+ ASSERT_OK( q.parse( "foo \"table top\"", "english", false, TEXT_INDEX_VERSION_2 ) );
FTSMatcher m( q,
FTSSpec( FTSSpec::fixSpec( BSON( "key" << BSON( "$**" << "text" ) ) ) ) );
- ASSERT( m.phraseMatch( "table top", BSON( "x" << "table top" ) ) );
- ASSERT( m.phraseMatch( "table top", BSON( "x" << " asd table top asd" ) ) );
- ASSERT( !m.phraseMatch( "table top", BSON( "x" << "tablz top" ) ) );
- ASSERT( !m.phraseMatch( "table top", BSON( "x" << " asd tablz top asd" ) ) );
+ ASSERT( m.positivePhrasesMatch( BSON( "x" << "table top" ) ) );
+ ASSERT( m.positivePhrasesMatch( BSON( "x" << " asd table top asd" ) ) );
+ ASSERT( !m.positivePhrasesMatch( BSON( "x" << "tablz top" ) ) );
+ ASSERT( !m.positivePhrasesMatch( BSON( "x" << " asd tablz top asd" ) ) );
- ASSERT( m.phrasesMatch( BSON( "x" << "table top" ) ) );
- ASSERT( !m.phrasesMatch( BSON( "x" << "table a top" ) ) );
+ ASSERT( m.positivePhrasesMatch( BSON( "x" << "table top" ) ) );
+ ASSERT( !m.positivePhrasesMatch( BSON( "x" << "table a top" ) ) );
}
TEST( FTSMatcher, Phrase2 ) {
FTSQuery q;
- ASSERT_OK( q.parse( "foo \"table top\"", "english", TEXT_INDEX_VERSION_2 ) );
+ ASSERT_OK( q.parse( "foo \"table top\"", "english", false, TEXT_INDEX_VERSION_2 ) );
FTSMatcher m( q,
FTSSpec( FTSSpec::fixSpec( BSON( "key" << BSON( "x" << "text" ) ) ) ) );
- ASSERT( m.phraseMatch( "table top",
- BSON( "x" << BSON_ARRAY( "table top" ) ) ) );
+ ASSERT( m.positivePhrasesMatch( BSON( "x" << BSON_ARRAY( "table top" ) ) ) );
}
// Test that the matcher parses the document with the document language, not the search
// language.
TEST( FTSMatcher, ParsesUsingDocLanguage ) {
FTSQuery q;
- ASSERT_OK( q.parse( "-glad", "none", TEXT_INDEX_VERSION_2 ) );
+ ASSERT_OK( q.parse( "-glad", "none", false, TEXT_INDEX_VERSION_2 ) );
FTSMatcher m( q,
FTSSpec( FTSSpec::fixSpec( BSON( "key" << BSON( "x" << "text" ) ) ) ) );
@@ -95,5 +94,107 @@ namespace mongo {
ASSERT( m.hasNegativeTerm( BSON( "x" << "gladly" ) ) );
}
+ // Returns whether a document indexed with text data 'doc' contains any positive terms from
+ // case-sensitive text query 'search'.
+ static bool docHasPositiveTermWithCase( const std::string& doc,
+ const std::string& search ) {
+ FTSQuery q;
+ ASSERT_OK( q.parse( search, "english", true, TEXT_INDEX_VERSION_2 ) );
+ FTSMatcher m( q,
+ FTSSpec( FTSSpec::fixSpec( BSON( "key" << BSON( "x" << "text" ) ) ) ) );
+
+ return m.hasPositiveTerm( BSON( "x" << doc ) );
+ }
+
+ TEST( FTSMatcher, HasPositiveTermCaseSensitive ) {
+ ASSERT_TRUE( docHasPositiveTermWithCase( "hello world", "hello" ) );
+ ASSERT_TRUE( docHasPositiveTermWithCase( "Hello World", "Hello" ) );
+ ASSERT_TRUE( docHasPositiveTermWithCase( "Hello World", "World Hello" ) );
+ ASSERT_TRUE( docHasPositiveTermWithCase( "Hello World", "World GoodBye" ) );
+ ASSERT_TRUE( docHasPositiveTermWithCase( "John Runs", "Runs" ) );
+ ASSERT_TRUE( docHasPositiveTermWithCase( "John Runs", "Running" ) );
+ ASSERT_TRUE( docHasPositiveTermWithCase( "John Runs", "Run" ) );
+
+ ASSERT_FALSE( docHasPositiveTermWithCase( "John Runs", "run" ) );
+ ASSERT_FALSE( docHasPositiveTermWithCase( "Hello World", "HELLO" ) );
+ ASSERT_FALSE( docHasPositiveTermWithCase( "hello world", "Hello" ) );
+ ASSERT_FALSE( docHasPositiveTermWithCase( "Hello World", "hello" ) );
+ }
+
+ // Returns whether a document indexed with text data 'doc' contains any negative terms from
+ // case-sensitive text query 'search'.
+ static bool docHasNegativeTermWithCase( const std::string& doc,
+ const std::string& search ) {
+ FTSQuery q;
+ ASSERT_OK( q.parse( search, "english", true, TEXT_INDEX_VERSION_2 ) );
+ FTSMatcher m( q,
+ FTSSpec( FTSSpec::fixSpec( BSON( "key" << BSON( "x" << "text" ) ) ) ) );
+
+ return m.hasNegativeTerm( BSON( "x" << doc ) );
+ }
+
+ TEST( FTSMatcher, HasNegativeTermCaseSensitive ) {
+ ASSERT_TRUE( docHasNegativeTermWithCase( "hello world", "hello -world" ) );
+ ASSERT_TRUE( docHasNegativeTermWithCase( "Hello World", "Hello -World" ) );
+ ASSERT_TRUE( docHasNegativeTermWithCase( "Hello World", "-World -Hello" ) );
+ ASSERT_TRUE( docHasNegativeTermWithCase( "Hello World", "-Goodbye -World" ) );
+ ASSERT_TRUE( docHasNegativeTermWithCase( "John Runs", "-Runs" ) );
+ ASSERT_TRUE( docHasNegativeTermWithCase( "John Runs", "-Running" ) );
+ ASSERT_TRUE( docHasNegativeTermWithCase( "John Runs", "-Run" ) );
+
+ ASSERT_FALSE( docHasNegativeTermWithCase( "John Runs", "-run" ) );
+ ASSERT_FALSE( docHasNegativeTermWithCase( "Hello World", "Hello -WORLD" ) );
+ ASSERT_FALSE( docHasNegativeTermWithCase( "hello world", "hello -World" ) );
+ ASSERT_FALSE( docHasNegativeTermWithCase( "Hello World", "Hello -world" ) );
+ }
+
+ // Returns whether a document indexed with text data 'doc' contains all positive phrases
+ // from case-sensitive text query 'search'.
+ static bool docPositivePhrasesMatchWithCase( const std::string& doc,
+ const std::string& search ) {
+ FTSQuery q;
+ ASSERT_OK( q.parse( search, "english", true, TEXT_INDEX_VERSION_2 ) );
+ FTSMatcher m( q,
+ FTSSpec( FTSSpec::fixSpec( BSON( "key" << BSON( "x" << "text" ) ) ) ) );
+
+ return m.positivePhrasesMatch( BSON( "x" << doc ) );
+ }
+
+ TEST( FTSMatcher, PositivePhrasesMatchWithCase ) {
+ ASSERT_TRUE( docPositivePhrasesMatchWithCase( "John Runs", "\"John Runs\"" ) );
+ ASSERT_TRUE( docPositivePhrasesMatchWithCase( "John Runs", "\"John Run\"" ) );
+ ASSERT_TRUE( docPositivePhrasesMatchWithCase( "John Runs", "\"John\" \"Run\"" ) );
+ ASSERT_TRUE( docPositivePhrasesMatchWithCase( "John Runs", "\"n R\"" ) );
+
+ ASSERT_FALSE( docPositivePhrasesMatchWithCase( "John Runs", "\"john runs\"" ) );
+ ASSERT_FALSE( docPositivePhrasesMatchWithCase( "john runs", "\"John Runs\"" ) );
+ ASSERT_FALSE( docPositivePhrasesMatchWithCase( "John Runs", "\"John\" \"Running\"" ) );
+ }
+
+ // Returns whether a document indexed with text data 'doc' contains zero negative phrases
+ // from case-sensitive text query 'search'.
+ static bool docNegativePhrasesMatchWithCase( const std::string& doc,
+ const std::string& search ) {
+ FTSQuery q;
+ ASSERT_OK( q.parse( search, "english", true, TEXT_INDEX_VERSION_2 ) );
+ FTSMatcher m( q,
+ FTSSpec( FTSSpec::fixSpec( BSON( "key" << BSON( "x" << "text" ) ) ) ) );
+
+ return m.negativePhrasesMatch( BSON( "x" << doc ) );
+ }
+
+ TEST( FTSMatcher, NegativePhrasesMatchWithCase ) {
+ ASSERT_TRUE( docNegativePhrasesMatchWithCase( "John Runs", "-\"john runs\"" ) );
+ ASSERT_TRUE( docNegativePhrasesMatchWithCase( "john runs", "-\"John Runs\"" ) );
+ ASSERT_TRUE( docNegativePhrasesMatchWithCase( "john runs", "-\"John\" -\"Runs\"" ) );
+
+ ASSERT_FALSE( docNegativePhrasesMatchWithCase( "John Runs", "-\"John Runs\"" ) );
+ ASSERT_FALSE( docNegativePhrasesMatchWithCase( "John Runs", "-\"John Run\"" ) );
+ ASSERT_FALSE( docNegativePhrasesMatchWithCase( "John Runs", "-\"John\" -\"Run\"" ) );
+ ASSERT_FALSE( docNegativePhrasesMatchWithCase( "John Runs", "-\"n R\"" ) );
+ ASSERT_FALSE( docNegativePhrasesMatchWithCase( "John Runs",
+ "-\"John\" -\"Running\"" ) );
+ }
+
}
}
diff --git a/src/mongo/db/fts/fts_query.cpp b/src/mongo/db/fts/fts_query.cpp
index e70732023eb..9088719d11e 100644
--- a/src/mongo/db/fts/fts_query.cpp
+++ b/src/mongo/db/fts/fts_query.cpp
@@ -48,15 +48,18 @@ namespace mongo {
using std::stringstream;
using std::vector;
- Status FTSQuery::parse(const string& query, StringData language,
+ const bool FTSQuery::caseSensitiveDefault = false;
+
+ Status FTSQuery::parse(const string& query, StringData language, bool caseSensitive,
TextIndexVersion textIndexVersion) {
StatusWithFTSLanguage swl = FTSLanguage::make( language, textIndexVersion );
if ( !swl.getStatus().isOK() ) {
return swl.getStatus();
}
_language = swl.getValue();
+ _caseSensitive = caseSensitive;
- const StopWords* stopWords = StopWords::getStopWords( *_language );
+ const StopWords& stopWords = *StopWords::getStopWords( *_language );
Stemmer stemmer( *_language );
bool inNegation = false;
@@ -98,9 +101,9 @@ namespace mongo {
StringData phrase = StringData( query ).substr( phraseStart,
phraseLength );
if ( inNegation )
- _negatedPhrases.push_back( tolowerString( phrase ) );
+ _negatedPhrases.push_back( normalizeString( phrase ) );
else
- _phrases.push_back( tolowerString( phrase ) );
+ _positivePhrases.push_back( normalizeString( phrase ) );
inNegation = false;
inPhrase = false;
}
@@ -112,22 +115,51 @@ namespace mongo {
}
}
else {
- abort();
+ invariant( false );
}
}
return Status::OK();
}
- void FTSQuery::_addTerm( const StopWords* sw, Stemmer& stemmer, const string& term, bool negated ) {
- string word = tolowerString( term );
- if ( sw->isStopWord( word ) )
+ void FTSQuery::_addTerm( const StopWords& sw,
+ const Stemmer& stemmer,
+ const string& token,
+ bool negated ) {
+ // Compute the string corresponding to 'token' that will be used for index bounds
+ // generation.
+ string boundsTerm = tolowerString( token );
+ if ( sw.isStopWord( boundsTerm ) ) {
return;
- word = stemmer.stem( word );
- if ( negated )
- _negatedTerms.insert( word );
- else
- _terms.push_back( word );
+ }
+ boundsTerm = stemmer.stem( boundsTerm );
+
+ // If the lowercased version of 'token' is a not a stop word, 'token' itself should also
+ // not be.
+ dassert( !sw.isStopWord( token ) );
+ if ( !negated ) {
+ _termsForBounds.insert( boundsTerm );
+ }
+
+ // Compute the string corresponding to 'token' that will be used for the matcher. For
+ // case-insensitive queries, this is the same string as 'boundsTerm' computed above.
+ // However, for case-sensitive queries we need to re-stem the original token, since
+ // 'boundsTerm' is already lowercased but we need the original casing for an exact
+ // match.
+ const string& matcherTerm = _caseSensitive ? stemmer.stem( token ) : boundsTerm;
+ if ( negated ) {
+ _negatedTerms.insert( matcherTerm );
+ }
+ else {
+ _positiveTerms.insert( matcherTerm );
+ }
+ }
+
+ string FTSQuery::normalizeString( StringData str ) const {
+ if ( _caseSensitive ) {
+ return str.toString();
+ }
+ return tolowerString( str );
}
namespace {
@@ -154,7 +186,7 @@ namespace mongo {
ss << "FTSQuery\n";
ss << " terms: ";
- _debugHelp( ss, getTerms(), ", " );
+ _debugHelp( ss, getPositiveTerms(), ", " );
ss << "\n";
ss << " negated terms: ";
@@ -162,7 +194,7 @@ namespace mongo {
ss << "\n";
ss << " phrases: ";
- _debugHelp( ss, getPhr(), ", " );
+ _debugHelp( ss, getPositivePhr(), ", " );
ss << "\n";
ss << " negated phrases: ";
@@ -175,13 +207,13 @@ namespace mongo {
string FTSQuery::debugString() const {
stringstream ss;
- _debugHelp( ss, getTerms(), "|" );
+ _debugHelp( ss, getPositiveTerms(), "|" );
ss << "||";
_debugHelp( ss, getNegatedTerms(), "|" );
ss << "||";
- _debugHelp( ss, getPhr(), "|" );
+ _debugHelp( ss, getPositivePhr(), "|" );
ss << "||";
_debugHelp( ss, getNegatedPhr(), "|" );
@@ -191,9 +223,9 @@ namespace mongo {
BSONObj FTSQuery::toBSON() const {
BSONObjBuilder bob;
- bob.append( "terms", getTerms() );
+ bob.append( "terms", getPositiveTerms() );
bob.append( "negatedTerms", getNegatedTerms() );
- bob.append( "phrases", getPhr() );
+ bob.append( "phrases", getPositivePhr() );
bob.append( "negatedPhrases", getNegatedPhr() );
return bob.obj();
}
diff --git a/src/mongo/db/fts/fts_query.h b/src/mongo/db/fts/fts_query.h
index c436abb83a8..96317c926e5 100644
--- a/src/mongo/db/fts/fts_query.h
+++ b/src/mongo/db/fts/fts_query.h
@@ -50,26 +50,20 @@ namespace mongo {
// version 1 text index with a version 1 default language string needs to be parsed as
// version 1 (see fts_language.cpp for a list of language strings specific to version
// 1).
- Status parse(const std::string& query, StringData language,
+ Status parse(const std::string& query, StringData language, bool caseSensitive,
TextIndexVersion textIndexVersion);
- const std::vector<std::string>& getTerms() const { return _terms; }
+ const std::set<std::string>& getPositiveTerms() const { return _positiveTerms; }
const std::set<std::string>& getNegatedTerms() const { return _negatedTerms; }
-
- const std::vector<std::string>& getPhr() const { return _phrases; }
+ const std::vector<std::string>& getPositivePhr() const { return _positivePhrases; }
const std::vector<std::string>& getNegatedPhr() const { return _negatedPhrases; }
- /**
- * @return true if any negations or phrase + or -
- */
- bool hasNonTermPieces() const {
- return
- _negatedTerms.size() > 0 ||
- _phrases.size() > 0 ||
- _negatedPhrases.size() > 0;
+ const std::set<std::string>& getTermsForBounds() const {
+ return _termsForBounds;
}
const FTSLanguage& getLanguage() const { return *_language; }
+ bool getCaseSensitive() const { return _caseSensitive; }
std::string toString() const;
@@ -77,15 +71,36 @@ namespace mongo {
BSONObj toBSON() const;
- protected:
+ /**
+ * Lowercases "str" if _caseSensitive is set, else returns a copy of "str" unchanged.
+ */
+ std::string normalizeString( StringData str ) const;
+
+ static const bool caseSensitiveDefault;
+
+ private:
+ void _addTerm( const StopWords& sw,
+ const Stemmer& stemmer,
+ const std::string& token,
+ bool negated );
+
const FTSLanguage* _language;
- std::vector<std::string> _terms;
+ bool _caseSensitive;
+
+ // Positive terms.
+ std::set<std::string> _positiveTerms;
+
+ // Negated terms.
std::set<std::string> _negatedTerms;
- std::vector<std::string> _phrases;
+
+ // Positive phrases.
+ std::vector<std::string> _positivePhrases;
+
+ // Negated phrases.
std::vector<std::string> _negatedPhrases;
- private:
- void _addTerm( const StopWords* sw, Stemmer& stemmer, const std::string& term, bool negated );
+ // Terms for bounds.
+ std::set<std::string> _termsForBounds;
};
}
diff --git a/src/mongo/db/fts/fts_query_test.cpp b/src/mongo/db/fts/fts_query_test.cpp
index 1e5318a1592..3763f3a5a76 100644
--- a/src/mongo/db/fts/fts_query_test.cpp
+++ b/src/mongo/db/fts/fts_query_test.cpp
@@ -37,78 +37,150 @@ namespace mongo {
TEST( FTSQuery, Basic1 ) {
FTSQuery q;
- ASSERT( q.parse( "this is fun", "english", TEXT_INDEX_VERSION_2 ).isOK() );
+ ASSERT( q.parse( "this is fun", "english", false, TEXT_INDEX_VERSION_2 ).isOK() );
- ASSERT_EQUALS( 1U, q.getTerms().size() );
- ASSERT_EQUALS( "fun", q.getTerms()[0] );
+ ASSERT_EQUALS( false, q.getCaseSensitive() );
+ ASSERT_EQUALS( 1U, q.getPositiveTerms().size() );
+ ASSERT_EQUALS( "fun", *q.getPositiveTerms().begin() );
ASSERT_EQUALS( 0U, q.getNegatedTerms().size() );
- ASSERT_EQUALS( 0U, q.getPhr().size() );
+ ASSERT_EQUALS( 0U, q.getPositivePhr().size() );
ASSERT_EQUALS( 0U, q.getNegatedPhr().size() );
+ ASSERT_TRUE( q.getTermsForBounds() == q.getPositiveTerms() );
}
TEST( FTSQuery, Neg1 ) {
FTSQuery q;
- ASSERT( q.parse( "this is -really fun", "english", TEXT_INDEX_VERSION_2 ).isOK() );
+ ASSERT( q.parse( "this is -really fun", "english", false, TEXT_INDEX_VERSION_2 ).isOK() );
- ASSERT_EQUALS( 1U, q.getTerms().size() );
- ASSERT_EQUALS( "fun", q.getTerms()[0] );
+ ASSERT_EQUALS( 1U, q.getPositiveTerms().size() );
+ ASSERT_EQUALS( "fun", *q.getPositiveTerms().begin() );
ASSERT_EQUALS( 1U, q.getNegatedTerms().size() );
ASSERT_EQUALS( "realli", *q.getNegatedTerms().begin() );
+ ASSERT_TRUE( q.getTermsForBounds() == q.getPositiveTerms() );
}
TEST( FTSQuery, Phrase1 ) {
FTSQuery q;
- ASSERT( q.parse( "doing a \"phrase test\" for fun", "english",
+ ASSERT( q.parse( "doing a \"phrase test\" for fun", "english", false,
TEXT_INDEX_VERSION_2 ).isOK() );
- ASSERT_EQUALS( 3U, q.getTerms().size() );
+ ASSERT_EQUALS( 3U, q.getPositiveTerms().size() );
ASSERT_EQUALS( 0U, q.getNegatedTerms().size() );
- ASSERT_EQUALS( 1U, q.getPhr().size() );
+ ASSERT_EQUALS( 1U, q.getPositivePhr().size() );
ASSERT_EQUALS( 0U, q.getNegatedPhr().size() );
+ ASSERT_TRUE( q.getTermsForBounds() == q.getPositiveTerms() );
- ASSERT_EQUALS( "phrase test", q.getPhr()[0] );
+ ASSERT_EQUALS( "phrase test", q.getPositivePhr()[0] );
ASSERT_EQUALS( "fun|phrase|test||||phrase test||", q.debugString() );
}
TEST( FTSQuery, Phrase2 ) {
FTSQuery q;
- ASSERT( q.parse( "doing a \"phrase-test\" for fun", "english",
+ ASSERT( q.parse( "doing a \"phrase-test\" for fun", "english", false,
TEXT_INDEX_VERSION_2 ).isOK() );
- ASSERT_EQUALS( 1U, q.getPhr().size() );
- ASSERT_EQUALS( "phrase-test", q.getPhr()[0] );
+ ASSERT_EQUALS( 1U, q.getPositivePhr().size() );
+ ASSERT_EQUALS( "phrase-test", q.getPositivePhr()[0] );
}
TEST( FTSQuery, NegPhrase1 ) {
FTSQuery q;
- ASSERT( q.parse( "doing a -\"phrase test\" for fun", "english",
+ ASSERT( q.parse( "doing a -\"phrase test\" for fun", "english", false,
TEXT_INDEX_VERSION_2 ).isOK() );
ASSERT_EQUALS( "fun||||||phrase test", q.debugString() );
}
+ TEST( FTSQuery, CaseSensitiveOption ) {
+ FTSQuery q;
+ ASSERT( q.parse( "this is fun", "english", true, TEXT_INDEX_VERSION_2 ).isOK() );
+ ASSERT_EQUALS( true, q.getCaseSensitive() );
+ }
+
+ TEST( FTSQuery, CaseSensitivePositiveTerms ) {
+ FTSQuery q;
+ ASSERT( q.parse( "This is Positively fun", "english", true,
+ TEXT_INDEX_VERSION_2 ).isOK() );
+
+ ASSERT_EQUALS( 2U, q.getTermsForBounds().size() );
+ ASSERT_EQUALS( 1, std::count( q.getTermsForBounds().begin(),
+ q.getTermsForBounds().end(),
+ "posit" ) );
+ ASSERT_EQUALS( 1, std::count( q.getTermsForBounds().begin(),
+ q.getTermsForBounds().end(),
+ "fun" ) );
+ ASSERT_EQUALS( 2U, q.getPositiveTerms().size() );
+ ASSERT_EQUALS( 1, std::count( q.getPositiveTerms().begin(),
+ q.getPositiveTerms().end(),
+ "Posit" ) );
+ ASSERT_EQUALS( 1, std::count( q.getPositiveTerms().begin(),
+ q.getPositiveTerms().end(),
+ "fun" ) );
+ ASSERT_EQUALS( 0U, q.getNegatedTerms().size() );
+ ASSERT_EQUALS( 0U, q.getPositivePhr().size() );
+ ASSERT_EQUALS( 0U, q.getNegatedPhr().size() );
+ }
+
+ TEST( FTSQuery, CaseSensitiveNegativeTerms ) {
+ FTSQuery q;
+ ASSERT( q.parse( "-This -is -Negatively -miserable", "english", true,
+ TEXT_INDEX_VERSION_2 ).isOK() );
+
+ ASSERT_EQUALS( 0U, q.getPositiveTerms().size() );
+ ASSERT_EQUALS( 0U, q.getTermsForBounds().size() );
+ ASSERT_EQUALS( 2U, q.getNegatedTerms().size() );
+ ASSERT_EQUALS( 1, std::count( q.getNegatedTerms().begin(),
+ q.getNegatedTerms().end(),
+ "Negat" ) );
+ ASSERT_EQUALS( 1, std::count( q.getNegatedTerms().begin(),
+ q.getNegatedTerms().end(),
+ "miser" ) );
+ ASSERT_EQUALS( 0U, q.getPositivePhr().size() );
+ ASSERT_EQUALS( 0U, q.getNegatedPhr().size() );
+ }
+
+ TEST( FTSQuery, CaseSensitivePositivePhrases ) {
+ FTSQuery q;
+ ASSERT( q.parse( "doing a \"Phrase Test\" for fun", "english", true,
+ TEXT_INDEX_VERSION_2 ).isOK() );
+
+ ASSERT_EQUALS( 1U, q.getPositivePhr().size() );
+ ASSERT_EQUALS( 0U, q.getNegatedPhr().size() );
+ ASSERT_EQUALS( "Phrase Test", q.getPositivePhr()[0] );
+ }
+
+ TEST( FTSQuery, CaseSensitiveNegativePhrases ) {
+ FTSQuery q;
+ ASSERT( q.parse( "doing a -\"Phrase Test\" for fun", "english", true,
+ TEXT_INDEX_VERSION_2 ).isOK() );
+
+ ASSERT_EQUALS( 0U, q.getPositivePhr().size() );
+ ASSERT_EQUALS( 1U, q.getNegatedPhr().size() );
+ ASSERT_EQUALS( "Phrase Test", q.getNegatedPhr()[0] );
+ }
+
TEST( FTSQuery, Mix1 ) {
FTSQuery q;
- ASSERT( q.parse( "\"industry\" -Melbourne -Physics", "english",
+ ASSERT( q.parse( "\"industry\" -Melbourne -Physics", "english", false,
TEXT_INDEX_VERSION_2 ).isOK() );
ASSERT_EQUALS( "industri||melbourn|physic||industry||", q.debugString() );
}
TEST( FTSQuery, NegPhrase2) {
FTSQuery q1, q2, q3;
- ASSERT( q1.parse( "foo \"bar\"", "english", TEXT_INDEX_VERSION_2 ).isOK() );
- ASSERT( q2.parse( "foo \"-bar\"", "english", TEXT_INDEX_VERSION_2 ).isOK() );
- ASSERT( q3.parse( "foo \" -bar\"", "english", TEXT_INDEX_VERSION_2 ).isOK() );
+ ASSERT( q1.parse( "foo \"bar\"", "english", false, TEXT_INDEX_VERSION_2 ).isOK() );
+ ASSERT( q2.parse( "foo \"-bar\"", "english", false, TEXT_INDEX_VERSION_2 ).isOK() );
+ ASSERT( q3.parse( "foo \" -bar\"", "english", false, TEXT_INDEX_VERSION_2 ).isOK() );
- ASSERT_EQUALS( 2U, q1.getTerms().size() );
- ASSERT_EQUALS( 2U, q2.getTerms().size() );
- ASSERT_EQUALS( 2U, q3.getTerms().size() );
+ ASSERT_EQUALS( 2U, q1.getPositiveTerms().size() );
+ ASSERT_EQUALS( 2U, q2.getPositiveTerms().size() );
+ ASSERT_EQUALS( 2U, q3.getPositiveTerms().size() );
ASSERT_EQUALS( 0U, q1.getNegatedTerms().size() );
ASSERT_EQUALS( 0U, q2.getNegatedTerms().size() );
ASSERT_EQUALS( 0U, q3.getNegatedTerms().size() );
- ASSERT_EQUALS( 1U, q1.getPhr().size() );
- ASSERT_EQUALS( 1U, q2.getPhr().size() );
- ASSERT_EQUALS( 1U, q3.getPhr().size() );
+ ASSERT_EQUALS( 1U, q1.getPositivePhr().size() );
+ ASSERT_EQUALS( 1U, q2.getPositivePhr().size() );
+ ASSERT_EQUALS( 1U, q3.getPositivePhr().size() );
ASSERT_EQUALS( 0U, q1.getNegatedPhr().size() );
ASSERT_EQUALS( 0U, q2.getNegatedPhr().size() );
@@ -117,21 +189,21 @@ namespace mongo {
TEST( FTSQuery, NegPhrase3) {
FTSQuery q1, q2, q3;
- ASSERT( q1.parse( "foo -\"bar\"", "english", TEXT_INDEX_VERSION_2 ).isOK() );
- ASSERT( q2.parse( "foo -\"-bar\"", "english", TEXT_INDEX_VERSION_2 ).isOK() );
- ASSERT( q3.parse( "foo -\" -bar\"", "english", TEXT_INDEX_VERSION_2 ).isOK() );
+ ASSERT( q1.parse( "foo -\"bar\"", "english", false, TEXT_INDEX_VERSION_2 ).isOK() );
+ ASSERT( q2.parse( "foo -\"-bar\"", "english", false, TEXT_INDEX_VERSION_2 ).isOK() );
+ ASSERT( q3.parse( "foo -\" -bar\"", "english", false, TEXT_INDEX_VERSION_2 ).isOK() );
- ASSERT_EQUALS( 1U, q1.getTerms().size() );
- ASSERT_EQUALS( 1U, q2.getTerms().size() );
- ASSERT_EQUALS( 1U, q3.getTerms().size() );
+ ASSERT_EQUALS( 1U, q1.getPositiveTerms().size() );
+ ASSERT_EQUALS( 1U, q2.getPositiveTerms().size() );
+ ASSERT_EQUALS( 1U, q3.getPositiveTerms().size() );
ASSERT_EQUALS( 0U, q1.getNegatedTerms().size() );
ASSERT_EQUALS( 0U, q2.getNegatedTerms().size() );
ASSERT_EQUALS( 0U, q3.getNegatedTerms().size() );
- ASSERT_EQUALS( 0U, q1.getPhr().size() );
- ASSERT_EQUALS( 0U, q2.getPhr().size() );
- ASSERT_EQUALS( 0U, q3.getPhr().size() );
+ ASSERT_EQUALS( 0U, q1.getPositivePhr().size() );
+ ASSERT_EQUALS( 0U, q2.getPositivePhr().size() );
+ ASSERT_EQUALS( 0U, q3.getPositivePhr().size() );
ASSERT_EQUALS( 1U, q1.getNegatedPhr().size() );
ASSERT_EQUALS( 1U, q2.getNegatedPhr().size() );
@@ -142,11 +214,11 @@ namespace mongo {
// stemmer and stopword list.
TEST( FTSQuery, TextIndexVersion1LanguageEnglish ) {
FTSQuery q;
- ASSERT( q.parse( "the running", "english", TEXT_INDEX_VERSION_1 ).isOK() );
- ASSERT_EQUALS( 1U, q.getTerms().size() );
- ASSERT_EQUALS( "run", q.getTerms()[0] );
+ ASSERT( q.parse( "the running", "english", false, TEXT_INDEX_VERSION_1 ).isOK() );
+ ASSERT_EQUALS( 1U, q.getPositiveTerms().size() );
+ ASSERT_EQUALS( "run", *q.getPositiveTerms().begin() );
ASSERT_EQUALS( 0U, q.getNegatedTerms().size() );
- ASSERT_EQUALS( 0U, q.getPhr().size() );
+ ASSERT_EQUALS( 0U, q.getPositivePhr().size() );
ASSERT_EQUALS( 0U, q.getNegatedPhr().size() );
}
@@ -154,12 +226,16 @@ namespace mongo {
// no stopword list.
TEST( FTSQuery, TextIndexVersion1LanguageEng ) {
FTSQuery q;
- ASSERT( q.parse( "the running", "eng", TEXT_INDEX_VERSION_1 ).isOK() );
- ASSERT_EQUALS( 2U, q.getTerms().size() );
- ASSERT_EQUALS( 1, std::count( q.getTerms().begin(), q.getTerms().end(), "the" ) );
- ASSERT_EQUALS( 1, std::count( q.getTerms().begin(), q.getTerms().end(), "run" ) );
+ ASSERT( q.parse( "the running", "eng", false, TEXT_INDEX_VERSION_1 ).isOK() );
+ ASSERT_EQUALS( 2U, q.getPositiveTerms().size() );
+ ASSERT_EQUALS( 1, std::count( q.getPositiveTerms().begin(),
+ q.getPositiveTerms().end(),
+ "the" ) );
+ ASSERT_EQUALS( 1, std::count( q.getPositiveTerms().begin(),
+ q.getPositiveTerms().end(),
+ "run" ) );
ASSERT_EQUALS( 0U, q.getNegatedTerms().size() );
- ASSERT_EQUALS( 0U, q.getPhr().size() );
+ ASSERT_EQUALS( 0U, q.getPositivePhr().size() );
ASSERT_EQUALS( 0U, q.getNegatedPhr().size() );
}
@@ -167,12 +243,16 @@ namespace mongo {
// and no stopword list will be used.
TEST( FTSQuery, TextIndexVersion1LanguageInvalid ) {
FTSQuery q;
- ASSERT( q.parse( "the running", "invalid", TEXT_INDEX_VERSION_1 ).isOK() );
- ASSERT_EQUALS( 2U, q.getTerms().size() );
- ASSERT_EQUALS( 1, std::count( q.getTerms().begin(), q.getTerms().end(), "the" ) );
- ASSERT_EQUALS( 1, std::count( q.getTerms().begin(), q.getTerms().end(), "running" ) );
+ ASSERT( q.parse( "the running", "invalid", false, TEXT_INDEX_VERSION_1 ).isOK() );
+ ASSERT_EQUALS( 2U, q.getPositiveTerms().size() );
+ ASSERT_EQUALS( 1, std::count( q.getPositiveTerms().begin(),
+ q.getPositiveTerms().end(),
+ "the" ) );
+ ASSERT_EQUALS( 1, std::count( q.getPositiveTerms().begin(),
+ q.getPositiveTerms().end(),
+ "running" ) );
ASSERT_EQUALS( 0U, q.getNegatedTerms().size() );
- ASSERT_EQUALS( 0U, q.getPhr().size() );
+ ASSERT_EQUALS( 0U, q.getPositivePhr().size() );
ASSERT_EQUALS( 0U, q.getNegatedPhr().size() );
}
diff --git a/src/mongo/db/fts/fts_spec.cpp b/src/mongo/db/fts/fts_spec.cpp
index 8e715bb4405..fdd9ecf7824 100644
--- a/src/mongo/db/fts/fts_spec.cpp
+++ b/src/mongo/db/fts/fts_spec.cpp
@@ -185,11 +185,11 @@ namespace mongo {
Tokenizer i( tools.language, raw );
while ( i.more() ) {
Token t = i.next();
- if ( t.type != Token::TEXT )
+ if ( t.type != Token::TEXT ) {
continue;
+ }
- string term = t.data.toString();
- makeLower( &term );
+ string term = tolowerString( t.data );
if ( tools.stopwords->isStopWord( term ) ) {
continue;
}
diff --git a/src/mongo/db/fts/fts_spec_legacy.cpp b/src/mongo/db/fts/fts_spec_legacy.cpp
index df7a82a076a..69721fe2ae0 100644
--- a/src/mongo/db/fts/fts_spec_legacy.cpp
+++ b/src/mongo/db/fts/fts_spec_legacy.cpp
@@ -29,6 +29,7 @@
#include "mongo/db/fts/fts_spec.h"
#include "mongo/util/mongoutils/str.h"
+#include "mongo/util/stringutils.h"
namespace mongo {
@@ -78,8 +79,7 @@ namespace mongo {
if ( t.type != Token::TEXT )
continue;
- string term = t.data.toString();
- makeLower( &term );
+ string term = tolowerString( t.data );
if ( tools.stopwords->isStopWord( term ) )
continue;
term = tools.stemmer->stem( term );
diff --git a/src/mongo/db/fts/fts_util.h b/src/mongo/db/fts/fts_util.h
index 88a613b5c88..7cde2bbe985 100644
--- a/src/mongo/db/fts/fts_util.h
+++ b/src/mongo/db/fts/fts_util.h
@@ -46,15 +46,6 @@ namespace mongo {
TEXT_INDEX_VERSION_2 = 2 // Current index format.
};
-
- /**
- * destructive!
- */
- inline void makeLower( std::string* s ) {
- std::string::size_type sz = s->size();
- for ( std::string::size_type i = 0; i < sz; i++ )
- (*s)[i] = (char)tolower( (int)(*s)[i] );
- }
}
}
diff --git a/src/mongo/db/fts/stop_words_turkish.txt b/src/mongo/db/fts/stop_words_turkish.txt
index 66dea7e2dec..e3994b29160 100644
--- a/src/mongo/db/fts/stop_words_turkish.txt
+++ b/src/mongo/db/fts/stop_words_turkish.txt
@@ -74,7 +74,6 @@ sizin
daha
niçin
þunda
-INSERmi
bunu
beni
ile
diff --git a/src/mongo/db/query/stage_builder.cpp b/src/mongo/db/query/stage_builder.cpp
index 0cdd4e03459..eeabcca2bff 100644
--- a/src/mongo/db/query/stage_builder.cpp
+++ b/src/mongo/db/query/stage_builder.cpp
@@ -264,7 +264,9 @@ namespace mongo {
? fam->getSpec().defaultLanguage().str()
: node->language);
- Status parseStatus = params.query.parse(node->query, language,
+ Status parseStatus = params.query.parse(node->query,
+ language,
+ fts::FTSQuery::caseSensitiveDefault,
fam->getSpec().getTextIndexVersion());
if (!parseStatus.isOK()) {
warning() << "Can't parse text search query";