summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorJason Rassi <rassi@10gen.com>2014-02-13 12:55:48 -0500
committerJason Rassi <rassi@10gen.com>2014-02-13 12:55:48 -0500
commitd8f7afc96a90ca3f158eebfe4ef3672a27788626 (patch)
tree332bf0eb9866645299c60221f18720ad2868c83a /src/mongo
parent82f354996edb1e6726de37aee0ca17947a55fe0b (diff)
downloadmongo-d8f7afc96a90ca3f158eebfe4ef3672a27788626.tar.gz
SERVER-12195 Text matcher should parse doc in doc language
This fixes the issue that the text matcher was incorrectly parsing the document in the search language.
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/fts/fts_matcher.cpp13
-rw-r--r--src/mongo/db/fts/fts_matcher.h3
-rw-r--r--src/mongo/db/fts/fts_matcher_test.cpp21
3 files changed, 25 insertions, 12 deletions
diff --git a/src/mongo/db/fts/fts_matcher.cpp b/src/mongo/db/fts/fts_matcher.cpp
index 45b455c2601..c9fad05f77f 100644
--- a/src/mongo/db/fts/fts_matcher.cpp
+++ b/src/mongo/db/fts/fts_matcher.cpp
@@ -40,8 +40,7 @@ namespace mongo {
FTSMatcher::FTSMatcher( const FTSQuery& query, const FTSSpec& spec )
: _query( query ),
- _spec( spec ),
- _stemmer( query.getLanguage() ){
+ _spec( spec ) {
}
/*
@@ -61,7 +60,7 @@ namespace mongo {
while ( it.more() ) {
FTSIteratorValue val = it.next();
- if (_hasNegativeTerm_string( val._text )) {
+ if (_hasNegativeTerm_string( val._language, val._text )) {
return true;
}
}
@@ -73,14 +72,16 @@ namespace mongo {
* Checks if any of the negTerms is in the tokenized string
* @param raw, the raw string to be tokenized
*/
- bool FTSMatcher::_hasNegativeTerm_string( const string& raw ) const {
+ bool FTSMatcher::_hasNegativeTerm_string( const FTSLanguage* language,
+ const string& raw ) const {
- Tokenizer i( _query.getLanguage(), raw );
+ Tokenizer i( *language, raw );
+ Stemmer stemmer( *language );
while ( i.more() ) {
Token t = i.next();
if ( t.type != Token::TEXT )
continue;
- string word = _stemmer.stem( tolowerString( t.data ) );
+ string word = stemmer.stem( tolowerString( t.data ) );
if ( _query.getNegatedTerms().count( word ) > 0 )
return true;
}
diff --git a/src/mongo/db/fts/fts_matcher.h b/src/mongo/db/fts/fts_matcher.h
index ed49e536f5d..d32a92d11f9 100644
--- a/src/mongo/db/fts/fts_matcher.h
+++ b/src/mongo/db/fts/fts_matcher.h
@@ -62,7 +62,7 @@ namespace mongo {
/**
* @return true if raw has a negated term
*/
- bool _hasNegativeTerm_string( const string& raw ) const;
+ bool _hasNegativeTerm_string( const FTSLanguage* language, const string& raw ) const;
/**
* @return true if raw has a phrase
@@ -71,7 +71,6 @@ namespace mongo {
FTSQuery _query;
FTSSpec _spec;
- Stemmer _stemmer;
};
}
diff --git a/src/mongo/db/fts/fts_matcher_test.cpp b/src/mongo/db/fts/fts_matcher_test.cpp
index 049bc7e02ad..34017a015f7 100644
--- a/src/mongo/db/fts/fts_matcher_test.cpp
+++ b/src/mongo/db/fts/fts_matcher_test.cpp
@@ -38,7 +38,7 @@ namespace mongo {
TEST( FTSMatcher, NegWild1 ) {
FTSQuery q;
- q.parse( "foo -bar", "english" );
+ ASSERT_OK( q.parse( "foo -bar", "english" ) );
FTSMatcher m( q,
FTSSpec( FTSSpec::fixSpec( BSON( "key" << BSON( "$**" << "text" ) ) ) ) );
@@ -49,7 +49,7 @@ namespace mongo {
// Regression test for SERVER-11994.
TEST( FTSMatcher, NegWild2 ) {
FTSQuery q;
- q.parse( "pizza -restaurant", "english" );
+ ASSERT_OK( q.parse( "pizza -restaurant", "english" ) );
FTSMatcher m( q,
FTSSpec( FTSSpec::fixSpec( BSON( "key" << BSON( "$**" << "text" ) ) ) ) );
@@ -59,7 +59,7 @@ namespace mongo {
TEST( FTSMatcher, Phrase1 ) {
FTSQuery q;
- q.parse( "foo \"table top\"", "english" );
+ ASSERT_OK( q.parse( "foo \"table top\"", "english" ) );
FTSMatcher m( q,
FTSSpec( FTSSpec::fixSpec( BSON( "key" << BSON( "$**" << "text" ) ) ) ) );
@@ -75,12 +75,25 @@ namespace mongo {
TEST( FTSMatcher, Phrase2 ) {
FTSQuery q;
- q.parse( "foo \"table top\"", "english" );
+ ASSERT_OK( q.parse( "foo \"table top\"", "english" ) );
FTSMatcher m( q,
FTSSpec( FTSSpec::fixSpec( BSON( "key" << BSON( "x" << "text" ) ) ) ) );
ASSERT( m.phraseMatch( "table top",
BSON( "x" << BSON_ARRAY( "table top" ) ) ) );
}
+ // Test that the matcher parses the document with the document language, not the search
+ // language.
+ TEST( FTSMatcher, ParsesUsingDocLanguage ) {
+ FTSQuery q;
+ ASSERT_OK( q.parse( "-glad", "none" ) );
+ FTSMatcher m( q,
+ FTSSpec( FTSSpec::fixSpec( BSON( "key" << BSON( "x" << "text" ) ) ) ) );
+
+ // Even though the search language is "none", the document {x: "gladly"} should be
+ // parsed using the English stemmer, and as such should match the negated term "glad".
+ ASSERT( m.hasNegativeTerm( BSON( "x" << "gladly" ) ) );
+ }
+
}
}