diff options
author | Jason Rassi <rassi@10gen.com> | 2014-02-13 12:55:48 -0500 |
---|---|---|
committer | Jason Rassi <rassi@10gen.com> | 2014-02-13 12:55:48 -0500 |
commit | d8f7afc96a90ca3f158eebfe4ef3672a27788626 (patch) | |
tree | 332bf0eb9866645299c60221f18720ad2868c83a /src/mongo | |
parent | 82f354996edb1e6726de37aee0ca17947a55fe0b (diff) | |
download | mongo-d8f7afc96a90ca3f158eebfe4ef3672a27788626.tar.gz |
SERVER-12195 Text matcher should parse doc in doc language
This fixes the issue that the text matcher was incorrectly parsing
the document in the search language.
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/fts/fts_matcher.cpp | 13 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_matcher.h | 3 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_matcher_test.cpp | 21 |
3 files changed, 25 insertions, 12 deletions
diff --git a/src/mongo/db/fts/fts_matcher.cpp b/src/mongo/db/fts/fts_matcher.cpp index 45b455c2601..c9fad05f77f 100644 --- a/src/mongo/db/fts/fts_matcher.cpp +++ b/src/mongo/db/fts/fts_matcher.cpp @@ -40,8 +40,7 @@ namespace mongo { FTSMatcher::FTSMatcher( const FTSQuery& query, const FTSSpec& spec ) : _query( query ), - _spec( spec ), - _stemmer( query.getLanguage() ){ + _spec( spec ) { } /* @@ -61,7 +60,7 @@ namespace mongo { while ( it.more() ) { FTSIteratorValue val = it.next(); - if (_hasNegativeTerm_string( val._text )) { + if (_hasNegativeTerm_string( val._language, val._text )) { return true; } } @@ -73,14 +72,16 @@ namespace mongo { * Checks if any of the negTerms is in the tokenized string * @param raw, the raw string to be tokenized */ - bool FTSMatcher::_hasNegativeTerm_string( const string& raw ) const { + bool FTSMatcher::_hasNegativeTerm_string( const FTSLanguage* language, + const string& raw ) const { - Tokenizer i( _query.getLanguage(), raw ); + Tokenizer i( *language, raw ); + Stemmer stemmer( *language ); while ( i.more() ) { Token t = i.next(); if ( t.type != Token::TEXT ) continue; - string word = _stemmer.stem( tolowerString( t.data ) ); + string word = stemmer.stem( tolowerString( t.data ) ); if ( _query.getNegatedTerms().count( word ) > 0 ) return true; } diff --git a/src/mongo/db/fts/fts_matcher.h b/src/mongo/db/fts/fts_matcher.h index ed49e536f5d..d32a92d11f9 100644 --- a/src/mongo/db/fts/fts_matcher.h +++ b/src/mongo/db/fts/fts_matcher.h @@ -62,7 +62,7 @@ namespace mongo { /** * @return true if raw has a negated term */ - bool _hasNegativeTerm_string( const string& raw ) const; + bool _hasNegativeTerm_string( const FTSLanguage* language, const string& raw ) const; /** * @return true if raw has a phrase @@ -71,7 +71,6 @@ namespace mongo { FTSQuery _query; FTSSpec _spec; - Stemmer _stemmer; }; } diff --git a/src/mongo/db/fts/fts_matcher_test.cpp b/src/mongo/db/fts/fts_matcher_test.cpp index 049bc7e02ad..34017a015f7 100644 --- a/src/mongo/db/fts/fts_matcher_test.cpp +++ b/src/mongo/db/fts/fts_matcher_test.cpp @@ -38,7 +38,7 @@ namespace mongo { TEST( FTSMatcher, NegWild1 ) { FTSQuery q; - q.parse( "foo -bar", "english" ); + ASSERT_OK( q.parse( "foo -bar", "english" ) ); FTSMatcher m( q, FTSSpec( FTSSpec::fixSpec( BSON( "key" << BSON( "$**" << "text" ) ) ) ) ); @@ -49,7 +49,7 @@ namespace mongo { // Regression test for SERVER-11994. TEST( FTSMatcher, NegWild2 ) { FTSQuery q; - q.parse( "pizza -restaurant", "english" ); + ASSERT_OK( q.parse( "pizza -restaurant", "english" ) ); FTSMatcher m( q, FTSSpec( FTSSpec::fixSpec( BSON( "key" << BSON( "$**" << "text" ) ) ) ) ); @@ -59,7 +59,7 @@ namespace mongo { TEST( FTSMatcher, Phrase1 ) { FTSQuery q; - q.parse( "foo \"table top\"", "english" ); + ASSERT_OK( q.parse( "foo \"table top\"", "english" ) ); FTSMatcher m( q, FTSSpec( FTSSpec::fixSpec( BSON( "key" << BSON( "$**" << "text" ) ) ) ) ); @@ -75,12 +75,25 @@ namespace mongo { TEST( FTSMatcher, Phrase2 ) { FTSQuery q; - q.parse( "foo \"table top\"", "english" ); + ASSERT_OK( q.parse( "foo \"table top\"", "english" ) ); FTSMatcher m( q, FTSSpec( FTSSpec::fixSpec( BSON( "key" << BSON( "x" << "text" ) ) ) ) ); ASSERT( m.phraseMatch( "table top", BSON( "x" << BSON_ARRAY( "table top" ) ) ) ); } + // Test that the matcher parses the document with the document language, not the search + // language. + TEST( FTSMatcher, ParsesUsingDocLanguage ) { + FTSQuery q; + ASSERT_OK( q.parse( "-glad", "none" ) ); + FTSMatcher m( q, + FTSSpec( FTSSpec::fixSpec( BSON( "key" << BSON( "x" << "text" ) ) ) ) ); + + // Even though the search language is "none", the document {x: "gladly"} should be + // parsed using the English stemmer, and as such should match the negated term "glad". + ASSERT( m.hasNegativeTerm( BSON( "x" << "gladly" ) ) ); + } + } } |