diff options
Diffstat (limited to 'src/mongo/db/fts')
-rw-r--r-- | src/mongo/db/fts/SConscript | 8 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_index_format.cpp | 16 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_iterator.cpp | 186 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_iterator.h | 173 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_iterator_test.cpp | 310 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_matcher.cpp | 143 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_matcher.h | 13 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_spec.cpp | 140 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_spec.h | 39 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_spec_test.cpp | 50 |
10 files changed, 761 insertions, 317 deletions
diff --git a/src/mongo/db/fts/SConscript b/src/mongo/db/fts/SConscript index afd6e0386a9..2ffd2371ccf 100644 --- a/src/mongo/db/fts/SConscript +++ b/src/mongo/db/fts/SConscript @@ -2,7 +2,7 @@ Import("env") -stop_word_lanages = [ +stop_word_languages = [ 'danish', 'dutch', 'english', @@ -21,7 +21,7 @@ stop_word_lanages = [ ] env.Command( [ "stop_words_list.h", "stop_words_list.cpp"], - [ "generate_stop_words.py"] + [ 'stop_words_%s.txt' % x for x in stop_word_lanages ], + [ "generate_stop_words.py"] + [ 'stop_words_%s.txt' % x for x in stop_word_languages ], "$PYTHON $SOURCES $TARGETS" ) env.Library('base', [ @@ -32,6 +32,7 @@ env.Library('base', [ 'fts_spec_legacy.cpp', 'fts_language.cpp', 'fts_util.cpp', + 'fts_iterator.cpp', 'stemmer.cpp', 'stop_words.cpp', 'stop_words_list.cpp', @@ -81,5 +82,8 @@ env.CppUnitTest( "fts_language_test", "fts_language_test.cpp", env.CppUnitTest( "fts_matcher_test", "fts_matcher_test.cpp", LIBDEPS=["base"] ) +env.CppUnitTest( "fts_iterator_test", "fts_iterator_test.cpp", + LIBDEPS=["base"] ) + env.CppUnitTest( "fts_util_test", "fts_util_test.cpp", LIBDEPS=["base","$BUILD_DIR/mongo/mongohasher"] ) diff --git a/src/mongo/db/fts/fts_index_format.cpp b/src/mongo/db/fts/fts_index_format.cpp index 62a2fbe699e..b3892a583af 100644 --- a/src/mongo/db/fts/fts_index_format.cpp +++ b/src/mongo/db/fts/fts_index_format.cpp @@ -79,7 +79,7 @@ namespace mongo { TermFrequencyMap term_freqs; - spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &term_freqs ); + spec.scoreDocument( obj, &term_freqs ); // create index keys from raw scores // only 1 per string @@ -92,9 +92,7 @@ namespace mongo { long long keyBSONSize = 0; const int MaxKeyBSONSizeMB = 4; - for ( TermFrequencyMap::const_iterator i = term_freqs.begin(); - i != term_freqs.end(); - ++i ) { + for ( TermFrequencyMap::const_iterator i = term_freqs.begin(); i != term_freqs.end(); ++i ) { const string& term = i->first; double weight = i->second; @@ -108,17 +106,18 @@ namespace mongo { extraSize; BSONObjBuilder b(guess); // builds a BSON object with guess length. - for ( unsigned k = 0; k < extrasBefore.size(); k++ ) + for ( unsigned k = 0; k < extrasBefore.size(); k++ ) { b.appendAs( extrasBefore[k], "" ); + } _appendIndexKey( b, weight, term ); - for ( unsigned k = 0; k < extrasAfter.size(); k++ ) + for ( unsigned k = 0; k < extrasAfter.size(); k++ ) { b.appendAs( extrasAfter[k], "" ); + } BSONObj res = b.obj(); verify( guess >= res.objsize() ); keys->insert( res ); - keyBSONSize += res.objsize(); uassert( 16733, @@ -136,8 +135,9 @@ namespace mongo { BSONObjBuilder b; BSONObjIterator i( indexPrefix ); - while ( i.more() ) + while ( i.more() ) { b.appendAs( i.next(), "" ); + } _appendIndexKey( b, weight, term ); return b.obj(); diff --git a/src/mongo/db/fts/fts_iterator.cpp b/src/mongo/db/fts/fts_iterator.cpp new file mode 100644 index 00000000000..c304404a1f0 --- /dev/null +++ b/src/mongo/db/fts/fts_iterator.cpp @@ -0,0 +1,186 @@ +// fts_iterator.cpp +/** +* Copyright (C) 2014 MongoDB Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects for +* all of the code used other than as permitted herein. If you modify file(s) +* with this exception, you may extend this exception to your version of the +* file(s), but you are not obligated to do so. If you do not wish to do so, +* delete this exception statement from your version. If you delete this +* exception statement from all source files in the program, then also delete +* it in the license file. +*/ + +#include "mongo/db/fts/fts_iterator.h" +#include "mongo/db/fts/fts_spec.h" +#include "mongo/db/fts/fts_util.h" +#include "mongo/util/mongoutils/str.h" +#include "mongo/util/stringutils.h" + +#include <stack> + +namespace mongo { + + namespace fts { + + extern const double DEFAULT_WEIGHT; + extern const double MAX_WEIGHT; + + std::ostream& operator<<( std::ostream& os, FTSElementIterator::FTSIteratorFrame& frame ) { + BSONObjIterator it = frame._it; + return os << "FTSIteratorFrame[" + " element=" << (*it).toString() << + ", _language=" << frame._language->str() << + ", _parentPath=" << frame._parentPath << + ", _isArray=" << frame._isArray << "]"; + } + + FTSElementIterator::FTSElementIterator( const FTSSpec& spec, const BSONObj& obj ) + : _frame( obj, spec, &spec.defaultLanguage(), "", false ), + _spec( spec ), + _currentValue( advance() ) + { } + + namespace { + /** Check for exact match or path prefix match. */ + inline bool _matchPrefix( const string& dottedName, const string& weight ) { + if ( weight == dottedName ) { + return true; + } + return mongoutils::str::startsWith( weight, dottedName + '.' ); + } + } + + bool FTSElementIterator::more() { + //_currentValue = advance(); + return _currentValue.valid(); + } + + FTSIteratorValue FTSElementIterator::next() { + FTSIteratorValue result = _currentValue; + _currentValue = advance(); + return result; + } + + /** + * Helper method: + * if (current object iterator not exhausted) return true; + * while (frame stack not empty) { + * resume object iterator popped from stack; + * if (resumed iterator not exhausted) return true; + * } + * return false; + */ + bool FTSElementIterator::moreFrames() { + if (_frame._it.more()) return true; + while (!_frameStack.empty()) { + _frame = _frameStack.top(); + _frameStack.pop(); + if (_frame._it.more()) { + return true; + } + } + return false; + } + + FTSIteratorValue FTSElementIterator::advance() { + while ( moreFrames() ) { + + BSONElement elem = _frame._it.next(); + string fieldName = elem.fieldName(); + + // Skip "language" specifier fields if wildcard. + if ( _spec.wildcard() && _spec.languageOverrideField() == fieldName ) { + continue; + } + + // Compose the dotted name of the current field: + // 1. parent path empty (top level): use the current field name + // 2. parent path non-empty and obj is an array: use the parent path + // 3. parent path non-empty and obj is a sub-doc: append field name to parent path + string dottedName = ( _frame._parentPath.empty() ? fieldName + : _frame._isArray ? _frame._parentPath + : _frame._parentPath + '.' + fieldName ); + + // Find lower bound of dottedName in _weights. lower_bound leaves us at the first + // weight that could possibly match or be a prefix of dottedName. And if this + // element fails to match, then no subsequent weight can match, since the weights + // are lexicographically ordered. + Weights::const_iterator i = _spec.weights().lower_bound( elem.type() == Object + ? dottedName + '.' + : dottedName ); + + // possibleWeightMatch is set if the weight map contains either a match or some item + // lexicographically larger than fieldName. This boolean acts as a guard on + // dereferences of iterator 'i'. + bool possibleWeightMatch = ( i != _spec.weights().end() ); + + // Optimize away two cases, when not wildcard: + // 1. lower_bound seeks to end(): no prefix match possible + // 2. lower_bound seeks to a name which is not a prefix + if ( !_spec.wildcard() ) { + if ( !possibleWeightMatch ) { + continue; + } + else if ( !_matchPrefix( dottedName, i->first ) ) { + continue; + } + } + + // Is the current field an exact match on a weight? + bool exactMatch = ( possibleWeightMatch && i->first == dottedName ); + double weight = ( possibleWeightMatch ? i->second : DEFAULT_WEIGHT ); + + switch ( elem.type() ) { + case String: + // Only index strings on exact match or wildcard. + if ( exactMatch || _spec.wildcard() ) { + return FTSIteratorValue( elem.valuestr(), _frame._language, weight ); + } + break; + + case Object: + // Only descend into a sub-document on proper prefix or wildcard. Note that + // !exactMatch is a sufficient test for proper prefix match, because of + // if ( !matchPrefix( dottedName, i->first ) ) continue; + // block above. + if ( !exactMatch || _spec.wildcard() ) { + _frameStack.push( _frame ); + _frame = FTSIteratorFrame( elem.Obj(), _spec, _frame._language, dottedName, false ); + } + break; + + case Array: + // Only descend into arrays from non-array parents or on wildcard. + if ( !_frame._isArray || _spec.wildcard() ) { + _frameStack.push( _frame ); + _frame = FTSIteratorFrame( elem.Obj(), _spec, _frame._language, dottedName, true ); + } + break; + + default: + // Skip over all other BSON types. + break; + } + } + return FTSIteratorValue(); // valid()==false + } + + } // namespace fts +} // namespace mongo diff --git a/src/mongo/db/fts/fts_iterator.h b/src/mongo/db/fts/fts_iterator.h new file mode 100644 index 00000000000..f8f75d2839f --- /dev/null +++ b/src/mongo/db/fts/fts_iterator.h @@ -0,0 +1,173 @@ +// fts_iterator.h + +/** +* Copyright (C) 2014 MongoDB Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects for +* all of the code used other than as permitted herein. If you modify file(s) +* with this exception, you may extend this exception to your version of the +* file(s), but you are not obligated to do so. If you do not wish to do so, +* delete this exception statement from your version. If you delete this +* exception statement from all source files in the program, then also delete +* it in the license file. +*/ + +#pragma once + +#include "mongo/bson/bsonobj.h" +#include "mongo/bson/bsonobjiterator.h" +#include "mongo/db/fts/fts_language.h" +#include "mongo/db/fts/fts_spec.h" + +#include <map> +#include <stack> +#include <string> + +namespace mongo { + + namespace fts { + + /** + * Encapsulates data fields returned by FTSElementIterator + */ + struct FTSIteratorValue { + + FTSIteratorValue( const char* text, + const FTSLanguage* language, + double weight ) + : _text(text), + _language(language), + _weight(weight), + _valid(true) + {} + + FTSIteratorValue() + : _text(NULL), + _language(), + _weight(0.0), + _valid(false) + {} + + bool valid() const { return _valid; } + + const char* _text; + const FTSLanguage* _language; + double _weight; + bool _valid; + }; + + /** + * Iterator pattern for walking through text-indexed fields of a + * BSON document. + * + * Example usage: + * FTSSpec spec( FTSSpec::fixSpec( indexSpec ) ); + * FTSElementIterator it( spec, obj ); + * while ( it.more() ) { + * FTSIteratorValue val = it.next(); + * std::cout << val._text << '[' << val._language.str() + * << ',' << val._weight << ']' << std::endl; + * } + * + */ + class FTSElementIterator { + public: + /** + * Iterator interface: returns false iff there are no further text-indexable fields. + */ + bool more(); + + /** + * Iterator interface: advances to the next text-indexable field. + */ + FTSIteratorValue next(); + + /** + * Iterator constructor + * + * Note: Caller must ensure that the constructed FTSElementIterator + * does >not< outlive either spec or obj. + * + * @arg spec text index specifier + * @arg obj document that the iterator will traverse + */ + FTSElementIterator( const FTSSpec& spec, const BSONObj& obj); + + /** + * Iterator frame needed for iterative implementation of + * recursive sub-documents. + */ + struct FTSIteratorFrame { + FTSIteratorFrame( const BSONObj& obj, + const FTSSpec& spec, + const FTSLanguage* parentLanguage, + const std::string& parentPath, + bool isArray ) + : _it( obj ), + _language( spec._getLanguageToUseV2( obj, parentLanguage ) ), + _parentPath( parentPath ), + _isArray( isArray ) + {} + + friend std::ostream& operator<<(std::ostream&, FTSIteratorFrame&); + + BSONObjIterator _it; + const FTSLanguage* _language; + std::string _parentPath; + bool _isArray; + }; + + private: + /** + * Helper method: + * returns false iff all FTSIteratorFrames on _frameStack are exhausted. + */ + bool moreFrames(); + + /** + * Helper method: + * advances to the next text-indexable field, possibly pushing frames as + * needed for recursive sub-documents. + */ + FTSIteratorValue advance(); + + /** + * Stack used by iterative implementation of recursive sub-document traversal. + */ + std::stack<FTSIteratorFrame> _frameStack; + + /** + * Current frame, not yet pushed to stack. + */ + FTSIteratorFrame _frame; + + /** + * Constructor input parameter: text index specification. + */ + const FTSSpec& _spec; + + /** + * Current iterator return value, computed by 'more()', returned by 'next()'. + */ + FTSIteratorValue _currentValue; + }; + + } // namespace fts +} // namespace mongo + diff --git a/src/mongo/db/fts/fts_iterator_test.cpp b/src/mongo/db/fts/fts_iterator_test.cpp new file mode 100644 index 00000000000..c1e50bc7682 --- /dev/null +++ b/src/mongo/db/fts/fts_iterator_test.cpp @@ -0,0 +1,310 @@ +// fts_iterator_test.cpp +/** +* Copyright (C) 2014 MongoDB Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects for +* all of the code used other than as permitted herein. If you modify file(s) +* with this exception, you may extend this exception to your version of the +* file(s), but you are not obligated to do so. If you do not wish to do so, +* delete this exception statement from your version. If you delete this +* exception statement from all source files in the program, then also delete +* it in the license file. +*/ + +#include "mongo/pch.h" + +#include "mongo/db/fts/fts_iterator.h" +#include "mongo/db/json.h" +#include "mongo/unittest/unittest.h" + +namespace mongo { + namespace fts { + + TEST( FTSElementIterator, Test1 ) { + + BSONObj obj = fromjson( + "{ b : \"walking\"," + " c : { e: \"walked\" }," + " d : \"walker\"" + " }" ); + + BSONObj indexSpec = fromjson( + "{ key : { a : \"text\" }, weights : { b : 10, d : 5 } }" ); + + FTSSpec spec( FTSSpec::fixSpec( indexSpec ) ); + Weights::const_iterator itt = spec.weights().begin(); + ASSERT( itt != spec.weights().end() ); + ASSERT_EQUALS( "a", itt->first ); + ASSERT_EQUALS( 1, itt->second ); + ++itt; + ASSERT( itt != spec.weights().end() ); + ASSERT_EQUALS( "b", itt->first ); + ASSERT_EQUALS( 10, itt->second ); + ++itt; + ASSERT( itt != spec.weights().end() ); + ASSERT_EQUALS( "d", itt->first ); + ASSERT_EQUALS( 5, itt->second ); + ++itt; + + FTSElementIterator it( spec, obj ); + + ASSERT( it.more() ); + FTSIteratorValue val = it.next(); + ASSERT_EQUALS( "walking", string(val._text) ); + ASSERT_EQUALS( "english", val._language->str() ); + ASSERT_EQUALS( 10, val._weight ); + + ASSERT( it.more() ); + val = it.next(); + ASSERT_EQUALS( "walker", string(val._text) ); + ASSERT_EQUALS( "english", val._language->str() ); + ASSERT_EQUALS( 5, val._weight ); + } + + // Multi-language : test + TEST( FTSElementIterator, Test2 ) { + + BSONObj obj = fromjson( + "{ a :" + " { b :" + " [ { c : \"walked\", language : \"english\" }," + " { c : \"camminato\", language : \"italian\" }," + " { c : \"ging\", language : \"german\" } ]" + " }," + " d : \"Feliz Año Nuevo!\"," + " language : \"spanish\"" + " }" ); + + BSONObj indexSpec = fromjson( + "{ key : { \"a.b.c\" : \"text\", d : \"text\" } }" ); + + FTSSpec spec( FTSSpec::fixSpec( indexSpec ) ); + + FTSElementIterator it( spec, obj ); + + ASSERT( it.more() ); + FTSIteratorValue val = it.next(); + ASSERT_EQUALS( "walked", string(val._text) ); + ASSERT_EQUALS( "english", val._language->str() ); + ASSERT_EQUALS( 1, val._weight ); + + ASSERT( it.more() ); + val = it.next(); + ASSERT_EQUALS( "camminato", string(val._text) ); + ASSERT_EQUALS( "italian", val._language->str() ); + ASSERT_EQUALS( 1, val._weight ); + + ASSERT( it.more() ); + val = it.next(); + ASSERT_EQUALS( "ging", string(val._text) ); + ASSERT_EQUALS( "german", val._language->str() ); + ASSERT_EQUALS( 1, val._weight ); + + ASSERT( it.more() ); + val = it.next(); + ASSERT_EQUALS( "Feliz Año Nuevo!", string(val._text) ); + ASSERT_EQUALS( "spanish", val._language->str() ); + ASSERT_EQUALS( 1, val._weight ); + } + + // Multi-language : test nested stemming per sub-document + TEST( FTSElementIterator, Test3 ) { + + BSONObj obj = fromjson( + "{ language : \"english\"," + " a :" + " { language : \"danish\"," + " b :" + " [ { c : \"foredrag\" }," + " { c : \"foredragsholder\" }," + " { c : \"lector\" } ]" + " }" + "}" ); + + BSONObj indexSpec = fromjson( + "{ key : { a : \"text\", \"a.b.c\" : \"text\" }, weights : { \"a.b.c\" : 5 } }" ); + + FTSSpec spec( FTSSpec::fixSpec( indexSpec ) ); + Weights::const_iterator itt = spec.weights().begin(); + ASSERT( itt != spec.weights().end() ); + ASSERT_EQUALS( "a", itt->first ); + ASSERT_EQUALS( 1, itt->second ); + ++itt; + ASSERT( itt != spec.weights().end() ); + ASSERT_EQUALS( "a.b.c", itt->first ); + ASSERT_EQUALS( 5, itt->second ); + + FTSElementIterator it( spec, obj ); + + ASSERT( it.more() ); + FTSIteratorValue val = it.next(); + ASSERT_EQUALS( "foredrag", string(val._text) ); + ASSERT_EQUALS( "danish", val._language->str() ); + ASSERT_EQUALS( 5, val._weight ); + + ASSERT( it.more() ); + val = it.next(); + ASSERT_EQUALS( "foredragsholder", string(val._text) ); + ASSERT_EQUALS( "danish", val._language->str() ); + ASSERT_EQUALS( 5, val._weight ); + + ASSERT( it.more() ); + val = it.next(); + ASSERT_EQUALS( "lector", string(val._text) ); + ASSERT_EQUALS( "danish", val._language->str() ); + ASSERT_EQUALS( 5, val._weight ); + + } + + // Multi-language : test nested arrays + TEST( FTSElementIterator, Test4 ) { + + BSONObj obj = fromjson( + "{ language : \"english\"," + " a : [" + " { language : \"danish\"," + " b :" + " [ { c : [\"foredrag\"] }," + " { c : [\"foredragsholder\"] }," + " { c : [\"lector\"] } ]" + " } ]" + "}" ); + + BSONObj indexSpec = fromjson( + "{ key : { \"a.b.c\" : \"text\" }, weights : { \"a.b.c\" : 5 } }" ); + + FTSSpec spec( FTSSpec::fixSpec( indexSpec ) ); + FTSElementIterator it( spec, obj ); + + ASSERT( it.more() ); + FTSIteratorValue val = it.next(); + ASSERT_EQUALS( "foredrag", string(val._text) ); + ASSERT_EQUALS( "danish", val._language->str() ); + ASSERT_EQUALS( 5, val._weight ); + + ASSERT( it.more() ); + val = it.next(); + ASSERT_EQUALS( "foredragsholder", string(val._text) ); + ASSERT_EQUALS( "danish", val._language->str() ); + ASSERT_EQUALS( 5, val._weight ); + + ASSERT( it.more() ); + val = it.next(); + ASSERT_EQUALS( "lector", string(val._text) ); + ASSERT_EQUALS( "danish", val._language->str() ); + ASSERT_EQUALS( 5, val._weight ); + + } + + // Multi-language : test wildcard spec + TEST( FTSElementIterator, Test5 ) { + + BSONObj obj = fromjson( + "{ language : \"english\"," + " b : \"these boots were made for walking\"," + " c : { e: \"I walked half way to the market before seeing the sunrise\" }," + " d : " + " { language : \"danish\"," + " e :" + " [ { f : \"foredrag\", g : 12 }," + " { f : \"foredragsholder\", g : 13 }," + " { f : \"lector\", g : 14 } ]" + " }" + "}" ); + + BSONObj indexSpec = fromjson( + "{ key : { a : \"text\" }, weights : { b : 20, c : 10, \"d.e.f\" : 5 } }" ); + + FTSSpec spec( FTSSpec::fixSpec( indexSpec ) ); + FTSElementIterator it( spec, obj ); + + ASSERT( it.more() ); + FTSIteratorValue val = it.next(); + ASSERT_EQUALS( "these boots were made for walking", string(val._text) ); + ASSERT_EQUALS( "english", val._language->str() ); + ASSERT_EQUALS( 20, val._weight ); + + ASSERT( it.more() ); + val = it.next(); + ASSERT_EQUALS( "foredrag", string(val._text) ); + ASSERT_EQUALS( "danish", val._language->str() ); + ASSERT_EQUALS( 5, val._weight ); + + ASSERT( it.more() ); + val = it.next(); + ASSERT_EQUALS( "foredragsholder", string(val._text) ); + ASSERT_EQUALS( "danish", val._language->str() ); + ASSERT_EQUALS( 5, val._weight ); + + ASSERT( it.more() ); + val = it.next(); + ASSERT_EQUALS( "lector", string(val._text) ); + ASSERT_EQUALS( "danish", val._language->str() ); + ASSERT_EQUALS( 5, val._weight ); + } + + // Multi-language : test wildcard spec + TEST( FTSElementIterator, Test6 ) { + + BSONObj obj = fromjson( + "{ language : \"english\"," + " b : \"these boots were made for walking\"," + " c : { e: \"I walked half way to the market before seeing the sunrise\" }," + " d : " + " { language : \"danish\"," + " e :" + " [ { f : \"foredrag\", g : 12 }," + " { f : \"foredragsholder\", g : 13 }," + " { f : \"lector\", g : 14 } ]" + " }" + "}" ); + + BSONObj indexSpec = fromjson( + "{ key : { a : \"text\" }, weights : { b : 20, c : 10, \"d.e.f\" : 5 } }" ); + + FTSSpec spec( FTSSpec::fixSpec( indexSpec ) ); + FTSElementIterator it( spec, obj ); + + ASSERT( it.more() ); + FTSIteratorValue val = it.next(); + ASSERT_EQUALS( "these boots were made for walking", string(val._text) ); + ASSERT_EQUALS( "english", val._language->str() ); + ASSERT_EQUALS( 20, val._weight ); + + ASSERT( it.more() ); + val = it.next(); + ASSERT_EQUALS( "foredrag", string(val._text) ); + ASSERT_EQUALS( "danish", val._language->str() ); + ASSERT_EQUALS( 5, val._weight ); + + ASSERT( it.more() ); + val = it.next(); + ASSERT_EQUALS( "foredragsholder", string(val._text) ); + ASSERT_EQUALS( "danish", val._language->str() ); + ASSERT_EQUALS( 5, val._weight ); + + ASSERT( it.more() ); + val = it.next(); + ASSERT_EQUALS( "lector", string(val._text) ); + ASSERT_EQUALS( "danish", val._language->str() ); + ASSERT_EQUALS( 5, val._weight ); + } + } +} + diff --git a/src/mongo/db/fts/fts_matcher.cpp b/src/mongo/db/fts/fts_matcher.cpp index cdf5ca37325..5523c389d2c 100644 --- a/src/mongo/db/fts/fts_matcher.cpp +++ b/src/mongo/db/fts/fts_matcher.cpp @@ -31,13 +31,13 @@ #include "mongo/pch.h" #include "mongo/db/fts/fts_matcher.h" +#include "mongo/db/fts/fts_iterator.h" #include "mongo/platform/strcasestr.h" namespace mongo { namespace fts { - FTSMatcher::FTSMatcher( const FTSQuery& query, const FTSSpec& spec ) : _query( query ), _spec( spec ), @@ -53,67 +53,19 @@ namespace mongo { // flagged for exclusion, i.e. "hello -world" we want to remove all // results that include "world" - if ( _query.getNegatedTerms().size() == 0 ) + if ( _query.getNegatedTerms().size() == 0 ) { return false; - - if ( _spec.wildcard() ) { - return _hasNegativeTerm_recurse(obj); } - /* otherwise look at fields where weights are defined */ - for ( Weights::const_iterator i = _spec.weights().begin(); - i != _spec.weights().end(); - i++ ) { - const char * leftOverName = i->first.c_str(); - BSONElement e = obj.getFieldDottedOrArray(leftOverName); - - if ( e.type() == Array ) { - BSONObjIterator j( e.Obj() ); - while ( j.more() ) { - BSONElement x = j.next(); - if ( leftOverName[0] && x.isABSONObj() ) - x = x.Obj().getFieldDotted( leftOverName ); - if ( x.type() == String ) - if ( _hasNegativeTerm_string( x.String() ) ) - return true; - } - } - else if ( e.type() == String ) { - if ( _hasNegativeTerm_string( e.String() ) ) - return true; - } - } - return false; - } - - bool FTSMatcher::_hasNegativeTerm_recurse(const BSONObj& obj ) const { - BSONObjIterator j( obj ); - while ( j.more() ) { - BSONElement x = j.next(); + FTSElementIterator it( _spec, obj); - if ( _spec.languageOverrideField() == x.fieldName()) - continue; - - if (x.type() == String) { - if ( _hasNegativeTerm_string( x.String() ) ) - return true; - } - else if ( x.isABSONObj() ) { - BSONObjIterator k( x.Obj() ); - while ( k.more() ) { - // check if k.next() is a obj/array or not - BSONElement y = k.next(); - if ( y.type() == String ) { - if ( _hasNegativeTerm_string( y.String() ) ) - return true; - } - else if ( y.isABSONObj() ) { - if ( _hasNegativeTerm_recurse( y.Obj() ) ) - return true; - } - } + while ( it.more() ) { + FTSIteratorValue val = it.next(); + if (_hasNegativeTerm_string( val._text )) { + return true; } } + return false; } @@ -135,7 +87,6 @@ namespace mongo { return false; } - bool FTSMatcher::phrasesMatch( const BSONObj& obj ) const { for (unsigned i = 0; i < _query.getPhr().size(); i++ ) { if ( !phraseMatch( _query.getPhr()[i], obj ) ) { @@ -152,90 +103,24 @@ namespace mongo { return true; } - /** * Checks if phrase is exactly matched in obj, returns true if so, false otherwise * @param phrase, the string to be matched * @param obj, document in the collection to match against */ bool FTSMatcher::phraseMatch( const string& phrase, const BSONObj& obj ) const { + FTSElementIterator it( _spec, obj); - if ( _spec.wildcard() ) { - // case where everything is indexed (all fields) - return _phraseRecurse( phrase, obj ); - } - - for ( Weights::const_iterator i = _spec.weights().begin(); - i != _spec.weights().end(); - ++i ) { - - // figure out what the indexed field is.. ie. is it "field" or "field.subfield" etc. - const char * leftOverName = i->first.c_str(); - BSONElement e = obj.getFieldDottedOrArray(leftOverName); - - if ( e.type() == Array ) { - BSONObjIterator j( e.Obj() ); - while ( j.more() ) { - BSONElement x = j.next(); - - if ( leftOverName[0] && x.isABSONObj() ) - x = x.Obj().getFieldDotted( leftOverName ); - - if ( x.type() == String ) - if ( _phraseMatches( phrase, x.String() ) ) - return true; - } - } - else if ( e.type() == String ) { - if ( _phraseMatches( phrase, e.String() ) ) - return true; - } - } - return false; - } - - - /* - * Recurses over all fields in the obj to match against phrase - * @param phrase, string to be matched - * @param obj, object to matched against - */ - bool FTSMatcher::_phraseRecurse( const string& phrase, const BSONObj& obj ) const { - BSONObjIterator j( obj ); - while ( j.more() ) { - BSONElement x = j.next(); - - if ( _spec.languageOverrideField() == x.fieldName() ) - continue; - - if ( x.type() == String ) { - if ( _phraseMatches( phrase, x.String() ) ) - return true; - } - else if ( x.isABSONObj() ) { - BSONObjIterator k( x.Obj() ); - - while ( k.more() ) { - - BSONElement y = k.next(); - - if ( y.type() == mongo::String ) { - if ( _phraseMatches( phrase, y.String() ) ) - return true; - } - else if ( y.isABSONObj() ) { - if ( _phraseRecurse( phrase, y.Obj() ) ) - return true; - } - } - + while ( it.more() ) { + FTSIteratorValue val = it.next(); + if (_phraseMatches( phrase, val._text )) { + return true; } } return false; } - /* * Looks for phrase in a raw string * @param phrase, phrase to match @@ -244,7 +129,5 @@ namespace mongo { bool FTSMatcher::_phraseMatches( const string& phrase, const string& haystack ) const { return strcasestr( haystack.c_str(), phrase.c_str() ) > 0; } - - } } diff --git a/src/mongo/db/fts/fts_matcher.h b/src/mongo/db/fts/fts_matcher.h index e1d9c281947..ed49e536f5d 100644 --- a/src/mongo/db/fts/fts_matcher.h +++ b/src/mongo/db/fts/fts_matcher.h @@ -52,7 +52,6 @@ namespace mongo { * so all full phrases and no negated */ bool phrasesMatch( const BSONObj& obj ) const; - bool phraseMatch( const string& phrase, const BSONObj& obj ) const; bool matchesNonTerm( const BSONObj& obj ) const { @@ -60,19 +59,19 @@ namespace mongo { } private: - bool _hasNegativeTerm_recurse(const BSONObj& obj ) const; - /** * @return true if raw has a negated term */ bool _hasNegativeTerm_string( const string& raw ) const; - bool _phraseRecurse( const string& phrase, const BSONObj& obj ) const; - bool _phraseMatches( const string& phrase, const string& haystack ) const; + /** + * @return true if raw has a phrase + */ + bool _phraseMatches( const string& phrase, const string& raw ) const; FTSQuery _query; - FTSSpec _spec; - Stemmer _stemmer; + FTSSpec _spec; + Stemmer _stemmer; }; } diff --git a/src/mongo/db/fts/fts_spec.cpp b/src/mongo/db/fts/fts_spec.cpp index fc692b097d6..176e8a0bd84 100644 --- a/src/mongo/db/fts/fts_spec.cpp +++ b/src/mongo/db/fts/fts_spec.cpp @@ -1,5 +1,4 @@ // fts_spec.cpp - /** * Copyright (C) 2012 10gen Inc. * @@ -33,6 +32,7 @@ #include "mongo/db/fts/fts_spec.h" #include "mongo/db/field_ref.h" +#include "mongo/db/fts/fts_iterator.h" #include "mongo/db/fts/fts_util.h" #include "mongo/util/mongoutils/str.h" #include "mongo/util/stringutils.h" @@ -140,8 +140,8 @@ namespace mongo { } } - const FTSLanguage& FTSSpec::_getLanguageToUseV2( const BSONObj& userDoc, - const FTSLanguage& currentLanguage ) const { + const FTSLanguage* FTSSpec::_getLanguageToUseV2( const BSONObj& userDoc, + const FTSLanguage* currentLanguage ) const { BSONElement e = userDoc[_languageOverrideField]; if ( e.eoo() ) { return currentLanguage; @@ -153,114 +153,21 @@ namespace mongo { uassert( 17262, "language override unsupported: " + e.String(), swl.getStatus().isOK() ); - return *swl.getValue(); - } - - - - namespace { - /** - * Check for exact match or path prefix match. - */ - inline bool _matchPrefix( const string& dottedName, const string& weight ) { - if ( weight == dottedName ) { - return true; - } - return str::startsWith( weight, dottedName + '.' ); - } + return swl.getValue(); } - void FTSSpec::scoreDocument( const BSONObj& obj, - const FTSLanguage& parentLanguage, - const string& parentPath, - bool isArray, - TermFrequencyMap* term_freqs ) const { - + void FTSSpec::scoreDocument( const BSONObj& obj, TermFrequencyMap* term_freqs ) const { if ( _textIndexVersion == TEXT_INDEX_VERSION_1 ) { - dassert( parentPath == "" ); - dassert( !isArray ); return _scoreDocumentV1( obj, term_freqs ); } - const FTSLanguage& language = _getLanguageToUseV2( obj, parentLanguage ); - Stemmer stemmer( language ); - Tools tools( language, &stemmer, StopWords::getStopWords( language ) ); - - // Perform a depth-first traversal of obj, skipping fields not touched by this spec. - BSONObjIterator j( obj ); - while ( j.more() ) { - - BSONElement elem = j.next(); - string fieldName = elem.fieldName(); - - // Skip "language" specifier fields if wildcard. - if ( wildcard() && languageOverrideField() == fieldName ) { - continue; - } - - // Compose the dotted name of the current field: - // 1. parent path empty (top level): use the current field name - // 2. parent path non-empty and obj is an array: use the parent path - // 3. parent path non-empty and obj is a sub-doc: append field name to parent path - string dottedName = ( parentPath.empty() ? fieldName - : isArray ? parentPath - : parentPath + '.' + fieldName ); - - // Find lower bound of dottedName in _weights. lower_bound leaves us at the first - // weight that could possibly match or be a prefix of dottedName. And if this - // element fails to match, then no subsequent weight can match, since the weights - // are lexicographically ordered. - Weights::const_iterator i = _weights.lower_bound( elem.type() == Object - ? dottedName + '.' - : dottedName ); - - // possibleWeightMatch is set if the weight map contains either a match or some item - // lexicographically larger than fieldName. This boolean acts as a guard on - // dereferences of iterator 'i'. - bool possibleWeightMatch = ( i != _weights.end() ); - - // Optimize away two cases, when not wildcard: - // 1. lower_bound seeks to end(): no prefix match possible - // 2. lower_bound seeks to a name which is not a prefix - if ( !wildcard() ) { - if ( !possibleWeightMatch ) { - continue; - } - else if ( !_matchPrefix( dottedName, i->first ) ) { - continue; - } - } - - // Is the current field an exact match on a weight? - bool exactMatch = ( possibleWeightMatch && i->first == dottedName ); - - double weight = ( possibleWeightMatch ? i->second : DEFAULT_WEIGHT ); + FTSElementIterator it( *this, obj ); - switch ( elem.type() ) { - case String: - // Only index strings on exact match or wildcard. - if ( exactMatch || wildcard() ) { - _scoreStringV2( tools, elem.valuestr(), term_freqs, weight ); - } - break; - case Object: - // Only descend into a sub-document on proper prefix or wildcard. Note that - // !exactMatch is a sufficient test for proper prefix match, because of - // matchPrefix() continue block above. - if ( !exactMatch || wildcard() ) { - scoreDocument( elem.Obj(), language, dottedName, false, term_freqs ); - } - break; - case Array: - // Only descend into arrays from non-array parents or on wildcard. - if ( !isArray || wildcard() ) { - scoreDocument( elem.Obj(), language, dottedName, true, term_freqs ); - } - break; - default: - // Skip over all other BSON types. - break; - } + while ( it.more() ) { + FTSIteratorValue val = it.next(); + Stemmer stemmer( *val._language ); + Tools tools( *val._language, &stemmer, StopWords::getStopWords( *val._language ) ); + _scoreStringV2( tools, val._text, term_freqs, val._weight ); } } @@ -281,19 +188,21 @@ namespace mongo { string term = t.data.toString(); makeLower( &term ); - if ( tools.stopwords->isStopWord( term ) ) + if ( tools.stopwords->isStopWord( term ) ) { continue; + } term = tools.stemmer->stem( term ); ScoreHelperStruct& data = terms[term]; - if ( data.exp ) + if ( data.exp ) { data.exp *= 2; - else + } + else { data.exp = 1; + } data.count += 1; data.freq += ( 1 / data.exp ); - numTokens++; } @@ -554,20 +463,21 @@ namespace mongo { } } - if ( !weights.isEmpty() ) + if ( !weights.isEmpty() ) { b.append( "weights", weights ); - if ( !default_language.empty() ) + } + if ( !default_language.empty() ) { b.append( "default_language", default_language); - if ( !language_override.empty() ) + } + if ( !language_override.empty() ) { b.append( "language_override", language_override); - - if ( version >= 0 ) + } + if ( version >= 0 ) { b.append( "v", version ); - + } b.append( "textIndexVersion", textIndexVersion ); return b.obj(); - } } diff --git a/src/mongo/db/fts/fts_spec.h b/src/mongo/db/fts/fts_spec.h index 570303f181b..f41238009fc 100644 --- a/src/mongo/db/fts/fts_spec.h +++ b/src/mongo/db/fts/fts_spec.h @@ -47,9 +47,9 @@ namespace mongo { extern const double MAX_WEIGHT; extern const double MAX_WORD_WEIGHT; + extern const double DEFAULT_WEIGHT; typedef std::map<string,double> Weights; // TODO cool map - typedef unordered_map<string,double> TermFrequencyMap; struct ScoreHelperStruct { @@ -92,17 +92,10 @@ namespace mongo { /** * Calculates term/score pairs for a BSONObj as applied to this spec. - * - "obj": the BSONObj to traverse; can be a subdocument or array - * - "parentLanguage": nearest enclosing document "language" spec for obj - * - "parentPath": obj's dotted path in containing document - * - "isArray": true if obj is an array - * - "term_freqs": out-parameter to store results + * @arg obj document to traverse; can be a subdocument or array + * @arg term_freqs output parameter to store (term,score) results */ - void scoreDocument( const BSONObj& obj, - const FTSLanguage& parentLanguage, - const string& parentPath, - bool isArray, - TermFrequencyMap* term_freqs ) const; + void scoreDocument( const BSONObj& obj, TermFrequencyMap* term_freqs ) const; /** * given a query, pulls out the pieces (in order) that go in the index first @@ -110,7 +103,6 @@ namespace mongo { Status getIndexPrefix( const BSONObj& filter, BSONObj* out ) const; const Weights& weights() const { return _weights; } - static BSONObj fixSpec( const BSONObj& spec ); private: @@ -119,13 +111,6 @@ namespace mongo { // /** - * Get the language override for the given BSON doc. If no language override is - * specified, returns currentLanguage. - */ - const FTSLanguage& _getLanguageToUseV2( const BSONObj& userDoc, - const FTSLanguage& currentLanguage ) const; - - /** * Calculate the term scores for 'raw' and update 'term_freqs' with the result. Parses * 'raw' using 'tools', and weights term scores based on 'weight'. */ @@ -134,6 +119,15 @@ namespace mongo { TermFrequencyMap* term_freqs, double weight ) const; + public: + /** + * Get the language override for the given BSON doc. If no language override is + * specified, returns currentLanguage. + */ + const FTSLanguage* _getLanguageToUseV2( const BSONObj& userDoc, + const FTSLanguage* currentLanguage ) const; + + private: // // Deprecated helper methods. Invoked for TEXT_INDEX_VERSION_1 spec objects only. // @@ -165,12 +159,13 @@ namespace mongo { string _languageOverrideField; bool _wildcard; - // _weights stores a mapping between the fields and the value as a double - // basically, how much should an occurence of (query term) in (field) be worth + // mapping : fieldname -> weight Weights _weights; - // other fields to index + // Prefix compound key - used to partition search index std::vector<string> _extraBefore; + + // Suffix compound key - used for covering index behavior std::vector<string> _extraAfter; }; diff --git a/src/mongo/db/fts/fts_spec_test.cpp b/src/mongo/db/fts/fts_spec_test.cpp index 8323ecc0cea..66590aac53d 100644 --- a/src/mongo/db/fts/fts_spec_test.cpp +++ b/src/mongo/db/fts/fts_spec_test.cpp @@ -180,11 +180,7 @@ namespace mongo { FTSSpec spec( FTSSpec::fixSpec( user ) ); TermFrequencyMap m; - spec.scoreDocument( BSON( "title" << "cat sat run" ), - spec.defaultLanguage(), - "", - false, - &m ); + spec.scoreDocument( BSON( "title" << "cat sat run" ), &m ); ASSERT_EQUALS( 3U, m.size() ); ASSERT_EQUALS( m["cat"], m["sat"] ); ASSERT_EQUALS( m["cat"], m["run"] ); @@ -199,11 +195,7 @@ namespace mongo { FTSSpec spec( FTSSpec::fixSpec( user ) ); TermFrequencyMap m; - spec.scoreDocument( BSON( "title" << "cat sat run" << "text" << "cat book" ), - spec.defaultLanguage(), - "", - false, - &m ); + spec.scoreDocument( BSON( "title" << "cat sat run" << "text" << "cat book" ), &m ); ASSERT_EQUALS( 4U, m.size() ); ASSERT_EQUALS( m["sat"], m["run"] ); @@ -222,11 +214,7 @@ namespace mongo { FTSSpec spec( FTSSpec::fixSpec( user ) ); TermFrequencyMap m; - spec.scoreDocument( BSON( "a" << BSON( "b" << "term" ) ), - spec.defaultLanguage(), - "", - false, - &m ); + spec.scoreDocument( BSON( "a" << BSON( "b" << "term" ) ), &m ); ASSERT_EQUALS( 1U, m.size() ); } @@ -238,11 +226,7 @@ namespace mongo { FTSSpec spec( FTSSpec::fixSpec( user ) ); TermFrequencyMap m; - spec.scoreDocument( BSON( "title" << "cat sat sat run run run" ), - spec.defaultLanguage(), - "", - false, - &m ); + spec.scoreDocument( BSON( "title" << "cat sat sat run run run" ), &m ); ASSERT_EQUALS( 3U, m.size() ); ASSERT( m["cat"] > 0 ); ASSERT( m["sat"] > m["cat"] ); @@ -311,7 +295,7 @@ namespace mongo { // The following document matches {"a.b": {$type: 2}}, so "term" should be indexed. BSONObj obj = fromjson("{a: [{b: ['term']}]}"); // indirectly nested arrays TermFrequencyMap m; - spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &m ); + spec.scoreDocument( obj, &m ); ASSERT_EQUALS( 1U, m.size() ); } @@ -322,7 +306,7 @@ namespace mongo { // The wildcard spec implies a full recursive traversal, so "term" should be indexed. BSONObj obj = fromjson("{a: {b: [['term']]}}"); // directly nested arrays TermFrequencyMap m; - spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &m ); + spec.scoreDocument( obj, &m ); ASSERT_EQUALS( 1U, m.size() ); } @@ -334,7 +318,7 @@ namespace mongo { // indexed. BSONObj obj = fromjson("{a: {b: [['term']]}}"); // directly nested arrays TermFrequencyMap m; - spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &m ); + spec.scoreDocument( obj, &m ); ASSERT_EQUALS( 0U, m.size() ); } @@ -353,7 +337,7 @@ namespace mongo { " }" " }" ); - spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &tfm ); + spec.scoreDocument( obj, &tfm ); set<string> hits; hits.insert("walk"); @@ -384,7 +368,7 @@ namespace mongo { " }" "}" ); - spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &tfm ); + spec.scoreDocument( obj, &tfm ); set<string> hits; hits.insert("foredrag"); @@ -415,7 +399,7 @@ namespace mongo { " } ]" "}" ); - spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &tfm ); + spec.scoreDocument( obj, &tfm ); set<string> hits; hits.insert("foredrag"); @@ -448,7 +432,7 @@ namespace mongo { " }" "}" ); - spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &tfm ); + spec.scoreDocument( obj, &tfm ); set<string> hits; hits.insert("foredrag"); @@ -481,7 +465,7 @@ namespace mongo { " }" "}" ); - spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &tfm ); + spec.scoreDocument( obj, &tfm ); set<string> hits; hits.insert("foredrag"); @@ -516,7 +500,7 @@ namespace mongo { " }" "}" ); - spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &tfm ); + spec.scoreDocument( obj, &tfm ); set<string> hits; hits.insert("foredrag"); @@ -540,7 +524,7 @@ namespace mongo { BSONObj indexSpec = fromjson( "{key: {'a.b': 'text'}, textIndexVersion: 1}" ); FTSSpec spec( FTSSpec::fixSpec( indexSpec ) ); TermFrequencyMap tfm; - spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &tfm ); + spec.scoreDocument( obj, &tfm ); ASSERT_EQUALS( tfm.size(), 0U ); } @@ -549,7 +533,7 @@ namespace mongo { BSONObj indexSpec = fromjson( "{key: {'a.b': 'text'}, textIndexVersion: 2}" ); FTSSpec spec( FTSSpec::fixSpec( indexSpec ) ); TermFrequencyMap tfm; - spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &tfm ); + spec.scoreDocument( obj, &tfm ); ASSERT_EQUALS( tfm.size(), 1U ); } } @@ -564,7 +548,7 @@ namespace mongo { BSONObj indexSpec = fromjson( "{key: {'a': 'text'}, textIndexVersion: 1}" ); FTSSpec spec( FTSSpec::fixSpec( indexSpec ) ); TermFrequencyMap tfm; - spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &tfm ); + spec.scoreDocument( obj, &tfm ); ASSERT_EQUALS( tfm.size(), 1U ); // "the" not recognized as stopword } @@ -573,7 +557,7 @@ namespace mongo { BSONObj indexSpec = fromjson( "{key: {'a': 'text'}, textIndexVersion: 2}" ); FTSSpec spec( FTSSpec::fixSpec( indexSpec ) ); TermFrequencyMap tfm; - spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &tfm ); + spec.scoreDocument( obj, &tfm ); ASSERT_EQUALS( tfm.size(), 0U ); // "the" recognized as stopword } } |