summaryrefslogtreecommitdiff
path: root/src/mongo/db/fts
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/fts')
-rw-r--r--src/mongo/db/fts/SConscript8
-rw-r--r--src/mongo/db/fts/fts_index_format.cpp16
-rw-r--r--src/mongo/db/fts/fts_iterator.cpp186
-rw-r--r--src/mongo/db/fts/fts_iterator.h173
-rw-r--r--src/mongo/db/fts/fts_iterator_test.cpp310
-rw-r--r--src/mongo/db/fts/fts_matcher.cpp143
-rw-r--r--src/mongo/db/fts/fts_matcher.h13
-rw-r--r--src/mongo/db/fts/fts_spec.cpp140
-rw-r--r--src/mongo/db/fts/fts_spec.h39
-rw-r--r--src/mongo/db/fts/fts_spec_test.cpp50
10 files changed, 761 insertions, 317 deletions
diff --git a/src/mongo/db/fts/SConscript b/src/mongo/db/fts/SConscript
index afd6e0386a9..2ffd2371ccf 100644
--- a/src/mongo/db/fts/SConscript
+++ b/src/mongo/db/fts/SConscript
@@ -2,7 +2,7 @@
Import("env")
-stop_word_lanages = [
+stop_word_languages = [
'danish',
'dutch',
'english',
@@ -21,7 +21,7 @@ stop_word_lanages = [
]
env.Command( [ "stop_words_list.h", "stop_words_list.cpp"],
- [ "generate_stop_words.py"] + [ 'stop_words_%s.txt' % x for x in stop_word_lanages ],
+ [ "generate_stop_words.py"] + [ 'stop_words_%s.txt' % x for x in stop_word_languages ],
"$PYTHON $SOURCES $TARGETS" )
env.Library('base', [
@@ -32,6 +32,7 @@ env.Library('base', [
'fts_spec_legacy.cpp',
'fts_language.cpp',
'fts_util.cpp',
+ 'fts_iterator.cpp',
'stemmer.cpp',
'stop_words.cpp',
'stop_words_list.cpp',
@@ -81,5 +82,8 @@ env.CppUnitTest( "fts_language_test", "fts_language_test.cpp",
env.CppUnitTest( "fts_matcher_test", "fts_matcher_test.cpp",
LIBDEPS=["base"] )
+env.CppUnitTest( "fts_iterator_test", "fts_iterator_test.cpp",
+ LIBDEPS=["base"] )
+
env.CppUnitTest( "fts_util_test", "fts_util_test.cpp",
LIBDEPS=["base","$BUILD_DIR/mongo/mongohasher"] )
diff --git a/src/mongo/db/fts/fts_index_format.cpp b/src/mongo/db/fts/fts_index_format.cpp
index 62a2fbe699e..b3892a583af 100644
--- a/src/mongo/db/fts/fts_index_format.cpp
+++ b/src/mongo/db/fts/fts_index_format.cpp
@@ -79,7 +79,7 @@ namespace mongo {
TermFrequencyMap term_freqs;
- spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &term_freqs );
+ spec.scoreDocument( obj, &term_freqs );
// create index keys from raw scores
// only 1 per string
@@ -92,9 +92,7 @@ namespace mongo {
long long keyBSONSize = 0;
const int MaxKeyBSONSizeMB = 4;
- for ( TermFrequencyMap::const_iterator i = term_freqs.begin();
- i != term_freqs.end();
- ++i ) {
+ for ( TermFrequencyMap::const_iterator i = term_freqs.begin(); i != term_freqs.end(); ++i ) {
const string& term = i->first;
double weight = i->second;
@@ -108,17 +106,18 @@ namespace mongo {
extraSize;
BSONObjBuilder b(guess); // builds a BSON object with guess length.
- for ( unsigned k = 0; k < extrasBefore.size(); k++ )
+ for ( unsigned k = 0; k < extrasBefore.size(); k++ ) {
b.appendAs( extrasBefore[k], "" );
+ }
_appendIndexKey( b, weight, term );
- for ( unsigned k = 0; k < extrasAfter.size(); k++ )
+ for ( unsigned k = 0; k < extrasAfter.size(); k++ ) {
b.appendAs( extrasAfter[k], "" );
+ }
BSONObj res = b.obj();
verify( guess >= res.objsize() );
keys->insert( res );
-
keyBSONSize += res.objsize();
uassert( 16733,
@@ -136,8 +135,9 @@ namespace mongo {
BSONObjBuilder b;
BSONObjIterator i( indexPrefix );
- while ( i.more() )
+ while ( i.more() ) {
b.appendAs( i.next(), "" );
+ }
_appendIndexKey( b, weight, term );
return b.obj();
diff --git a/src/mongo/db/fts/fts_iterator.cpp b/src/mongo/db/fts/fts_iterator.cpp
new file mode 100644
index 00000000000..c304404a1f0
--- /dev/null
+++ b/src/mongo/db/fts/fts_iterator.cpp
@@ -0,0 +1,186 @@
+// fts_iterator.cpp
+/**
+* Copyright (C) 2014 MongoDB Inc.
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License, version 3,
+* as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+* As a special exception, the copyright holders give permission to link the
+* code of portions of this program with the OpenSSL library under certain
+* conditions as described in each individual source file and distribute
+* linked combinations including the program with the OpenSSL library. You
+* must comply with the GNU Affero General Public License in all respects for
+* all of the code used other than as permitted herein. If you modify file(s)
+* with this exception, you may extend this exception to your version of the
+* file(s), but you are not obligated to do so. If you do not wish to do so,
+* delete this exception statement from your version. If you delete this
+* exception statement from all source files in the program, then also delete
+* it in the license file.
+*/
+
+#include "mongo/db/fts/fts_iterator.h"
+#include "mongo/db/fts/fts_spec.h"
+#include "mongo/db/fts/fts_util.h"
+#include "mongo/util/mongoutils/str.h"
+#include "mongo/util/stringutils.h"
+
+#include <stack>
+
+namespace mongo {
+
+ namespace fts {
+
+ extern const double DEFAULT_WEIGHT;
+ extern const double MAX_WEIGHT;
+
+ std::ostream& operator<<( std::ostream& os, FTSElementIterator::FTSIteratorFrame& frame ) {
+ BSONObjIterator it = frame._it;
+ return os << "FTSIteratorFrame["
+ " element=" << (*it).toString() <<
+ ", _language=" << frame._language->str() <<
+ ", _parentPath=" << frame._parentPath <<
+ ", _isArray=" << frame._isArray << "]";
+ }
+
+ FTSElementIterator::FTSElementIterator( const FTSSpec& spec, const BSONObj& obj )
+ : _frame( obj, spec, &spec.defaultLanguage(), "", false ),
+ _spec( spec ),
+ _currentValue( advance() )
+ { }
+
+ namespace {
+ /** Check for exact match or path prefix match. */
+ inline bool _matchPrefix( const string& dottedName, const string& weight ) {
+ if ( weight == dottedName ) {
+ return true;
+ }
+ return mongoutils::str::startsWith( weight, dottedName + '.' );
+ }
+ }
+
+ bool FTSElementIterator::more() {
+ //_currentValue = advance();
+ return _currentValue.valid();
+ }
+
+ FTSIteratorValue FTSElementIterator::next() {
+ FTSIteratorValue result = _currentValue;
+ _currentValue = advance();
+ return result;
+ }
+
+ /**
+ * Helper method:
+ * if (current object iterator not exhausted) return true;
+ * while (frame stack not empty) {
+ * resume object iterator popped from stack;
+ * if (resumed iterator not exhausted) return true;
+ * }
+ * return false;
+ */
+ bool FTSElementIterator::moreFrames() {
+ if (_frame._it.more()) return true;
+ while (!_frameStack.empty()) {
+ _frame = _frameStack.top();
+ _frameStack.pop();
+ if (_frame._it.more()) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ FTSIteratorValue FTSElementIterator::advance() {
+ while ( moreFrames() ) {
+
+ BSONElement elem = _frame._it.next();
+ string fieldName = elem.fieldName();
+
+ // Skip "language" specifier fields if wildcard.
+ if ( _spec.wildcard() && _spec.languageOverrideField() == fieldName ) {
+ continue;
+ }
+
+ // Compose the dotted name of the current field:
+ // 1. parent path empty (top level): use the current field name
+ // 2. parent path non-empty and obj is an array: use the parent path
+ // 3. parent path non-empty and obj is a sub-doc: append field name to parent path
+ string dottedName = ( _frame._parentPath.empty() ? fieldName
+ : _frame._isArray ? _frame._parentPath
+ : _frame._parentPath + '.' + fieldName );
+
+ // Find lower bound of dottedName in _weights. lower_bound leaves us at the first
+ // weight that could possibly match or be a prefix of dottedName. And if this
+ // element fails to match, then no subsequent weight can match, since the weights
+ // are lexicographically ordered.
+ Weights::const_iterator i = _spec.weights().lower_bound( elem.type() == Object
+ ? dottedName + '.'
+ : dottedName );
+
+ // possibleWeightMatch is set if the weight map contains either a match or some item
+ // lexicographically larger than fieldName. This boolean acts as a guard on
+ // dereferences of iterator 'i'.
+ bool possibleWeightMatch = ( i != _spec.weights().end() );
+
+ // Optimize away two cases, when not wildcard:
+ // 1. lower_bound seeks to end(): no prefix match possible
+ // 2. lower_bound seeks to a name which is not a prefix
+ if ( !_spec.wildcard() ) {
+ if ( !possibleWeightMatch ) {
+ continue;
+ }
+ else if ( !_matchPrefix( dottedName, i->first ) ) {
+ continue;
+ }
+ }
+
+ // Is the current field an exact match on a weight?
+ bool exactMatch = ( possibleWeightMatch && i->first == dottedName );
+ double weight = ( possibleWeightMatch ? i->second : DEFAULT_WEIGHT );
+
+ switch ( elem.type() ) {
+ case String:
+ // Only index strings on exact match or wildcard.
+ if ( exactMatch || _spec.wildcard() ) {
+ return FTSIteratorValue( elem.valuestr(), _frame._language, weight );
+ }
+ break;
+
+ case Object:
+ // Only descend into a sub-document on proper prefix or wildcard. Note that
+ // !exactMatch is a sufficient test for proper prefix match, because of
+ // if ( !matchPrefix( dottedName, i->first ) ) continue;
+ // block above.
+ if ( !exactMatch || _spec.wildcard() ) {
+ _frameStack.push( _frame );
+ _frame = FTSIteratorFrame( elem.Obj(), _spec, _frame._language, dottedName, false );
+ }
+ break;
+
+ case Array:
+ // Only descend into arrays from non-array parents or on wildcard.
+ if ( !_frame._isArray || _spec.wildcard() ) {
+ _frameStack.push( _frame );
+ _frame = FTSIteratorFrame( elem.Obj(), _spec, _frame._language, dottedName, true );
+ }
+ break;
+
+ default:
+ // Skip over all other BSON types.
+ break;
+ }
+ }
+ return FTSIteratorValue(); // valid()==false
+ }
+
+ } // namespace fts
+} // namespace mongo
diff --git a/src/mongo/db/fts/fts_iterator.h b/src/mongo/db/fts/fts_iterator.h
new file mode 100644
index 00000000000..f8f75d2839f
--- /dev/null
+++ b/src/mongo/db/fts/fts_iterator.h
@@ -0,0 +1,173 @@
+// fts_iterator.h
+
+/**
+* Copyright (C) 2014 MongoDB Inc.
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License, version 3,
+* as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+* As a special exception, the copyright holders give permission to link the
+* code of portions of this program with the OpenSSL library under certain
+* conditions as described in each individual source file and distribute
+* linked combinations including the program with the OpenSSL library. You
+* must comply with the GNU Affero General Public License in all respects for
+* all of the code used other than as permitted herein. If you modify file(s)
+* with this exception, you may extend this exception to your version of the
+* file(s), but you are not obligated to do so. If you do not wish to do so,
+* delete this exception statement from your version. If you delete this
+* exception statement from all source files in the program, then also delete
+* it in the license file.
+*/
+
+#pragma once
+
+#include "mongo/bson/bsonobj.h"
+#include "mongo/bson/bsonobjiterator.h"
+#include "mongo/db/fts/fts_language.h"
+#include "mongo/db/fts/fts_spec.h"
+
+#include <map>
+#include <stack>
+#include <string>
+
+namespace mongo {
+
+ namespace fts {
+
+ /**
+ * Encapsulates data fields returned by FTSElementIterator
+ */
+ struct FTSIteratorValue {
+
+ FTSIteratorValue( const char* text,
+ const FTSLanguage* language,
+ double weight )
+ : _text(text),
+ _language(language),
+ _weight(weight),
+ _valid(true)
+ {}
+
+ FTSIteratorValue()
+ : _text(NULL),
+ _language(),
+ _weight(0.0),
+ _valid(false)
+ {}
+
+ bool valid() const { return _valid; }
+
+ const char* _text;
+ const FTSLanguage* _language;
+ double _weight;
+ bool _valid;
+ };
+
+ /**
+ * Iterator pattern for walking through text-indexed fields of a
+ * BSON document.
+ *
+ * Example usage:
+ * FTSSpec spec( FTSSpec::fixSpec( indexSpec ) );
+ * FTSElementIterator it( spec, obj );
+ * while ( it.more() ) {
+ * FTSIteratorValue val = it.next();
+ * std::cout << val._text << '[' << val._language.str()
+ * << ',' << val._weight << ']' << std::endl;
+ * }
+ *
+ */
+ class FTSElementIterator {
+ public:
+ /**
+ * Iterator interface: returns false iff there are no further text-indexable fields.
+ */
+ bool more();
+
+ /**
+ * Iterator interface: advances to the next text-indexable field.
+ */
+ FTSIteratorValue next();
+
+ /**
+ * Iterator constructor
+ *
+ * Note: Caller must ensure that the constructed FTSElementIterator
+ * does >not< outlive either spec or obj.
+ *
+ * @arg spec text index specifier
+ * @arg obj document that the iterator will traverse
+ */
+ FTSElementIterator( const FTSSpec& spec, const BSONObj& obj);
+
+ /**
+ * Iterator frame needed for iterative implementation of
+ * recursive sub-documents.
+ */
+ struct FTSIteratorFrame {
+ FTSIteratorFrame( const BSONObj& obj,
+ const FTSSpec& spec,
+ const FTSLanguage* parentLanguage,
+ const std::string& parentPath,
+ bool isArray )
+ : _it( obj ),
+ _language( spec._getLanguageToUseV2( obj, parentLanguage ) ),
+ _parentPath( parentPath ),
+ _isArray( isArray )
+ {}
+
+ friend std::ostream& operator<<(std::ostream&, FTSIteratorFrame&);
+
+ BSONObjIterator _it;
+ const FTSLanguage* _language;
+ std::string _parentPath;
+ bool _isArray;
+ };
+
+ private:
+ /**
+ * Helper method:
+ * returns false iff all FTSIteratorFrames on _frameStack are exhausted.
+ */
+ bool moreFrames();
+
+ /**
+ * Helper method:
+ * advances to the next text-indexable field, possibly pushing frames as
+ * needed for recursive sub-documents.
+ */
+ FTSIteratorValue advance();
+
+ /**
+ * Stack used by iterative implementation of recursive sub-document traversal.
+ */
+ std::stack<FTSIteratorFrame> _frameStack;
+
+ /**
+ * Current frame, not yet pushed to stack.
+ */
+ FTSIteratorFrame _frame;
+
+ /**
+ * Constructor input parameter: text index specification.
+ */
+ const FTSSpec& _spec;
+
+ /**
+ * Current iterator return value, computed by 'more()', returned by 'next()'.
+ */
+ FTSIteratorValue _currentValue;
+ };
+
+ } // namespace fts
+} // namespace mongo
+
diff --git a/src/mongo/db/fts/fts_iterator_test.cpp b/src/mongo/db/fts/fts_iterator_test.cpp
new file mode 100644
index 00000000000..c1e50bc7682
--- /dev/null
+++ b/src/mongo/db/fts/fts_iterator_test.cpp
@@ -0,0 +1,310 @@
+// fts_iterator_test.cpp
+/**
+* Copyright (C) 2014 MongoDB Inc.
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License, version 3,
+* as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+* As a special exception, the copyright holders give permission to link the
+* code of portions of this program with the OpenSSL library under certain
+* conditions as described in each individual source file and distribute
+* linked combinations including the program with the OpenSSL library. You
+* must comply with the GNU Affero General Public License in all respects for
+* all of the code used other than as permitted herein. If you modify file(s)
+* with this exception, you may extend this exception to your version of the
+* file(s), but you are not obligated to do so. If you do not wish to do so,
+* delete this exception statement from your version. If you delete this
+* exception statement from all source files in the program, then also delete
+* it in the license file.
+*/
+
+#include "mongo/pch.h"
+
+#include "mongo/db/fts/fts_iterator.h"
+#include "mongo/db/json.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo {
+ namespace fts {
+
+ TEST( FTSElementIterator, Test1 ) {
+
+ BSONObj obj = fromjson(
+ "{ b : \"walking\","
+ " c : { e: \"walked\" },"
+ " d : \"walker\""
+ " }" );
+
+ BSONObj indexSpec = fromjson(
+ "{ key : { a : \"text\" }, weights : { b : 10, d : 5 } }" );
+
+ FTSSpec spec( FTSSpec::fixSpec( indexSpec ) );
+ Weights::const_iterator itt = spec.weights().begin();
+ ASSERT( itt != spec.weights().end() );
+ ASSERT_EQUALS( "a", itt->first );
+ ASSERT_EQUALS( 1, itt->second );
+ ++itt;
+ ASSERT( itt != spec.weights().end() );
+ ASSERT_EQUALS( "b", itt->first );
+ ASSERT_EQUALS( 10, itt->second );
+ ++itt;
+ ASSERT( itt != spec.weights().end() );
+ ASSERT_EQUALS( "d", itt->first );
+ ASSERT_EQUALS( 5, itt->second );
+ ++itt;
+
+ FTSElementIterator it( spec, obj );
+
+ ASSERT( it.more() );
+ FTSIteratorValue val = it.next();
+ ASSERT_EQUALS( "walking", string(val._text) );
+ ASSERT_EQUALS( "english", val._language->str() );
+ ASSERT_EQUALS( 10, val._weight );
+
+ ASSERT( it.more() );
+ val = it.next();
+ ASSERT_EQUALS( "walker", string(val._text) );
+ ASSERT_EQUALS( "english", val._language->str() );
+ ASSERT_EQUALS( 5, val._weight );
+ }
+
+ // Multi-language : test
+ TEST( FTSElementIterator, Test2 ) {
+
+ BSONObj obj = fromjson(
+ "{ a :"
+ " { b :"
+ " [ { c : \"walked\", language : \"english\" },"
+ " { c : \"camminato\", language : \"italian\" },"
+ " { c : \"ging\", language : \"german\" } ]"
+ " },"
+ " d : \"Feliz Año Nuevo!\","
+ " language : \"spanish\""
+ " }" );
+
+ BSONObj indexSpec = fromjson(
+ "{ key : { \"a.b.c\" : \"text\", d : \"text\" } }" );
+
+ FTSSpec spec( FTSSpec::fixSpec( indexSpec ) );
+
+ FTSElementIterator it( spec, obj );
+
+ ASSERT( it.more() );
+ FTSIteratorValue val = it.next();
+ ASSERT_EQUALS( "walked", string(val._text) );
+ ASSERT_EQUALS( "english", val._language->str() );
+ ASSERT_EQUALS( 1, val._weight );
+
+ ASSERT( it.more() );
+ val = it.next();
+ ASSERT_EQUALS( "camminato", string(val._text) );
+ ASSERT_EQUALS( "italian", val._language->str() );
+ ASSERT_EQUALS( 1, val._weight );
+
+ ASSERT( it.more() );
+ val = it.next();
+ ASSERT_EQUALS( "ging", string(val._text) );
+ ASSERT_EQUALS( "german", val._language->str() );
+ ASSERT_EQUALS( 1, val._weight );
+
+ ASSERT( it.more() );
+ val = it.next();
+ ASSERT_EQUALS( "Feliz Año Nuevo!", string(val._text) );
+ ASSERT_EQUALS( "spanish", val._language->str() );
+ ASSERT_EQUALS( 1, val._weight );
+ }
+
+ // Multi-language : test nested stemming per sub-document
+ TEST( FTSElementIterator, Test3 ) {
+
+ BSONObj obj = fromjson(
+ "{ language : \"english\","
+ " a :"
+ " { language : \"danish\","
+ " b :"
+ " [ { c : \"foredrag\" },"
+ " { c : \"foredragsholder\" },"
+ " { c : \"lector\" } ]"
+ " }"
+ "}" );
+
+ BSONObj indexSpec = fromjson(
+ "{ key : { a : \"text\", \"a.b.c\" : \"text\" }, weights : { \"a.b.c\" : 5 } }" );
+
+ FTSSpec spec( FTSSpec::fixSpec( indexSpec ) );
+ Weights::const_iterator itt = spec.weights().begin();
+ ASSERT( itt != spec.weights().end() );
+ ASSERT_EQUALS( "a", itt->first );
+ ASSERT_EQUALS( 1, itt->second );
+ ++itt;
+ ASSERT( itt != spec.weights().end() );
+ ASSERT_EQUALS( "a.b.c", itt->first );
+ ASSERT_EQUALS( 5, itt->second );
+
+ FTSElementIterator it( spec, obj );
+
+ ASSERT( it.more() );
+ FTSIteratorValue val = it.next();
+ ASSERT_EQUALS( "foredrag", string(val._text) );
+ ASSERT_EQUALS( "danish", val._language->str() );
+ ASSERT_EQUALS( 5, val._weight );
+
+ ASSERT( it.more() );
+ val = it.next();
+ ASSERT_EQUALS( "foredragsholder", string(val._text) );
+ ASSERT_EQUALS( "danish", val._language->str() );
+ ASSERT_EQUALS( 5, val._weight );
+
+ ASSERT( it.more() );
+ val = it.next();
+ ASSERT_EQUALS( "lector", string(val._text) );
+ ASSERT_EQUALS( "danish", val._language->str() );
+ ASSERT_EQUALS( 5, val._weight );
+
+ }
+
+ // Multi-language : test nested arrays
+ TEST( FTSElementIterator, Test4 ) {
+
+ BSONObj obj = fromjson(
+ "{ language : \"english\","
+ " a : ["
+ " { language : \"danish\","
+ " b :"
+ " [ { c : [\"foredrag\"] },"
+ " { c : [\"foredragsholder\"] },"
+ " { c : [\"lector\"] } ]"
+ " } ]"
+ "}" );
+
+ BSONObj indexSpec = fromjson(
+ "{ key : { \"a.b.c\" : \"text\" }, weights : { \"a.b.c\" : 5 } }" );
+
+ FTSSpec spec( FTSSpec::fixSpec( indexSpec ) );
+ FTSElementIterator it( spec, obj );
+
+ ASSERT( it.more() );
+ FTSIteratorValue val = it.next();
+ ASSERT_EQUALS( "foredrag", string(val._text) );
+ ASSERT_EQUALS( "danish", val._language->str() );
+ ASSERT_EQUALS( 5, val._weight );
+
+ ASSERT( it.more() );
+ val = it.next();
+ ASSERT_EQUALS( "foredragsholder", string(val._text) );
+ ASSERT_EQUALS( "danish", val._language->str() );
+ ASSERT_EQUALS( 5, val._weight );
+
+ ASSERT( it.more() );
+ val = it.next();
+ ASSERT_EQUALS( "lector", string(val._text) );
+ ASSERT_EQUALS( "danish", val._language->str() );
+ ASSERT_EQUALS( 5, val._weight );
+
+ }
+
+ // Multi-language : test wildcard spec
+ TEST( FTSElementIterator, Test5 ) {
+
+ BSONObj obj = fromjson(
+ "{ language : \"english\","
+ " b : \"these boots were made for walking\","
+ " c : { e: \"I walked half way to the market before seeing the sunrise\" },"
+ " d : "
+ " { language : \"danish\","
+ " e :"
+ " [ { f : \"foredrag\", g : 12 },"
+ " { f : \"foredragsholder\", g : 13 },"
+ " { f : \"lector\", g : 14 } ]"
+ " }"
+ "}" );
+
+ BSONObj indexSpec = fromjson(
+ "{ key : { a : \"text\" }, weights : { b : 20, c : 10, \"d.e.f\" : 5 } }" );
+
+ FTSSpec spec( FTSSpec::fixSpec( indexSpec ) );
+ FTSElementIterator it( spec, obj );
+
+ ASSERT( it.more() );
+ FTSIteratorValue val = it.next();
+ ASSERT_EQUALS( "these boots were made for walking", string(val._text) );
+ ASSERT_EQUALS( "english", val._language->str() );
+ ASSERT_EQUALS( 20, val._weight );
+
+ ASSERT( it.more() );
+ val = it.next();
+ ASSERT_EQUALS( "foredrag", string(val._text) );
+ ASSERT_EQUALS( "danish", val._language->str() );
+ ASSERT_EQUALS( 5, val._weight );
+
+ ASSERT( it.more() );
+ val = it.next();
+ ASSERT_EQUALS( "foredragsholder", string(val._text) );
+ ASSERT_EQUALS( "danish", val._language->str() );
+ ASSERT_EQUALS( 5, val._weight );
+
+ ASSERT( it.more() );
+ val = it.next();
+ ASSERT_EQUALS( "lector", string(val._text) );
+ ASSERT_EQUALS( "danish", val._language->str() );
+ ASSERT_EQUALS( 5, val._weight );
+ }
+
+ // Multi-language : test wildcard spec
+ TEST( FTSElementIterator, Test6 ) {
+
+ BSONObj obj = fromjson(
+ "{ language : \"english\","
+ " b : \"these boots were made for walking\","
+ " c : { e: \"I walked half way to the market before seeing the sunrise\" },"
+ " d : "
+ " { language : \"danish\","
+ " e :"
+ " [ { f : \"foredrag\", g : 12 },"
+ " { f : \"foredragsholder\", g : 13 },"
+ " { f : \"lector\", g : 14 } ]"
+ " }"
+ "}" );
+
+ BSONObj indexSpec = fromjson(
+ "{ key : { a : \"text\" }, weights : { b : 20, c : 10, \"d.e.f\" : 5 } }" );
+
+ FTSSpec spec( FTSSpec::fixSpec( indexSpec ) );
+ FTSElementIterator it( spec, obj );
+
+ ASSERT( it.more() );
+ FTSIteratorValue val = it.next();
+ ASSERT_EQUALS( "these boots were made for walking", string(val._text) );
+ ASSERT_EQUALS( "english", val._language->str() );
+ ASSERT_EQUALS( 20, val._weight );
+
+ ASSERT( it.more() );
+ val = it.next();
+ ASSERT_EQUALS( "foredrag", string(val._text) );
+ ASSERT_EQUALS( "danish", val._language->str() );
+ ASSERT_EQUALS( 5, val._weight );
+
+ ASSERT( it.more() );
+ val = it.next();
+ ASSERT_EQUALS( "foredragsholder", string(val._text) );
+ ASSERT_EQUALS( "danish", val._language->str() );
+ ASSERT_EQUALS( 5, val._weight );
+
+ ASSERT( it.more() );
+ val = it.next();
+ ASSERT_EQUALS( "lector", string(val._text) );
+ ASSERT_EQUALS( "danish", val._language->str() );
+ ASSERT_EQUALS( 5, val._weight );
+ }
+ }
+}
+
diff --git a/src/mongo/db/fts/fts_matcher.cpp b/src/mongo/db/fts/fts_matcher.cpp
index cdf5ca37325..5523c389d2c 100644
--- a/src/mongo/db/fts/fts_matcher.cpp
+++ b/src/mongo/db/fts/fts_matcher.cpp
@@ -31,13 +31,13 @@
#include "mongo/pch.h"
#include "mongo/db/fts/fts_matcher.h"
+#include "mongo/db/fts/fts_iterator.h"
#include "mongo/platform/strcasestr.h"
namespace mongo {
namespace fts {
-
FTSMatcher::FTSMatcher( const FTSQuery& query, const FTSSpec& spec )
: _query( query ),
_spec( spec ),
@@ -53,67 +53,19 @@ namespace mongo {
// flagged for exclusion, i.e. "hello -world" we want to remove all
// results that include "world"
- if ( _query.getNegatedTerms().size() == 0 )
+ if ( _query.getNegatedTerms().size() == 0 ) {
return false;
-
- if ( _spec.wildcard() ) {
- return _hasNegativeTerm_recurse(obj);
}
- /* otherwise look at fields where weights are defined */
- for ( Weights::const_iterator i = _spec.weights().begin();
- i != _spec.weights().end();
- i++ ) {
- const char * leftOverName = i->first.c_str();
- BSONElement e = obj.getFieldDottedOrArray(leftOverName);
-
- if ( e.type() == Array ) {
- BSONObjIterator j( e.Obj() );
- while ( j.more() ) {
- BSONElement x = j.next();
- if ( leftOverName[0] && x.isABSONObj() )
- x = x.Obj().getFieldDotted( leftOverName );
- if ( x.type() == String )
- if ( _hasNegativeTerm_string( x.String() ) )
- return true;
- }
- }
- else if ( e.type() == String ) {
- if ( _hasNegativeTerm_string( e.String() ) )
- return true;
- }
- }
- return false;
- }
-
- bool FTSMatcher::_hasNegativeTerm_recurse(const BSONObj& obj ) const {
- BSONObjIterator j( obj );
- while ( j.more() ) {
- BSONElement x = j.next();
+ FTSElementIterator it( _spec, obj);
- if ( _spec.languageOverrideField() == x.fieldName())
- continue;
-
- if (x.type() == String) {
- if ( _hasNegativeTerm_string( x.String() ) )
- return true;
- }
- else if ( x.isABSONObj() ) {
- BSONObjIterator k( x.Obj() );
- while ( k.more() ) {
- // check if k.next() is a obj/array or not
- BSONElement y = k.next();
- if ( y.type() == String ) {
- if ( _hasNegativeTerm_string( y.String() ) )
- return true;
- }
- else if ( y.isABSONObj() ) {
- if ( _hasNegativeTerm_recurse( y.Obj() ) )
- return true;
- }
- }
+ while ( it.more() ) {
+ FTSIteratorValue val = it.next();
+ if (_hasNegativeTerm_string( val._text )) {
+ return true;
}
}
+
return false;
}
@@ -135,7 +87,6 @@ namespace mongo {
return false;
}
-
bool FTSMatcher::phrasesMatch( const BSONObj& obj ) const {
for (unsigned i = 0; i < _query.getPhr().size(); i++ ) {
if ( !phraseMatch( _query.getPhr()[i], obj ) ) {
@@ -152,90 +103,24 @@ namespace mongo {
return true;
}
-
/**
* Checks if phrase is exactly matched in obj, returns true if so, false otherwise
* @param phrase, the string to be matched
* @param obj, document in the collection to match against
*/
bool FTSMatcher::phraseMatch( const string& phrase, const BSONObj& obj ) const {
+ FTSElementIterator it( _spec, obj);
- if ( _spec.wildcard() ) {
- // case where everything is indexed (all fields)
- return _phraseRecurse( phrase, obj );
- }
-
- for ( Weights::const_iterator i = _spec.weights().begin();
- i != _spec.weights().end();
- ++i ) {
-
- // figure out what the indexed field is.. ie. is it "field" or "field.subfield" etc.
- const char * leftOverName = i->first.c_str();
- BSONElement e = obj.getFieldDottedOrArray(leftOverName);
-
- if ( e.type() == Array ) {
- BSONObjIterator j( e.Obj() );
- while ( j.more() ) {
- BSONElement x = j.next();
-
- if ( leftOverName[0] && x.isABSONObj() )
- x = x.Obj().getFieldDotted( leftOverName );
-
- if ( x.type() == String )
- if ( _phraseMatches( phrase, x.String() ) )
- return true;
- }
- }
- else if ( e.type() == String ) {
- if ( _phraseMatches( phrase, e.String() ) )
- return true;
- }
- }
- return false;
- }
-
-
- /*
- * Recurses over all fields in the obj to match against phrase
- * @param phrase, string to be matched
- * @param obj, object to matched against
- */
- bool FTSMatcher::_phraseRecurse( const string& phrase, const BSONObj& obj ) const {
- BSONObjIterator j( obj );
- while ( j.more() ) {
- BSONElement x = j.next();
-
- if ( _spec.languageOverrideField() == x.fieldName() )
- continue;
-
- if ( x.type() == String ) {
- if ( _phraseMatches( phrase, x.String() ) )
- return true;
- }
- else if ( x.isABSONObj() ) {
- BSONObjIterator k( x.Obj() );
-
- while ( k.more() ) {
-
- BSONElement y = k.next();
-
- if ( y.type() == mongo::String ) {
- if ( _phraseMatches( phrase, y.String() ) )
- return true;
- }
- else if ( y.isABSONObj() ) {
- if ( _phraseRecurse( phrase, y.Obj() ) )
- return true;
- }
- }
-
+ while ( it.more() ) {
+ FTSIteratorValue val = it.next();
+ if (_phraseMatches( phrase, val._text )) {
+ return true;
}
}
return false;
}
-
/*
* Looks for phrase in a raw string
* @param phrase, phrase to match
@@ -244,7 +129,5 @@ namespace mongo {
bool FTSMatcher::_phraseMatches( const string& phrase, const string& haystack ) const {
return strcasestr( haystack.c_str(), phrase.c_str() ) > 0;
}
-
-
}
}
diff --git a/src/mongo/db/fts/fts_matcher.h b/src/mongo/db/fts/fts_matcher.h
index e1d9c281947..ed49e536f5d 100644
--- a/src/mongo/db/fts/fts_matcher.h
+++ b/src/mongo/db/fts/fts_matcher.h
@@ -52,7 +52,6 @@ namespace mongo {
* so all full phrases and no negated
*/
bool phrasesMatch( const BSONObj& obj ) const;
-
bool phraseMatch( const string& phrase, const BSONObj& obj ) const;
bool matchesNonTerm( const BSONObj& obj ) const {
@@ -60,19 +59,19 @@ namespace mongo {
}
private:
- bool _hasNegativeTerm_recurse(const BSONObj& obj ) const;
-
/**
* @return true if raw has a negated term
*/
bool _hasNegativeTerm_string( const string& raw ) const;
- bool _phraseRecurse( const string& phrase, const BSONObj& obj ) const;
- bool _phraseMatches( const string& phrase, const string& haystack ) const;
+ /**
+ * @return true if raw has a phrase
+ */
+ bool _phraseMatches( const string& phrase, const string& raw ) const;
FTSQuery _query;
- FTSSpec _spec;
- Stemmer _stemmer;
+ FTSSpec _spec;
+ Stemmer _stemmer;
};
}
diff --git a/src/mongo/db/fts/fts_spec.cpp b/src/mongo/db/fts/fts_spec.cpp
index fc692b097d6..176e8a0bd84 100644
--- a/src/mongo/db/fts/fts_spec.cpp
+++ b/src/mongo/db/fts/fts_spec.cpp
@@ -1,5 +1,4 @@
// fts_spec.cpp
-
/**
* Copyright (C) 2012 10gen Inc.
*
@@ -33,6 +32,7 @@
#include "mongo/db/fts/fts_spec.h"
#include "mongo/db/field_ref.h"
+#include "mongo/db/fts/fts_iterator.h"
#include "mongo/db/fts/fts_util.h"
#include "mongo/util/mongoutils/str.h"
#include "mongo/util/stringutils.h"
@@ -140,8 +140,8 @@ namespace mongo {
}
}
- const FTSLanguage& FTSSpec::_getLanguageToUseV2( const BSONObj& userDoc,
- const FTSLanguage& currentLanguage ) const {
+ const FTSLanguage* FTSSpec::_getLanguageToUseV2( const BSONObj& userDoc,
+ const FTSLanguage* currentLanguage ) const {
BSONElement e = userDoc[_languageOverrideField];
if ( e.eoo() ) {
return currentLanguage;
@@ -153,114 +153,21 @@ namespace mongo {
uassert( 17262,
"language override unsupported: " + e.String(),
swl.getStatus().isOK() );
- return *swl.getValue();
- }
-
-
-
- namespace {
- /**
- * Check for exact match or path prefix match.
- */
- inline bool _matchPrefix( const string& dottedName, const string& weight ) {
- if ( weight == dottedName ) {
- return true;
- }
- return str::startsWith( weight, dottedName + '.' );
- }
+ return swl.getValue();
}
- void FTSSpec::scoreDocument( const BSONObj& obj,
- const FTSLanguage& parentLanguage,
- const string& parentPath,
- bool isArray,
- TermFrequencyMap* term_freqs ) const {
-
+ void FTSSpec::scoreDocument( const BSONObj& obj, TermFrequencyMap* term_freqs ) const {
if ( _textIndexVersion == TEXT_INDEX_VERSION_1 ) {
- dassert( parentPath == "" );
- dassert( !isArray );
return _scoreDocumentV1( obj, term_freqs );
}
- const FTSLanguage& language = _getLanguageToUseV2( obj, parentLanguage );
- Stemmer stemmer( language );
- Tools tools( language, &stemmer, StopWords::getStopWords( language ) );
-
- // Perform a depth-first traversal of obj, skipping fields not touched by this spec.
- BSONObjIterator j( obj );
- while ( j.more() ) {
-
- BSONElement elem = j.next();
- string fieldName = elem.fieldName();
-
- // Skip "language" specifier fields if wildcard.
- if ( wildcard() && languageOverrideField() == fieldName ) {
- continue;
- }
-
- // Compose the dotted name of the current field:
- // 1. parent path empty (top level): use the current field name
- // 2. parent path non-empty and obj is an array: use the parent path
- // 3. parent path non-empty and obj is a sub-doc: append field name to parent path
- string dottedName = ( parentPath.empty() ? fieldName
- : isArray ? parentPath
- : parentPath + '.' + fieldName );
-
- // Find lower bound of dottedName in _weights. lower_bound leaves us at the first
- // weight that could possibly match or be a prefix of dottedName. And if this
- // element fails to match, then no subsequent weight can match, since the weights
- // are lexicographically ordered.
- Weights::const_iterator i = _weights.lower_bound( elem.type() == Object
- ? dottedName + '.'
- : dottedName );
-
- // possibleWeightMatch is set if the weight map contains either a match or some item
- // lexicographically larger than fieldName. This boolean acts as a guard on
- // dereferences of iterator 'i'.
- bool possibleWeightMatch = ( i != _weights.end() );
-
- // Optimize away two cases, when not wildcard:
- // 1. lower_bound seeks to end(): no prefix match possible
- // 2. lower_bound seeks to a name which is not a prefix
- if ( !wildcard() ) {
- if ( !possibleWeightMatch ) {
- continue;
- }
- else if ( !_matchPrefix( dottedName, i->first ) ) {
- continue;
- }
- }
-
- // Is the current field an exact match on a weight?
- bool exactMatch = ( possibleWeightMatch && i->first == dottedName );
-
- double weight = ( possibleWeightMatch ? i->second : DEFAULT_WEIGHT );
+ FTSElementIterator it( *this, obj );
- switch ( elem.type() ) {
- case String:
- // Only index strings on exact match or wildcard.
- if ( exactMatch || wildcard() ) {
- _scoreStringV2( tools, elem.valuestr(), term_freqs, weight );
- }
- break;
- case Object:
- // Only descend into a sub-document on proper prefix or wildcard. Note that
- // !exactMatch is a sufficient test for proper prefix match, because of
- // matchPrefix() continue block above.
- if ( !exactMatch || wildcard() ) {
- scoreDocument( elem.Obj(), language, dottedName, false, term_freqs );
- }
- break;
- case Array:
- // Only descend into arrays from non-array parents or on wildcard.
- if ( !isArray || wildcard() ) {
- scoreDocument( elem.Obj(), language, dottedName, true, term_freqs );
- }
- break;
- default:
- // Skip over all other BSON types.
- break;
- }
+ while ( it.more() ) {
+ FTSIteratorValue val = it.next();
+ Stemmer stemmer( *val._language );
+ Tools tools( *val._language, &stemmer, StopWords::getStopWords( *val._language ) );
+ _scoreStringV2( tools, val._text, term_freqs, val._weight );
}
}
@@ -281,19 +188,21 @@ namespace mongo {
string term = t.data.toString();
makeLower( &term );
- if ( tools.stopwords->isStopWord( term ) )
+ if ( tools.stopwords->isStopWord( term ) ) {
continue;
+ }
term = tools.stemmer->stem( term );
ScoreHelperStruct& data = terms[term];
- if ( data.exp )
+ if ( data.exp ) {
data.exp *= 2;
- else
+ }
+ else {
data.exp = 1;
+ }
data.count += 1;
data.freq += ( 1 / data.exp );
-
numTokens++;
}
@@ -554,20 +463,21 @@ namespace mongo {
}
}
- if ( !weights.isEmpty() )
+ if ( !weights.isEmpty() ) {
b.append( "weights", weights );
- if ( !default_language.empty() )
+ }
+ if ( !default_language.empty() ) {
b.append( "default_language", default_language);
- if ( !language_override.empty() )
+ }
+ if ( !language_override.empty() ) {
b.append( "language_override", language_override);
-
- if ( version >= 0 )
+ }
+ if ( version >= 0 ) {
b.append( "v", version );
-
+ }
b.append( "textIndexVersion", textIndexVersion );
return b.obj();
-
}
}
diff --git a/src/mongo/db/fts/fts_spec.h b/src/mongo/db/fts/fts_spec.h
index 570303f181b..f41238009fc 100644
--- a/src/mongo/db/fts/fts_spec.h
+++ b/src/mongo/db/fts/fts_spec.h
@@ -47,9 +47,9 @@ namespace mongo {
extern const double MAX_WEIGHT;
extern const double MAX_WORD_WEIGHT;
+ extern const double DEFAULT_WEIGHT;
typedef std::map<string,double> Weights; // TODO cool map
-
typedef unordered_map<string,double> TermFrequencyMap;
struct ScoreHelperStruct {
@@ -92,17 +92,10 @@ namespace mongo {
/**
* Calculates term/score pairs for a BSONObj as applied to this spec.
- * - "obj": the BSONObj to traverse; can be a subdocument or array
- * - "parentLanguage": nearest enclosing document "language" spec for obj
- * - "parentPath": obj's dotted path in containing document
- * - "isArray": true if obj is an array
- * - "term_freqs": out-parameter to store results
+ * @arg obj document to traverse; can be a subdocument or array
+ * @arg term_freqs output parameter to store (term,score) results
*/
- void scoreDocument( const BSONObj& obj,
- const FTSLanguage& parentLanguage,
- const string& parentPath,
- bool isArray,
- TermFrequencyMap* term_freqs ) const;
+ void scoreDocument( const BSONObj& obj, TermFrequencyMap* term_freqs ) const;
/**
* given a query, pulls out the pieces (in order) that go in the index first
@@ -110,7 +103,6 @@ namespace mongo {
Status getIndexPrefix( const BSONObj& filter, BSONObj* out ) const;
const Weights& weights() const { return _weights; }
-
static BSONObj fixSpec( const BSONObj& spec );
private:
@@ -119,13 +111,6 @@ namespace mongo {
//
/**
- * Get the language override for the given BSON doc. If no language override is
- * specified, returns currentLanguage.
- */
- const FTSLanguage& _getLanguageToUseV2( const BSONObj& userDoc,
- const FTSLanguage& currentLanguage ) const;
-
- /**
* Calculate the term scores for 'raw' and update 'term_freqs' with the result. Parses
* 'raw' using 'tools', and weights term scores based on 'weight'.
*/
@@ -134,6 +119,15 @@ namespace mongo {
TermFrequencyMap* term_freqs,
double weight ) const;
+ public:
+ /**
+ * Get the language override for the given BSON doc. If no language override is
+ * specified, returns currentLanguage.
+ */
+ const FTSLanguage* _getLanguageToUseV2( const BSONObj& userDoc,
+ const FTSLanguage* currentLanguage ) const;
+
+ private:
//
// Deprecated helper methods. Invoked for TEXT_INDEX_VERSION_1 spec objects only.
//
@@ -165,12 +159,13 @@ namespace mongo {
string _languageOverrideField;
bool _wildcard;
- // _weights stores a mapping between the fields and the value as a double
- // basically, how much should an occurence of (query term) in (field) be worth
+ // mapping : fieldname -> weight
Weights _weights;
- // other fields to index
+ // Prefix compound key - used to partition search index
std::vector<string> _extraBefore;
+
+ // Suffix compound key - used for covering index behavior
std::vector<string> _extraAfter;
};
diff --git a/src/mongo/db/fts/fts_spec_test.cpp b/src/mongo/db/fts/fts_spec_test.cpp
index 8323ecc0cea..66590aac53d 100644
--- a/src/mongo/db/fts/fts_spec_test.cpp
+++ b/src/mongo/db/fts/fts_spec_test.cpp
@@ -180,11 +180,7 @@ namespace mongo {
FTSSpec spec( FTSSpec::fixSpec( user ) );
TermFrequencyMap m;
- spec.scoreDocument( BSON( "title" << "cat sat run" ),
- spec.defaultLanguage(),
- "",
- false,
- &m );
+ spec.scoreDocument( BSON( "title" << "cat sat run" ), &m );
ASSERT_EQUALS( 3U, m.size() );
ASSERT_EQUALS( m["cat"], m["sat"] );
ASSERT_EQUALS( m["cat"], m["run"] );
@@ -199,11 +195,7 @@ namespace mongo {
FTSSpec spec( FTSSpec::fixSpec( user ) );
TermFrequencyMap m;
- spec.scoreDocument( BSON( "title" << "cat sat run" << "text" << "cat book" ),
- spec.defaultLanguage(),
- "",
- false,
- &m );
+ spec.scoreDocument( BSON( "title" << "cat sat run" << "text" << "cat book" ), &m );
ASSERT_EQUALS( 4U, m.size() );
ASSERT_EQUALS( m["sat"], m["run"] );
@@ -222,11 +214,7 @@ namespace mongo {
FTSSpec spec( FTSSpec::fixSpec( user ) );
TermFrequencyMap m;
- spec.scoreDocument( BSON( "a" << BSON( "b" << "term" ) ),
- spec.defaultLanguage(),
- "",
- false,
- &m );
+ spec.scoreDocument( BSON( "a" << BSON( "b" << "term" ) ), &m );
ASSERT_EQUALS( 1U, m.size() );
}
@@ -238,11 +226,7 @@ namespace mongo {
FTSSpec spec( FTSSpec::fixSpec( user ) );
TermFrequencyMap m;
- spec.scoreDocument( BSON( "title" << "cat sat sat run run run" ),
- spec.defaultLanguage(),
- "",
- false,
- &m );
+ spec.scoreDocument( BSON( "title" << "cat sat sat run run run" ), &m );
ASSERT_EQUALS( 3U, m.size() );
ASSERT( m["cat"] > 0 );
ASSERT( m["sat"] > m["cat"] );
@@ -311,7 +295,7 @@ namespace mongo {
// The following document matches {"a.b": {$type: 2}}, so "term" should be indexed.
BSONObj obj = fromjson("{a: [{b: ['term']}]}"); // indirectly nested arrays
TermFrequencyMap m;
- spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &m );
+ spec.scoreDocument( obj, &m );
ASSERT_EQUALS( 1U, m.size() );
}
@@ -322,7 +306,7 @@ namespace mongo {
// The wildcard spec implies a full recursive traversal, so "term" should be indexed.
BSONObj obj = fromjson("{a: {b: [['term']]}}"); // directly nested arrays
TermFrequencyMap m;
- spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &m );
+ spec.scoreDocument( obj, &m );
ASSERT_EQUALS( 1U, m.size() );
}
@@ -334,7 +318,7 @@ namespace mongo {
// indexed.
BSONObj obj = fromjson("{a: {b: [['term']]}}"); // directly nested arrays
TermFrequencyMap m;
- spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &m );
+ spec.scoreDocument( obj, &m );
ASSERT_EQUALS( 0U, m.size() );
}
@@ -353,7 +337,7 @@ namespace mongo {
" }"
" }" );
- spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &tfm );
+ spec.scoreDocument( obj, &tfm );
set<string> hits;
hits.insert("walk");
@@ -384,7 +368,7 @@ namespace mongo {
" }"
"}" );
- spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &tfm );
+ spec.scoreDocument( obj, &tfm );
set<string> hits;
hits.insert("foredrag");
@@ -415,7 +399,7 @@ namespace mongo {
" } ]"
"}" );
- spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &tfm );
+ spec.scoreDocument( obj, &tfm );
set<string> hits;
hits.insert("foredrag");
@@ -448,7 +432,7 @@ namespace mongo {
" }"
"}" );
- spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &tfm );
+ spec.scoreDocument( obj, &tfm );
set<string> hits;
hits.insert("foredrag");
@@ -481,7 +465,7 @@ namespace mongo {
" }"
"}" );
- spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &tfm );
+ spec.scoreDocument( obj, &tfm );
set<string> hits;
hits.insert("foredrag");
@@ -516,7 +500,7 @@ namespace mongo {
" }"
"}" );
- spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &tfm );
+ spec.scoreDocument( obj, &tfm );
set<string> hits;
hits.insert("foredrag");
@@ -540,7 +524,7 @@ namespace mongo {
BSONObj indexSpec = fromjson( "{key: {'a.b': 'text'}, textIndexVersion: 1}" );
FTSSpec spec( FTSSpec::fixSpec( indexSpec ) );
TermFrequencyMap tfm;
- spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &tfm );
+ spec.scoreDocument( obj, &tfm );
ASSERT_EQUALS( tfm.size(), 0U );
}
@@ -549,7 +533,7 @@ namespace mongo {
BSONObj indexSpec = fromjson( "{key: {'a.b': 'text'}, textIndexVersion: 2}" );
FTSSpec spec( FTSSpec::fixSpec( indexSpec ) );
TermFrequencyMap tfm;
- spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &tfm );
+ spec.scoreDocument( obj, &tfm );
ASSERT_EQUALS( tfm.size(), 1U );
}
}
@@ -564,7 +548,7 @@ namespace mongo {
BSONObj indexSpec = fromjson( "{key: {'a': 'text'}, textIndexVersion: 1}" );
FTSSpec spec( FTSSpec::fixSpec( indexSpec ) );
TermFrequencyMap tfm;
- spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &tfm );
+ spec.scoreDocument( obj, &tfm );
ASSERT_EQUALS( tfm.size(), 1U ); // "the" not recognized as stopword
}
@@ -573,7 +557,7 @@ namespace mongo {
BSONObj indexSpec = fromjson( "{key: {'a': 'text'}, textIndexVersion: 2}" );
FTSSpec spec( FTSSpec::fixSpec( indexSpec ) );
TermFrequencyMap tfm;
- spec.scoreDocument( obj, spec.defaultLanguage(), "", false, &tfm );
+ spec.scoreDocument( obj, &tfm );
ASSERT_EQUALS( tfm.size(), 0U ); // "the" recognized as stopword
}
}