diff options
author | Jason Rassi <rassi@10gen.com> | 2014-01-29 18:44:16 -0500 |
---|---|---|
committer | Jason Rassi <rassi@10gen.com> | 2014-01-29 18:44:32 -0500 |
commit | fc2cbaf512630326868d8c948e2524cc8a4328ee (patch) | |
tree | fd338315422fa983b0fac408717c0eb68bf87465 /src/mongo/db/fts/fts_spec.cpp | |
parent | a195fdda02361abf7e74f8a99dc80550f46c2f84 (diff) | |
parent | 8afbec33f4de266a552f50c608621053523a7a15 (diff) | |
download | mongo-fc2cbaf512630326868d8c948e2524cc8a4328ee.tar.gz |
Merge branch 'paul/ftsiterator'
Conflicts:
src/mongo/db/fts/fts_spec.cpp
src/mongo/db/fts/fts_spec.h
src/mongo/db/fts/fts_spec_test.cpp
Diffstat (limited to 'src/mongo/db/fts/fts_spec.cpp')
-rw-r--r-- | src/mongo/db/fts/fts_spec.cpp | 140 |
1 files changed, 25 insertions, 115 deletions
diff --git a/src/mongo/db/fts/fts_spec.cpp b/src/mongo/db/fts/fts_spec.cpp index fc692b097d6..176e8a0bd84 100644 --- a/src/mongo/db/fts/fts_spec.cpp +++ b/src/mongo/db/fts/fts_spec.cpp @@ -1,5 +1,4 @@ // fts_spec.cpp - /** * Copyright (C) 2012 10gen Inc. * @@ -33,6 +32,7 @@ #include "mongo/db/fts/fts_spec.h" #include "mongo/db/field_ref.h" +#include "mongo/db/fts/fts_iterator.h" #include "mongo/db/fts/fts_util.h" #include "mongo/util/mongoutils/str.h" #include "mongo/util/stringutils.h" @@ -140,8 +140,8 @@ namespace mongo { } } - const FTSLanguage& FTSSpec::_getLanguageToUseV2( const BSONObj& userDoc, - const FTSLanguage& currentLanguage ) const { + const FTSLanguage* FTSSpec::_getLanguageToUseV2( const BSONObj& userDoc, + const FTSLanguage* currentLanguage ) const { BSONElement e = userDoc[_languageOverrideField]; if ( e.eoo() ) { return currentLanguage; @@ -153,114 +153,21 @@ namespace mongo { uassert( 17262, "language override unsupported: " + e.String(), swl.getStatus().isOK() ); - return *swl.getValue(); - } - - - - namespace { - /** - * Check for exact match or path prefix match. - */ - inline bool _matchPrefix( const string& dottedName, const string& weight ) { - if ( weight == dottedName ) { - return true; - } - return str::startsWith( weight, dottedName + '.' ); - } + return swl.getValue(); } - void FTSSpec::scoreDocument( const BSONObj& obj, - const FTSLanguage& parentLanguage, - const string& parentPath, - bool isArray, - TermFrequencyMap* term_freqs ) const { - + void FTSSpec::scoreDocument( const BSONObj& obj, TermFrequencyMap* term_freqs ) const { if ( _textIndexVersion == TEXT_INDEX_VERSION_1 ) { - dassert( parentPath == "" ); - dassert( !isArray ); return _scoreDocumentV1( obj, term_freqs ); } - const FTSLanguage& language = _getLanguageToUseV2( obj, parentLanguage ); - Stemmer stemmer( language ); - Tools tools( language, &stemmer, StopWords::getStopWords( language ) ); - - // Perform a depth-first traversal of obj, skipping fields not touched by this spec. - BSONObjIterator j( obj ); - while ( j.more() ) { - - BSONElement elem = j.next(); - string fieldName = elem.fieldName(); - - // Skip "language" specifier fields if wildcard. - if ( wildcard() && languageOverrideField() == fieldName ) { - continue; - } - - // Compose the dotted name of the current field: - // 1. parent path empty (top level): use the current field name - // 2. parent path non-empty and obj is an array: use the parent path - // 3. parent path non-empty and obj is a sub-doc: append field name to parent path - string dottedName = ( parentPath.empty() ? fieldName - : isArray ? parentPath - : parentPath + '.' + fieldName ); - - // Find lower bound of dottedName in _weights. lower_bound leaves us at the first - // weight that could possibly match or be a prefix of dottedName. And if this - // element fails to match, then no subsequent weight can match, since the weights - // are lexicographically ordered. - Weights::const_iterator i = _weights.lower_bound( elem.type() == Object - ? dottedName + '.' - : dottedName ); - - // possibleWeightMatch is set if the weight map contains either a match or some item - // lexicographically larger than fieldName. This boolean acts as a guard on - // dereferences of iterator 'i'. - bool possibleWeightMatch = ( i != _weights.end() ); - - // Optimize away two cases, when not wildcard: - // 1. lower_bound seeks to end(): no prefix match possible - // 2. lower_bound seeks to a name which is not a prefix - if ( !wildcard() ) { - if ( !possibleWeightMatch ) { - continue; - } - else if ( !_matchPrefix( dottedName, i->first ) ) { - continue; - } - } - - // Is the current field an exact match on a weight? - bool exactMatch = ( possibleWeightMatch && i->first == dottedName ); - - double weight = ( possibleWeightMatch ? i->second : DEFAULT_WEIGHT ); + FTSElementIterator it( *this, obj ); - switch ( elem.type() ) { - case String: - // Only index strings on exact match or wildcard. - if ( exactMatch || wildcard() ) { - _scoreStringV2( tools, elem.valuestr(), term_freqs, weight ); - } - break; - case Object: - // Only descend into a sub-document on proper prefix or wildcard. Note that - // !exactMatch is a sufficient test for proper prefix match, because of - // matchPrefix() continue block above. - if ( !exactMatch || wildcard() ) { - scoreDocument( elem.Obj(), language, dottedName, false, term_freqs ); - } - break; - case Array: - // Only descend into arrays from non-array parents or on wildcard. - if ( !isArray || wildcard() ) { - scoreDocument( elem.Obj(), language, dottedName, true, term_freqs ); - } - break; - default: - // Skip over all other BSON types. - break; - } + while ( it.more() ) { + FTSIteratorValue val = it.next(); + Stemmer stemmer( *val._language ); + Tools tools( *val._language, &stemmer, StopWords::getStopWords( *val._language ) ); + _scoreStringV2( tools, val._text, term_freqs, val._weight ); } } @@ -281,19 +188,21 @@ namespace mongo { string term = t.data.toString(); makeLower( &term ); - if ( tools.stopwords->isStopWord( term ) ) + if ( tools.stopwords->isStopWord( term ) ) { continue; + } term = tools.stemmer->stem( term ); ScoreHelperStruct& data = terms[term]; - if ( data.exp ) + if ( data.exp ) { data.exp *= 2; - else + } + else { data.exp = 1; + } data.count += 1; data.freq += ( 1 / data.exp ); - numTokens++; } @@ -554,20 +463,21 @@ namespace mongo { } } - if ( !weights.isEmpty() ) + if ( !weights.isEmpty() ) { b.append( "weights", weights ); - if ( !default_language.empty() ) + } + if ( !default_language.empty() ) { b.append( "default_language", default_language); - if ( !language_override.empty() ) + } + if ( !language_override.empty() ) { b.append( "language_override", language_override); - - if ( version >= 0 ) + } + if ( version >= 0 ) { b.append( "v", version ); - + } b.append( "textIndexVersion", textIndexVersion ); return b.obj(); - } } |