diff options
author | Geert Bosch <geert@mongodb.com> | 2015-03-30 15:43:54 -0400 |
---|---|---|
committer | Geert Bosch <geert@mongodb.com> | 2015-03-30 15:43:54 -0400 |
commit | edc67399aef9bded106c0196d4af843f23a8acc9 (patch) | |
tree | dbb5cebdf54af86e9ee28ced798377579f393722 /src/mongo/db/fts/fts_spec.cpp | |
parent | 465bb26c0fb0f4731f4dbb5e09e0a791177bbc64 (diff) | |
download | mongo-edc67399aef9bded106c0196d4af843f23a8acc9.tar.gz |
Revert "FTS Tokenizer"
This reverts commit 0bed4262dac849788e6571dc404d5d261b9e1c8c.
Diffstat (limited to 'src/mongo/db/fts/fts_spec.cpp')
-rw-r--r-- | src/mongo/db/fts/fts_spec.cpp | 22 |
1 files changed, 15 insertions, 7 deletions
diff --git a/src/mongo/db/fts/fts_spec.cpp b/src/mongo/db/fts/fts_spec.cpp index 9e68835e83b..fdd9ecf7824 100644 --- a/src/mongo/db/fts/fts_spec.cpp +++ b/src/mongo/db/fts/fts_spec.cpp @@ -33,7 +33,6 @@ #include "mongo/db/field_ref.h" #include "mongo/db/fts/fts_element_iterator.h" -#include "mongo/db/fts/fts_tokenizer.h" #include "mongo/db/fts/fts_util.h" #include "mongo/util/mongoutils/str.h" #include "mongo/util/stringutils.h" @@ -168,12 +167,13 @@ namespace mongo { while ( it.more() ) { FTSIteratorValue val = it.next(); - std::unique_ptr<FTSTokenizer> tokenizer(val._language->createTokenizer()); - _scoreStringV2( tokenizer.get(), val._text, term_freqs, val._weight ); + Stemmer stemmer( *val._language ); + Tools tools( *val._language, &stemmer, StopWords::getStopWords( *val._language ) ); + _scoreStringV2( tools, val._text, term_freqs, val._weight ); } } - void FTSSpec::_scoreStringV2( FTSTokenizer* tokenizer, + void FTSSpec::_scoreStringV2( const Tools& tools, StringData raw, TermFrequencyMap* docScores, double weight ) const { @@ -182,10 +182,18 @@ namespace mongo { unsigned numTokens = 0; - tokenizer->reset(raw.rawData(), false ); + Tokenizer i( tools.language, raw ); + while ( i.more() ) { + Token t = i.next(); + if ( t.type != Token::TEXT ) { + continue; + } - while (tokenizer->moveNext()) { - string term = tokenizer->get().toString(); + string term = tolowerString( t.data ); + if ( tools.stopwords->isStopWord( term ) ) { + continue; + } + term = tools.stemmer->stem( term ); ScoreHelperStruct& data = terms[term]; |