summaryrefslogtreecommitdiff
path: root/src/mongo/db/fts/fts_spec.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/fts/fts_spec.cpp')
-rw-r--r--src/mongo/db/fts/fts_spec.cpp22
1 files changed, 15 insertions, 7 deletions
diff --git a/src/mongo/db/fts/fts_spec.cpp b/src/mongo/db/fts/fts_spec.cpp
index 9e68835e83b..fdd9ecf7824 100644
--- a/src/mongo/db/fts/fts_spec.cpp
+++ b/src/mongo/db/fts/fts_spec.cpp
@@ -33,7 +33,6 @@
#include "mongo/db/field_ref.h"
#include "mongo/db/fts/fts_element_iterator.h"
-#include "mongo/db/fts/fts_tokenizer.h"
#include "mongo/db/fts/fts_util.h"
#include "mongo/util/mongoutils/str.h"
#include "mongo/util/stringutils.h"
@@ -168,12 +167,13 @@ namespace mongo {
while ( it.more() ) {
FTSIteratorValue val = it.next();
- std::unique_ptr<FTSTokenizer> tokenizer(val._language->createTokenizer());
- _scoreStringV2( tokenizer.get(), val._text, term_freqs, val._weight );
+ Stemmer stemmer( *val._language );
+ Tools tools( *val._language, &stemmer, StopWords::getStopWords( *val._language ) );
+ _scoreStringV2( tools, val._text, term_freqs, val._weight );
}
}
- void FTSSpec::_scoreStringV2( FTSTokenizer* tokenizer,
+ void FTSSpec::_scoreStringV2( const Tools& tools,
StringData raw,
TermFrequencyMap* docScores,
double weight ) const {
@@ -182,10 +182,18 @@ namespace mongo {
unsigned numTokens = 0;
- tokenizer->reset(raw.rawData(), false );
+ Tokenizer i( tools.language, raw );
+ while ( i.more() ) {
+ Token t = i.next();
+ if ( t.type != Token::TEXT ) {
+ continue;
+ }
- while (tokenizer->moveNext()) {
- string term = tokenizer->get().toString();
+ string term = tolowerString( t.data );
+ if ( tools.stopwords->isStopWord( term ) ) {
+ continue;
+ }
+ term = tools.stemmer->stem( term );
ScoreHelperStruct& data = terms[term];