summaryrefslogtreecommitdiff
path: root/src/mongo/db/fts/fts_spec.cpp
diff options
context:
space:
mode:
authorGeert Bosch <geert@mongodb.com>2015-03-30 15:43:54 -0400
committerGeert Bosch <geert@mongodb.com>2015-03-30 15:43:54 -0400
commitedc67399aef9bded106c0196d4af843f23a8acc9 (patch)
treedbb5cebdf54af86e9ee28ced798377579f393722 /src/mongo/db/fts/fts_spec.cpp
parent465bb26c0fb0f4731f4dbb5e09e0a791177bbc64 (diff)
downloadmongo-edc67399aef9bded106c0196d4af843f23a8acc9.tar.gz
Revert "FTS Tokenizer"
This reverts commit 0bed4262dac849788e6571dc404d5d261b9e1c8c.
Diffstat (limited to 'src/mongo/db/fts/fts_spec.cpp')
-rw-r--r--src/mongo/db/fts/fts_spec.cpp22
1 files changed, 15 insertions, 7 deletions
diff --git a/src/mongo/db/fts/fts_spec.cpp b/src/mongo/db/fts/fts_spec.cpp
index 9e68835e83b..fdd9ecf7824 100644
--- a/src/mongo/db/fts/fts_spec.cpp
+++ b/src/mongo/db/fts/fts_spec.cpp
@@ -33,7 +33,6 @@
#include "mongo/db/field_ref.h"
#include "mongo/db/fts/fts_element_iterator.h"
-#include "mongo/db/fts/fts_tokenizer.h"
#include "mongo/db/fts/fts_util.h"
#include "mongo/util/mongoutils/str.h"
#include "mongo/util/stringutils.h"
@@ -168,12 +167,13 @@ namespace mongo {
while ( it.more() ) {
FTSIteratorValue val = it.next();
- std::unique_ptr<FTSTokenizer> tokenizer(val._language->createTokenizer());
- _scoreStringV2( tokenizer.get(), val._text, term_freqs, val._weight );
+ Stemmer stemmer( *val._language );
+ Tools tools( *val._language, &stemmer, StopWords::getStopWords( *val._language ) );
+ _scoreStringV2( tools, val._text, term_freqs, val._weight );
}
}
- void FTSSpec::_scoreStringV2( FTSTokenizer* tokenizer,
+ void FTSSpec::_scoreStringV2( const Tools& tools,
StringData raw,
TermFrequencyMap* docScores,
double weight ) const {
@@ -182,10 +182,18 @@ namespace mongo {
unsigned numTokens = 0;
- tokenizer->reset(raw.rawData(), false );
+ Tokenizer i( tools.language, raw );
+ while ( i.more() ) {
+ Token t = i.next();
+ if ( t.type != Token::TEXT ) {
+ continue;
+ }
- while (tokenizer->moveNext()) {
- string term = tokenizer->get().toString();
+ string term = tolowerString( t.data );
+ if ( tools.stopwords->isStopWord( term ) ) {
+ continue;
+ }
+ term = tools.stemmer->stem( term );
ScoreHelperStruct& data = terms[term];