summaryrefslogtreecommitdiff
path: root/src/mongo/db/fts/fts_spec.cpp
diff options
context:
space:
mode:
authorMark Benvenuto <mark.benvenuto@mongodb.com>2015-04-01 14:34:39 -0400
committerMark Benvenuto <mark.benvenuto@mongodb.com>2015-04-01 14:58:13 -0400
commit72598f750d732c08c98f5f578bf1335acd78e10e (patch)
treed80364b07b25210f5724ba6e6506650be657c74e /src/mongo/db/fts/fts_spec.cpp
parent3cf0c18aa2c56949fda47ab35570489d68965370 (diff)
downloadmongo-72598f750d732c08c98f5f578bf1335acd78e10e.tar.gz
SERVER-17520: Add support for pluggable FTS tokenizers
Diffstat (limited to 'src/mongo/db/fts/fts_spec.cpp')
-rw-r--r--src/mongo/db/fts/fts_spec.cpp22
1 files changed, 7 insertions, 15 deletions
diff --git a/src/mongo/db/fts/fts_spec.cpp b/src/mongo/db/fts/fts_spec.cpp
index fdd9ecf7824..9e68835e83b 100644
--- a/src/mongo/db/fts/fts_spec.cpp
+++ b/src/mongo/db/fts/fts_spec.cpp
@@ -33,6 +33,7 @@
#include "mongo/db/field_ref.h"
#include "mongo/db/fts/fts_element_iterator.h"
+#include "mongo/db/fts/fts_tokenizer.h"
#include "mongo/db/fts/fts_util.h"
#include "mongo/util/mongoutils/str.h"
#include "mongo/util/stringutils.h"
@@ -167,13 +168,12 @@ namespace mongo {
while ( it.more() ) {
FTSIteratorValue val = it.next();
- Stemmer stemmer( *val._language );
- Tools tools( *val._language, &stemmer, StopWords::getStopWords( *val._language ) );
- _scoreStringV2( tools, val._text, term_freqs, val._weight );
+ std::unique_ptr<FTSTokenizer> tokenizer(val._language->createTokenizer());
+ _scoreStringV2( tokenizer.get(), val._text, term_freqs, val._weight );
}
}
- void FTSSpec::_scoreStringV2( const Tools& tools,
+ void FTSSpec::_scoreStringV2( FTSTokenizer* tokenizer,
StringData raw,
TermFrequencyMap* docScores,
double weight ) const {
@@ -182,18 +182,10 @@ namespace mongo {
unsigned numTokens = 0;
- Tokenizer i( tools.language, raw );
- while ( i.more() ) {
- Token t = i.next();
- if ( t.type != Token::TEXT ) {
- continue;
- }
+ tokenizer->reset(raw.rawData(), false );
- string term = tolowerString( t.data );
- if ( tools.stopwords->isStopWord( term ) ) {
- continue;
- }
- term = tools.stemmer->stem( term );
+ while (tokenizer->moveNext()) {
+ string term = tokenizer->get().toString();
ScoreHelperStruct& data = terms[term];