diff options
Diffstat (limited to 'src/mongo/db/fts/fts_basic_tokenizer.cpp')
-rw-r--r-- | src/mongo/db/fts/fts_basic_tokenizer.cpp | 84 |
1 files changed, 41 insertions, 43 deletions
diff --git a/src/mongo/db/fts/fts_basic_tokenizer.cpp b/src/mongo/db/fts/fts_basic_tokenizer.cpp index 2d5cc493123..9fc41923d40 100644 --- a/src/mongo/db/fts/fts_basic_tokenizer.cpp +++ b/src/mongo/db/fts/fts_basic_tokenizer.cpp @@ -42,56 +42,54 @@ namespace mongo { namespace fts { - using std::string; - - BasicFTSTokenizer::BasicFTSTokenizer(const FTSLanguage* language) - : _language(language), _stemmer(language), _stopWords(StopWords::getStopWords(language)) { - } - - void BasicFTSTokenizer::reset(StringData document, Options options) { - _options = options; - _document = document.toString(); - _tokenizer = stdx::make_unique<Tokenizer>(_language, _document); - } - - bool BasicFTSTokenizer::moveNext() { - while (true) { - bool hasMore = _tokenizer->more(); - if (!hasMore) { - _stem = ""; - return false; - } - - Token token = _tokenizer->next(); +using std::string; + +BasicFTSTokenizer::BasicFTSTokenizer(const FTSLanguage* language) + : _language(language), _stemmer(language), _stopWords(StopWords::getStopWords(language)) {} + +void BasicFTSTokenizer::reset(StringData document, Options options) { + _options = options; + _document = document.toString(); + _tokenizer = stdx::make_unique<Tokenizer>(_language, _document); +} + +bool BasicFTSTokenizer::moveNext() { + while (true) { + bool hasMore = _tokenizer->more(); + if (!hasMore) { + _stem = ""; + return false; + } - // Do not return delimiters - if (token.type != Token::TEXT) { - continue; - } + Token token = _tokenizer->next(); - string word = token.data.toString(); + // Do not return delimiters + if (token.type != Token::TEXT) { + continue; + } - word = tolowerString(token.data); + string word = token.data.toString(); - // Stop words are case-sensitive so we need them to be lower cased to check - // against the stop word list - if ((_options & FTSTokenizer::FilterStopWords) && - _stopWords->isStopWord(word)) { - continue; - } + word = tolowerString(token.data); - if (_options & FTSTokenizer::GenerateCaseSensitiveTokens) { - word = token.data.toString(); - } + // Stop words are case-sensitive so we need them to be lower cased to check + // against the stop word list + if ((_options & FTSTokenizer::FilterStopWords) && _stopWords->isStopWord(word)) { + continue; + } - _stem = _stemmer.stem(word); - return true; + if (_options & FTSTokenizer::GenerateCaseSensitiveTokens) { + word = token.data.toString(); } - } - StringData BasicFTSTokenizer::get() const { - return _stem; + _stem = _stemmer.stem(word); + return true; } +} + +StringData BasicFTSTokenizer::get() const { + return _stem; +} -} // namespace fts -} // namespace mongo +} // namespace fts +} // namespace mongo |