summaryrefslogtreecommitdiff
path: root/src/mongo/db/fts/fts_basic_tokenizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/fts/fts_basic_tokenizer.cpp')
-rw-r--r--src/mongo/db/fts/fts_basic_tokenizer.cpp84
1 files changed, 41 insertions, 43 deletions
diff --git a/src/mongo/db/fts/fts_basic_tokenizer.cpp b/src/mongo/db/fts/fts_basic_tokenizer.cpp
index 2d5cc493123..9fc41923d40 100644
--- a/src/mongo/db/fts/fts_basic_tokenizer.cpp
+++ b/src/mongo/db/fts/fts_basic_tokenizer.cpp
@@ -42,56 +42,54 @@
namespace mongo {
namespace fts {
- using std::string;
-
- BasicFTSTokenizer::BasicFTSTokenizer(const FTSLanguage* language)
- : _language(language), _stemmer(language), _stopWords(StopWords::getStopWords(language)) {
- }
-
- void BasicFTSTokenizer::reset(StringData document, Options options) {
- _options = options;
- _document = document.toString();
- _tokenizer = stdx::make_unique<Tokenizer>(_language, _document);
- }
-
- bool BasicFTSTokenizer::moveNext() {
- while (true) {
- bool hasMore = _tokenizer->more();
- if (!hasMore) {
- _stem = "";
- return false;
- }
-
- Token token = _tokenizer->next();
+using std::string;
+
+BasicFTSTokenizer::BasicFTSTokenizer(const FTSLanguage* language)
+ : _language(language), _stemmer(language), _stopWords(StopWords::getStopWords(language)) {}
+
+void BasicFTSTokenizer::reset(StringData document, Options options) {
+ _options = options;
+ _document = document.toString();
+ _tokenizer = stdx::make_unique<Tokenizer>(_language, _document);
+}
+
+bool BasicFTSTokenizer::moveNext() {
+ while (true) {
+ bool hasMore = _tokenizer->more();
+ if (!hasMore) {
+ _stem = "";
+ return false;
+ }
- // Do not return delimiters
- if (token.type != Token::TEXT) {
- continue;
- }
+ Token token = _tokenizer->next();
- string word = token.data.toString();
+ // Do not return delimiters
+ if (token.type != Token::TEXT) {
+ continue;
+ }
- word = tolowerString(token.data);
+ string word = token.data.toString();
- // Stop words are case-sensitive so we need them to be lower cased to check
- // against the stop word list
- if ((_options & FTSTokenizer::FilterStopWords) &&
- _stopWords->isStopWord(word)) {
- continue;
- }
+ word = tolowerString(token.data);
- if (_options & FTSTokenizer::GenerateCaseSensitiveTokens) {
- word = token.data.toString();
- }
+ // Stop words are case-sensitive so we need them to be lower cased to check
+ // against the stop word list
+ if ((_options & FTSTokenizer::FilterStopWords) && _stopWords->isStopWord(word)) {
+ continue;
+ }
- _stem = _stemmer.stem(word);
- return true;
+ if (_options & FTSTokenizer::GenerateCaseSensitiveTokens) {
+ word = token.data.toString();
}
- }
- StringData BasicFTSTokenizer::get() const {
- return _stem;
+ _stem = _stemmer.stem(word);
+ return true;
}
+}
+
+StringData BasicFTSTokenizer::get() const {
+ return _stem;
+}
-} // namespace fts
-} // namespace mongo
+} // namespace fts
+} // namespace mongo