summaryrefslogtreecommitdiff
path: root/src/mongo/db/fts/tokenizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/fts/tokenizer.cpp')
-rw-r--r--src/mongo/db/fts/tokenizer.cpp194
1 files changed, 96 insertions, 98 deletions
diff --git a/src/mongo/db/fts/tokenizer.cpp b/src/mongo/db/fts/tokenizer.cpp
index 01037a85c8d..e1f595b9a4a 100644
--- a/src/mongo/db/fts/tokenizer.cpp
+++ b/src/mongo/db/fts/tokenizer.cpp
@@ -36,105 +36,103 @@
namespace mongo {
- namespace fts {
-
- Tokenizer::Tokenizer(const FTSLanguage* language, StringData str)
- : _pos(0), _raw( str ) {
- _english = ( language->str() == "english" );
- _skipWhitespace();
- }
-
- bool Tokenizer::more() const {
- return _pos < _raw.size();
- }
-
- Token Tokenizer::next() {
- if ( _pos >= _raw.size() )
- return Token(Token::INVALID, "", 0);
-
- unsigned start = _pos++;
- Token::Type type = _type( _raw[start] );
- if ( type == Token::WHITESPACE ) invariant( false );
-
- if ( type == Token::TEXT )
- while ( _pos < _raw.size() && _type( _raw[_pos] ) == type )
- _pos++;
-
- StringData ret = _raw.substr( start, _pos - start );
- _skipWhitespace();
- return Token( type, ret, start );
- }
-
-
- bool Tokenizer::_skipWhitespace() {
- unsigned start = _pos;
- while ( _pos < _raw.size() && _type( _raw[_pos] ) == Token::WHITESPACE )
- _pos++;
- return _pos > start;
- }
-
-
- Token::Type Tokenizer::_type( char c ) const {
- switch ( c ) {
- case ' ':
- case '\f':
- case '\v':
- case '\t':
- case '\r':
- case '\n':
- return Token::WHITESPACE;
- case '\'':
- if ( _english )
- return Token::TEXT;
- else
- return Token::WHITESPACE;
-
- case '~':
- case '`':
-
- case '!':
- case '@':
- case '#':
- case '$':
- case '%':
- case '^':
- case '&':
- case '*':
- case '(':
- case ')':
-
- case '-':
-
- case '=':
- case '+':
-
- case '[':
- case ']':
- case '{':
- case '}':
- case '|':
- case '\\':
-
- case ';':
- case ':':
-
- case '"':
-
- case '<':
- case '>':
-
- case ',':
- case '.':
-
- case '/':
- case '?':
-
- return Token::DELIMITER;
- default:
+namespace fts {
+
+Tokenizer::Tokenizer(const FTSLanguage* language, StringData str) : _pos(0), _raw(str) {
+ _english = (language->str() == "english");
+ _skipWhitespace();
+}
+
+bool Tokenizer::more() const {
+ return _pos < _raw.size();
+}
+
+Token Tokenizer::next() {
+ if (_pos >= _raw.size())
+ return Token(Token::INVALID, "", 0);
+
+ unsigned start = _pos++;
+ Token::Type type = _type(_raw[start]);
+ if (type == Token::WHITESPACE)
+ invariant(false);
+
+ if (type == Token::TEXT)
+ while (_pos < _raw.size() && _type(_raw[_pos]) == type)
+ _pos++;
+
+ StringData ret = _raw.substr(start, _pos - start);
+ _skipWhitespace();
+ return Token(type, ret, start);
+}
+
+
+bool Tokenizer::_skipWhitespace() {
+ unsigned start = _pos;
+ while (_pos < _raw.size() && _type(_raw[_pos]) == Token::WHITESPACE)
+ _pos++;
+ return _pos > start;
+}
+
+
+Token::Type Tokenizer::_type(char c) const {
+ switch (c) {
+ case ' ':
+ case '\f':
+ case '\v':
+ case '\t':
+ case '\r':
+ case '\n':
+ return Token::WHITESPACE;
+ case '\'':
+ if (_english)
return Token::TEXT;
- }
- }
+ else
+ return Token::WHITESPACE;
- }
+ case '~':
+ case '`':
+
+ case '!':
+ case '@':
+ case '#':
+ case '$':
+ case '%':
+ case '^':
+ case '&':
+ case '*':
+ case '(':
+ case ')':
+
+ case '-':
+
+ case '=':
+ case '+':
+
+ case '[':
+ case ']':
+ case '{':
+ case '}':
+ case '|':
+ case '\\':
+
+ case ';':
+ case ':':
+ case '"':
+
+ case '<':
+ case '>':
+
+ case ',':
+ case '.':
+
+ case '/':
+ case '?':
+
+ return Token::DELIMITER;
+ default:
+ return Token::TEXT;
+ }
+}
+}
}