diff options
Diffstat (limited to 'qpid/cpp/src/qpid/broker/SelectorToken.cpp')
-rw-r--r-- | qpid/cpp/src/qpid/broker/SelectorToken.cpp | 173 |
1 files changed, 66 insertions, 107 deletions
diff --git a/qpid/cpp/src/qpid/broker/SelectorToken.cpp b/qpid/cpp/src/qpid/broker/SelectorToken.cpp index 3540ee158f..1e84834e18 100644 --- a/qpid/cpp/src/qpid/broker/SelectorToken.cpp +++ b/qpid/cpp/src/qpid/broker/SelectorToken.cpp @@ -52,38 +52,6 @@ void skipWS(std::string::const_iterator& s, std::string::const_iterator& e) } } -bool tokeniseEos(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok) -{ - if ( s!=e ) return false; - - tok = Token(T_EOS, ""); - return true; -} - -inline bool isIdentifierStart(char c) -{ - return std::isalpha(c) || c=='_' || c=='$'; -} - -inline bool isIdentifierPart(char c) -{ - return std::isalnum(c) || c=='_' || c=='$' || c=='.'; -} - -bool tokeniseIdentifier(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok) -{ - // Be sure that first char is alphanumeric or _ or $ - if ( s==e || !isIdentifierStart(*s) ) return false; - - std::string::const_iterator t = s; - - while ( ++s!=e && isIdentifierPart(*s) ); - - tok = Token(T_IDENTIFIER, t, s); - - return true; -} - // Lexically, reserved words are a subset of identifiers // so we parse an identifier first then check if it is a reserved word and // convert it if it is a reserved word @@ -145,27 +113,11 @@ bool tokeniseReservedWord(Token& tok) return true; } -// This is really only used for testing -bool tokeniseReservedWord(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok) -{ - std::string::const_iterator p = s; - bool r = tokeniseIdentifier(p, e, tok) && tokeniseReservedWord(tok); - if (r) s = p; - return r; -} - -bool tokeniseIdentifierOrReservedWord(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok) -{ - bool r = tokeniseIdentifier(s, e, tok); - if (r) (void) tokeniseReservedWord(tok); - return r; -} - // parsing strings is complicated by the need to allow "''" as an embedded single quote -bool tokeniseString(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok) +bool processString(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok) { - if ( s==e || *s != '\'' ) return false; - + // We only get here once the tokeniser recognises the initial quote for a string + // so we don't need to check for it again. std::string::const_iterator q = std::find(s+1, e, '\''); if ( q==e ) return false; @@ -185,42 +137,17 @@ bool tokeniseString(std::string::const_iterator& s, std::string::const_iterator& return true; } -bool tokeniseParens(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok) -{ - if ( s==e) return false; - if ( *s=='(' ) { - tok = Token (T_LPAREN, s, s+1); - ++s; - return true; - } - if ( *s==')' ) { - tok = Token (T_RPAREN, s, s+1); - ++s; - return true; - } - return false; -} - -inline bool isOperatorPart(char c) +inline bool isIdentifierStart(char c) { - return !std::isalnum(c) && !std::isspace(c) && c!='_' && c!='$' && c!='(' && c!=')' && c!= '\''; + return std::isalpha(c) || c=='_' || c=='$'; } -// These lexical tokens contain no alphanumerics - this is broader than actual operators but -// works. -bool tokeniseOperator(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok) +inline bool isIdentifierPart(char c) { - if ( s==e || !isOperatorPart(*s) ) return false; - - std::string::const_iterator t = s; - - while (++s!=e && isOperatorPart(*s)); - - tok = Token(T_OPERATOR, t, s); - return true; + return std::isalnum(c) || c=='_' || c=='$' || c=='.'; } -bool tokeniseNumeric(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok) +bool tokenise(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok) { std::string::const_iterator t = s; @@ -228,65 +155,102 @@ bool tokeniseNumeric(std::string::const_iterator& s, std::string::const_iterator enum { START, REJECT, + IDENTIFIER, DIGIT, DECIMAL_START, DECIMAL, EXPONENT_SIGN, EXPONENT_START, EXPONENT, - ACCEPT_EXACT, - ACCEPT_INEXACT + ACCEPT_IDENTIFIER, + ACCEPT_INC, + ACCEPT_NOINC } state = START; + TokenType tokType = T_EOS; while (true) switch (state) { case START: - if (t==e) {state = REJECT;} + if (t==e) {tok = Token(T_EOS, ""); return true;} + else switch (*t) { + case '(': tokType = T_LPAREN; state = ACCEPT_INC; continue; + case ')': tokType = T_RPAREN; state = ACCEPT_INC; continue; + case ',': tokType = T_COMMA; state = ACCEPT_INC; continue; + case '+': tokType = T_PLUS; state = ACCEPT_INC; continue; + case '-': tokType = T_MINUS; state = ACCEPT_INC; continue; + case '*': tokType = T_MULT; state = ACCEPT_INC; continue; + case '/': tokType = T_DIV; state = ACCEPT_INC; continue; + case '=': tokType = T_EQUAL; state = ACCEPT_INC; continue; + case '<': + ++t; + if (t==e || (*t!='>' && *t!='=')) + {tokType = T_LESS; state = ACCEPT_NOINC; continue; } + else + {tokType = (*t=='>') ? T_NEQ : T_LSEQ; state = ACCEPT_INC; continue; } + case '>': + ++t; + if (t==e || *t!='=') + {tokType = T_GRT; state = ACCEPT_NOINC; continue;} + else + {tokType = T_GREQ; state = ACCEPT_INC; continue;} + default: + break; + } + if (isIdentifierStart(*t)) {++t; state = IDENTIFIER;} + else if (*t=='\'') {return processString(s, e, tok);} else if (std::isdigit(*t)) {++t; state = DIGIT;} else if (*t=='.') {++t; state = DECIMAL_START;} else state = REJECT; - break; + continue; + case IDENTIFIER: + if (t==e) {state = ACCEPT_IDENTIFIER;} + else if (isIdentifierPart(*t)) {++t; state = IDENTIFIER;} + else state = ACCEPT_IDENTIFIER; + continue; case DECIMAL_START: if (t==e) {state = REJECT;} else if (std::isdigit(*t)) {++t; state = DECIMAL;} else state = REJECT; - break; + continue; case EXPONENT_SIGN: if (t==e) {state = REJECT;} else if (*t=='-' || *t=='+') {++t; state = EXPONENT_START;} else if (std::isdigit(*t)) {++t; state = EXPONENT;} else state = REJECT; - break; + continue; case EXPONENT_START: if (t==e) {state = REJECT;} else if (std::isdigit(*t)) {++t; state = EXPONENT;} else state = REJECT; - break; + continue; case DIGIT: - if (t==e) {state = ACCEPT_EXACT;} + if (t==e) {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;} else if (std::isdigit(*t)) {++t; state = DIGIT;} else if (*t=='.') {++t; state = DECIMAL;} else if (*t=='e' || *t=='E') {++t; state = EXPONENT_SIGN;} - else state = ACCEPT_EXACT; - break; + else {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;} + continue; case DECIMAL: - if (t==e) {state = ACCEPT_INEXACT;} + if (t==e) {tokType = T_NUMERIC_APPROX; state = ACCEPT_NOINC;} else if (std::isdigit(*t)) {++t; state = DECIMAL;} else if (*t=='e' || *t=='E') {++t; state = EXPONENT_SIGN;} - else state = ACCEPT_INEXACT; - break; + else {tokType = T_NUMERIC_APPROX; state = ACCEPT_NOINC;} + continue; case EXPONENT: - if (t==e) {state = ACCEPT_INEXACT;} + if (t==e) {tokType = T_NUMERIC_APPROX; state = ACCEPT_NOINC;} else if (std::isdigit(*t)) {++t; state = EXPONENT;} - else state = ACCEPT_INEXACT; - break; - case ACCEPT_EXACT: - tok = Token(T_NUMERIC_EXACT, s, t); + else {tokType = T_NUMERIC_APPROX; state = ACCEPT_NOINC;} + continue; + case ACCEPT_INC: + ++t; + case ACCEPT_NOINC: + tok = Token(tokType, s, t); s = t; return true; - case ACCEPT_INEXACT: - tok = Token(T_NUMERIC_APPROX, s, t); + case ACCEPT_IDENTIFIER: + tok = Token(T_IDENTIFIER, s, t); s = t; + tokeniseReservedWord(tok); return true; case REJECT: return false; @@ -319,12 +283,7 @@ const Token& Tokeniser::nextToken() tokens.push_back(Token()); Token& tok = tokens[tokp++]; - if (tokeniseEos(inp, inEnd, tok)) return tok; - if (tokeniseIdentifierOrReservedWord(inp, inEnd, tok)) return tok; - if (tokeniseNumeric(inp, inEnd, tok)) return tok; - if (tokeniseString(inp, inEnd, tok)) return tok; - if (tokeniseParens(inp, inEnd, tok)) return tok; - if (tokeniseOperator(inp, inEnd, tok)) return tok; + if (tokenise(inp, inEnd, tok)) return tok; throw TokenException("Found illegal character"); } |