summaryrefslogtreecommitdiff
path: root/qpid/cpp/src/qpid/broker/SelectorToken.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'qpid/cpp/src/qpid/broker/SelectorToken.cpp')
-rw-r--r--qpid/cpp/src/qpid/broker/SelectorToken.cpp173
1 files changed, 66 insertions, 107 deletions
diff --git a/qpid/cpp/src/qpid/broker/SelectorToken.cpp b/qpid/cpp/src/qpid/broker/SelectorToken.cpp
index 3540ee158f..1e84834e18 100644
--- a/qpid/cpp/src/qpid/broker/SelectorToken.cpp
+++ b/qpid/cpp/src/qpid/broker/SelectorToken.cpp
@@ -52,38 +52,6 @@ void skipWS(std::string::const_iterator& s, std::string::const_iterator& e)
}
}
-bool tokeniseEos(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok)
-{
- if ( s!=e ) return false;
-
- tok = Token(T_EOS, "");
- return true;
-}
-
-inline bool isIdentifierStart(char c)
-{
- return std::isalpha(c) || c=='_' || c=='$';
-}
-
-inline bool isIdentifierPart(char c)
-{
- return std::isalnum(c) || c=='_' || c=='$' || c=='.';
-}
-
-bool tokeniseIdentifier(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok)
-{
- // Be sure that first char is alphanumeric or _ or $
- if ( s==e || !isIdentifierStart(*s) ) return false;
-
- std::string::const_iterator t = s;
-
- while ( ++s!=e && isIdentifierPart(*s) );
-
- tok = Token(T_IDENTIFIER, t, s);
-
- return true;
-}
-
// Lexically, reserved words are a subset of identifiers
// so we parse an identifier first then check if it is a reserved word and
// convert it if it is a reserved word
@@ -145,27 +113,11 @@ bool tokeniseReservedWord(Token& tok)
return true;
}
-// This is really only used for testing
-bool tokeniseReservedWord(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok)
-{
- std::string::const_iterator p = s;
- bool r = tokeniseIdentifier(p, e, tok) && tokeniseReservedWord(tok);
- if (r) s = p;
- return r;
-}
-
-bool tokeniseIdentifierOrReservedWord(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok)
-{
- bool r = tokeniseIdentifier(s, e, tok);
- if (r) (void) tokeniseReservedWord(tok);
- return r;
-}
-
// parsing strings is complicated by the need to allow "''" as an embedded single quote
-bool tokeniseString(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok)
+bool processString(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok)
{
- if ( s==e || *s != '\'' ) return false;
-
+ // We only get here once the tokeniser recognises the initial quote for a string
+ // so we don't need to check for it again.
std::string::const_iterator q = std::find(s+1, e, '\'');
if ( q==e ) return false;
@@ -185,42 +137,17 @@ bool tokeniseString(std::string::const_iterator& s, std::string::const_iterator&
return true;
}
-bool tokeniseParens(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok)
-{
- if ( s==e) return false;
- if ( *s=='(' ) {
- tok = Token (T_LPAREN, s, s+1);
- ++s;
- return true;
- }
- if ( *s==')' ) {
- tok = Token (T_RPAREN, s, s+1);
- ++s;
- return true;
- }
- return false;
-}
-
-inline bool isOperatorPart(char c)
+inline bool isIdentifierStart(char c)
{
- return !std::isalnum(c) && !std::isspace(c) && c!='_' && c!='$' && c!='(' && c!=')' && c!= '\'';
+ return std::isalpha(c) || c=='_' || c=='$';
}
-// These lexical tokens contain no alphanumerics - this is broader than actual operators but
-// works.
-bool tokeniseOperator(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok)
+inline bool isIdentifierPart(char c)
{
- if ( s==e || !isOperatorPart(*s) ) return false;
-
- std::string::const_iterator t = s;
-
- while (++s!=e && isOperatorPart(*s));
-
- tok = Token(T_OPERATOR, t, s);
- return true;
+ return std::isalnum(c) || c=='_' || c=='$' || c=='.';
}
-bool tokeniseNumeric(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok)
+bool tokenise(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok)
{
std::string::const_iterator t = s;
@@ -228,65 +155,102 @@ bool tokeniseNumeric(std::string::const_iterator& s, std::string::const_iterator
enum {
START,
REJECT,
+ IDENTIFIER,
DIGIT,
DECIMAL_START,
DECIMAL,
EXPONENT_SIGN,
EXPONENT_START,
EXPONENT,
- ACCEPT_EXACT,
- ACCEPT_INEXACT
+ ACCEPT_IDENTIFIER,
+ ACCEPT_INC,
+ ACCEPT_NOINC
} state = START;
+ TokenType tokType = T_EOS;
while (true)
switch (state) {
case START:
- if (t==e) {state = REJECT;}
+ if (t==e) {tok = Token(T_EOS, ""); return true;}
+ else switch (*t) {
+ case '(': tokType = T_LPAREN; state = ACCEPT_INC; continue;
+ case ')': tokType = T_RPAREN; state = ACCEPT_INC; continue;
+ case ',': tokType = T_COMMA; state = ACCEPT_INC; continue;
+ case '+': tokType = T_PLUS; state = ACCEPT_INC; continue;
+ case '-': tokType = T_MINUS; state = ACCEPT_INC; continue;
+ case '*': tokType = T_MULT; state = ACCEPT_INC; continue;
+ case '/': tokType = T_DIV; state = ACCEPT_INC; continue;
+ case '=': tokType = T_EQUAL; state = ACCEPT_INC; continue;
+ case '<':
+ ++t;
+ if (t==e || (*t!='>' && *t!='='))
+ {tokType = T_LESS; state = ACCEPT_NOINC; continue; }
+ else
+ {tokType = (*t=='>') ? T_NEQ : T_LSEQ; state = ACCEPT_INC; continue; }
+ case '>':
+ ++t;
+ if (t==e || *t!='=')
+ {tokType = T_GRT; state = ACCEPT_NOINC; continue;}
+ else
+ {tokType = T_GREQ; state = ACCEPT_INC; continue;}
+ default:
+ break;
+ }
+ if (isIdentifierStart(*t)) {++t; state = IDENTIFIER;}
+ else if (*t=='\'') {return processString(s, e, tok);}
else if (std::isdigit(*t)) {++t; state = DIGIT;}
else if (*t=='.') {++t; state = DECIMAL_START;}
else state = REJECT;
- break;
+ continue;
+ case IDENTIFIER:
+ if (t==e) {state = ACCEPT_IDENTIFIER;}
+ else if (isIdentifierPart(*t)) {++t; state = IDENTIFIER;}
+ else state = ACCEPT_IDENTIFIER;
+ continue;
case DECIMAL_START:
if (t==e) {state = REJECT;}
else if (std::isdigit(*t)) {++t; state = DECIMAL;}
else state = REJECT;
- break;
+ continue;
case EXPONENT_SIGN:
if (t==e) {state = REJECT;}
else if (*t=='-' || *t=='+') {++t; state = EXPONENT_START;}
else if (std::isdigit(*t)) {++t; state = EXPONENT;}
else state = REJECT;
- break;
+ continue;
case EXPONENT_START:
if (t==e) {state = REJECT;}
else if (std::isdigit(*t)) {++t; state = EXPONENT;}
else state = REJECT;
- break;
+ continue;
case DIGIT:
- if (t==e) {state = ACCEPT_EXACT;}
+ if (t==e) {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;}
else if (std::isdigit(*t)) {++t; state = DIGIT;}
else if (*t=='.') {++t; state = DECIMAL;}
else if (*t=='e' || *t=='E') {++t; state = EXPONENT_SIGN;}
- else state = ACCEPT_EXACT;
- break;
+ else {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;}
+ continue;
case DECIMAL:
- if (t==e) {state = ACCEPT_INEXACT;}
+ if (t==e) {tokType = T_NUMERIC_APPROX; state = ACCEPT_NOINC;}
else if (std::isdigit(*t)) {++t; state = DECIMAL;}
else if (*t=='e' || *t=='E') {++t; state = EXPONENT_SIGN;}
- else state = ACCEPT_INEXACT;
- break;
+ else {tokType = T_NUMERIC_APPROX; state = ACCEPT_NOINC;}
+ continue;
case EXPONENT:
- if (t==e) {state = ACCEPT_INEXACT;}
+ if (t==e) {tokType = T_NUMERIC_APPROX; state = ACCEPT_NOINC;}
else if (std::isdigit(*t)) {++t; state = EXPONENT;}
- else state = ACCEPT_INEXACT;
- break;
- case ACCEPT_EXACT:
- tok = Token(T_NUMERIC_EXACT, s, t);
+ else {tokType = T_NUMERIC_APPROX; state = ACCEPT_NOINC;}
+ continue;
+ case ACCEPT_INC:
+ ++t;
+ case ACCEPT_NOINC:
+ tok = Token(tokType, s, t);
s = t;
return true;
- case ACCEPT_INEXACT:
- tok = Token(T_NUMERIC_APPROX, s, t);
+ case ACCEPT_IDENTIFIER:
+ tok = Token(T_IDENTIFIER, s, t);
s = t;
+ tokeniseReservedWord(tok);
return true;
case REJECT:
return false;
@@ -319,12 +283,7 @@ const Token& Tokeniser::nextToken()
tokens.push_back(Token());
Token& tok = tokens[tokp++];
- if (tokeniseEos(inp, inEnd, tok)) return tok;
- if (tokeniseIdentifierOrReservedWord(inp, inEnd, tok)) return tok;
- if (tokeniseNumeric(inp, inEnd, tok)) return tok;
- if (tokeniseString(inp, inEnd, tok)) return tok;
- if (tokeniseParens(inp, inEnd, tok)) return tok;
- if (tokeniseOperator(inp, inEnd, tok)) return tok;
+ if (tokenise(inp, inEnd, tok)) return tok;
throw TokenException("Found illegal character");
}