summaryrefslogtreecommitdiff
path: root/qpid/cpp/src/qpid/broker/SelectorToken.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'qpid/cpp/src/qpid/broker/SelectorToken.cpp')
-rw-r--r--qpid/cpp/src/qpid/broker/SelectorToken.cpp298
1 files changed, 298 insertions, 0 deletions
diff --git a/qpid/cpp/src/qpid/broker/SelectorToken.cpp b/qpid/cpp/src/qpid/broker/SelectorToken.cpp
new file mode 100644
index 0000000000..d69267b2e5
--- /dev/null
+++ b/qpid/cpp/src/qpid/broker/SelectorToken.cpp
@@ -0,0 +1,298 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "qpid/broker/SelectorToken.h"
+
+#include <string>
+#include <algorithm>
+#include <iostream>
+#include <cassert>
+#include <cctype>
+
+namespace qpid {
+namespace broker {
+
+// Tokeniserss always take string const_iterators to mark the beginning and end of the string being tokenised
+// if the tokenise is successful then the start iterator is advanced, if the tokenise fails then the start
+// iterator is unchanged.
+
+std::ostream& operator<<(std::ostream& os, const Token& t)
+{
+ os << "T<" << t.type << ", " << t.val << ">";
+ return os;
+}
+
+TokenException::TokenException(const std::string& msg) :
+ range_error(msg)
+{}
+
+// Lexically, reserved words are a subset of identifiers
+// so we parse an identifier first then check if it is a reserved word and
+// convert it if it is a reserved word
+namespace {
+
+struct RWEntry {
+ const char* word;
+ TokenType type;
+};
+
+inline bool caseless(const char* s1, const char* s2)
+{
+ do {
+ char ls1 = std::tolower(*s1);
+ char ls2 = std::tolower(*s2);
+ if (ls1<ls2)
+ return true;
+ else if (ls1>ls2)
+ return false;
+ } while ( *s1++ && *s2++ );
+ // Equal
+ return false;
+}
+
+inline bool operator<(const RWEntry& lhs, const RWEntry& rhs) {
+ return caseless(lhs.word, rhs.word);
+}
+
+}
+
+bool tokeniseReservedWord(Token& tok)
+{
+ // This must be sorted!!
+ static const RWEntry reserved[] = {
+ {"and", T_AND},
+ {"between", T_BETWEEN},
+ {"escape", T_ESCAPE},
+ {"false", T_FALSE},
+ {"in", T_IN},
+ {"is", T_IS},
+ {"like", T_LIKE},
+ {"not", T_NOT},
+ {"null", T_NULL},
+ {"or", T_OR},
+ {"true", T_TRUE}
+ };
+
+ const int reserved_size = sizeof(reserved)/sizeof(RWEntry);
+
+ if ( tok.type != T_IDENTIFIER ) return false;
+
+ RWEntry rw;
+ rw.word = tok.val.c_str();
+ std::pair<const RWEntry*, const RWEntry*> entry = std::equal_range(&reserved[0], &reserved[reserved_size], rw);
+
+ if ( entry.first==entry.second ) return false;
+
+ tok.type = entry.first->type;
+ return true;
+}
+
+// parsing strings is complicated by the need to allow embedded quotes by doubling the quote character
+bool processString(std::string::const_iterator& s, std::string::const_iterator& e, char quoteChar, TokenType type, Token& tok)
+{
+ // We only get here once the tokeniser recognises the initial quote for a string
+ // so we don't need to check for it again.
+ std::string::const_iterator q = std::find(s+1, e, quoteChar);
+ if ( q==e ) return false;
+
+ std::string content(s+1, q);
+ ++q;
+
+ while ( q!=e && *q==quoteChar ) {
+ std::string::const_iterator p = q;
+ q = std::find(p+1, e, quoteChar);
+ if ( q==e ) return false;
+ content += std::string(p, q);
+ ++q;
+ }
+
+ tok = Token(type, s, content);
+ s = q;
+ return true;
+}
+
+inline bool isIdentifierStart(char c)
+{
+ return std::isalpha(c) || c=='_' || c=='$';
+}
+
+inline bool isIdentifierPart(char c)
+{
+ return std::isalnum(c) || c=='_' || c=='$' || c=='.';
+}
+
+static const std::string END("<END>");
+bool tokenise(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok)
+{
+ std::string::const_iterator t = s;
+
+ // Hand constructed state machine recogniser
+ enum {
+ START,
+ REJECT,
+ IDENTIFIER,
+ DIGIT,
+ DECIMAL_START,
+ DECIMAL,
+ EXPONENT_SIGN,
+ EXPONENT_START,
+ EXPONENT,
+ ACCEPT_IDENTIFIER,
+ ACCEPT_INC,
+ ACCEPT_NOINC
+ } state = START;
+
+ TokenType tokType = T_EOS;
+ while (true)
+ switch (state) {
+ case START:
+ if (t==e) {tok = Token(T_EOS, s, END); return true;}
+ else if (std::isspace(*t)) {++t; ++s; continue;}
+ else switch (*t) {
+ case '(': tokType = T_LPAREN; state = ACCEPT_INC; continue;
+ case ')': tokType = T_RPAREN; state = ACCEPT_INC; continue;
+ case ',': tokType = T_COMMA; state = ACCEPT_INC; continue;
+ case '+': tokType = T_PLUS; state = ACCEPT_INC; continue;
+ case '-': tokType = T_MINUS; state = ACCEPT_INC; continue;
+ case '*': tokType = T_MULT; state = ACCEPT_INC; continue;
+ case '/': tokType = T_DIV; state = ACCEPT_INC; continue;
+ case '=': tokType = T_EQUAL; state = ACCEPT_INC; continue;
+ case '<':
+ ++t;
+ if (t==e || (*t!='>' && *t!='='))
+ {tokType = T_LESS; state = ACCEPT_NOINC; continue; }
+ else
+ {tokType = (*t=='>') ? T_NEQ : T_LSEQ; state = ACCEPT_INC; continue; }
+ case '>':
+ ++t;
+ if (t==e || *t!='=')
+ {tokType = T_GRT; state = ACCEPT_NOINC; continue;}
+ else
+ {tokType = T_GREQ; state = ACCEPT_INC; continue;}
+ default:
+ break;
+ }
+ if (isIdentifierStart(*t)) {++t; state = IDENTIFIER;}
+ else if (*t=='\'') {return processString(s, e, '\'', T_STRING, tok);}
+ else if (*t=='\"') {return processString(s, e, '\"', T_IDENTIFIER, tok);}
+ else if (std::isdigit(*t)) {++t; state = DIGIT;}
+ else if (*t=='.') {++t; state = DECIMAL_START;}
+ else state = REJECT;
+ continue;
+ case IDENTIFIER:
+ if (t==e) {state = ACCEPT_IDENTIFIER;}
+ else if (isIdentifierPart(*t)) {++t; state = IDENTIFIER;}
+ else state = ACCEPT_IDENTIFIER;
+ continue;
+ case DECIMAL_START:
+ if (t==e) {state = REJECT;}
+ else if (std::isdigit(*t)) {++t; state = DECIMAL;}
+ else state = REJECT;
+ continue;
+ case EXPONENT_SIGN:
+ if (t==e) {state = REJECT;}
+ else if (*t=='-' || *t=='+') {++t; state = EXPONENT_START;}
+ else if (std::isdigit(*t)) {++t; state = EXPONENT;}
+ else state = REJECT;
+ continue;
+ case EXPONENT_START:
+ if (t==e) {state = REJECT;}
+ else if (std::isdigit(*t)) {++t; state = EXPONENT;}
+ else state = REJECT;
+ continue;
+ case DIGIT:
+ if (t==e) {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;}
+ else if (std::isdigit(*t)) {++t; state = DIGIT;}
+ else if (*t=='.') {++t; state = DECIMAL;}
+ else if (*t=='e' || *t=='E') {++t; state = EXPONENT_SIGN;}
+ else {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;}
+ continue;
+ case DECIMAL:
+ if (t==e) {tokType = T_NUMERIC_APPROX; state = ACCEPT_NOINC;}
+ else if (std::isdigit(*t)) {++t; state = DECIMAL;}
+ else if (*t=='e' || *t=='E') {++t; state = EXPONENT_SIGN;}
+ else {tokType = T_NUMERIC_APPROX; state = ACCEPT_NOINC;}
+ continue;
+ case EXPONENT:
+ if (t==e) {tokType = T_NUMERIC_APPROX; state = ACCEPT_NOINC;}
+ else if (std::isdigit(*t)) {++t; state = EXPONENT;}
+ else {tokType = T_NUMERIC_APPROX; state = ACCEPT_NOINC;}
+ continue;
+ case ACCEPT_INC:
+ ++t;
+ case ACCEPT_NOINC:
+ tok = Token(tokType, s, t);
+ s = t;
+ return true;
+ case ACCEPT_IDENTIFIER:
+ tok = Token(T_IDENTIFIER, s, t);
+ s = t;
+ tokeniseReservedWord(tok);
+ return true;
+ case REJECT:
+ return false;
+ };
+}
+
+Tokeniser::Tokeniser(const std::string::const_iterator& s, const std::string::const_iterator& e) :
+ tokp(0),
+ inStart(s),
+ inp(s),
+ inEnd(e)
+{
+}
+
+/**
+ * Skip any whitespace then look for a token, throwing an exception if no valid token
+ * is found.
+ *
+ * Advance the string iterator past the parsed token on success. On failure the string iterator is
+ * in an undefined location.
+ */
+const Token& Tokeniser::nextToken()
+{
+ if ( tokens.size()>tokp ) return tokens[tokp++];
+
+ // Don't extend stream of tokens further than the end of stream;
+ if ( tokp>0 && tokens[tokp-1].type==T_EOS ) return tokens[tokp-1];
+
+ tokens.push_back(Token());
+ Token& tok = tokens[tokp++];
+
+ if (tokenise(inp, inEnd, tok)) return tok;
+
+ throw TokenException("Found illegal character");
+}
+
+void Tokeniser::returnTokens(unsigned int n)
+{
+ assert( n<=tokp );
+ tokp-=n;
+}
+
+std::string Tokeniser::remaining()
+{
+ Token& currentTok = tokens[tokp];
+ return std::string(currentTok.tokenStart, inEnd);
+}
+
+
+}}