summaryrefslogtreecommitdiff
path: root/qpid/python/qpid/lexer.py
diff options
context:
space:
mode:
Diffstat (limited to 'qpid/python/qpid/lexer.py')
-rw-r--r--qpid/python/qpid/lexer.py118
1 files changed, 118 insertions, 0 deletions
diff --git a/qpid/python/qpid/lexer.py b/qpid/python/qpid/lexer.py
new file mode 100644
index 0000000000..ec28bbb91a
--- /dev/null
+++ b/qpid/python/qpid/lexer.py
@@ -0,0 +1,118 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+import re
+
+class Type:
+
+ def __init__(self, name, pattern=None):
+ self.name = name
+ self.pattern = pattern
+
+ def __repr__(self):
+ return self.name
+
+class Lexicon:
+
+ def __init__(self):
+ self.types = []
+ self._eof = None
+
+ def define(self, name, pattern):
+ t = Type(name, pattern)
+ self.types.append(t)
+ return t
+
+ def eof(self, name):
+ t = Type(name)
+ self._eof = t
+ return t
+
+ def compile(self):
+ types = self.types[:]
+ joined = "|".join(["(%s)" % t.pattern for t in types])
+ rexp = re.compile(joined)
+ return Lexer(types, self._eof, rexp)
+
+class Token:
+
+ def __init__(self, type, value, input, position):
+ self.type = type
+ self.value = value
+ self.input = input
+ self.position = position
+
+ def line_info(self):
+ return line_info(self.input, self.position)
+
+ def __repr__(self):
+ if self.value is None:
+ return repr(self.type)
+ else:
+ return "%s(%s)" % (self.type, self.value)
+
+
+class LexError(Exception):
+ pass
+
+def line_info(st, pos):
+ idx = 0
+ lineno = 1
+ column = 0
+ line_pos = 0
+ while idx < pos:
+ if st[idx] == "\n":
+ lineno += 1
+ column = 0
+ line_pos = idx
+ column += 1
+ idx += 1
+
+ end = st.find("\n", line_pos)
+ if end < 0:
+ end = len(st)
+ line = st[line_pos:end]
+
+ return line, lineno, column
+
+class Lexer:
+
+ def __init__(self, types, eof, rexp):
+ self.types = types
+ self.eof = eof
+ self.rexp = rexp
+ self.byname = {}
+ for t in self.types + [eof]:
+ self.byname[t.name] = t
+
+ def type(self, name):
+ return self.byname[name]
+
+ def lex(self, st):
+ pos = 0
+ while pos < len(st):
+ m = self.rexp.match(st, pos)
+ if m is None:
+ line, ln, col = line_info(st, pos)
+ raise LexError("unrecognized characters line:%s,%s: %s" % (ln, col, line))
+ else:
+ idx = m.lastindex
+ t = Token(self.types[idx - 1], m.group(idx), st, pos)
+ yield t
+ pos = m.end()
+ yield Token(self.eof, None, st, pos)