summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRafael H. Schloming <rhs@apache.org>2009-12-09 18:08:14 +0000
committerRafael H. Schloming <rhs@apache.org>2009-12-09 18:08:14 +0000
commit1770fd804750bb39f693045901b74d1b20e8cca4 (patch)
tree94ae0e7cefaaabf0740a15dd13f1c311f1eed2d0
parent5720d502cfe7d570f3f6ac8f0f510709182e70e8 (diff)
downloadqpid-python-1770fd804750bb39f693045901b74d1b20e8cca4.tar.gz
split out some of the generic parsing stuff in the address parser, and added a real mimetype parser in anticipation of a fix for QPID-2255
git-svn-id: https://svn.apache.org/repos/asf/qpid/trunk@888901 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--qpid/python/qpid/address.py145
-rw-r--r--qpid/python/qpid/lexer.py112
-rw-r--r--qpid/python/qpid/mimetype.py106
-rw-r--r--qpid/python/qpid/parser.py68
-rw-r--r--qpid/python/qpid/tests/__init__.py2
-rw-r--r--qpid/python/qpid/tests/address.py28
-rw-r--r--qpid/python/qpid/tests/mimetype.py56
-rw-r--r--qpid/python/qpid/tests/parser.py37
8 files changed, 416 insertions, 138 deletions
diff --git a/qpid/python/qpid/address.py b/qpid/python/qpid/address.py
index 909a9e42e1..5c675b8782 100644
--- a/qpid/python/qpid/address.py
+++ b/qpid/python/qpid/address.py
@@ -17,90 +17,29 @@
# under the License.
#
import re
-
-TYPES = []
-
-class Type:
-
- def __init__(self, name, pattern=None):
- self.name = name
- self.pattern = pattern
- if self.pattern:
- TYPES.append(self)
-
- def __repr__(self):
- return self.name
-
-LBRACE = Type("LBRACE", r"\{")
-RBRACE = Type("RBRACE", r"\}")
-COLON = Type("COLON", r":")
-SEMI = Type("SEMI", r";")
-SLASH = Type("SLASH", r"/")
-COMMA = Type("COMMA", r",")
-NUMBER = Type("NUMBER", r'[+-]?[0-9]*\.?[0-9]+')
-ID = Type("ID", r'[a-zA-Z_](?:[a-zA-Z0-9_-]*[a-zA-Z0-9_])?')
-STRING = Type("STRING", r""""(?:[^\\"]|\\.)*"|'(?:[^\\']|\\.)*'""")
-ESC = Type("ESC", r"\\[^ux]|\\x[0-9a-fA-F][0-9a-fA-F]|\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]")
-SYM = Type("SYM", r"[.#*%@$^!+-]")
-WSPACE = Type("WSPACE", r"[ \n\r\t]+")
-EOF = Type("EOF")
-
-class Token:
-
- def __init__(self, type, value, input, position):
- self.type = type
- self.value = value
- self.input = input
- self.position = position
-
- def line_info(self):
- return line_info(self.input, self.position)
-
- def __repr__(self):
- if self.value is None:
- return repr(self.type)
- else:
- return "%s(%r)" % (self.type, self.value)
-
-joined = "|".join(["(%s)" % t.pattern for t in TYPES])
-LEXER = re.compile(joined)
-
-class LexError(Exception):
- pass
-
-def line_info(st, pos):
- idx = 0
- lineno = 1
- column = 0
- line_pos = 0
- while idx < pos:
- if st[idx] == "\n":
- lineno += 1
- column = 0
- line_pos = idx
- column += 1
- idx += 1
-
- end = st.find("\n", line_pos)
- if end < 0:
- end = len(st)
- line = st[line_pos:end]
-
- return line, lineno, column
+from lexer import Lexicon, LexError
+from parser import Parser, ParseError
+
+l = Lexicon()
+
+LBRACE = l.define("LBRACE", r"\{")
+RBRACE = l.define("RBRACE", r"\}")
+COLON = l.define("COLON", r":")
+SEMI = l.define("SEMI", r";")
+SLASH = l.define("SLASH", r"/")
+COMMA = l.define("COMMA", r",")
+NUMBER = l.define("NUMBER", r'[+-]?[0-9]*\.?[0-9]+')
+ID = l.define("ID", r'[a-zA-Z_](?:[a-zA-Z0-9_-]*[a-zA-Z0-9_])?')
+STRING = l.define("STRING", r""""(?:[^\\"]|\\.)*"|'(?:[^\\']|\\.)*'""")
+ESC = l.define("ESC", r"\\[^ux]|\\x[0-9a-fA-F][0-9a-fA-F]|\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]")
+SYM = l.define("SYM", r"[.#*%@$^!+-]")
+WSPACE = l.define("WSPACE", r"[ \n\r\t]+")
+EOF = l.eof("EOF")
+
+LEXER = l.compile()
def lex(st):
- pos = 0
- while pos < len(st):
- m = LEXER.match(st, pos)
- if m is None:
- line, ln, col = line_info(st, pos)
- raise LexError("unrecognized characters line:%s,%s: %s" % (ln, col, line))
- else:
- idx = m.lastindex
- t = Token(TYPES[idx - 1], m.group(idx), st, pos)
- yield t
- pos = m.end()
- yield Token(EOF, None, st, pos)
+ return LEXER.lex(st)
def tok2str(tok):
if tok.type is STRING:
@@ -127,46 +66,10 @@ def toks2str(toks):
else:
return None
-class ParseError(Exception):
-
- def __init__(self, token, *expected):
- line, ln, col = token.line_info()
- exp = ", ".join(map(str, expected))
- if len(expected) > 1:
- exp = "(%s)" % exp
- if expected:
- msg = "expecting %s, got %s line:%s,%s:%s" % (exp, token, ln, col, line)
- else:
- msg = "unexpected token %s line:%s,%s:%s" % (token, ln, col, line)
- Exception.__init__(self, msg)
- self.token = token
- self.expected = expected
-
-class Parser:
+class AddressParser(Parser):
def __init__(self, tokens):
- self.tokens = [t for t in tokens if t.type is not WSPACE]
- self.idx = 0
-
- def next(self):
- return self.tokens[self.idx]
-
- def matches(self, *types):
- return self.next().type in types
-
- def eat(self, *types):
- if types and not self.matches(*types):
- raise ParseError(self.next(), *types)
- else:
- t = self.next()
- self.idx += 1
- return t
-
- def eat_until(self, *types):
- result = []
- while not self.matches(*types):
- result.append(self.eat())
- return result
+ Parser.__init__(self, [t for t in tokens if t.type is not WSPACE])
def parse(self):
result = self.address()
@@ -229,6 +132,6 @@ class Parser:
raise ParseError(self.next(), NUMBER, STRING, ID, LBRACE)
def parse(addr):
- return Parser(lex(addr)).parse()
+ return AddressParser(lex(addr)).parse()
__all__ = ["parse", "ParseError"]
diff --git a/qpid/python/qpid/lexer.py b/qpid/python/qpid/lexer.py
new file mode 100644
index 0000000000..87845560eb
--- /dev/null
+++ b/qpid/python/qpid/lexer.py
@@ -0,0 +1,112 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+import re
+
+class Type:
+
+ def __init__(self, name, pattern=None):
+ self.name = name
+ self.pattern = pattern
+
+ def __repr__(self):
+ return self.name
+
+class Lexicon:
+
+ def __init__(self):
+ self.types = []
+ self._eof = None
+
+ def define(self, name, pattern):
+ t = Type(name, pattern)
+ self.types.append(t)
+ return t
+
+ def eof(self, name):
+ t = Type(name)
+ self._eof = t
+ return t
+
+ def compile(self):
+ types = self.types[:]
+ joined = "|".join(["(%s)" % t.pattern for t in types])
+ rexp = re.compile(joined)
+ return Lexer(types, self._eof, rexp)
+
+class Token:
+
+ def __init__(self, type, value, input, position):
+ self.type = type
+ self.value = value
+ self.input = input
+ self.position = position
+
+ def line_info(self):
+ return line_info(self.input, self.position)
+
+ def __repr__(self):
+ if self.value is None:
+ return repr(self.type)
+ else:
+ return "%s(%r)" % (self.type, self.value)
+
+
+class LexError(Exception):
+ pass
+
+def line_info(st, pos):
+ idx = 0
+ lineno = 1
+ column = 0
+ line_pos = 0
+ while idx < pos:
+ if st[idx] == "\n":
+ lineno += 1
+ column = 0
+ line_pos = idx
+ column += 1
+ idx += 1
+
+ end = st.find("\n", line_pos)
+ if end < 0:
+ end = len(st)
+ line = st[line_pos:end]
+
+ return line, lineno, column
+
+class Lexer:
+
+ def __init__(self, types, eof, rexp):
+ self.types = types
+ self.eof = eof
+ self.rexp = rexp
+
+ def lex(self, st):
+ pos = 0
+ while pos < len(st):
+ m = self.rexp.match(st, pos)
+ if m is None:
+ line, ln, col = line_info(st, pos)
+ raise LexError("unrecognized characters line:%s,%s: %s" % (ln, col, line))
+ else:
+ idx = m.lastindex
+ t = Token(self.types[idx - 1], m.group(idx), st, pos)
+ yield t
+ pos = m.end()
+ yield Token(self.eof, None, st, pos)
diff --git a/qpid/python/qpid/mimetype.py b/qpid/python/qpid/mimetype.py
new file mode 100644
index 0000000000..f512996b9f
--- /dev/null
+++ b/qpid/python/qpid/mimetype.py
@@ -0,0 +1,106 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+import re, rfc822
+from lexer import Lexicon, LexError
+from parser import Parser, ParseError
+
+l = Lexicon()
+
+LPAREN = l.define("LPAREN", r"\(")
+RPAREN = l.define("LPAREN", r"\)")
+SLASH = l.define("SLASH", r"/")
+SEMI = l.define("SEMI", r";")
+EQUAL = l.define("EQUAL", r"=")
+TOKEN = l.define("TOKEN", r'[^()<>@,;:\\"/\[\]?= ]+')
+STRING = l.define("STRING", r'"(?:[^\\"]|\\.)*"')
+WSPACE = l.define("WSPACE", r"[ \n\r\t]+")
+EOF = l.eof("EOF")
+
+LEXER = l.compile()
+
+def lex(st):
+ return LEXER.lex(st)
+
+class MimeTypeParser(Parser):
+
+ def __init__(self, tokens):
+ Parser.__init__(self, [t for t in tokens if t.type is not WSPACE])
+
+ def parse(self):
+ result = self.mimetype()
+ self.eat(EOF)
+ return result
+
+ def mimetype(self):
+ self.remove_comments()
+ self.reset()
+
+ type = self.eat(TOKEN).value.lower()
+ self.eat(SLASH)
+ subtype = self.eat(TOKEN).value.lower()
+
+ params = []
+ while True:
+ if self.matches(SEMI):
+ params.append(self.parameter())
+ else:
+ break
+
+ return type, subtype, params
+
+ def remove_comments(self):
+ while True:
+ self.eat_until(LPAREN, EOF)
+ if self.matches(LPAREN):
+ self.remove(*self.comment())
+ else:
+ break
+
+ def comment(self):
+ start = self.eat(LPAREN)
+
+ while True:
+ self.eat_until(LPAREN, RPAREN)
+ if self.matches(LPAREN):
+ self.comment()
+ else:
+ break
+
+ end = self.eat(RPAREN)
+ return start, end
+
+ def parameter(self):
+ self.eat(SEMI)
+ name = self.eat(TOKEN).value
+ self.eat(EQUAL)
+ value = self.value()
+ return name, value
+
+ def value(self):
+ if self.matches(TOKEN):
+ return self.eat().value
+ elif self.matches(STRING):
+ return rfc822.unquote(self.eat().value)
+ else:
+ raise ParseError(self.next(), TOKEN, STRING)
+
+def parse(addr):
+ return MimeTypeParser(lex(addr)).parse()
+
+__all__ = ["parse", "ParseError"]
diff --git a/qpid/python/qpid/parser.py b/qpid/python/qpid/parser.py
new file mode 100644
index 0000000000..233f0a8469
--- /dev/null
+++ b/qpid/python/qpid/parser.py
@@ -0,0 +1,68 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+class ParseError(Exception):
+
+ def __init__(self, token, *expected):
+ line, ln, col = token.line_info()
+ exp = ", ".join(map(str, expected))
+ if len(expected) > 1:
+ exp = "(%s)" % exp
+ if expected:
+ msg = "expecting %s, got %s line:%s,%s:%s" % (exp, token, ln, col, line)
+ else:
+ msg = "unexpected token %s line:%s,%s:%s" % (token, ln, col, line)
+ Exception.__init__(self, msg)
+ self.token = token
+ self.expected = expected
+
+class Parser:
+
+ def __init__(self, tokens):
+ self.tokens = tokens
+ self.idx = 0
+
+ def next(self):
+ return self.tokens[self.idx]
+
+ def matches(self, *types):
+ return self.next().type in types
+
+ def eat(self, *types):
+ if types and not self.matches(*types):
+ raise ParseError(self.next(), *types)
+ else:
+ t = self.next()
+ self.idx += 1
+ return t
+
+ def eat_until(self, *types):
+ result = []
+ while not self.matches(*types):
+ result.append(self.eat())
+ return result
+
+ def remove(self, start, end):
+ start_idx = self.tokens.index(start)
+ end_idx = self.tokens.index(end) + 1
+ del self.tokens[start_idx:end_idx]
+ self.idx -= end_idx - start_idx
+
+ def reset(self):
+ self.idx = 0
diff --git a/qpid/python/qpid/tests/__init__.py b/qpid/python/qpid/tests/__init__.py
index 6d227dd656..2f0fcfdf67 100644
--- a/qpid/python/qpid/tests/__init__.py
+++ b/qpid/python/qpid/tests/__init__.py
@@ -25,4 +25,4 @@ class Test:
def configure(self, config):
self.config = config
-import address, framing, messaging
+import address, framing, mimetype, messaging
diff --git a/qpid/python/qpid/tests/address.py b/qpid/python/qpid/tests/address.py
index 065e2ca8de..f772425e42 100644
--- a/qpid/python/qpid/tests/address.py
+++ b/qpid/python/qpid/tests/address.py
@@ -19,12 +19,20 @@
from qpid.tests import Test
from qpid.address import lex, parse, ParseError, EOF, ID, NUMBER, SYM, WSPACE
+from parser import ParserBase
-class AddressTests(Test):
+class AddressTests(ParserBase, Test):
- def lex(self, addr, *types):
- toks = [t.type for t in lex(addr) if t.type not in (WSPACE, EOF)]
- assert list(types) == toks, "expected %s, got %s" % (types, toks)
+ EXCLUDE = (WSPACE, EOF)
+
+ def do_lex(self, st):
+ return lex(st)
+
+ def do_parse(self, st):
+ return parse(st)
+
+ def valid(self, addr, name=None, subject=None, options=None):
+ ParserBase.valid(self, addr, (name, subject, options))
def testDashInId1(self):
self.lex("foo-bar", ID)
@@ -47,18 +55,6 @@ class AddressTests(Test):
def testNegativeNum(self):
self.lex("-3", NUMBER)
- def valid(self, addr, name=None, subject=None, options=None):
- expected = (name, subject, options)
- got = parse(addr)
- assert expected == got, "expected %s, got %s" % (expected, got)
-
- def invalid(self, addr, error=None):
- try:
- p = parse(addr)
- assert False, "invalid address parsed: %s" % p
- except ParseError, e:
- assert error == str(e), "expected %r, got %r" % (error, str(e))
-
def testHash(self):
self.valid("foo/bar.#", "foo", "bar.#")
diff --git a/qpid/python/qpid/tests/mimetype.py b/qpid/python/qpid/tests/mimetype.py
new file mode 100644
index 0000000000..22760316f0
--- /dev/null
+++ b/qpid/python/qpid/tests/mimetype.py
@@ -0,0 +1,56 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from qpid.tests import Test
+from qpid.mimetype import lex, parse, ParseError, EOF, WSPACE
+from parser import ParserBase
+
+class MimeTypeTests(ParserBase, Test):
+
+ EXCLUDE = (WSPACE, EOF)
+
+ def do_lex(self, st):
+ return lex(st)
+
+ def do_parse(self, st):
+ return parse(st)
+
+ def valid(self, addr, type=None, subtype=None, parameters=None):
+ ParserBase.valid(self, addr, (type, subtype, parameters))
+
+ def testTypeOnly(self):
+ self.invalid("type", "expecting SLASH, got EOF line:1,4:type")
+
+ def testTypeSubtype(self):
+ self.valid("type/subtype", "type", "subtype", [])
+
+ def testTypeSubtypeParam(self):
+ self.valid("type/subtype ; name=value",
+ "type", "subtype", [("name", "value")])
+
+ def testTypeSubtypeParamComment(self):
+ self.valid("type/subtype ; name(This is a comment.)=value",
+ "type", "subtype", [("name", "value")])
+
+ def testMultipleParams(self):
+ self.valid("type/subtype ; name1=value1 ; name2=value2",
+ "type", "subtype", [("name1", "value1"), ("name2", "value2")])
+
+ def testCaseInsensitivity(self):
+ self.valid("Type/Subtype", "type", "subtype", [])
diff --git a/qpid/python/qpid/tests/parser.py b/qpid/python/qpid/tests/parser.py
new file mode 100644
index 0000000000..a4865cc9fe
--- /dev/null
+++ b/qpid/python/qpid/tests/parser.py
@@ -0,0 +1,37 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from qpid.parser import ParseError
+
+class ParserBase:
+
+ def lex(self, addr, *types):
+ toks = [t.type for t in self.do_lex(addr) if t.type not in self.EXCLUDE]
+ assert list(types) == toks, "expected %s, got %s" % (types, toks)
+
+ def valid(self, addr, expected):
+ got = self.do_parse(addr)
+ assert expected == got, "expected %s, got %s" % (expected, got)
+
+ def invalid(self, addr, error=None):
+ try:
+ p = self.do_parse(addr)
+ assert False, "invalid address parsed: %s" % p
+ except ParseError, e:
+ assert error == str(e), "expected %r, got %r" % (error, str(e))