diff options
author | Rafael H. Schloming <rhs@apache.org> | 2009-12-09 18:08:14 +0000 |
---|---|---|
committer | Rafael H. Schloming <rhs@apache.org> | 2009-12-09 18:08:14 +0000 |
commit | 1770fd804750bb39f693045901b74d1b20e8cca4 (patch) | |
tree | 94ae0e7cefaaabf0740a15dd13f1c311f1eed2d0 | |
parent | 5720d502cfe7d570f3f6ac8f0f510709182e70e8 (diff) | |
download | qpid-python-1770fd804750bb39f693045901b74d1b20e8cca4.tar.gz |
split out some of the generic parsing stuff in the address parser, and added a real mimetype parser in anticipation of a fix for QPID-2255
git-svn-id: https://svn.apache.org/repos/asf/qpid/trunk@888901 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r-- | qpid/python/qpid/address.py | 145 | ||||
-rw-r--r-- | qpid/python/qpid/lexer.py | 112 | ||||
-rw-r--r-- | qpid/python/qpid/mimetype.py | 106 | ||||
-rw-r--r-- | qpid/python/qpid/parser.py | 68 | ||||
-rw-r--r-- | qpid/python/qpid/tests/__init__.py | 2 | ||||
-rw-r--r-- | qpid/python/qpid/tests/address.py | 28 | ||||
-rw-r--r-- | qpid/python/qpid/tests/mimetype.py | 56 | ||||
-rw-r--r-- | qpid/python/qpid/tests/parser.py | 37 |
8 files changed, 416 insertions, 138 deletions
diff --git a/qpid/python/qpid/address.py b/qpid/python/qpid/address.py index 909a9e42e1..5c675b8782 100644 --- a/qpid/python/qpid/address.py +++ b/qpid/python/qpid/address.py @@ -17,90 +17,29 @@ # under the License. # import re - -TYPES = [] - -class Type: - - def __init__(self, name, pattern=None): - self.name = name - self.pattern = pattern - if self.pattern: - TYPES.append(self) - - def __repr__(self): - return self.name - -LBRACE = Type("LBRACE", r"\{") -RBRACE = Type("RBRACE", r"\}") -COLON = Type("COLON", r":") -SEMI = Type("SEMI", r";") -SLASH = Type("SLASH", r"/") -COMMA = Type("COMMA", r",") -NUMBER = Type("NUMBER", r'[+-]?[0-9]*\.?[0-9]+') -ID = Type("ID", r'[a-zA-Z_](?:[a-zA-Z0-9_-]*[a-zA-Z0-9_])?') -STRING = Type("STRING", r""""(?:[^\\"]|\\.)*"|'(?:[^\\']|\\.)*'""") -ESC = Type("ESC", r"\\[^ux]|\\x[0-9a-fA-F][0-9a-fA-F]|\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]") -SYM = Type("SYM", r"[.#*%@$^!+-]") -WSPACE = Type("WSPACE", r"[ \n\r\t]+") -EOF = Type("EOF") - -class Token: - - def __init__(self, type, value, input, position): - self.type = type - self.value = value - self.input = input - self.position = position - - def line_info(self): - return line_info(self.input, self.position) - - def __repr__(self): - if self.value is None: - return repr(self.type) - else: - return "%s(%r)" % (self.type, self.value) - -joined = "|".join(["(%s)" % t.pattern for t in TYPES]) -LEXER = re.compile(joined) - -class LexError(Exception): - pass - -def line_info(st, pos): - idx = 0 - lineno = 1 - column = 0 - line_pos = 0 - while idx < pos: - if st[idx] == "\n": - lineno += 1 - column = 0 - line_pos = idx - column += 1 - idx += 1 - - end = st.find("\n", line_pos) - if end < 0: - end = len(st) - line = st[line_pos:end] - - return line, lineno, column +from lexer import Lexicon, LexError +from parser import Parser, ParseError + +l = Lexicon() + +LBRACE = l.define("LBRACE", r"\{") +RBRACE = l.define("RBRACE", r"\}") +COLON = l.define("COLON", r":") +SEMI = l.define("SEMI", r";") +SLASH = l.define("SLASH", r"/") +COMMA = l.define("COMMA", r",") +NUMBER = l.define("NUMBER", r'[+-]?[0-9]*\.?[0-9]+') +ID = l.define("ID", r'[a-zA-Z_](?:[a-zA-Z0-9_-]*[a-zA-Z0-9_])?') +STRING = l.define("STRING", r""""(?:[^\\"]|\\.)*"|'(?:[^\\']|\\.)*'""") +ESC = l.define("ESC", r"\\[^ux]|\\x[0-9a-fA-F][0-9a-fA-F]|\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]") +SYM = l.define("SYM", r"[.#*%@$^!+-]") +WSPACE = l.define("WSPACE", r"[ \n\r\t]+") +EOF = l.eof("EOF") + +LEXER = l.compile() def lex(st): - pos = 0 - while pos < len(st): - m = LEXER.match(st, pos) - if m is None: - line, ln, col = line_info(st, pos) - raise LexError("unrecognized characters line:%s,%s: %s" % (ln, col, line)) - else: - idx = m.lastindex - t = Token(TYPES[idx - 1], m.group(idx), st, pos) - yield t - pos = m.end() - yield Token(EOF, None, st, pos) + return LEXER.lex(st) def tok2str(tok): if tok.type is STRING: @@ -127,46 +66,10 @@ def toks2str(toks): else: return None -class ParseError(Exception): - - def __init__(self, token, *expected): - line, ln, col = token.line_info() - exp = ", ".join(map(str, expected)) - if len(expected) > 1: - exp = "(%s)" % exp - if expected: - msg = "expecting %s, got %s line:%s,%s:%s" % (exp, token, ln, col, line) - else: - msg = "unexpected token %s line:%s,%s:%s" % (token, ln, col, line) - Exception.__init__(self, msg) - self.token = token - self.expected = expected - -class Parser: +class AddressParser(Parser): def __init__(self, tokens): - self.tokens = [t for t in tokens if t.type is not WSPACE] - self.idx = 0 - - def next(self): - return self.tokens[self.idx] - - def matches(self, *types): - return self.next().type in types - - def eat(self, *types): - if types and not self.matches(*types): - raise ParseError(self.next(), *types) - else: - t = self.next() - self.idx += 1 - return t - - def eat_until(self, *types): - result = [] - while not self.matches(*types): - result.append(self.eat()) - return result + Parser.__init__(self, [t for t in tokens if t.type is not WSPACE]) def parse(self): result = self.address() @@ -229,6 +132,6 @@ class Parser: raise ParseError(self.next(), NUMBER, STRING, ID, LBRACE) def parse(addr): - return Parser(lex(addr)).parse() + return AddressParser(lex(addr)).parse() __all__ = ["parse", "ParseError"] diff --git a/qpid/python/qpid/lexer.py b/qpid/python/qpid/lexer.py new file mode 100644 index 0000000000..87845560eb --- /dev/null +++ b/qpid/python/qpid/lexer.py @@ -0,0 +1,112 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +import re + +class Type: + + def __init__(self, name, pattern=None): + self.name = name + self.pattern = pattern + + def __repr__(self): + return self.name + +class Lexicon: + + def __init__(self): + self.types = [] + self._eof = None + + def define(self, name, pattern): + t = Type(name, pattern) + self.types.append(t) + return t + + def eof(self, name): + t = Type(name) + self._eof = t + return t + + def compile(self): + types = self.types[:] + joined = "|".join(["(%s)" % t.pattern for t in types]) + rexp = re.compile(joined) + return Lexer(types, self._eof, rexp) + +class Token: + + def __init__(self, type, value, input, position): + self.type = type + self.value = value + self.input = input + self.position = position + + def line_info(self): + return line_info(self.input, self.position) + + def __repr__(self): + if self.value is None: + return repr(self.type) + else: + return "%s(%r)" % (self.type, self.value) + + +class LexError(Exception): + pass + +def line_info(st, pos): + idx = 0 + lineno = 1 + column = 0 + line_pos = 0 + while idx < pos: + if st[idx] == "\n": + lineno += 1 + column = 0 + line_pos = idx + column += 1 + idx += 1 + + end = st.find("\n", line_pos) + if end < 0: + end = len(st) + line = st[line_pos:end] + + return line, lineno, column + +class Lexer: + + def __init__(self, types, eof, rexp): + self.types = types + self.eof = eof + self.rexp = rexp + + def lex(self, st): + pos = 0 + while pos < len(st): + m = self.rexp.match(st, pos) + if m is None: + line, ln, col = line_info(st, pos) + raise LexError("unrecognized characters line:%s,%s: %s" % (ln, col, line)) + else: + idx = m.lastindex + t = Token(self.types[idx - 1], m.group(idx), st, pos) + yield t + pos = m.end() + yield Token(self.eof, None, st, pos) diff --git a/qpid/python/qpid/mimetype.py b/qpid/python/qpid/mimetype.py new file mode 100644 index 0000000000..f512996b9f --- /dev/null +++ b/qpid/python/qpid/mimetype.py @@ -0,0 +1,106 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +import re, rfc822 +from lexer import Lexicon, LexError +from parser import Parser, ParseError + +l = Lexicon() + +LPAREN = l.define("LPAREN", r"\(") +RPAREN = l.define("LPAREN", r"\)") +SLASH = l.define("SLASH", r"/") +SEMI = l.define("SEMI", r";") +EQUAL = l.define("EQUAL", r"=") +TOKEN = l.define("TOKEN", r'[^()<>@,;:\\"/\[\]?= ]+') +STRING = l.define("STRING", r'"(?:[^\\"]|\\.)*"') +WSPACE = l.define("WSPACE", r"[ \n\r\t]+") +EOF = l.eof("EOF") + +LEXER = l.compile() + +def lex(st): + return LEXER.lex(st) + +class MimeTypeParser(Parser): + + def __init__(self, tokens): + Parser.__init__(self, [t for t in tokens if t.type is not WSPACE]) + + def parse(self): + result = self.mimetype() + self.eat(EOF) + return result + + def mimetype(self): + self.remove_comments() + self.reset() + + type = self.eat(TOKEN).value.lower() + self.eat(SLASH) + subtype = self.eat(TOKEN).value.lower() + + params = [] + while True: + if self.matches(SEMI): + params.append(self.parameter()) + else: + break + + return type, subtype, params + + def remove_comments(self): + while True: + self.eat_until(LPAREN, EOF) + if self.matches(LPAREN): + self.remove(*self.comment()) + else: + break + + def comment(self): + start = self.eat(LPAREN) + + while True: + self.eat_until(LPAREN, RPAREN) + if self.matches(LPAREN): + self.comment() + else: + break + + end = self.eat(RPAREN) + return start, end + + def parameter(self): + self.eat(SEMI) + name = self.eat(TOKEN).value + self.eat(EQUAL) + value = self.value() + return name, value + + def value(self): + if self.matches(TOKEN): + return self.eat().value + elif self.matches(STRING): + return rfc822.unquote(self.eat().value) + else: + raise ParseError(self.next(), TOKEN, STRING) + +def parse(addr): + return MimeTypeParser(lex(addr)).parse() + +__all__ = ["parse", "ParseError"] diff --git a/qpid/python/qpid/parser.py b/qpid/python/qpid/parser.py new file mode 100644 index 0000000000..233f0a8469 --- /dev/null +++ b/qpid/python/qpid/parser.py @@ -0,0 +1,68 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +class ParseError(Exception): + + def __init__(self, token, *expected): + line, ln, col = token.line_info() + exp = ", ".join(map(str, expected)) + if len(expected) > 1: + exp = "(%s)" % exp + if expected: + msg = "expecting %s, got %s line:%s,%s:%s" % (exp, token, ln, col, line) + else: + msg = "unexpected token %s line:%s,%s:%s" % (token, ln, col, line) + Exception.__init__(self, msg) + self.token = token + self.expected = expected + +class Parser: + + def __init__(self, tokens): + self.tokens = tokens + self.idx = 0 + + def next(self): + return self.tokens[self.idx] + + def matches(self, *types): + return self.next().type in types + + def eat(self, *types): + if types and not self.matches(*types): + raise ParseError(self.next(), *types) + else: + t = self.next() + self.idx += 1 + return t + + def eat_until(self, *types): + result = [] + while not self.matches(*types): + result.append(self.eat()) + return result + + def remove(self, start, end): + start_idx = self.tokens.index(start) + end_idx = self.tokens.index(end) + 1 + del self.tokens[start_idx:end_idx] + self.idx -= end_idx - start_idx + + def reset(self): + self.idx = 0 diff --git a/qpid/python/qpid/tests/__init__.py b/qpid/python/qpid/tests/__init__.py index 6d227dd656..2f0fcfdf67 100644 --- a/qpid/python/qpid/tests/__init__.py +++ b/qpid/python/qpid/tests/__init__.py @@ -25,4 +25,4 @@ class Test: def configure(self, config): self.config = config -import address, framing, messaging +import address, framing, mimetype, messaging diff --git a/qpid/python/qpid/tests/address.py b/qpid/python/qpid/tests/address.py index 065e2ca8de..f772425e42 100644 --- a/qpid/python/qpid/tests/address.py +++ b/qpid/python/qpid/tests/address.py @@ -19,12 +19,20 @@ from qpid.tests import Test from qpid.address import lex, parse, ParseError, EOF, ID, NUMBER, SYM, WSPACE +from parser import ParserBase -class AddressTests(Test): +class AddressTests(ParserBase, Test): - def lex(self, addr, *types): - toks = [t.type for t in lex(addr) if t.type not in (WSPACE, EOF)] - assert list(types) == toks, "expected %s, got %s" % (types, toks) + EXCLUDE = (WSPACE, EOF) + + def do_lex(self, st): + return lex(st) + + def do_parse(self, st): + return parse(st) + + def valid(self, addr, name=None, subject=None, options=None): + ParserBase.valid(self, addr, (name, subject, options)) def testDashInId1(self): self.lex("foo-bar", ID) @@ -47,18 +55,6 @@ class AddressTests(Test): def testNegativeNum(self): self.lex("-3", NUMBER) - def valid(self, addr, name=None, subject=None, options=None): - expected = (name, subject, options) - got = parse(addr) - assert expected == got, "expected %s, got %s" % (expected, got) - - def invalid(self, addr, error=None): - try: - p = parse(addr) - assert False, "invalid address parsed: %s" % p - except ParseError, e: - assert error == str(e), "expected %r, got %r" % (error, str(e)) - def testHash(self): self.valid("foo/bar.#", "foo", "bar.#") diff --git a/qpid/python/qpid/tests/mimetype.py b/qpid/python/qpid/tests/mimetype.py new file mode 100644 index 0000000000..22760316f0 --- /dev/null +++ b/qpid/python/qpid/tests/mimetype.py @@ -0,0 +1,56 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +from qpid.tests import Test +from qpid.mimetype import lex, parse, ParseError, EOF, WSPACE +from parser import ParserBase + +class MimeTypeTests(ParserBase, Test): + + EXCLUDE = (WSPACE, EOF) + + def do_lex(self, st): + return lex(st) + + def do_parse(self, st): + return parse(st) + + def valid(self, addr, type=None, subtype=None, parameters=None): + ParserBase.valid(self, addr, (type, subtype, parameters)) + + def testTypeOnly(self): + self.invalid("type", "expecting SLASH, got EOF line:1,4:type") + + def testTypeSubtype(self): + self.valid("type/subtype", "type", "subtype", []) + + def testTypeSubtypeParam(self): + self.valid("type/subtype ; name=value", + "type", "subtype", [("name", "value")]) + + def testTypeSubtypeParamComment(self): + self.valid("type/subtype ; name(This is a comment.)=value", + "type", "subtype", [("name", "value")]) + + def testMultipleParams(self): + self.valid("type/subtype ; name1=value1 ; name2=value2", + "type", "subtype", [("name1", "value1"), ("name2", "value2")]) + + def testCaseInsensitivity(self): + self.valid("Type/Subtype", "type", "subtype", []) diff --git a/qpid/python/qpid/tests/parser.py b/qpid/python/qpid/tests/parser.py new file mode 100644 index 0000000000..a4865cc9fe --- /dev/null +++ b/qpid/python/qpid/tests/parser.py @@ -0,0 +1,37 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +from qpid.parser import ParseError + +class ParserBase: + + def lex(self, addr, *types): + toks = [t.type for t in self.do_lex(addr) if t.type not in self.EXCLUDE] + assert list(types) == toks, "expected %s, got %s" % (types, toks) + + def valid(self, addr, expected): + got = self.do_parse(addr) + assert expected == got, "expected %s, got %s" % (expected, got) + + def invalid(self, addr, error=None): + try: + p = self.do_parse(addr) + assert False, "invalid address parsed: %s" % p + except ParseError, e: + assert error == str(e), "expected %r, got %r" % (error, str(e)) |