diff options
Diffstat (limited to 'qpid/python/mllib')
-rw-r--r-- | qpid/python/mllib/__init__.py | 60 | ||||
-rw-r--r-- | qpid/python/mllib/dom.py | 250 | ||||
-rw-r--r-- | qpid/python/mllib/parsers.py | 139 | ||||
-rw-r--r-- | qpid/python/mllib/transforms.py | 164 |
4 files changed, 613 insertions, 0 deletions
diff --git a/qpid/python/mllib/__init__.py b/qpid/python/mllib/__init__.py new file mode 100644 index 0000000000..44b78126fb --- /dev/null +++ b/qpid/python/mllib/__init__.py @@ -0,0 +1,60 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +""" +This module provides document parsing and transformation utilities for +both SGML and XML. +""" + +import dom, transforms, parsers +import xml.sax, types +from cStringIO import StringIO + +def transform(node, *args): + result = node + for t in args: + if isinstance(t, types.ClassType): + t = t() + result = result.dispatch(t) + return result + +def sgml_parse(source): + if isinstance(source, basestring): + source = StringIO(source) + fname = "<string>" + elif hasattr(source, "name"): + fname = source.name + p = parsers.SGMLParser() + num = 1 + for line in source: + p.feed(line) + p.parser.line(fname, num, None) + num += 1 + p.close() + return p.parser.tree + +def xml_parse(source): + p = parsers.XMLParser() + xml.sax.parse(source, p) + return p.parser.tree + +def sexp(node): + s = transforms.Sexp() + node.dispatch(s) + return s.out diff --git a/qpid/python/mllib/dom.py b/qpid/python/mllib/dom.py new file mode 100644 index 0000000000..9b1740055b --- /dev/null +++ b/qpid/python/mllib/dom.py @@ -0,0 +1,250 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +""" +Simple DOM for both SGML and XML documents. +""" + +from transforms import Text + +class Container: + + def __init__(self): + self.children = [] + + def add(self, child): + child.parent = self + self.children.append(child) + + def extend(self, children): + for child in children: + child.parent = self + self.children.append(child) + +class Component: + + def __init__(self): + self.parent = None + + def index(self): + if self.parent: + return self.parent.children.index(self) + else: + return 0 + + def _line(self, file, line, column): + self.file = file + self.line = line + self.column = column + +class DispatchError(Exception): + + def __init__(self, scope, f): + msg = "no such attribtue" + +class Dispatcher: + + def is_type(self, type): + cls = self + while cls != None: + if cls.type == type: + return True + cls = cls.base + return False + + def dispatch(self, f): + cls = self + while cls != None: + if hasattr(f, cls.type): + return getattr(f, cls.type)(self) + else: + cls = cls.base + + cls = self + attrs = "" + while cls != None: + if attrs: + sep = ", " + if cls.base == None: + sep += "or " + else: + sep = "" + attrs += "%s'%s'" % (sep, cls.type) + cls = cls.base + + raise AttributeError("'%s' object has no attribute %s" % + (f.__class__.__name__, attrs)) + +class Node(Container, Component, Dispatcher): + + type = "node" + base = None + + def __init__(self): + Container.__init__(self) + Component.__init__(self) + self.query = Query([self]) + + def __getitem__(self, name): + for nd in self.query[name]: + return nd + + def text(self): + return self.dispatch(Text()) + + def tag(self, name, *attrs, **kwargs): + t = Tag(name, *attrs, **kwargs) + self.add(t) + return t + + def data(self, s): + d = Data(s) + self.add(d) + return d + + def entity(self, s): + e = Entity(s) + self.add(e) + return e + +class Tree(Node): + + type = "tree" + base = Node + +class Tag(Node): + + type = "tag" + base = Node + + def __init__(self, _name, *attrs, **kwargs): + Node.__init__(self) + self.name = _name + self.attrs = list(attrs) + self.attrs.extend(kwargs.items()) + self.singleton = False + + def get_attr(self, name): + for k, v in self.attrs: + if name == k: + return v + + def __getitem__(self, name): + if name and name[0] == "@": + return self.get_attr(name[1:]) + else: + for nd in self.query[name]: + return nd + return self.get_attr(name) + + def dispatch(self, f): + try: + method = getattr(f, "do_" + self.name) + except AttributeError: + return Dispatcher.dispatch(self, f) + return method(self) + +class Leaf(Component, Dispatcher): + + type = "leaf" + base = None + + def __init__(self, data): + assert isinstance(data, basestring) + self.data = data + +class Data(Leaf): + type = "data" + base = Leaf + +class Entity(Leaf): + type = "entity" + base = Leaf + +class Character(Leaf): + type = "character" + base = Leaf + +class Comment(Leaf): + type = "comment" + base = Leaf + +################### +## Query Classes ## +########################################################################### + +class View: + + def __init__(self, source): + self.source = source + +class Filter(View): + + def __init__(self, predicate, source): + View.__init__(self, source) + if callable(predicate): + self.predicate = predicate + elif predicate[0] == "#": + type = predicate[1:] + self.predicate = lambda x: x.is_type(type) + else: + self.predicate = lambda x: isinstance(x, Tag) and x.name == predicate + + def __iter__(self): + for nd in self.source: + if self.predicate(nd): yield nd + +class Flatten(View): + + def __iter__(self): + sources = [iter(self.source)] + while sources: + try: + nd = sources[-1].next() + if isinstance(nd, Tree): + sources.append(iter(nd.children)) + else: + yield nd + except StopIteration: + sources.pop() + +class Children(View): + + def __iter__(self): + for nd in self.source: + for child in nd.children: + yield child + +class Query(View): + + def __iter__(self): + for nd in self.source: + yield nd + + def __getitem__(self, predicate): + if isinstance(predicate, basestring): + path = predicate.split("/") + else: + path = [predicate] + + query = self.source + for p in path: + query = Query(Filter(p, Flatten(Children(query)))) + + return query diff --git a/qpid/python/mllib/parsers.py b/qpid/python/mllib/parsers.py new file mode 100644 index 0000000000..3e7cc10dc2 --- /dev/null +++ b/qpid/python/mllib/parsers.py @@ -0,0 +1,139 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +""" +Parsers for SGML and XML to dom. +""" + +import sgmllib, xml.sax.handler +from dom import * + +class Parser: + + def __init__(self): + self.tree = Tree() + self.node = self.tree + self.nodes = [] + + def line(self, id, lineno, colno): + while self.nodes: + n = self.nodes.pop() + n._line(id, lineno, colno) + + def add(self, node): + self.node.add(node) + self.nodes.append(node) + + def start(self, name, attrs): + tag = Tag(name, *attrs) + self.add(tag) + self.node = tag + + def end(self, name): + self.balance(name) + self.node = self.node.parent + + def data(self, data): + children = self.node.children + if children and isinstance(children[-1], Data): + children[-1].data += data + else: + self.add(Data(data)) + + def comment(self, comment): + self.add(Comment(comment)) + + def entity(self, ref): + self.add(Entity(ref)) + + def character(self, ref): + self.add(Character(ref)) + + def balance(self, name = None): + while self.node != self.tree and name != self.node.name: + self.node.parent.extend(self.node.children) + del self.node.children[:] + self.node.singleton = True + self.node = self.node.parent + + +class SGMLParser(sgmllib.SGMLParser): + + def __init__(self, entitydefs = None): + sgmllib.SGMLParser.__init__(self) + if entitydefs == None: + self.entitydefs = {} + else: + self.entitydefs = entitydefs + self.parser = Parser() + + def unknown_starttag(self, name, attrs): + self.parser.start(name, attrs) + + def handle_data(self, data): + self.parser.data(data) + + def handle_comment(self, comment): + self.parser.comment(comment) + + def unknown_entityref(self, ref): + self.parser.entity(ref) + + def unknown_charref(self, ref): + self.parser.character(ref) + + def unknown_endtag(self, name): + self.parser.end(name) + + def close(self): + sgmllib.SGMLParser.close(self) + self.parser.balance() + assert self.parser.node == self.parser.tree + +class XMLParser(xml.sax.handler.ContentHandler): + + def __init__(self): + self.parser = Parser() + self.locator = None + + def line(self): + if self.locator != None: + self.parser.line(self.locator.getSystemId(), + self.locator.getLineNumber(), + self.locator.getColumnNumber()) + + def setDocumentLocator(self, locator): + self.locator = locator + + def startElement(self, name, attrs): + self.parser.start(name, attrs.items()) + self.line() + + def endElement(self, name): + self.parser.end(name) + self.line() + + def characters(self, content): + self.parser.data(content) + self.line() + + def skippedEntity(self, name): + self.parser.entity(name) + self.line() + diff --git a/qpid/python/mllib/transforms.py b/qpid/python/mllib/transforms.py new file mode 100644 index 0000000000..bb79dcf192 --- /dev/null +++ b/qpid/python/mllib/transforms.py @@ -0,0 +1,164 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +""" +Useful transforms for dom objects. +""" + +from cStringIO import StringIO +from dom import * + +class Visitor: + + def descend(self, node): + for child in node.children: + child.dispatch(self) + + def node(self, node): + self.descend(node) + + def leaf(self, leaf): + pass + +class Identity: + + def descend(self, node): + result = [] + for child in node.children: + result.append(child.dispatch(self)) + return result + + def default(self, tag): + result = Tag(tag.name, *tag.attrs) + result.extend(self.descend(tag)) + return result + + def tree(self, tree): + result = Tree() + result.extend(self.descend(tree)) + return result + + def tag(self, tag): + return self.default(tag) + + def leaf(self, leaf): + return leaf.__class__(leaf.data) + +class Sexp(Identity): + + def __init__(self): + self.stack = [] + self.level = 0 + self.out = "" + + def open(self, s): + self.out += "(%s" % s + self.level += len(s) + 1 + self.stack.append(s) + + def line(self, s = ""): + self.out = self.out.rstrip() + self.out += "\n" + " "*self.level + s + + def close(self): + s = self.stack.pop() + self.level -= len(s) + 1 + self.out = self.out.rstrip() + self.out += ")" + + def tree(self, tree): + self.open("+ ") + for child in tree.children: + self.line(); child.dispatch(self) + self.close() + + def tag(self, tag): + self.open("Node(%s) " % tag.name) + for child in tag.children: + self.line(); child.dispatch(self) + self.close() + + def leaf(self, leaf): + self.line("%s(%s)" % (leaf.__class__.__name__, leaf.data)) + +class Output: + + def descend(self, node): + out = StringIO() + for child in node.children: + out.write(child.dispatch(self)) + return out.getvalue() + + def default(self, tag): + out = StringIO() + out.write("<%s" % tag.name) + for k, v in tag.attrs: + out.write(' %s="%s"' % (k, v)) + out.write(">") + out.write(self.descend(tag)) + if not tag.singleton: + out.write("</%s>" % tag.name) + return out.getvalue() + + def tree(self, tree): + return self.descend(tree) + + def tag(self, tag): + return self.default(tag) + + def data(self, leaf): + return leaf.data + + def entity(self, leaf): + return "&%s;" % leaf.data + + def character(self, leaf): + raise Exception("TODO") + + def comment(self, leaf): + return "<!-- %s -->" % leaf.data + +class Empty(Output): + + def tag(self, tag): + return self.descend(tag) + + def data(self, leaf): + return "" + + def entity(self, leaf): + return "" + + def character(self, leaf): + return "" + + def comment(self, leaf): + return "" + +class Text(Empty): + + def data(self, leaf): + return leaf.data + + def entity(self, leaf): + return "&%s;" % leaf.data + + def character(self, leaf): + # XXX: is this right? + return "&#%s;" % leaf.data |