summaryrefslogtreecommitdiff
path: root/qpid/python/mllib
diff options
context:
space:
mode:
Diffstat (limited to 'qpid/python/mllib')
-rw-r--r--qpid/python/mllib/__init__.py60
-rw-r--r--qpid/python/mllib/dom.py250
-rw-r--r--qpid/python/mllib/parsers.py139
-rw-r--r--qpid/python/mllib/transforms.py164
4 files changed, 613 insertions, 0 deletions
diff --git a/qpid/python/mllib/__init__.py b/qpid/python/mllib/__init__.py
new file mode 100644
index 0000000000..44b78126fb
--- /dev/null
+++ b/qpid/python/mllib/__init__.py
@@ -0,0 +1,60 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+"""
+This module provides document parsing and transformation utilities for
+both SGML and XML.
+"""
+
+import dom, transforms, parsers
+import xml.sax, types
+from cStringIO import StringIO
+
+def transform(node, *args):
+ result = node
+ for t in args:
+ if isinstance(t, types.ClassType):
+ t = t()
+ result = result.dispatch(t)
+ return result
+
+def sgml_parse(source):
+ if isinstance(source, basestring):
+ source = StringIO(source)
+ fname = "<string>"
+ elif hasattr(source, "name"):
+ fname = source.name
+ p = parsers.SGMLParser()
+ num = 1
+ for line in source:
+ p.feed(line)
+ p.parser.line(fname, num, None)
+ num += 1
+ p.close()
+ return p.parser.tree
+
+def xml_parse(source):
+ p = parsers.XMLParser()
+ xml.sax.parse(source, p)
+ return p.parser.tree
+
+def sexp(node):
+ s = transforms.Sexp()
+ node.dispatch(s)
+ return s.out
diff --git a/qpid/python/mllib/dom.py b/qpid/python/mllib/dom.py
new file mode 100644
index 0000000000..9b1740055b
--- /dev/null
+++ b/qpid/python/mllib/dom.py
@@ -0,0 +1,250 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+"""
+Simple DOM for both SGML and XML documents.
+"""
+
+from transforms import Text
+
+class Container:
+
+ def __init__(self):
+ self.children = []
+
+ def add(self, child):
+ child.parent = self
+ self.children.append(child)
+
+ def extend(self, children):
+ for child in children:
+ child.parent = self
+ self.children.append(child)
+
+class Component:
+
+ def __init__(self):
+ self.parent = None
+
+ def index(self):
+ if self.parent:
+ return self.parent.children.index(self)
+ else:
+ return 0
+
+ def _line(self, file, line, column):
+ self.file = file
+ self.line = line
+ self.column = column
+
+class DispatchError(Exception):
+
+ def __init__(self, scope, f):
+ msg = "no such attribtue"
+
+class Dispatcher:
+
+ def is_type(self, type):
+ cls = self
+ while cls != None:
+ if cls.type == type:
+ return True
+ cls = cls.base
+ return False
+
+ def dispatch(self, f):
+ cls = self
+ while cls != None:
+ if hasattr(f, cls.type):
+ return getattr(f, cls.type)(self)
+ else:
+ cls = cls.base
+
+ cls = self
+ attrs = ""
+ while cls != None:
+ if attrs:
+ sep = ", "
+ if cls.base == None:
+ sep += "or "
+ else:
+ sep = ""
+ attrs += "%s'%s'" % (sep, cls.type)
+ cls = cls.base
+
+ raise AttributeError("'%s' object has no attribute %s" %
+ (f.__class__.__name__, attrs))
+
+class Node(Container, Component, Dispatcher):
+
+ type = "node"
+ base = None
+
+ def __init__(self):
+ Container.__init__(self)
+ Component.__init__(self)
+ self.query = Query([self])
+
+ def __getitem__(self, name):
+ for nd in self.query[name]:
+ return nd
+
+ def text(self):
+ return self.dispatch(Text())
+
+ def tag(self, name, *attrs, **kwargs):
+ t = Tag(name, *attrs, **kwargs)
+ self.add(t)
+ return t
+
+ def data(self, s):
+ d = Data(s)
+ self.add(d)
+ return d
+
+ def entity(self, s):
+ e = Entity(s)
+ self.add(e)
+ return e
+
+class Tree(Node):
+
+ type = "tree"
+ base = Node
+
+class Tag(Node):
+
+ type = "tag"
+ base = Node
+
+ def __init__(self, _name, *attrs, **kwargs):
+ Node.__init__(self)
+ self.name = _name
+ self.attrs = list(attrs)
+ self.attrs.extend(kwargs.items())
+ self.singleton = False
+
+ def get_attr(self, name):
+ for k, v in self.attrs:
+ if name == k:
+ return v
+
+ def __getitem__(self, name):
+ if name and name[0] == "@":
+ return self.get_attr(name[1:])
+ else:
+ for nd in self.query[name]:
+ return nd
+ return self.get_attr(name)
+
+ def dispatch(self, f):
+ try:
+ method = getattr(f, "do_" + self.name)
+ except AttributeError:
+ return Dispatcher.dispatch(self, f)
+ return method(self)
+
+class Leaf(Component, Dispatcher):
+
+ type = "leaf"
+ base = None
+
+ def __init__(self, data):
+ assert isinstance(data, basestring)
+ self.data = data
+
+class Data(Leaf):
+ type = "data"
+ base = Leaf
+
+class Entity(Leaf):
+ type = "entity"
+ base = Leaf
+
+class Character(Leaf):
+ type = "character"
+ base = Leaf
+
+class Comment(Leaf):
+ type = "comment"
+ base = Leaf
+
+###################
+## Query Classes ##
+###########################################################################
+
+class View:
+
+ def __init__(self, source):
+ self.source = source
+
+class Filter(View):
+
+ def __init__(self, predicate, source):
+ View.__init__(self, source)
+ if callable(predicate):
+ self.predicate = predicate
+ elif predicate[0] == "#":
+ type = predicate[1:]
+ self.predicate = lambda x: x.is_type(type)
+ else:
+ self.predicate = lambda x: isinstance(x, Tag) and x.name == predicate
+
+ def __iter__(self):
+ for nd in self.source:
+ if self.predicate(nd): yield nd
+
+class Flatten(View):
+
+ def __iter__(self):
+ sources = [iter(self.source)]
+ while sources:
+ try:
+ nd = sources[-1].next()
+ if isinstance(nd, Tree):
+ sources.append(iter(nd.children))
+ else:
+ yield nd
+ except StopIteration:
+ sources.pop()
+
+class Children(View):
+
+ def __iter__(self):
+ for nd in self.source:
+ for child in nd.children:
+ yield child
+
+class Query(View):
+
+ def __iter__(self):
+ for nd in self.source:
+ yield nd
+
+ def __getitem__(self, predicate):
+ if isinstance(predicate, basestring):
+ path = predicate.split("/")
+ else:
+ path = [predicate]
+
+ query = self.source
+ for p in path:
+ query = Query(Filter(p, Flatten(Children(query))))
+
+ return query
diff --git a/qpid/python/mllib/parsers.py b/qpid/python/mllib/parsers.py
new file mode 100644
index 0000000000..3e7cc10dc2
--- /dev/null
+++ b/qpid/python/mllib/parsers.py
@@ -0,0 +1,139 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+"""
+Parsers for SGML and XML to dom.
+"""
+
+import sgmllib, xml.sax.handler
+from dom import *
+
+class Parser:
+
+ def __init__(self):
+ self.tree = Tree()
+ self.node = self.tree
+ self.nodes = []
+
+ def line(self, id, lineno, colno):
+ while self.nodes:
+ n = self.nodes.pop()
+ n._line(id, lineno, colno)
+
+ def add(self, node):
+ self.node.add(node)
+ self.nodes.append(node)
+
+ def start(self, name, attrs):
+ tag = Tag(name, *attrs)
+ self.add(tag)
+ self.node = tag
+
+ def end(self, name):
+ self.balance(name)
+ self.node = self.node.parent
+
+ def data(self, data):
+ children = self.node.children
+ if children and isinstance(children[-1], Data):
+ children[-1].data += data
+ else:
+ self.add(Data(data))
+
+ def comment(self, comment):
+ self.add(Comment(comment))
+
+ def entity(self, ref):
+ self.add(Entity(ref))
+
+ def character(self, ref):
+ self.add(Character(ref))
+
+ def balance(self, name = None):
+ while self.node != self.tree and name != self.node.name:
+ self.node.parent.extend(self.node.children)
+ del self.node.children[:]
+ self.node.singleton = True
+ self.node = self.node.parent
+
+
+class SGMLParser(sgmllib.SGMLParser):
+
+ def __init__(self, entitydefs = None):
+ sgmllib.SGMLParser.__init__(self)
+ if entitydefs == None:
+ self.entitydefs = {}
+ else:
+ self.entitydefs = entitydefs
+ self.parser = Parser()
+
+ def unknown_starttag(self, name, attrs):
+ self.parser.start(name, attrs)
+
+ def handle_data(self, data):
+ self.parser.data(data)
+
+ def handle_comment(self, comment):
+ self.parser.comment(comment)
+
+ def unknown_entityref(self, ref):
+ self.parser.entity(ref)
+
+ def unknown_charref(self, ref):
+ self.parser.character(ref)
+
+ def unknown_endtag(self, name):
+ self.parser.end(name)
+
+ def close(self):
+ sgmllib.SGMLParser.close(self)
+ self.parser.balance()
+ assert self.parser.node == self.parser.tree
+
+class XMLParser(xml.sax.handler.ContentHandler):
+
+ def __init__(self):
+ self.parser = Parser()
+ self.locator = None
+
+ def line(self):
+ if self.locator != None:
+ self.parser.line(self.locator.getSystemId(),
+ self.locator.getLineNumber(),
+ self.locator.getColumnNumber())
+
+ def setDocumentLocator(self, locator):
+ self.locator = locator
+
+ def startElement(self, name, attrs):
+ self.parser.start(name, attrs.items())
+ self.line()
+
+ def endElement(self, name):
+ self.parser.end(name)
+ self.line()
+
+ def characters(self, content):
+ self.parser.data(content)
+ self.line()
+
+ def skippedEntity(self, name):
+ self.parser.entity(name)
+ self.line()
+
diff --git a/qpid/python/mllib/transforms.py b/qpid/python/mllib/transforms.py
new file mode 100644
index 0000000000..bb79dcf192
--- /dev/null
+++ b/qpid/python/mllib/transforms.py
@@ -0,0 +1,164 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+"""
+Useful transforms for dom objects.
+"""
+
+from cStringIO import StringIO
+from dom import *
+
+class Visitor:
+
+ def descend(self, node):
+ for child in node.children:
+ child.dispatch(self)
+
+ def node(self, node):
+ self.descend(node)
+
+ def leaf(self, leaf):
+ pass
+
+class Identity:
+
+ def descend(self, node):
+ result = []
+ for child in node.children:
+ result.append(child.dispatch(self))
+ return result
+
+ def default(self, tag):
+ result = Tag(tag.name, *tag.attrs)
+ result.extend(self.descend(tag))
+ return result
+
+ def tree(self, tree):
+ result = Tree()
+ result.extend(self.descend(tree))
+ return result
+
+ def tag(self, tag):
+ return self.default(tag)
+
+ def leaf(self, leaf):
+ return leaf.__class__(leaf.data)
+
+class Sexp(Identity):
+
+ def __init__(self):
+ self.stack = []
+ self.level = 0
+ self.out = ""
+
+ def open(self, s):
+ self.out += "(%s" % s
+ self.level += len(s) + 1
+ self.stack.append(s)
+
+ def line(self, s = ""):
+ self.out = self.out.rstrip()
+ self.out += "\n" + " "*self.level + s
+
+ def close(self):
+ s = self.stack.pop()
+ self.level -= len(s) + 1
+ self.out = self.out.rstrip()
+ self.out += ")"
+
+ def tree(self, tree):
+ self.open("+ ")
+ for child in tree.children:
+ self.line(); child.dispatch(self)
+ self.close()
+
+ def tag(self, tag):
+ self.open("Node(%s) " % tag.name)
+ for child in tag.children:
+ self.line(); child.dispatch(self)
+ self.close()
+
+ def leaf(self, leaf):
+ self.line("%s(%s)" % (leaf.__class__.__name__, leaf.data))
+
+class Output:
+
+ def descend(self, node):
+ out = StringIO()
+ for child in node.children:
+ out.write(child.dispatch(self))
+ return out.getvalue()
+
+ def default(self, tag):
+ out = StringIO()
+ out.write("<%s" % tag.name)
+ for k, v in tag.attrs:
+ out.write(' %s="%s"' % (k, v))
+ out.write(">")
+ out.write(self.descend(tag))
+ if not tag.singleton:
+ out.write("</%s>" % tag.name)
+ return out.getvalue()
+
+ def tree(self, tree):
+ return self.descend(tree)
+
+ def tag(self, tag):
+ return self.default(tag)
+
+ def data(self, leaf):
+ return leaf.data
+
+ def entity(self, leaf):
+ return "&%s;" % leaf.data
+
+ def character(self, leaf):
+ raise Exception("TODO")
+
+ def comment(self, leaf):
+ return "<!-- %s -->" % leaf.data
+
+class Empty(Output):
+
+ def tag(self, tag):
+ return self.descend(tag)
+
+ def data(self, leaf):
+ return ""
+
+ def entity(self, leaf):
+ return ""
+
+ def character(self, leaf):
+ return ""
+
+ def comment(self, leaf):
+ return ""
+
+class Text(Empty):
+
+ def data(self, leaf):
+ return leaf.data
+
+ def entity(self, leaf):
+ return "&%s;" % leaf.data
+
+ def character(self, leaf):
+ # XXX: is this right?
+ return "&#%s;" % leaf.data