summaryrefslogtreecommitdiff
path: root/examples/elementtree/adjacency_list.py
diff options
context:
space:
mode:
Diffstat (limited to 'examples/elementtree/adjacency_list.py')
-rw-r--r--examples/elementtree/adjacency_list.py215
1 files changed, 215 insertions, 0 deletions
diff --git a/examples/elementtree/adjacency_list.py b/examples/elementtree/adjacency_list.py
new file mode 100644
index 000000000..204662f56
--- /dev/null
+++ b/examples/elementtree/adjacency_list.py
@@ -0,0 +1,215 @@
+"""illustrates an explicit way to persist an XML document expressed using ElementTree.
+
+This example explicitly marshals/unmarshals the ElementTree document into
+mapped entities which have their own tables. Compare to pickle.py which
+uses pickle to accomplish the same task. Note that the usage of both
+styles of persistence are identical, as is the structure of the main Document class.
+"""
+
+################################# PART I - Imports/Coniguration ###########################################
+from sqlalchemy import *
+from sqlalchemy.orm import *
+
+import sys, os, StringIO, re
+
+import logging
+logging.basicConfig()
+
+# uncomment to show SQL statements
+#logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
+
+# uncomment to show SQL statements and result sets
+#logging.getLogger('sqlalchemy.engine').setLevel(logging.DEBUG)
+
+
+from elementtree import ElementTree
+from elementtree.ElementTree import Element, SubElement
+
+meta = MetaData()
+meta.engine = 'sqlite://'
+
+################################# PART II - Table Metadata ###########################################
+
+# stores a top level record of an XML document.
+documents = Table('documents', meta,
+ Column('document_id', Integer, primary_key=True),
+ Column('filename', String(30), unique=True),
+ Column('element_id', Integer, ForeignKey('elements.element_id'))
+)
+
+# stores XML nodes in an adjacency list model. This corresponds to
+# Element and SubElement objects.
+elements = Table('elements', meta,
+ Column('element_id', Integer, primary_key=True),
+ Column('parent_id', Integer, ForeignKey('elements.element_id')),
+ Column('tag', Unicode(30), nullable=False),
+ Column('text', Unicode),
+ Column('tail', Unicode)
+ )
+
+# stores attributes. This corresponds to the dictionary of attributes
+# stored by an Element or SubElement.
+attributes = Table('attributes', meta,
+ Column('element_id', Integer, ForeignKey('elements.element_id'), primary_key=True),
+ Column('name', Unicode(100), nullable=False, primary_key=True),
+ Column('value', Unicode(255)))
+
+meta.create_all()
+
+#################################### PART III - Model #############################################
+
+# our document class. contains a string name,
+# and the ElementTree root element.
+class Document(object):
+ def __init__(self, name, element):
+ self.filename = name
+ self.element = element
+
+ def __str__(self):
+ buf = StringIO.StringIO()
+ self.element.write(buf)
+ return buf.getvalue()
+
+#################################### PART IV - Persistence Mapping ###################################
+
+# Node class. a non-public class which will represent
+# the DB-persisted Element/SubElement object. We cannot create mappers for
+# ElementTree elements directly because they are at the very least not new-style
+# classes, and also may be backed by native implementations.
+# so here we construct an adapter.
+class _Node(object):
+ pass
+
+# Attribute class. also internal, this will represent the key/value attributes stored for
+# a particular Node.
+class _Attribute(object):
+ def __init__(self, name, value):
+ self.name = name
+ self.value = value
+
+# setup mappers. Document will eagerly load a list of _Node objects.
+mapper(Document, documents, properties={
+ '_root':relation(_Node, lazy=False, cascade="all")
+})
+
+mapper(_Node, elements, properties={
+ 'children':relation(_Node, cascade="all"),
+ 'attributes':relation(_Attribute, lazy=False, cascade="all, delete-orphan"), # eagerly load attributes
+})
+
+mapper(_Attribute, attributes)
+
+# define marshalling functions that convert from _Node/_Attribute to/from ElementTree objects.
+# this will set the ElementTree element as "document._element", and append the root _Node
+# object to the "_root" mapped collection.
+class ElementTreeMarshal(object):
+ def __get__(self, document, owner):
+ if document is None:
+ return self
+
+ if hasattr(document, '_element'):
+ return document._element
+
+ def traverse(node, parent=None):
+ if parent is not None:
+ elem = ElementTree.SubElement(parent, node.tag)
+ else:
+ elem = ElementTree.Element(node.tag)
+ elem.text = node.text
+ elem.tail = node.tail
+ for attr in node.attributes:
+ elem.attrib[attr.name] = attr.value
+ for child in node.children:
+ traverse(child, parent=elem)
+ return elem
+
+ document._element = ElementTree.ElementTree(traverse(document._root))
+ return document._element
+
+ def __set__(self, document, element):
+ def traverse(node):
+ n = _Node()
+ n.tag = node.tag
+ n.text = node.text
+ n.tail = node.tail
+ n.children = [traverse(n2) for n2 in node]
+ n.attributes = [_Attribute(k, v) for k, v in node.attrib.iteritems()]
+ return n
+
+ document._root = traverse(element.getroot())
+ document._element = element
+
+ def __delete__(self, document):
+ del document._element
+ document._root = []
+
+# override Document's "element" attribute with the marshaller.
+Document.element = ElementTreeMarshal()
+
+########################################### PART V - Basic Persistence Example ############################
+
+line = "\n--------------------------------------------------------"
+
+# save to DB
+session = create_session()
+
+# get ElementTree documents
+for file in ('test.xml', 'test2.xml', 'test3.xml'):
+ filename = os.path.join(os.path.dirname(sys.argv[0]), file)
+ doc = ElementTree.parse(filename)
+ session.save(Document(file, doc))
+
+print "\nSaving three documents...", line
+session.flush()
+print "Done."
+
+# clear session (to illustrate a full load), restore
+session.clear()
+
+print "\nFull text of document 'text.xml':", line
+document = session.query(Document).filter_by(filename="test.xml").first()
+
+print document
+
+############################################ PART VI - Searching for Paths #######################################
+
+# manually search for a document which contains "/somefile/header/field1:hi"
+print "\nManual search for /somefile/header/field1=='hi':", line
+n1 = elements.alias('n1')
+n2 = elements.alias('n2')
+n3 = elements.alias('n3')
+j = documents.join(n1).join(n2, n1.c.element_id==n2.c.parent_id).join(n3, n2.c.element_id==n3.c.parent_id)
+d = session.query(Document).select_from(j).filter(n1.c.tag=='somefile').filter(n2.c.tag=='header').filter(and_(n3.c.tag=='field1', n3.c.text=='hi')).one()
+print d
+
+# generalize the above approach into an extremely impoverished xpath function:
+def find_document(path, compareto):
+ j = documents
+ prev_elements = None
+ query = session.query(Document)
+ for i, match in enumerate(re.finditer(r'/([\w_]+)(?:\[@([\w_]+)(?:=(.*))?\])?', path)):
+ (token, attrname, attrvalue) = match.group(1, 2, 3)
+ a = elements.alias("n%d" % i)
+ query = query.filter(a.c.tag==token)
+ if attrname:
+ attr_alias = attributes.alias('a%d' % i)
+ if attrvalue:
+ query = query.filter(and_(a.c.element_id==attr_alias.c.element_id, attr_alias.c.name==attrname, attr_alias.c.value==attrvalue))
+ else:
+ query = query.filter(and_(a.c.element_id==attr_alias.c.element_id, attr_alias.c.name==attrname))
+ if prev_elements is not None:
+ j = j.join(a, prev_elements.c.element_id==a.c.parent_id)
+ else:
+ j = j.join(a)
+ prev_elements = a
+ return query.options(lazyload('_root')).select_from(j).filter(prev_elements.c.text==compareto).all()
+
+for path, compareto in (
+ ('/somefile/header/field1', 'hi'),
+ ('/somefile/field1', 'hi'),
+ ('/somefile/header/field2', 'there'),
+ ('/somefile/header/field2[@attr=foo]', 'there')
+ ):
+ print "\nDocuments containing '%s=%s':" % (path, compareto), line
+ print [d.filename for d in find_document(path, compareto)]
+