- flush() refactor merged from uow_nontree branch r3871-r3885

- topological.py cleaned up, presents three public facing functions which return list/tuple based structures, without exposing any internals. only the third function returns the "hierarchical" structure. when results include "cycles" or "child" items, 2- or 3- tuples are used to represent results. - unitofwork uses InstanceState almost exclusively now. new and deleted lists are now dicts which ref the actual object to provide a strong ref for the duration that they're in those lists. IdentitySet is only used for the public facing versions of "new" and "deleted". - unitofwork topological sort no longer uses the "hierarchical" version of the sort for the base sort, only for the "per-object" secondary sort where it still helps to group non-dependent operations together and provides expected insert order. the default sort deals with UOWTasks in a straight list and is greatly simplified. Tests all pass but need to see if svilen's stuff still works, one block of code in _sort_cyclical_dependencies() seems to not be needed anywhere but i definitely put it there for a reason at some point; if not hopefully we can derive more test coverage from that. - the UOWEventHandler is only applied to object-storing attributes, not scalar (i.e. column-based) ones. cuts out a ton of overhead when setting non-object based attributes. - InstanceState also used throughout the flush process, i.e. dependency.py, mapper.save_obj()/delete_obj(), sync.execute() all expect InstanceState objects in most cases now. - mapper/property cascade_iterator() takes InstanceState as its argument, but still returns lists of object instances so that they are not dereferenced. - a few tricks needed when dealing with InstanceState, i.e. when loading a list of items that are possibly fresh from the DB, you *have* to get the actual objects into a strong-referencing datastructure else they fall out of scope immediately. dependency.py caches lists of dependent objects which it loads now (i.e. history collections). - AttributeHistory is gone, replaced by a function that returns a 3-tuple of added, unchanged, deleted. these collections still reference the object instances directly for the strong-referencing reasons mentiontioned, but it uses less IdentitySet logic to generate.
author: Mike Bayer <mike_mp@zzzcomputing.com> 2007-12-08 18:58:03 +0000
committer: Mike Bayer <mike_mp@zzzcomputing.com> 2007-12-08 18:58:03 +0000
commit: 8693d4b2876e9239cf48bbc42a7ffaa11c01b506 (patch)
tree: 6438a7f4a319c4e1f25376fb004de6fb73bfd7c0 /lib/sqlalchemy/topological.py
parent: 78bb82a44b7f382c6cfeab0cfc7f932e68c4de86 (diff)
download: sqlalchemy-8693d4b2876e9239cf48bbc42a7ffaa11c01b506.tar.gz
1 files changed, 178 insertions, 168 deletions
diff --git a/lib/sqlalchemy/topological.py b/lib/sqlalchemy/topological.py
index 258c3bf74..209d3455c 100644
--- a/lib/sqlalchemy/topological.py
+++ b/lib/sqlalchemy/topological.py
@@ -21,14 +21,47 @@ conditions.
 from sqlalchemy import util
 from sqlalchemy.exceptions import CircularDependencyError
 
-class _Node(object):
-    """Represent each item in the sort.
+__all__ = ['sort', 'sort_with_cycles', 'sort_as_tree']
 
-    While the topological sort produces a straight ordered list of
-    items, ``_Node`` ultimately stores a tree-structure of those items
-    which are organized so that non-dependent nodes are siblings.
+def sort(tuples, allitems):
+    """sort the given list of items by dependency.
+    
+    'tuples' is a list of tuples representing a partial ordering.
+    """
+    
+    return [n.item for n in _sort(tuples, allitems, allow_cycles=False, ignore_self_cycles=True)]
+
+def sort_with_cycles(tuples, allitems):
+    """sort the given list of items by dependency, cutting out cycles.
+    
+    returns results as an iterable of 2-tuples, containing the item,
+    and a list containing items involved in a cycle with this item, if any.
+    
+    'tuples' is a list of tuples representing a partial ordering.
+    """
+    
+    return [(n.item, [n.item for n in n.cycles or []]) for n in _sort(tuples, allitems, allow_cycles=True)]
+    
+def sort_as_tree(tuples, allitems, with_cycles=False):
+    """sort the given list of items by dependency, and return results
+    as a hierarchical tree structure.
+    
+    returns results as an iterable of 3-tuples, containing the item,
+    and a list containing items involved in a cycle with this item, if any,
+    and a list of child tuples.  
+    
+    if with_cycles is False, the returned structure is of the same form
+    but the second element of each tuple, i.e. the 'cycles', is an empty list.
+    
+    'tuples' is a list of tuples representing a partial ordering.
     """
 
+    return _organize_as_tree(_sort(tuples, allitems, allow_cycles=with_cycles))
+
+
+class _Node(object):
+    """Represent each item in the sort."""
+
     def __init__(self, item):
         self.item = item
         self.dependencies = util.Set()
@@ -37,7 +70,7 @@ class _Node(object):
 
     def __str__(self):
         return self.safestr()
-
+    
     def safestr(self, indent=0):
         return (' ' * indent * 2) + \
             str(self.item) + \
@@ -130,168 +163,145 @@ class _EdgeCollection(object):
     def __repr__(self):
         return repr(list(self))
 
-class QueueDependencySorter(object):
-    """Topological sort adapted from wikipedia's article on the subject.
-
-    It creates a straight-line list of elements, then a second pass
-    batches non-dependent elements as siblings in a tree structure.  Future
-    versions of this algorithm may separate the "convert to a tree" 
-    step.
+def _sort(tuples, allitems, allow_cycles=False, ignore_self_cycles=False):
+    nodes = {}
+    edges = _EdgeCollection()
+    for item in list(allitems) + [t[0] for t in tuples] + [t[1] for t in tuples]:
+        if id(item) not in nodes:
+            node = _Node(item)
+            nodes[item] = node
+
+    for t in tuples:
+        if t[0] is t[1]:
+            if allow_cycles:
+                n = nodes[t[0]]
+                n.cycles = util.Set([n])
+            elif not ignore_self_cycles:
+                raise CircularDependencyError("Self-referential dependency detected " + repr(t))
+            continue
+        childnode = nodes[t[1]]
+        parentnode = nodes[t[0]]
+        edges.add((parentnode, childnode))
+
+    queue = []
+    for n in nodes.values():
+        if not edges.has_parents(n):
+            queue.append(n)
+
+    output = []
+    while nodes:
+        if not queue:
+            # edges remain but no edgeless nodes to remove; this indicates
+            # a cycle
+            if allow_cycles:
+                for cycle in _find_cycles(edges):
+                    lead = cycle[0][0]
+                    lead.cycles = util.Set()
+                    for edge in cycle:
+                        n = edges.remove(edge)
+                        lead.cycles.add(edge[0])
+                        lead.cycles.add(edge[1])
+                        if n is not None:
+                            queue.append(n)
+                    for n in lead.cycles:
+                        if n is not lead:
+                            n._cyclical = True
+                            for (n,k) in list(edges.edges_by_parent(n)):
+                                edges.add((lead, k))
+                                edges.remove((n,k))
+                continue
+            else:
+                # long cycles not allowed
+                raise CircularDependencyError("Circular dependency detected " + repr(edges) + repr(queue))
+        node = queue.pop()
+        if not hasattr(node, '_cyclical'):
+            output.append(node)
+        del nodes[node.item]
+        for childnode in edges.pop_node(node):
+            queue.append(childnode)
+    return output
+
+def _organize_as_tree(nodes):
+    """Given a list of nodes from a topological sort, organize the
+    nodes into a tree structure, with as many non-dependent nodes
+    set as siblings to each other as possible.
+    
+    returns nodes as 3-tuples (item, cycles, children).
     """
 
-    def __init__(self, tuples, allitems):
-        self.tuples = tuples
-        self.allitems = allitems
-
-    def sort(self, allow_cycles=False, ignore_self_cycles=False, create_tree=True):
-        (tuples, allitems) = (self.tuples, self.allitems)
-        #print "\n---------------------------------\n"
-        #print repr([t for t in tuples])
-        #print repr([a for a in allitems])
-        #print "\n---------------------------------\n"
-
-        nodes = {}
-        edges = _EdgeCollection()
-        for item in list(allitems) + [t[0] for t in tuples] + [t[1] for t in tuples]:
-            if id(item) not in nodes:
-                node = _Node(item)
-                nodes[id(item)] = node
-
-        for t in tuples:
-            if t[0] is t[1]:
-                if allow_cycles:
-                    n = nodes[id(t[0])]
-                    n.cycles = util.Set([n])
-                elif not ignore_self_cycles:
-                    raise CircularDependencyError("Self-referential dependency detected " + repr(t))
-                continue
-            childnode = nodes[id(t[1])]
-            parentnode = nodes[id(t[0])]
-            edges.add((parentnode, childnode))
-
-        queue = []
-        for n in nodes.values():
-            if not edges.has_parents(n):
-                queue.append(n)
-
-        output = []
-        while nodes:
-            if not queue:
-                # edges remain but no edgeless nodes to remove; this indicates
-                # a cycle
-                if allow_cycles:
-                    for cycle in self._find_cycles(edges):
-                        lead = cycle[0][0]
-                        lead.cycles = util.Set()
-                        for edge in cycle:
-                            n = edges.remove(edge)
-                            lead.cycles.add(edge[0])
-                            lead.cycles.add(edge[1])
-                            if n is not None:
-                                queue.append(n)
-                        for n in lead.cycles:
-                            if n is not lead:
-                                n._cyclical = True
-                                for (n,k) in list(edges.edges_by_parent(n)):
-                                    edges.add((lead, k))
-                                    edges.remove((n,k))
-                    continue
-                else:
-                    # long cycles not allowed
-                    raise CircularDependencyError("Circular dependency detected " + repr(edges) + repr(queue))
-            node = queue.pop()
-            if not hasattr(node, '_cyclical'):
-                output.append(node)
-            del nodes[id(node.item)]
-            for childnode in edges.pop_node(node):
-                queue.append(childnode)
-        if create_tree:
-            return self._create_batched_tree(output)
-        elif allow_cycles:
-            return output
+    if not nodes:
+        return None
+    # a list of all currently independent subtrees as a tuple of
+    # (root_node, set_of_all_tree_nodes, set_of_all_cycle_nodes_in_tree)
+    # order of the list has no semantics for the algorithmic
+    independents = []
+    # in reverse topological order
+    for node in util.reversed(nodes):
+        # nodes subtree and cycles contain the node itself
+        subtree = util.Set([node])
+        if node.cycles is not None:
+            cycles = util.Set(node.cycles)
         else:
-            return [n.item for n in output]
-
-
-    def _create_batched_tree(self, nodes):
-        """Given a list of nodes from a topological sort, organize the
-        nodes into a tree structure, with as many non-dependent nodes
-        set as siblings to each other as possible.
-        """
-
-        if not nodes:
-            return None
-        # a list of all currently independent subtrees as a tuple of
-        # (root_node, set_of_all_tree_nodes, set_of_all_cycle_nodes_in_tree)
-        # order of the list has no semantics for the algorithmic
-        independents = []
-        # in reverse topological order
-        for node in util.reversed(nodes):
-            # nodes subtree and cycles contain the node itself
-            subtree = util.Set([node])
-            if node.cycles is not None:
-                cycles = util.Set(node.cycles)
-            else:
-                cycles = util.Set()
-            # get a set of dependent nodes of node and its cycles
-            nodealldeps = node.all_deps()
-            if nodealldeps:
-                # iterate over independent node indexes in reverse order so we can efficiently remove them
-                for index in xrange(len(independents)-1,-1,-1):
-                    child, childsubtree, childcycles = independents[index]
-                    # if there is a dependency between this node and an independent node
-                    if (childsubtree.intersection(nodealldeps) or childcycles.intersection(node.dependencies)):
-                        # prepend child to nodes children
-                        # (append should be fine, but previous implemetation used prepend)
-                        node.children[0:0] = (child,)
-                        # merge childs subtree and cycles
-                        subtree.update(childsubtree)
-                        cycles.update(childcycles)
-                        # remove the child from list of independent subtrees
-                        independents[index:index+1] = []
-            # add node as a new independent subtree
-            independents.append((node,subtree,cycles))
-        # choose an arbitrary node from list of all independent subtrees
-        head = independents.pop()[0]
-        # add all other independent subtrees as a child of the chosen root
-        # used prepend [0:0] instead of extend to maintain exact behaviour of previous implementation
-        head.children[0:0] = [i[0] for i in independents]
-        return head
-
-    def _find_cycles(self, edges):
-        involved_in_cycles = util.Set()
-        cycles = {}
-        def traverse(node, goal=None, cycle=None):
-            if goal is None:
-                goal = node
-                cycle = []
-            elif node is goal:
-                return True
-
-            for (n, key) in edges.edges_by_parent(node):
-                if key in cycle:
-                    continue
-                cycle.append(key)
-                if traverse(key, goal, cycle):
-                    cycset = util.Set(cycle)
-                    for x in cycle:
-                        involved_in_cycles.add(x)
-                        if x in cycles:
-                            existing_set = cycles[x]
-                            [existing_set.add(y) for y in cycset]
-                            for y in existing_set:
-                                cycles[y] = existing_set
-                            cycset = existing_set
-                        else:
-                            cycles[x] = cycset
-                cycle.pop()
-
-        for parent in edges.get_parents():
-            traverse(parent)
-
-        # sets are not hashable, so uniquify with id
-        unique_cycles = dict([(id(s), s) for s in cycles.values()]).values()
-        for cycle in unique_cycles:
-            edgecollection = [edge for edge in edges
-                              if edge[0] in cycle and edge[1] in cycle]
-            yield edgecollection
+            cycles = util.Set()
+        # get a set of dependent nodes of node and its cycles
+        nodealldeps = node.all_deps()
+        if nodealldeps:
+            # iterate over independent node indexes in reverse order so we can efficiently remove them
+            for index in xrange(len(independents)-1,-1,-1):
+                child, childsubtree, childcycles = independents[index]
+                # if there is a dependency between this node and an independent node
+                if (childsubtree.intersection(nodealldeps) or childcycles.intersection(node.dependencies)):
+                    # prepend child to nodes children
+                    # (append should be fine, but previous implemetation used prepend)
+                    node.children[0:0] = [(child.item, [child.item for n in child.cycles or []], child.children)]
+                    # merge childs subtree and cycles
+                    subtree.update(childsubtree)
+                    cycles.update(childcycles)
+                    # remove the child from list of independent subtrees
+                    independents[index:index+1] = []
+        # add node as a new independent subtree
+        independents.append((node,subtree,cycles))
+    # choose an arbitrary node from list of all independent subtrees
+    head = independents.pop()[0]
+    # add all other independent subtrees as a child of the chosen root
+    # used prepend [0:0] instead of extend to maintain exact behaviour of previous implementation
+    head.children[0:0] = [(i[0].item, [n.item for n in i[0].cycles or []], i[0].children) for i in independents]
+    return (head.item, [n.item for n in head.cycles or []], head.children)
+
+def _find_cycles(edges):
+    involved_in_cycles = util.Set()
+    cycles = {}
+    def traverse(node, goal=None, cycle=None):
+        if goal is None:
+            goal = node
+            cycle = []
+        elif node is goal:
+            return True
+
+        for (n, key) in edges.edges_by_parent(node):
+            if key in cycle:
+                continue
+            cycle.append(key)
+            if traverse(key, goal, cycle):
+                cycset = util.Set(cycle)
+                for x in cycle:
+                    involved_in_cycles.add(x)
+                    if x in cycles:
+                        existing_set = cycles[x]
+                        [existing_set.add(y) for y in cycset]
+                        for y in existing_set:
+                            cycles[y] = existing_set
+                        cycset = existing_set
+                    else:
+                        cycles[x] = cycset
+            cycle.pop()
+
+    for parent in edges.get_parents():
+        traverse(parent)
+
+    # sets are not hashable, so uniquify with id
+    unique_cycles = dict([(id(s), s) for s in cycles.values()]).values()
+    for cycle in unique_cycles:
+        edgecollection = [edge for edge in edges
+                          if edge[0] in cycle and edge[1] in cycle]
+        yield edgecollection
author	Mike Bayer <mike_mp@zzzcomputing.com>	2007-12-08 18:58:03 +0000
committer	Mike Bayer <mike_mp@zzzcomputing.com>	2007-12-08 18:58:03 +0000
commit	8693d4b2876e9239cf48bbc42a7ffaa11c01b506 (patch)
tree	6438a7f4a319c4e1f25376fb004de6fb73bfd7c0 /lib/sqlalchemy/topological.py
parent	78bb82a44b7f382c6cfeab0cfc7f932e68c4de86 (diff)
download	sqlalchemy-8693d4b2876e9239cf48bbc42a7ffaa11c01b506.tar.gz