diff options
author | Mike Bayer <mike_mp@zzzcomputing.com> | 2007-12-08 18:58:03 +0000 |
---|---|---|
committer | Mike Bayer <mike_mp@zzzcomputing.com> | 2007-12-08 18:58:03 +0000 |
commit | 8693d4b2876e9239cf48bbc42a7ffaa11c01b506 (patch) | |
tree | 6438a7f4a319c4e1f25376fb004de6fb73bfd7c0 /lib/sqlalchemy/topological.py | |
parent | 78bb82a44b7f382c6cfeab0cfc7f932e68c4de86 (diff) | |
download | sqlalchemy-8693d4b2876e9239cf48bbc42a7ffaa11c01b506.tar.gz |
- flush() refactor merged from uow_nontree branch r3871-r3885
- topological.py cleaned up, presents three public facing functions which
return list/tuple based structures, without exposing any internals. only
the third function returns the "hierarchical" structure. when results
include "cycles" or "child" items, 2- or 3- tuples are used to represent
results.
- unitofwork uses InstanceState almost exclusively now. new and deleted lists
are now dicts which ref the actual object to provide a strong ref for the
duration that they're in those lists. IdentitySet is only used for the public
facing versions of "new" and "deleted".
- unitofwork topological sort no longer uses the "hierarchical" version of the sort
for the base sort, only for the "per-object" secondary sort where it still
helps to group non-dependent operations together and provides expected insert
order. the default sort deals with UOWTasks in a straight list and is greatly
simplified. Tests all pass but need to see if svilen's stuff still works,
one block of code in _sort_cyclical_dependencies() seems to not be needed anywhere
but i definitely put it there for a reason at some point; if not hopefully we
can derive more test coverage from that.
- the UOWEventHandler is only applied to object-storing attributes, not
scalar (i.e. column-based) ones. cuts out a ton of overhead when setting
non-object based attributes.
- InstanceState also used throughout the flush process, i.e. dependency.py,
mapper.save_obj()/delete_obj(), sync.execute() all expect InstanceState objects
in most cases now.
- mapper/property cascade_iterator() takes InstanceState as its argument,
but still returns lists of object instances so that they are not dereferenced.
- a few tricks needed when dealing with InstanceState, i.e. when loading a list
of items that are possibly fresh from the DB, you *have* to get the actual objects
into a strong-referencing datastructure else they fall out of scope immediately.
dependency.py caches lists of dependent objects which it loads now (i.e. history
collections).
- AttributeHistory is gone, replaced by a function that returns a 3-tuple of
added, unchanged, deleted. these collections still reference the object
instances directly for the strong-referencing reasons mentiontioned, but
it uses less IdentitySet logic to generate.
Diffstat (limited to 'lib/sqlalchemy/topological.py')
-rw-r--r-- | lib/sqlalchemy/topological.py | 346 |
1 files changed, 178 insertions, 168 deletions
diff --git a/lib/sqlalchemy/topological.py b/lib/sqlalchemy/topological.py index 258c3bf74..209d3455c 100644 --- a/lib/sqlalchemy/topological.py +++ b/lib/sqlalchemy/topological.py @@ -21,14 +21,47 @@ conditions. from sqlalchemy import util from sqlalchemy.exceptions import CircularDependencyError -class _Node(object): - """Represent each item in the sort. +__all__ = ['sort', 'sort_with_cycles', 'sort_as_tree'] - While the topological sort produces a straight ordered list of - items, ``_Node`` ultimately stores a tree-structure of those items - which are organized so that non-dependent nodes are siblings. +def sort(tuples, allitems): + """sort the given list of items by dependency. + + 'tuples' is a list of tuples representing a partial ordering. + """ + + return [n.item for n in _sort(tuples, allitems, allow_cycles=False, ignore_self_cycles=True)] + +def sort_with_cycles(tuples, allitems): + """sort the given list of items by dependency, cutting out cycles. + + returns results as an iterable of 2-tuples, containing the item, + and a list containing items involved in a cycle with this item, if any. + + 'tuples' is a list of tuples representing a partial ordering. + """ + + return [(n.item, [n.item for n in n.cycles or []]) for n in _sort(tuples, allitems, allow_cycles=True)] + +def sort_as_tree(tuples, allitems, with_cycles=False): + """sort the given list of items by dependency, and return results + as a hierarchical tree structure. + + returns results as an iterable of 3-tuples, containing the item, + and a list containing items involved in a cycle with this item, if any, + and a list of child tuples. + + if with_cycles is False, the returned structure is of the same form + but the second element of each tuple, i.e. the 'cycles', is an empty list. + + 'tuples' is a list of tuples representing a partial ordering. """ + return _organize_as_tree(_sort(tuples, allitems, allow_cycles=with_cycles)) + + +class _Node(object): + """Represent each item in the sort.""" + def __init__(self, item): self.item = item self.dependencies = util.Set() @@ -37,7 +70,7 @@ class _Node(object): def __str__(self): return self.safestr() - + def safestr(self, indent=0): return (' ' * indent * 2) + \ str(self.item) + \ @@ -130,168 +163,145 @@ class _EdgeCollection(object): def __repr__(self): return repr(list(self)) -class QueueDependencySorter(object): - """Topological sort adapted from wikipedia's article on the subject. - - It creates a straight-line list of elements, then a second pass - batches non-dependent elements as siblings in a tree structure. Future - versions of this algorithm may separate the "convert to a tree" - step. +def _sort(tuples, allitems, allow_cycles=False, ignore_self_cycles=False): + nodes = {} + edges = _EdgeCollection() + for item in list(allitems) + [t[0] for t in tuples] + [t[1] for t in tuples]: + if id(item) not in nodes: + node = _Node(item) + nodes[item] = node + + for t in tuples: + if t[0] is t[1]: + if allow_cycles: + n = nodes[t[0]] + n.cycles = util.Set([n]) + elif not ignore_self_cycles: + raise CircularDependencyError("Self-referential dependency detected " + repr(t)) + continue + childnode = nodes[t[1]] + parentnode = nodes[t[0]] + edges.add((parentnode, childnode)) + + queue = [] + for n in nodes.values(): + if not edges.has_parents(n): + queue.append(n) + + output = [] + while nodes: + if not queue: + # edges remain but no edgeless nodes to remove; this indicates + # a cycle + if allow_cycles: + for cycle in _find_cycles(edges): + lead = cycle[0][0] + lead.cycles = util.Set() + for edge in cycle: + n = edges.remove(edge) + lead.cycles.add(edge[0]) + lead.cycles.add(edge[1]) + if n is not None: + queue.append(n) + for n in lead.cycles: + if n is not lead: + n._cyclical = True + for (n,k) in list(edges.edges_by_parent(n)): + edges.add((lead, k)) + edges.remove((n,k)) + continue + else: + # long cycles not allowed + raise CircularDependencyError("Circular dependency detected " + repr(edges) + repr(queue)) + node = queue.pop() + if not hasattr(node, '_cyclical'): + output.append(node) + del nodes[node.item] + for childnode in edges.pop_node(node): + queue.append(childnode) + return output + +def _organize_as_tree(nodes): + """Given a list of nodes from a topological sort, organize the + nodes into a tree structure, with as many non-dependent nodes + set as siblings to each other as possible. + + returns nodes as 3-tuples (item, cycles, children). """ - def __init__(self, tuples, allitems): - self.tuples = tuples - self.allitems = allitems - - def sort(self, allow_cycles=False, ignore_self_cycles=False, create_tree=True): - (tuples, allitems) = (self.tuples, self.allitems) - #print "\n---------------------------------\n" - #print repr([t for t in tuples]) - #print repr([a for a in allitems]) - #print "\n---------------------------------\n" - - nodes = {} - edges = _EdgeCollection() - for item in list(allitems) + [t[0] for t in tuples] + [t[1] for t in tuples]: - if id(item) not in nodes: - node = _Node(item) - nodes[id(item)] = node - - for t in tuples: - if t[0] is t[1]: - if allow_cycles: - n = nodes[id(t[0])] - n.cycles = util.Set([n]) - elif not ignore_self_cycles: - raise CircularDependencyError("Self-referential dependency detected " + repr(t)) - continue - childnode = nodes[id(t[1])] - parentnode = nodes[id(t[0])] - edges.add((parentnode, childnode)) - - queue = [] - for n in nodes.values(): - if not edges.has_parents(n): - queue.append(n) - - output = [] - while nodes: - if not queue: - # edges remain but no edgeless nodes to remove; this indicates - # a cycle - if allow_cycles: - for cycle in self._find_cycles(edges): - lead = cycle[0][0] - lead.cycles = util.Set() - for edge in cycle: - n = edges.remove(edge) - lead.cycles.add(edge[0]) - lead.cycles.add(edge[1]) - if n is not None: - queue.append(n) - for n in lead.cycles: - if n is not lead: - n._cyclical = True - for (n,k) in list(edges.edges_by_parent(n)): - edges.add((lead, k)) - edges.remove((n,k)) - continue - else: - # long cycles not allowed - raise CircularDependencyError("Circular dependency detected " + repr(edges) + repr(queue)) - node = queue.pop() - if not hasattr(node, '_cyclical'): - output.append(node) - del nodes[id(node.item)] - for childnode in edges.pop_node(node): - queue.append(childnode) - if create_tree: - return self._create_batched_tree(output) - elif allow_cycles: - return output + if not nodes: + return None + # a list of all currently independent subtrees as a tuple of + # (root_node, set_of_all_tree_nodes, set_of_all_cycle_nodes_in_tree) + # order of the list has no semantics for the algorithmic + independents = [] + # in reverse topological order + for node in util.reversed(nodes): + # nodes subtree and cycles contain the node itself + subtree = util.Set([node]) + if node.cycles is not None: + cycles = util.Set(node.cycles) else: - return [n.item for n in output] - - - def _create_batched_tree(self, nodes): - """Given a list of nodes from a topological sort, organize the - nodes into a tree structure, with as many non-dependent nodes - set as siblings to each other as possible. - """ - - if not nodes: - return None - # a list of all currently independent subtrees as a tuple of - # (root_node, set_of_all_tree_nodes, set_of_all_cycle_nodes_in_tree) - # order of the list has no semantics for the algorithmic - independents = [] - # in reverse topological order - for node in util.reversed(nodes): - # nodes subtree and cycles contain the node itself - subtree = util.Set([node]) - if node.cycles is not None: - cycles = util.Set(node.cycles) - else: - cycles = util.Set() - # get a set of dependent nodes of node and its cycles - nodealldeps = node.all_deps() - if nodealldeps: - # iterate over independent node indexes in reverse order so we can efficiently remove them - for index in xrange(len(independents)-1,-1,-1): - child, childsubtree, childcycles = independents[index] - # if there is a dependency between this node and an independent node - if (childsubtree.intersection(nodealldeps) or childcycles.intersection(node.dependencies)): - # prepend child to nodes children - # (append should be fine, but previous implemetation used prepend) - node.children[0:0] = (child,) - # merge childs subtree and cycles - subtree.update(childsubtree) - cycles.update(childcycles) - # remove the child from list of independent subtrees - independents[index:index+1] = [] - # add node as a new independent subtree - independents.append((node,subtree,cycles)) - # choose an arbitrary node from list of all independent subtrees - head = independents.pop()[0] - # add all other independent subtrees as a child of the chosen root - # used prepend [0:0] instead of extend to maintain exact behaviour of previous implementation - head.children[0:0] = [i[0] for i in independents] - return head - - def _find_cycles(self, edges): - involved_in_cycles = util.Set() - cycles = {} - def traverse(node, goal=None, cycle=None): - if goal is None: - goal = node - cycle = [] - elif node is goal: - return True - - for (n, key) in edges.edges_by_parent(node): - if key in cycle: - continue - cycle.append(key) - if traverse(key, goal, cycle): - cycset = util.Set(cycle) - for x in cycle: - involved_in_cycles.add(x) - if x in cycles: - existing_set = cycles[x] - [existing_set.add(y) for y in cycset] - for y in existing_set: - cycles[y] = existing_set - cycset = existing_set - else: - cycles[x] = cycset - cycle.pop() - - for parent in edges.get_parents(): - traverse(parent) - - # sets are not hashable, so uniquify with id - unique_cycles = dict([(id(s), s) for s in cycles.values()]).values() - for cycle in unique_cycles: - edgecollection = [edge for edge in edges - if edge[0] in cycle and edge[1] in cycle] - yield edgecollection + cycles = util.Set() + # get a set of dependent nodes of node and its cycles + nodealldeps = node.all_deps() + if nodealldeps: + # iterate over independent node indexes in reverse order so we can efficiently remove them + for index in xrange(len(independents)-1,-1,-1): + child, childsubtree, childcycles = independents[index] + # if there is a dependency between this node and an independent node + if (childsubtree.intersection(nodealldeps) or childcycles.intersection(node.dependencies)): + # prepend child to nodes children + # (append should be fine, but previous implemetation used prepend) + node.children[0:0] = [(child.item, [child.item for n in child.cycles or []], child.children)] + # merge childs subtree and cycles + subtree.update(childsubtree) + cycles.update(childcycles) + # remove the child from list of independent subtrees + independents[index:index+1] = [] + # add node as a new independent subtree + independents.append((node,subtree,cycles)) + # choose an arbitrary node from list of all independent subtrees + head = independents.pop()[0] + # add all other independent subtrees as a child of the chosen root + # used prepend [0:0] instead of extend to maintain exact behaviour of previous implementation + head.children[0:0] = [(i[0].item, [n.item for n in i[0].cycles or []], i[0].children) for i in independents] + return (head.item, [n.item for n in head.cycles or []], head.children) + +def _find_cycles(edges): + involved_in_cycles = util.Set() + cycles = {} + def traverse(node, goal=None, cycle=None): + if goal is None: + goal = node + cycle = [] + elif node is goal: + return True + + for (n, key) in edges.edges_by_parent(node): + if key in cycle: + continue + cycle.append(key) + if traverse(key, goal, cycle): + cycset = util.Set(cycle) + for x in cycle: + involved_in_cycles.add(x) + if x in cycles: + existing_set = cycles[x] + [existing_set.add(y) for y in cycset] + for y in existing_set: + cycles[y] = existing_set + cycset = existing_set + else: + cycles[x] = cycset + cycle.pop() + + for parent in edges.get_parents(): + traverse(parent) + + # sets are not hashable, so uniquify with id + unique_cycles = dict([(id(s), s) for s in cycles.values()]).values() + for cycle in unique_cycles: + edgecollection = [edge for edge in edges + if edge[0] in cycle and edge[1] in cycle] + yield edgecollection |