summaryrefslogtreecommitdiff
path: root/bzrlib/_known_graph_pyx.pyx
diff options
context:
space:
mode:
authorLorry <lorry@roadtrain.codethink.co.uk>2012-08-22 15:47:16 +0100
committerLorry <lorry@roadtrain.codethink.co.uk>2012-08-22 15:47:16 +0100
commit25335618bf8755ce6b116ee14f47f5a1f2c821e9 (patch)
treed889d7ab3f9f985d0c54c534cb8052bd2e6d7163 /bzrlib/_known_graph_pyx.pyx
downloadbzr-tarball-25335618bf8755ce6b116ee14f47f5a1f2c821e9.tar.gz
Tarball conversion
Diffstat (limited to 'bzrlib/_known_graph_pyx.pyx')
-rw-r--r--bzrlib/_known_graph_pyx.pyx955
1 files changed, 955 insertions, 0 deletions
diff --git a/bzrlib/_known_graph_pyx.pyx b/bzrlib/_known_graph_pyx.pyx
new file mode 100644
index 0000000..a9e6d13
--- /dev/null
+++ b/bzrlib/_known_graph_pyx.pyx
@@ -0,0 +1,955 @@
+# Copyright (C) 2009, 2010 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+"""Implementation of Graph algorithms when we have already loaded everything.
+"""
+
+cdef extern from "python-compat.h":
+ pass
+
+cdef extern from "Python.h":
+ ctypedef int Py_ssize_t
+ ctypedef struct PyObject:
+ pass
+
+ int PyString_CheckExact(object)
+
+ int PyObject_RichCompareBool(object, object, int)
+ int Py_LT
+
+ int PyTuple_CheckExact(object)
+ object PyTuple_New(Py_ssize_t n)
+ Py_ssize_t PyTuple_GET_SIZE(object t)
+ PyObject * PyTuple_GET_ITEM(object t, Py_ssize_t o)
+ void PyTuple_SET_ITEM(object t, Py_ssize_t o, object v)
+
+ int PyList_CheckExact(object)
+ Py_ssize_t PyList_GET_SIZE(object l)
+ PyObject * PyList_GET_ITEM(object l, Py_ssize_t o)
+ int PyList_SetItem(object l, Py_ssize_t o, object l) except -1
+ int PyList_Append(object l, object v) except -1
+
+ int PyDict_CheckExact(object d)
+ Py_ssize_t PyDict_Size(object d) except -1
+ PyObject * PyDict_GetItem(object d, object k)
+ int PyDict_SetItem(object d, object k, object v) except -1
+ int PyDict_DelItem(object d, object k) except -1
+ int PyDict_Next(object d, Py_ssize_t *pos, PyObject **k, PyObject **v)
+
+ void Py_INCREF(object)
+
+from collections import deque
+import gc
+
+from bzrlib import errors, revision
+
+cdef object NULL_REVISION
+NULL_REVISION = revision.NULL_REVISION
+
+
+cdef class _KnownGraphNode:
+ """Represents a single object in the known graph."""
+
+ cdef object key
+ cdef object parents
+ cdef object children
+ cdef public long gdfo
+ cdef int seen
+ cdef object extra
+
+ def __init__(self, key):
+ self.key = key
+ self.parents = None
+
+ self.children = []
+ # Greatest distance from origin
+ self.gdfo = -1
+ self.seen = 0
+ self.extra = None
+
+ property child_keys:
+ def __get__(self):
+ cdef _KnownGraphNode child
+
+ keys = []
+ for child in self.children:
+ PyList_Append(keys, child.key)
+ return keys
+
+ property parent_keys:
+ def __get__(self):
+ if self.parents is None:
+ return None
+
+ cdef _KnownGraphNode parent
+
+ keys = []
+ for parent in self.parents:
+ PyList_Append(keys, parent.key)
+ return keys
+
+ cdef clear_references(self):
+ self.parents = None
+ self.children = None
+
+ def __repr__(self):
+ cdef _KnownGraphNode node
+
+ parent_keys = []
+ if self.parents is not None:
+ for node in self.parents:
+ parent_keys.append(node.key)
+ child_keys = []
+ if self.children is not None:
+ for node in self.children:
+ child_keys.append(node.key)
+ return '%s(%s gdfo:%s par:%s child:%s)' % (
+ self.__class__.__name__, self.key, self.gdfo,
+ parent_keys, child_keys)
+
+
+cdef _KnownGraphNode _get_list_node(lst, Py_ssize_t pos):
+ cdef PyObject *temp_node
+
+ temp_node = PyList_GET_ITEM(lst, pos)
+ return <_KnownGraphNode>temp_node
+
+
+cdef _KnownGraphNode _get_tuple_node(tpl, Py_ssize_t pos):
+ cdef PyObject *temp_node
+
+ temp_node = PyTuple_GET_ITEM(tpl, pos)
+ return <_KnownGraphNode>temp_node
+
+
+def get_key(node):
+ cdef _KnownGraphNode real_node
+ real_node = node
+ return real_node.key
+
+
+cdef object _sort_list_nodes(object lst_or_tpl, int reverse):
+ """Sort a list of _KnownGraphNode objects.
+
+ If lst_or_tpl is a list, it is allowed to mutate in place. It may also
+ just return the input list if everything is already sorted.
+ """
+ cdef _KnownGraphNode node1, node2
+ cdef int do_swap, is_tuple
+ cdef Py_ssize_t length
+
+ is_tuple = PyTuple_CheckExact(lst_or_tpl)
+ if not (is_tuple or PyList_CheckExact(lst_or_tpl)):
+ raise TypeError('lst_or_tpl must be a list or tuple.')
+ length = len(lst_or_tpl)
+ if length == 0 or length == 1:
+ return lst_or_tpl
+ if length == 2:
+ if is_tuple:
+ node1 = _get_tuple_node(lst_or_tpl, 0)
+ node2 = _get_tuple_node(lst_or_tpl, 1)
+ else:
+ node1 = _get_list_node(lst_or_tpl, 0)
+ node2 = _get_list_node(lst_or_tpl, 1)
+ if reverse:
+ do_swap = PyObject_RichCompareBool(node1.key, node2.key, Py_LT)
+ else:
+ do_swap = PyObject_RichCompareBool(node2.key, node1.key, Py_LT)
+ if not do_swap:
+ return lst_or_tpl
+ if is_tuple:
+ return (node2, node1)
+ else:
+ # Swap 'in-place', since lists are mutable
+ Py_INCREF(node1)
+ PyList_SetItem(lst_or_tpl, 1, node1)
+ Py_INCREF(node2)
+ PyList_SetItem(lst_or_tpl, 0, node2)
+ return lst_or_tpl
+ # For all other sizes, we just use 'sorted()'
+ if is_tuple:
+ # Note that sorted() is just list(iterable).sort()
+ lst_or_tpl = list(lst_or_tpl)
+ lst_or_tpl.sort(key=get_key, reverse=reverse)
+ return lst_or_tpl
+
+
+cdef class _MergeSorter
+
+cdef class KnownGraph:
+ """This is a class which assumes we already know the full graph."""
+
+ cdef public object _nodes
+ cdef public object _known_heads
+ cdef public int do_cache
+
+ def __init__(self, parent_map, do_cache=True):
+ """Create a new KnownGraph instance.
+
+ :param parent_map: A dictionary mapping key => parent_keys
+ """
+ # tests at pre-allocating the node dict actually slowed things down
+ self._nodes = {}
+ # Maps {sorted(revision_id, revision_id): heads}
+ self._known_heads = {}
+ self.do_cache = int(do_cache)
+ # TODO: consider disabling gc since we are allocating a lot of nodes
+ # that won't be collectable anyway. real world testing has not
+ # shown a specific impact, yet.
+ self._initialize_nodes(parent_map)
+ self._find_gdfo()
+
+ def __dealloc__(self):
+ cdef _KnownGraphNode child
+ cdef Py_ssize_t pos
+ cdef PyObject *temp_node
+
+ while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
+ child = <_KnownGraphNode>temp_node
+ child.clear_references()
+
+ cdef _KnownGraphNode _get_or_create_node(self, key):
+ cdef PyObject *temp_node
+ cdef _KnownGraphNode node
+
+ temp_node = PyDict_GetItem(self._nodes, key)
+ if temp_node == NULL:
+ node = _KnownGraphNode(key)
+ PyDict_SetItem(self._nodes, key, node)
+ else:
+ node = <_KnownGraphNode>temp_node
+ return node
+
+ cdef _populate_parents(self, _KnownGraphNode node, parent_keys):
+ cdef Py_ssize_t num_parent_keys, pos
+ cdef _KnownGraphNode parent_node
+
+ num_parent_keys = len(parent_keys)
+ # We know how many parents, so we pre allocate the tuple
+ parent_nodes = PyTuple_New(num_parent_keys)
+ for pos from 0 <= pos < num_parent_keys:
+ # Note: it costs us 10ms out of 40ms to lookup all of these
+ # parents, it doesn't seem to be an allocation overhead,
+ # but rather a lookup overhead. There doesn't seem to be
+ # a way around it, and that is one reason why
+ # KnownGraphNode maintains a direct pointer to the parent
+ # node.
+ # We use [] because parent_keys may be a tuple or list
+ parent_node = self._get_or_create_node(parent_keys[pos])
+ # PyTuple_SET_ITEM will steal a reference, so INCREF first
+ Py_INCREF(parent_node)
+ PyTuple_SET_ITEM(parent_nodes, pos, parent_node)
+ PyList_Append(parent_node.children, node)
+ node.parents = parent_nodes
+
+ def _initialize_nodes(self, parent_map):
+ """Populate self._nodes.
+
+ After this has finished:
+ - self._nodes will have an entry for every entry in parent_map.
+ - ghosts will have a parent_keys = None,
+ - all nodes found will also have child_keys populated with all known
+ child keys,
+ """
+ cdef PyObject *temp_key, *temp_parent_keys, *temp_node
+ cdef Py_ssize_t pos
+ cdef _KnownGraphNode node
+ cdef _KnownGraphNode parent_node
+
+ if not PyDict_CheckExact(parent_map):
+ raise TypeError('parent_map should be a dict of {key:parent_keys}')
+ # for key, parent_keys in parent_map.iteritems():
+ pos = 0
+ while PyDict_Next(parent_map, &pos, &temp_key, &temp_parent_keys):
+ key = <object>temp_key
+ parent_keys = <object>temp_parent_keys
+ node = self._get_or_create_node(key)
+ self._populate_parents(node, parent_keys)
+
+ def _find_tails(self):
+ cdef PyObject *temp_node
+ cdef _KnownGraphNode node
+ cdef Py_ssize_t pos
+
+ tails = []
+ pos = 0
+ while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
+ node = <_KnownGraphNode>temp_node
+ if node.parents is None or PyTuple_GET_SIZE(node.parents) == 0:
+ node.gdfo = 1
+ PyList_Append(tails, node)
+ return tails
+
+ def _find_tips(self):
+ cdef PyObject *temp_node
+ cdef _KnownGraphNode node
+ cdef Py_ssize_t pos
+
+ tips = []
+ pos = 0
+ while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
+ node = <_KnownGraphNode>temp_node
+ if PyList_GET_SIZE(node.children) == 0:
+ PyList_Append(tips, node)
+ return tips
+
+ def _find_gdfo(self):
+ cdef _KnownGraphNode node
+ cdef _KnownGraphNode child
+ cdef PyObject *temp
+ cdef Py_ssize_t pos
+ cdef int replace
+ cdef Py_ssize_t last_item
+ cdef long next_gdfo
+
+ pending = self._find_tails()
+
+ last_item = PyList_GET_SIZE(pending) - 1
+ while last_item >= 0:
+ # Avoid pop followed by push, instead, peek, and replace
+ # timing shows this is 930ms => 770ms for OOo
+ node = _get_list_node(pending, last_item)
+ last_item = last_item - 1
+ next_gdfo = node.gdfo + 1
+ for pos from 0 <= pos < PyList_GET_SIZE(node.children):
+ child = _get_list_node(node.children, pos)
+ if next_gdfo > child.gdfo:
+ child.gdfo = next_gdfo
+ child.seen = child.seen + 1
+ if child.seen == PyTuple_GET_SIZE(child.parents):
+ # This child is populated, queue it to be walked
+ last_item = last_item + 1
+ if last_item < PyList_GET_SIZE(pending):
+ Py_INCREF(child) # SetItem steals a ref
+ PyList_SetItem(pending, last_item, child)
+ else:
+ PyList_Append(pending, child)
+ # We have queued this node, we don't need to track it
+ # anymore
+ child.seen = 0
+
+ def add_node(self, key, parent_keys):
+ """Add a new node to the graph.
+
+ If this fills in a ghost, then the gdfos of all children will be
+ updated accordingly.
+
+ :param key: The node being added. If this is a duplicate, this is a
+ no-op.
+ :param parent_keys: The parents of the given node.
+ :return: None (should we return if this was a ghost, etc?)
+ """
+ cdef PyObject *maybe_node
+ cdef _KnownGraphNode node, parent_node, child_node
+ cdef long parent_gdfo, next_gdfo
+
+ maybe_node = PyDict_GetItem(self._nodes, key)
+ if maybe_node != NULL:
+ node = <_KnownGraphNode>maybe_node
+ if node.parents is None:
+ # We are filling in a ghost
+ self._populate_parents(node, parent_keys)
+ # We can't trust cached heads anymore
+ self._known_heads.clear()
+ else: # Ensure that the parent_key list matches
+ existing_parent_keys = []
+ for parent_node in node.parents:
+ existing_parent_keys.append(parent_node.key)
+ # Make sure we use a list for the comparison, in case it was a
+ # tuple, etc
+ parent_keys = list(parent_keys)
+ if existing_parent_keys == parent_keys:
+ # Exact match, nothing more to do
+ return
+ else:
+ raise ValueError('Parent key mismatch, existing node %s'
+ ' has parents of %s not %s'
+ % (key, existing_parent_keys, parent_keys))
+ else:
+ node = _KnownGraphNode(key)
+ PyDict_SetItem(self._nodes, key, node)
+ self._populate_parents(node, parent_keys)
+ parent_gdfo = 0
+ for parent_node in node.parents:
+ if parent_node.gdfo == -1:
+ # This is a newly introduced ghost, so it gets gdfo of 1
+ parent_node.gdfo = 1
+ if parent_gdfo < parent_node.gdfo:
+ parent_gdfo = parent_node.gdfo
+ node.gdfo = parent_gdfo + 1
+ # Now fill the gdfo to all children
+ # Note that this loop is slightly inefficient, in that we may visit the
+ # same child (and its decendents) more than once, however, it is
+ # 'efficient' in that we only walk to nodes that would be updated,
+ # rather than all nodes
+ # We use a deque rather than a simple list stack, to go for BFD rather
+ # than DFD. So that if a longer path is possible, we walk it before we
+ # get to the final child
+ pending = deque([node])
+ pending_popleft = pending.popleft
+ pending_append = pending.append
+ while pending:
+ node = pending_popleft()
+ next_gdfo = node.gdfo + 1
+ for child_node in node.children:
+ if child_node.gdfo < next_gdfo:
+ # This child is being updated, we need to check its
+ # children
+ child_node.gdfo = next_gdfo
+ pending_append(child_node)
+
+ def heads(self, keys):
+ """Return the heads from amongst keys.
+
+ This is done by searching the ancestries of each key. Any key that is
+ reachable from another key is not returned; all the others are.
+
+ This operation scales with the relative depth between any two keys. It
+ uses gdfo to avoid walking all ancestry.
+
+ :param keys: An iterable of keys.
+ :return: A set of the heads. Note that as a set there is no ordering
+ information. Callers will need to filter their input to create
+ order if they need it.
+ """
+ cdef PyObject *maybe_node
+ cdef PyObject *maybe_heads
+ cdef PyObject *temp_node
+ cdef _KnownGraphNode node
+ cdef Py_ssize_t pos, last_item
+ cdef long min_gdfo
+
+ heads_key = frozenset(keys)
+ maybe_heads = PyDict_GetItem(self._known_heads, heads_key)
+ if maybe_heads != NULL:
+ return <object>maybe_heads
+ # Not cached, compute it ourselves
+ candidate_nodes = {}
+ for key in keys:
+ maybe_node = PyDict_GetItem(self._nodes, key)
+ if maybe_node == NULL:
+ raise KeyError('key %s not in nodes' % (key,))
+ PyDict_SetItem(candidate_nodes, key, <object>maybe_node)
+ maybe_node = PyDict_GetItem(candidate_nodes, NULL_REVISION)
+ if maybe_node != NULL:
+ # NULL_REVISION is only a head if it is the only entry
+ candidate_nodes.pop(NULL_REVISION)
+ if not candidate_nodes:
+ return frozenset([NULL_REVISION])
+ # The keys changed, so recalculate heads_key
+ heads_key = frozenset(candidate_nodes)
+ if PyDict_Size(candidate_nodes) < 2:
+ return heads_key
+
+ cleanup = []
+ pending = []
+ # we know a gdfo cannot be longer than a linear chain of all nodes
+ min_gdfo = PyDict_Size(self._nodes) + 1
+ # Build up nodes that need to be walked, note that starting nodes are
+ # not added to seen()
+ pos = 0
+ while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
+ node = <_KnownGraphNode>temp_node
+ if node.parents is not None:
+ pending.extend(node.parents)
+ if node.gdfo < min_gdfo:
+ min_gdfo = node.gdfo
+
+ # Now do all the real work
+ last_item = PyList_GET_SIZE(pending) - 1
+ while last_item >= 0:
+ node = _get_list_node(pending, last_item)
+ last_item = last_item - 1
+ if node.seen:
+ # node already appears in some ancestry
+ continue
+ PyList_Append(cleanup, node)
+ node.seen = 1
+ if node.gdfo <= min_gdfo:
+ continue
+ if node.parents is not None and PyTuple_GET_SIZE(node.parents) > 0:
+ for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):
+ parent_node = _get_tuple_node(node.parents, pos)
+ last_item = last_item + 1
+ if last_item < PyList_GET_SIZE(pending):
+ Py_INCREF(parent_node) # SetItem steals a ref
+ PyList_SetItem(pending, last_item, parent_node)
+ else:
+ PyList_Append(pending, parent_node)
+ heads = []
+ pos = 0
+ while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
+ node = <_KnownGraphNode>temp_node
+ if not node.seen:
+ PyList_Append(heads, node.key)
+ heads = frozenset(heads)
+ for pos from 0 <= pos < PyList_GET_SIZE(cleanup):
+ node = _get_list_node(cleanup, pos)
+ node.seen = 0
+ if self.do_cache:
+ PyDict_SetItem(self._known_heads, heads_key, heads)
+ return heads
+
+ def topo_sort(self):
+ """Return the nodes in topological order.
+
+ All parents must occur before all children.
+ """
+ # This is, for the most part, the same iteration order that we used for
+ # _find_gdfo, consider finding a way to remove the duplication
+ # In general, we find the 'tails' (nodes with no parents), and then
+ # walk to the children. For children that have all of their parents
+ # yielded, we queue up the child to be yielded as well.
+ cdef _KnownGraphNode node
+ cdef _KnownGraphNode child
+ cdef PyObject *temp
+ cdef Py_ssize_t pos
+ cdef int replace
+ cdef Py_ssize_t last_item
+
+ pending = self._find_tails()
+ if PyList_GET_SIZE(pending) == 0 and len(self._nodes) > 0:
+ raise errors.GraphCycleError(self._nodes)
+
+ topo_order = []
+
+ last_item = PyList_GET_SIZE(pending) - 1
+ while last_item >= 0:
+ # Avoid pop followed by push, instead, peek, and replace
+ # timing shows this is 930ms => 770ms for OOo
+ node = _get_list_node(pending, last_item)
+ last_item = last_item - 1
+ if node.parents is not None:
+ # We don't include ghost parents
+ PyList_Append(topo_order, node.key)
+ for pos from 0 <= pos < PyList_GET_SIZE(node.children):
+ child = _get_list_node(node.children, pos)
+ if child.gdfo == -1:
+ # We know we have a graph cycle because a node has a parent
+ # which we couldn't find
+ raise errors.GraphCycleError(self._nodes)
+ child.seen = child.seen + 1
+ if child.seen == PyTuple_GET_SIZE(child.parents):
+ # All parents of this child have been yielded, queue this
+ # one to be yielded as well
+ last_item = last_item + 1
+ if last_item < PyList_GET_SIZE(pending):
+ Py_INCREF(child) # SetItem steals a ref
+ PyList_SetItem(pending, last_item, child)
+ else:
+ PyList_Append(pending, child)
+ # We have queued this node, we don't need to track it
+ # anymore
+ child.seen = 0
+ # We started from the parents, so we don't need to do anymore work
+ return topo_order
+
+ def gc_sort(self):
+ """Return a reverse topological ordering which is 'stable'.
+
+ There are a few constraints:
+ 1) Reverse topological (all children before all parents)
+ 2) Grouped by prefix
+ 3) 'stable' sorting, so that we get the same result, independent of
+ machine, or extra data.
+ To do this, we use the same basic algorithm as topo_sort, but when we
+ aren't sure what node to access next, we sort them lexicographically.
+ """
+ cdef PyObject *temp
+ cdef Py_ssize_t pos, last_item
+ cdef _KnownGraphNode node, node2, parent_node
+
+ tips = self._find_tips()
+ # Split the tips based on prefix
+ prefix_tips = {}
+ for pos from 0 <= pos < PyList_GET_SIZE(tips):
+ node = _get_list_node(tips, pos)
+ if PyString_CheckExact(node.key) or len(node.key) == 1:
+ prefix = ''
+ else:
+ prefix = node.key[0]
+ temp = PyDict_GetItem(prefix_tips, prefix)
+ if temp == NULL:
+ prefix_tips[prefix] = [node]
+ else:
+ tip_nodes = <object>temp
+ PyList_Append(tip_nodes, node)
+
+ result = []
+ for prefix in sorted(prefix_tips):
+ temp = PyDict_GetItem(prefix_tips, prefix)
+ assert temp != NULL
+ tip_nodes = <object>temp
+ pending = _sort_list_nodes(tip_nodes, 1)
+ last_item = PyList_GET_SIZE(pending) - 1
+ while last_item >= 0:
+ node = _get_list_node(pending, last_item)
+ last_item = last_item - 1
+ if node.parents is None:
+ # Ghost
+ continue
+ PyList_Append(result, node.key)
+ # Sorting the parent keys isn't strictly necessary for stable
+ # sorting of a given graph. But it does help minimize the
+ # differences between graphs
+ # For bzr.dev ancestry:
+ # 4.73ms no sort
+ # 7.73ms RichCompareBool sort
+ parents = _sort_list_nodes(node.parents, 1)
+ for pos from 0 <= pos < len(parents):
+ if PyTuple_CheckExact(parents):
+ parent_node = _get_tuple_node(parents, pos)
+ else:
+ parent_node = _get_list_node(parents, pos)
+ # TODO: GraphCycle detection
+ parent_node.seen = parent_node.seen + 1
+ if (parent_node.seen
+ == PyList_GET_SIZE(parent_node.children)):
+ # All children have been processed, queue up this
+ # parent
+ last_item = last_item + 1
+ if last_item < PyList_GET_SIZE(pending):
+ Py_INCREF(parent_node) # SetItem steals a ref
+ PyList_SetItem(pending, last_item, parent_node)
+ else:
+ PyList_Append(pending, parent_node)
+ parent_node.seen = 0
+ return result
+
+ def merge_sort(self, tip_key):
+ """Compute the merge sorted graph output."""
+ cdef _MergeSorter sorter
+
+ # TODO: consider disabling gc since we are allocating a lot of nodes
+ # that won't be collectable anyway. real world testing has not
+ # shown a specific impact, yet.
+ sorter = _MergeSorter(self, tip_key)
+ return sorter.topo_order()
+
+ def get_parent_keys(self, key):
+ """Get the parents for a key
+
+ Returns a list containg the parents keys. If the key is a ghost,
+ None is returned. A KeyError will be raised if the key is not in
+ the graph.
+
+ :param keys: Key to check (eg revision_id)
+ :return: A list of parents
+ """
+ return self._nodes[key].parent_keys
+
+ def get_child_keys(self, key):
+ """Get the children for a key
+
+ Returns a list containg the children keys. A KeyError will be raised
+ if the key is not in the graph.
+
+ :param keys: Key to check (eg revision_id)
+ :return: A list of children
+ """
+ return self._nodes[key].child_keys
+
+
+cdef class _MergeSortNode:
+ """Tracks information about a node during the merge_sort operation."""
+
+ # Public api
+ cdef public object key
+ cdef public long merge_depth
+ cdef public object end_of_merge # True/False Is this the end of the current merge
+
+ # Private api, used while computing the information
+ cdef _KnownGraphNode left_parent
+ cdef _KnownGraphNode left_pending_parent
+ cdef object pending_parents # list of _KnownGraphNode for non-left parents
+ cdef long _revno_first
+ cdef long _revno_second
+ cdef long _revno_last
+ # TODO: turn these into flag/bit fields rather than individual members
+ cdef int is_first_child # Is this the first child?
+ cdef int seen_by_child # A child node has seen this parent
+ cdef int completed # Fully Processed
+
+ def __init__(self, key):
+ self.key = key
+ self.merge_depth = -1
+ self.left_parent = None
+ self.left_pending_parent = None
+ self.pending_parents = None
+ self._revno_first = -1
+ self._revno_second = -1
+ self._revno_last = -1
+ self.is_first_child = 0
+ self.seen_by_child = 0
+ self.completed = 0
+
+ def __repr__(self):
+ return '%s(%s depth:%s rev:%s,%s,%s first:%s seen:%s)' % (
+ self.__class__.__name__, self.key,
+ self.merge_depth,
+ self._revno_first, self._revno_second, self._revno_last,
+ self.is_first_child, self.seen_by_child)
+
+ cdef int has_pending_parents(self): # cannot_raise
+ if self.left_pending_parent is not None or self.pending_parents:
+ return 1
+ return 0
+
+ cdef object _revno(self):
+ if self._revno_first == -1:
+ if self._revno_second != -1:
+ raise RuntimeError('Something wrong with: %s' % (self,))
+ return (self._revno_last,)
+ else:
+ return (self._revno_first, self._revno_second, self._revno_last)
+
+ property revno:
+ def __get__(self):
+ return self._revno()
+
+
+cdef class _MergeSorter:
+ """This class does the work of computing the merge_sort ordering.
+
+ We have some small advantages, in that we get all the extra information
+ that KnownGraph knows, like knowing the child lists, etc.
+ """
+
+ # Current performance numbers for merge_sort(bzr_dev_parent_map):
+ # 302ms tsort.merge_sort()
+ # 91ms graph.KnownGraph().merge_sort()
+ # 40ms kg.merge_sort()
+
+ cdef KnownGraph graph
+ cdef object _depth_first_stack # list
+ cdef Py_ssize_t _last_stack_item # offset to last item on stack
+ # cdef object _ms_nodes # dict of key => _MergeSortNode
+ cdef object _revno_to_branch_count # {revno => num child branches}
+ cdef object _scheduled_nodes # List of nodes ready to be yielded
+
+ def __init__(self, known_graph, tip_key):
+ cdef _KnownGraphNode node
+
+ self.graph = known_graph
+ # self._ms_nodes = {}
+ self._revno_to_branch_count = {}
+ self._depth_first_stack = []
+ self._last_stack_item = -1
+ self._scheduled_nodes = []
+ if (tip_key is not None and tip_key != NULL_REVISION
+ and tip_key != (NULL_REVISION,)):
+ node = self.graph._nodes[tip_key]
+ self._push_node(node, 0)
+
+ cdef _MergeSortNode _get_ms_node(self, _KnownGraphNode node):
+ cdef PyObject *temp_node
+ cdef _MergeSortNode ms_node
+
+ if node.extra is None:
+ ms_node = _MergeSortNode(node.key)
+ node.extra = ms_node
+ else:
+ ms_node = <_MergeSortNode>node.extra
+ return ms_node
+
+ cdef _push_node(self, _KnownGraphNode node, long merge_depth):
+ cdef _KnownGraphNode parent_node
+ cdef _MergeSortNode ms_node, ms_parent_node
+ cdef Py_ssize_t pos
+
+ ms_node = self._get_ms_node(node)
+ ms_node.merge_depth = merge_depth
+ if node.parents is None:
+ raise RuntimeError('ghost nodes should not be pushed'
+ ' onto the stack: %s' % (node,))
+ if PyTuple_GET_SIZE(node.parents) > 0:
+ parent_node = _get_tuple_node(node.parents, 0)
+ ms_node.left_parent = parent_node
+ if parent_node.parents is None: # left-hand ghost
+ ms_node.left_pending_parent = None
+ ms_node.left_parent = None
+ else:
+ ms_node.left_pending_parent = parent_node
+ if PyTuple_GET_SIZE(node.parents) > 1:
+ ms_node.pending_parents = []
+ for pos from 1 <= pos < PyTuple_GET_SIZE(node.parents):
+ parent_node = _get_tuple_node(node.parents, pos)
+ if parent_node.parents is None: # ghost
+ continue
+ PyList_Append(ms_node.pending_parents, parent_node)
+
+ ms_node.is_first_child = 1
+ if ms_node.left_parent is not None:
+ ms_parent_node = self._get_ms_node(ms_node.left_parent)
+ if ms_parent_node.seen_by_child:
+ ms_node.is_first_child = 0
+ ms_parent_node.seen_by_child = 1
+ self._last_stack_item = self._last_stack_item + 1
+ if self._last_stack_item < PyList_GET_SIZE(self._depth_first_stack):
+ Py_INCREF(node) # SetItem steals a ref
+ PyList_SetItem(self._depth_first_stack, self._last_stack_item,
+ node)
+ else:
+ PyList_Append(self._depth_first_stack, node)
+
+ cdef _pop_node(self):
+ cdef PyObject *temp
+ cdef _MergeSortNode ms_node, ms_parent_node, ms_prev_node
+ cdef _KnownGraphNode node, parent_node, prev_node
+
+ node = _get_list_node(self._depth_first_stack, self._last_stack_item)
+ ms_node = <_MergeSortNode>node.extra
+ self._last_stack_item = self._last_stack_item - 1
+ if ms_node.left_parent is not None:
+ # Assign the revision number from the left-hand parent
+ ms_parent_node = <_MergeSortNode>ms_node.left_parent.extra
+ if ms_node.is_first_child:
+ # First child just increments the final digit
+ ms_node._revno_first = ms_parent_node._revno_first
+ ms_node._revno_second = ms_parent_node._revno_second
+ ms_node._revno_last = ms_parent_node._revno_last + 1
+ else:
+ # Not the first child, make a new branch
+ # (mainline_revno, branch_count, 1)
+ if ms_parent_node._revno_first == -1:
+ # Mainline ancestor, the increment is on the last digit
+ base_revno = ms_parent_node._revno_last
+ else:
+ base_revno = ms_parent_node._revno_first
+ temp = PyDict_GetItem(self._revno_to_branch_count,
+ base_revno)
+ if temp == NULL:
+ branch_count = 1
+ else:
+ branch_count = (<object>temp) + 1
+ PyDict_SetItem(self._revno_to_branch_count, base_revno,
+ branch_count)
+ ms_node._revno_first = base_revno
+ ms_node._revno_second = branch_count
+ ms_node._revno_last = 1
+ else:
+ temp = PyDict_GetItem(self._revno_to_branch_count, 0)
+ if temp == NULL:
+ # The first root node doesn't have a 3-digit revno
+ root_count = 0
+ ms_node._revno_first = -1
+ ms_node._revno_second = -1
+ ms_node._revno_last = 1
+ else:
+ root_count = (<object>temp) + 1
+ ms_node._revno_first = 0
+ ms_node._revno_second = root_count
+ ms_node._revno_last = 1
+ PyDict_SetItem(self._revno_to_branch_count, 0, root_count)
+ ms_node.completed = 1
+ if PyList_GET_SIZE(self._scheduled_nodes) == 0:
+ # The first scheduled node is always the end of merge
+ ms_node.end_of_merge = True
+ else:
+ prev_node = _get_list_node(self._scheduled_nodes,
+ PyList_GET_SIZE(self._scheduled_nodes) - 1)
+ ms_prev_node = <_MergeSortNode>prev_node.extra
+ if ms_prev_node.merge_depth < ms_node.merge_depth:
+ # The previously pushed node is to our left, so this is the end
+ # of this right-hand chain
+ ms_node.end_of_merge = True
+ elif (ms_prev_node.merge_depth == ms_node.merge_depth
+ and prev_node not in node.parents):
+ # The next node is not a direct parent of this node
+ ms_node.end_of_merge = True
+ else:
+ ms_node.end_of_merge = False
+ PyList_Append(self._scheduled_nodes, node)
+
+ cdef _schedule_stack(self):
+ cdef _KnownGraphNode last_node, next_node
+ cdef _MergeSortNode ms_node, ms_last_node, ms_next_node
+ cdef long next_merge_depth
+ ordered = []
+ while self._last_stack_item >= 0:
+ # Peek at the last item on the stack
+ last_node = _get_list_node(self._depth_first_stack,
+ self._last_stack_item)
+ if last_node.gdfo == -1:
+ # if _find_gdfo skipped a node, that means there is a graph
+ # cycle, error out now
+ raise errors.GraphCycleError(self.graph._nodes)
+ ms_last_node = <_MergeSortNode>last_node.extra
+ if not ms_last_node.has_pending_parents():
+ # Processed all parents, pop this node
+ self._pop_node()
+ continue
+ while ms_last_node.has_pending_parents():
+ if ms_last_node.left_pending_parent is not None:
+ # recurse depth first into the primary parent
+ next_node = ms_last_node.left_pending_parent
+ ms_last_node.left_pending_parent = None
+ else:
+ # place any merges in right-to-left order for scheduling
+ # which gives us left-to-right order after we reverse
+ # the scheduled queue.
+ # Note: This has the effect of allocating common-new
+ # revisions to the right-most subtree rather than the
+ # left most, which will display nicely (you get
+ # smaller trees at the top of the combined merge).
+ next_node = ms_last_node.pending_parents.pop()
+ ms_next_node = self._get_ms_node(next_node)
+ if ms_next_node.completed:
+ # this parent was completed by a child on the
+ # call stack. skip it.
+ continue
+ # otherwise transfer it from the source graph into the
+ # top of the current depth first search stack.
+
+ if next_node is ms_last_node.left_parent:
+ next_merge_depth = ms_last_node.merge_depth
+ else:
+ next_merge_depth = ms_last_node.merge_depth + 1
+ self._push_node(next_node, next_merge_depth)
+ # and do not continue processing parents until this 'call'
+ # has recursed.
+ break
+
+ cdef topo_order(self):
+ cdef _MergeSortNode ms_node
+ cdef _KnownGraphNode node
+ cdef Py_ssize_t pos
+ cdef PyObject *temp_key, *temp_node
+
+ # Note: allocating a _MergeSortNode and deallocating it for all nodes
+ # costs approx 8.52ms (21%) of the total runtime
+ # We might consider moving the attributes into the base
+ # KnownGraph object.
+ self._schedule_stack()
+
+ # We've set up the basic schedule, now we can continue processing the
+ # output.
+ # Note: This final loop costs us 40.0ms => 28.8ms (11ms, 25%) on
+ # bzr.dev, to convert the internal Object representation into a
+ # Tuple representation...
+ # 2ms is walking the data and computing revno tuples
+ # 7ms is computing the return tuple
+ # 4ms is PyList_Append()
+ ordered = []
+ # output the result in reverse order, and separate the generated info
+ for pos from PyList_GET_SIZE(self._scheduled_nodes) > pos >= 0:
+ node = _get_list_node(self._scheduled_nodes, pos)
+ ms_node = <_MergeSortNode>node.extra
+ PyList_Append(ordered, ms_node)
+ node.extra = None
+ # Clear out the scheduled nodes now that we're done
+ self._scheduled_nodes = []
+ return ordered