1 files changed, 1715 insertions, 0 deletions
diff --git a/bzrlib/graph.py b/bzrlib/graph.py
new file mode 100644
index 0000000..e1c2522
--- /dev/null
+++ b/bzrlib/graph.py
@@ -0,0 +1,1715 @@
+# Copyright (C) 2007-2011 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+from __future__ import absolute_import
+
+import time
+
+from bzrlib import (
+    debug,
+    errors,
+    osutils,
+    revision,
+    trace,
+    )
+
+STEP_UNIQUE_SEARCHER_EVERY = 5
+
+# DIAGRAM of terminology
+#       A
+#       /\
+#      B  C
+#      |  |\
+#      D  E F
+#      |\/| |
+#      |/\|/
+#      G  H
+#
+# In this diagram, relative to G and H:
+# A, B, C, D, E are common ancestors.
+# C, D and E are border ancestors, because each has a non-common descendant.
+# D and E are least common ancestors because none of their descendants are
+# common ancestors.
+# C is not a least common ancestor because its descendant, E, is a common
+# ancestor.
+#
+# The find_unique_lca algorithm will pick A in two steps:
+# 1. find_lca('G', 'H') => ['D', 'E']
+# 2. Since len(['D', 'E']) > 1, find_lca('D', 'E') => ['A']
+
+
+class DictParentsProvider(object):
+    """A parents provider for Graph objects."""
+
+    def __init__(self, ancestry):
+        self.ancestry = ancestry
+
+    def __repr__(self):
+        return 'DictParentsProvider(%r)' % self.ancestry
+
+    # Note: DictParentsProvider does not implement get_cached_parent_map
+    #       Arguably, the data is clearly cached in memory. However, this class
+    #       is mostly used for testing, and it keeps the tests clean to not
+    #       change it.
+
+    def get_parent_map(self, keys):
+        """See StackedParentsProvider.get_parent_map"""
+        ancestry = self.ancestry
+        return dict([(k, ancestry[k]) for k in keys if k in ancestry])
+
+
+class StackedParentsProvider(object):
+    """A parents provider which stacks (or unions) multiple providers.
+
+    The providers are queries in the order of the provided parent_providers.
+    """
+
+    def __init__(self, parent_providers):
+        self._parent_providers = parent_providers
+
+    def __repr__(self):
+        return "%s(%r)" % (self.__class__.__name__, self._parent_providers)
+
+    def get_parent_map(self, keys):
+        """Get a mapping of keys => parents
+
+        A dictionary is returned with an entry for each key present in this
+        source. If this source doesn't have information about a key, it should
+        not include an entry.
+
+        [NULL_REVISION] is used as the parent of the first user-committed
+        revision.  Its parent list is empty.
+
+        :param keys: An iterable returning keys to check (eg revision_ids)
+        :return: A dictionary mapping each key to its parents
+        """
+        found = {}
+        remaining = set(keys)
+        # This adds getattr() overhead to each get_parent_map call. However,
+        # this is StackedParentsProvider, which means we're dealing with I/O
+        # (either local indexes, or remote RPCs), so CPU overhead should be
+        # minimal.
+        for parents_provider in self._parent_providers:
+            get_cached = getattr(parents_provider, 'get_cached_parent_map',
+                                 None)
+            if get_cached is None:
+                continue
+            new_found = get_cached(remaining)
+            found.update(new_found)
+            remaining.difference_update(new_found)
+            if not remaining:
+                break
+        if not remaining:
+            return found
+        for parents_provider in self._parent_providers:
+            new_found = parents_provider.get_parent_map(remaining)
+            found.update(new_found)
+            remaining.difference_update(new_found)
+            if not remaining:
+                break
+        return found
+
+
+class CachingParentsProvider(object):
+    """A parents provider which will cache the revision => parents as a dict.
+
+    This is useful for providers which have an expensive look up.
+
+    Either a ParentsProvider or a get_parent_map-like callback may be
+    supplied.  If it provides extra un-asked-for parents, they will be cached,
+    but filtered out of get_parent_map.
+
+    The cache is enabled by default, but may be disabled and re-enabled.
+    """
+    def __init__(self, parent_provider=None, get_parent_map=None):
+        """Constructor.
+
+        :param parent_provider: The ParentProvider to use.  It or
+            get_parent_map must be supplied.
+        :param get_parent_map: The get_parent_map callback to use.  It or
+            parent_provider must be supplied.
+        """
+        self._real_provider = parent_provider
+        if get_parent_map is None:
+            self._get_parent_map = self._real_provider.get_parent_map
+        else:
+            self._get_parent_map = get_parent_map
+        self._cache = None
+        self.enable_cache(True)
+
+    def __repr__(self):
+        return "%s(%r)" % (self.__class__.__name__, self._real_provider)
+
+    def enable_cache(self, cache_misses=True):
+        """Enable cache."""
+        if self._cache is not None:
+            raise AssertionError('Cache enabled when already enabled.')
+        self._cache = {}
+        self._cache_misses = cache_misses
+        self.missing_keys = set()
+
+    def disable_cache(self):
+        """Disable and clear the cache."""
+        self._cache = None
+        self._cache_misses = None
+        self.missing_keys = set()
+
+    def get_cached_map(self):
+        """Return any cached get_parent_map values."""
+        if self._cache is None:
+            return None
+        return dict(self._cache)
+
+    def get_cached_parent_map(self, keys):
+        """Return items from the cache.
+
+        This returns the same info as get_parent_map, but explicitly does not
+        invoke the supplied ParentsProvider to search for uncached values.
+        """
+        cache = self._cache
+        if cache is None:
+            return {}
+        return dict([(key, cache[key]) for key in keys if key in cache])
+
+    def get_parent_map(self, keys):
+        """See StackedParentsProvider.get_parent_map."""
+        cache = self._cache
+        if cache is None:
+            cache = self._get_parent_map(keys)
+        else:
+            needed_revisions = set(key for key in keys if key not in cache)
+            # Do not ask for negatively cached keys
+            needed_revisions.difference_update(self.missing_keys)
+            if needed_revisions:
+                parent_map = self._get_parent_map(needed_revisions)
+                cache.update(parent_map)
+                if self._cache_misses:
+                    for key in needed_revisions:
+                        if key not in parent_map:
+                            self.note_missing_key(key)
+        result = {}
+        for key in keys:
+            value = cache.get(key)
+            if value is not None:
+                result[key] = value
+        return result
+
+    def note_missing_key(self, key):
+        """Note that key is a missing key."""
+        if self._cache_misses:
+            self.missing_keys.add(key)
+
+
+class CallableToParentsProviderAdapter(object):
+    """A parents provider that adapts any callable to the parents provider API.
+
+    i.e. it accepts calls to self.get_parent_map and relays them to the
+    callable it was constructed with.
+    """
+
+    def __init__(self, a_callable):
+        self.callable = a_callable
+
+    def __repr__(self):
+        return "%s(%r)" % (self.__class__.__name__, self.callable)
+
+    def get_parent_map(self, keys):
+        return self.callable(keys)
+
+
+class Graph(object):
+    """Provide incremental access to revision graphs.
+
+    This is the generic implementation; it is intended to be subclassed to
+    specialize it for other repository types.
+    """
+
+    def __init__(self, parents_provider):
+        """Construct a Graph that uses several graphs as its input
+
+        This should not normally be invoked directly, because there may be
+        specialized implementations for particular repository types.  See
+        Repository.get_graph().
+
+        :param parents_provider: An object providing a get_parent_map call
+            conforming to the behavior of
+            StackedParentsProvider.get_parent_map.
+        """
+        if getattr(parents_provider, 'get_parents', None) is not None:
+            self.get_parents = parents_provider.get_parents
+        if getattr(parents_provider, 'get_parent_map', None) is not None:
+            self.get_parent_map = parents_provider.get_parent_map
+        self._parents_provider = parents_provider
+
+    def __repr__(self):
+        return 'Graph(%r)' % self._parents_provider
+
+    def find_lca(self, *revisions):
+        """Determine the lowest common ancestors of the provided revisions
+
+        A lowest common ancestor is a common ancestor none of whose
+        descendants are common ancestors.  In graphs, unlike trees, there may
+        be multiple lowest common ancestors.
+
+        This algorithm has two phases.  Phase 1 identifies border ancestors,
+        and phase 2 filters border ancestors to determine lowest common
+        ancestors.
+
+        In phase 1, border ancestors are identified, using a breadth-first
+        search starting at the bottom of the graph.  Searches are stopped
+        whenever a node or one of its descendants is determined to be common
+
+        In phase 2, the border ancestors are filtered to find the least
+        common ancestors.  This is done by searching the ancestries of each
+        border ancestor.
+
+        Phase 2 is perfomed on the principle that a border ancestor that is
+        not an ancestor of any other border ancestor is a least common
+        ancestor.
+
+        Searches are stopped when they find a node that is determined to be a
+        common ancestor of all border ancestors, because this shows that it
+        cannot be a descendant of any border ancestor.
+
+        The scaling of this operation should be proportional to:
+
+        1. The number of uncommon ancestors
+        2. The number of border ancestors
+        3. The length of the shortest path between a border ancestor and an
+           ancestor of all border ancestors.
+        """
+        border_common, common, sides = self._find_border_ancestors(revisions)
+        # We may have common ancestors that can be reached from each other.
+        # - ask for the heads of them to filter it down to only ones that
+        # cannot be reached from each other - phase 2.
+        return self.heads(border_common)
+
+    def find_difference(self, left_revision, right_revision):
+        """Determine the graph difference between two revisions"""
+        border, common, searchers = self._find_border_ancestors(
+            [left_revision, right_revision])
+        self._search_for_extra_common(common, searchers)
+        left = searchers[0].seen
+        right = searchers[1].seen
+        return (left.difference(right), right.difference(left))
+
+    def find_descendants(self, old_key, new_key):
+        """Find descendants of old_key that are ancestors of new_key."""
+        child_map = self.get_child_map(self._find_descendant_ancestors(
+            old_key, new_key))
+        graph = Graph(DictParentsProvider(child_map))
+        searcher = graph._make_breadth_first_searcher([old_key])
+        list(searcher)
+        return searcher.seen
+
+    def _find_descendant_ancestors(self, old_key, new_key):
+        """Find ancestors of new_key that may be descendants of old_key."""
+        stop = self._make_breadth_first_searcher([old_key])
+        descendants = self._make_breadth_first_searcher([new_key])
+        for revisions in descendants:
+            old_stop = stop.seen.intersection(revisions)
+            descendants.stop_searching_any(old_stop)
+            seen_stop = descendants.find_seen_ancestors(stop.step())
+            descendants.stop_searching_any(seen_stop)
+        return descendants.seen.difference(stop.seen)
+
+    def get_child_map(self, keys):
+        """Get a mapping from parents to children of the specified keys.
+
+        This is simply the inversion of get_parent_map.  Only supplied keys
+        will be discovered as children.
+        :return: a dict of key:child_list for keys.
+        """
+        parent_map = self._parents_provider.get_parent_map(keys)
+        parent_child = {}
+        for child, parents in sorted(parent_map.items()):
+            for parent in parents:
+                parent_child.setdefault(parent, []).append(child)
+        return parent_child
+
+    def find_distance_to_null(self, target_revision_id, known_revision_ids):
+        """Find the left-hand distance to the NULL_REVISION.
+
+        (This can also be considered the revno of a branch at
+        target_revision_id.)
+
+        :param target_revision_id: A revision_id which we would like to know
+            the revno for.
+        :param known_revision_ids: [(revision_id, revno)] A list of known
+            revno, revision_id tuples. We'll use this to seed the search.
+        """
+        # Map from revision_ids to a known value for their revno
+        known_revnos = dict(known_revision_ids)
+        cur_tip = target_revision_id
+        num_steps = 0
+        NULL_REVISION = revision.NULL_REVISION
+        known_revnos[NULL_REVISION] = 0
+
+        searching_known_tips = list(known_revnos.keys())
+
+        unknown_searched = {}
+
+        while cur_tip not in known_revnos:
+            unknown_searched[cur_tip] = num_steps
+            num_steps += 1
+            to_search = set([cur_tip])
+            to_search.update(searching_known_tips)
+            parent_map = self.get_parent_map(to_search)
+            parents = parent_map.get(cur_tip, None)
+            if not parents: # An empty list or None is a ghost
+                raise errors.GhostRevisionsHaveNoRevno(target_revision_id,
+                                                       cur_tip)
+            cur_tip = parents[0]
+            next_known_tips = []
+            for revision_id in searching_known_tips:
+                parents = parent_map.get(revision_id, None)
+                if not parents:
+                    continue
+                next = parents[0]
+                next_revno = known_revnos[revision_id] - 1
+                if next in unknown_searched:
+                    # We have enough information to return a value right now
+                    return next_revno + unknown_searched[next]
+                if next in known_revnos:
+                    continue
+                known_revnos[next] = next_revno
+                next_known_tips.append(next)
+            searching_known_tips = next_known_tips
+
+        # We reached a known revision, so just add in how many steps it took to
+        # get there.
+        return known_revnos[cur_tip] + num_steps
+
+    def find_lefthand_distances(self, keys):
+        """Find the distance to null for all the keys in keys.
+
+        :param keys: keys to lookup.
+        :return: A dict key->distance for all of keys.
+        """
+        # Optimisable by concurrent searching, but a random spread should get
+        # some sort of hit rate.
+        result = {}
+        known_revnos = []
+        ghosts = []
+        for key in keys:
+            try:
+                known_revnos.append(
+                    (key, self.find_distance_to_null(key, known_revnos)))
+            except errors.GhostRevisionsHaveNoRevno:
+                ghosts.append(key)
+        for key in ghosts:
+            known_revnos.append((key, -1))
+        return dict(known_revnos)
+
+    def find_unique_ancestors(self, unique_revision, common_revisions):
+        """Find the unique ancestors for a revision versus others.
+
+        This returns the ancestry of unique_revision, excluding all revisions
+        in the ancestry of common_revisions. If unique_revision is in the
+        ancestry, then the empty set will be returned.
+
+        :param unique_revision: The revision_id whose ancestry we are
+            interested in.
+            (XXX: Would this API be better if we allowed multiple revisions on
+            to be searched here?)
+        :param common_revisions: Revision_ids of ancestries to exclude.
+        :return: A set of revisions in the ancestry of unique_revision
+        """
+        if unique_revision in common_revisions:
+            return set()
+
+        # Algorithm description
+        # 1) Walk backwards from the unique node and all common nodes.
+        # 2) When a node is seen by both sides, stop searching it in the unique
+        #    walker, include it in the common walker.
+        # 3) Stop searching when there are no nodes left for the unique walker.
+        #    At this point, you have a maximal set of unique nodes. Some of
+        #    them may actually be common, and you haven't reached them yet.
+        # 4) Start new searchers for the unique nodes, seeded with the
+        #    information you have so far.
+        # 5) Continue searching, stopping the common searches when the search
+        #    tip is an ancestor of all unique nodes.
+        # 6) Aggregate together unique searchers when they are searching the
+        #    same tips. When all unique searchers are searching the same node,
+        #    stop move it to a single 'all_unique_searcher'.
+        # 7) The 'all_unique_searcher' represents the very 'tip' of searching.
+        #    Most of the time this produces very little important information.
+        #    So don't step it as quickly as the other searchers.
+        # 8) Search is done when all common searchers have completed.
+
+        unique_searcher, common_searcher = self._find_initial_unique_nodes(
+            [unique_revision], common_revisions)
+
+        unique_nodes = unique_searcher.seen.difference(common_searcher.seen)
+        if not unique_nodes:
+            return unique_nodes
+
+        (all_unique_searcher,
+         unique_tip_searchers) = self._make_unique_searchers(unique_nodes,
+                                    unique_searcher, common_searcher)
+
+        self._refine_unique_nodes(unique_searcher, all_unique_searcher,
+                                  unique_tip_searchers, common_searcher)
+        true_unique_nodes = unique_nodes.difference(common_searcher.seen)
+        if 'graph' in debug.debug_flags:
+            trace.mutter('Found %d truly unique nodes out of %d',
+                         len(true_unique_nodes), len(unique_nodes))
+        return true_unique_nodes
+
+    def _find_initial_unique_nodes(self, unique_revisions, common_revisions):
+        """Steps 1-3 of find_unique_ancestors.
+
+        Find the maximal set of unique nodes. Some of these might actually
+        still be common, but we are sure that there are no other unique nodes.
+
+        :return: (unique_searcher, common_searcher)
+        """
+
+        unique_searcher = self._make_breadth_first_searcher(unique_revisions)
+        # we know that unique_revisions aren't in common_revisions, so skip
+        # past them.
+        unique_searcher.next()
+        common_searcher = self._make_breadth_first_searcher(common_revisions)
+
+        # As long as we are still finding unique nodes, keep searching
+        while unique_searcher._next_query:
+            next_unique_nodes = set(unique_searcher.step())
+            next_common_nodes = set(common_searcher.step())
+
+            # Check if either searcher encounters new nodes seen by the other
+            # side.
+            unique_are_common_nodes = next_unique_nodes.intersection(
+                common_searcher.seen)
+            unique_are_common_nodes.update(
+                next_common_nodes.intersection(unique_searcher.seen))
+            if unique_are_common_nodes:
+                ancestors = unique_searcher.find_seen_ancestors(
+                                unique_are_common_nodes)
+                # TODO: This is a bit overboard, we only really care about
+                #       the ancestors of the tips because the rest we
+                #       already know. This is *correct* but causes us to
+                #       search too much ancestry.
+                ancestors.update(common_searcher.find_seen_ancestors(ancestors))
+                unique_searcher.stop_searching_any(ancestors)
+                common_searcher.start_searching(ancestors)
+
+        return unique_searcher, common_searcher
+
+    def _make_unique_searchers(self, unique_nodes, unique_searcher,
+                               common_searcher):
+        """Create a searcher for all the unique search tips (step 4).
+
+        As a side effect, the common_searcher will stop searching any nodes
+        that are ancestors of the unique searcher tips.
+
+        :return: (all_unique_searcher, unique_tip_searchers)
+        """
+        unique_tips = self._remove_simple_descendants(unique_nodes,
+                        self.get_parent_map(unique_nodes))
+
+        if len(unique_tips) == 1:
+            unique_tip_searchers = []
+            ancestor_all_unique = unique_searcher.find_seen_ancestors(unique_tips)
+        else:
+            unique_tip_searchers = []
+            for tip in unique_tips:
+                revs_to_search = unique_searcher.find_seen_ancestors([tip])
+                revs_to_search.update(
+                    common_searcher.find_seen_ancestors(revs_to_search))
+                searcher = self._make_breadth_first_searcher(revs_to_search)
+                # We don't care about the starting nodes.
+                searcher._label = tip
+                searcher.step()
+                unique_tip_searchers.append(searcher)
+
+            ancestor_all_unique = None
+            for searcher in unique_tip_searchers:
+                if ancestor_all_unique is None:
+                    ancestor_all_unique = set(searcher.seen)
+                else:
+                    ancestor_all_unique = ancestor_all_unique.intersection(
+                                                searcher.seen)
+        # Collapse all the common nodes into a single searcher
+        all_unique_searcher = self._make_breadth_first_searcher(
+                                ancestor_all_unique)
+        if ancestor_all_unique:
+            # We've seen these nodes in all the searchers, so we'll just go to
+            # the next
+            all_unique_searcher.step()
+
+            # Stop any search tips that are already known as ancestors of the
+            # unique nodes
+            stopped_common = common_searcher.stop_searching_any(
+                common_searcher.find_seen_ancestors(ancestor_all_unique))
+
+            total_stopped = 0
+            for searcher in unique_tip_searchers:
+                total_stopped += len(searcher.stop_searching_any(
+                    searcher.find_seen_ancestors(ancestor_all_unique)))
+        if 'graph' in debug.debug_flags:
+            trace.mutter('For %d unique nodes, created %d + 1 unique searchers'
+                         ' (%d stopped search tips, %d common ancestors'
+                         ' (%d stopped common)',
+                         len(unique_nodes), len(unique_tip_searchers),
+                         total_stopped, len(ancestor_all_unique),
+                         len(stopped_common))
+        return all_unique_searcher, unique_tip_searchers
+
+    def _step_unique_and_common_searchers(self, common_searcher,
+                                          unique_tip_searchers,
+                                          unique_searcher):
+        """Step all the searchers"""
+        newly_seen_common = set(common_searcher.step())
+        newly_seen_unique = set()
+        for searcher in unique_tip_searchers:
+            next = set(searcher.step())
+            next.update(unique_searcher.find_seen_ancestors(next))
+            next.update(common_searcher.find_seen_ancestors(next))
+            for alt_searcher in unique_tip_searchers:
+                if alt_searcher is searcher:
+                    continue
+                next.update(alt_searcher.find_seen_ancestors(next))
+            searcher.start_searching(next)
+            newly_seen_unique.update(next)
+        return newly_seen_common, newly_seen_unique
+
+    def _find_nodes_common_to_all_unique(self, unique_tip_searchers,
+                                         all_unique_searcher,
+                                         newly_seen_unique, step_all_unique):
+        """Find nodes that are common to all unique_tip_searchers.
+
+        If it is time, step the all_unique_searcher, and add its nodes to the
+        result.
+        """
+        common_to_all_unique_nodes = newly_seen_unique.copy()
+        for searcher in unique_tip_searchers:
+            common_to_all_unique_nodes.intersection_update(searcher.seen)
+        common_to_all_unique_nodes.intersection_update(
+                                    all_unique_searcher.seen)
+        # Step all-unique less frequently than the other searchers.
+        # In the common case, we don't need to spider out far here, so
+        # avoid doing extra work.
+        if step_all_unique:
+            tstart = time.clock()
+            nodes = all_unique_searcher.step()
+            common_to_all_unique_nodes.update(nodes)
+            if 'graph' in debug.debug_flags:
+                tdelta = time.clock() - tstart
+                trace.mutter('all_unique_searcher step() took %.3fs'
+                             'for %d nodes (%d total), iteration: %s',
+                             tdelta, len(nodes), len(all_unique_searcher.seen),
+                             all_unique_searcher._iterations)
+        return common_to_all_unique_nodes
+
+    def _collapse_unique_searchers(self, unique_tip_searchers,
+                                   common_to_all_unique_nodes):
+        """Combine searchers that are searching the same tips.
+
+        When two searchers are searching the same tips, we can stop one of the
+        searchers. We also know that the maximal set of common ancestors is the
+        intersection of the two original searchers.
+
+        :return: A list of searchers that are searching unique nodes.
+        """
+        # Filter out searchers that don't actually search different
+        # nodes. We already have the ancestry intersection for them
+        unique_search_tips = {}
+        for searcher in unique_tip_searchers:
+            stopped = searcher.stop_searching_any(common_to_all_unique_nodes)
+            will_search_set = frozenset(searcher._next_query)
+            if not will_search_set:
+                if 'graph' in debug.debug_flags:
+                    trace.mutter('Unique searcher %s was stopped.'
+                                 ' (%s iterations) %d nodes stopped',
+                                 searcher._label,
+                                 searcher._iterations,
+                                 len(stopped))
+            elif will_search_set not in unique_search_tips:
+                # This searcher is searching a unique set of nodes, let it
+                unique_search_tips[will_search_set] = [searcher]
+            else:
+                unique_search_tips[will_search_set].append(searcher)
+        # TODO: it might be possible to collapse searchers faster when they
+        #       only have *some* search tips in common.
+        next_unique_searchers = []
+        for searchers in unique_search_tips.itervalues():
+            if len(searchers) == 1:
+                # Searching unique tips, go for it
+                next_unique_searchers.append(searchers[0])
+            else:
+                # These searchers have started searching the same tips, we
+                # don't need them to cover the same ground. The
+                # intersection of their ancestry won't change, so create a
+                # new searcher, combining their histories.
+                next_searcher = searchers[0]
+                for searcher in searchers[1:]:
+                    next_searcher.seen.intersection_update(searcher.seen)
+                if 'graph' in debug.debug_flags:
+                    trace.mutter('Combining %d searchers into a single'
+                                 ' searcher searching %d nodes with'
+                                 ' %d ancestry',
+                                 len(searchers),
+                                 len(next_searcher._next_query),
+                                 len(next_searcher.seen))
+                next_unique_searchers.append(next_searcher)
+        return next_unique_searchers
+
+    def _refine_unique_nodes(self, unique_searcher, all_unique_searcher,
+                             unique_tip_searchers, common_searcher):
+        """Steps 5-8 of find_unique_ancestors.
+
+        This function returns when common_searcher has stopped searching for
+        more nodes.
+        """
+        # We step the ancestor_all_unique searcher only every
+        # STEP_UNIQUE_SEARCHER_EVERY steps.
+        step_all_unique_counter = 0
+        # While we still have common nodes to search
+        while common_searcher._next_query:
+            (newly_seen_common,
+             newly_seen_unique) = self._step_unique_and_common_searchers(
+                common_searcher, unique_tip_searchers, unique_searcher)
+            # These nodes are common ancestors of all unique nodes
+            common_to_all_unique_nodes = self._find_nodes_common_to_all_unique(
+                unique_tip_searchers, all_unique_searcher, newly_seen_unique,
+                step_all_unique_counter==0)
+            step_all_unique_counter = ((step_all_unique_counter + 1)
+                                       % STEP_UNIQUE_SEARCHER_EVERY)
+
+            if newly_seen_common:
+                # If a 'common' node is an ancestor of all unique searchers, we
+                # can stop searching it.
+                common_searcher.stop_searching_any(
+                    all_unique_searcher.seen.intersection(newly_seen_common))
+            if common_to_all_unique_nodes:
+                common_to_all_unique_nodes.update(
+                    common_searcher.find_seen_ancestors(
+                        common_to_all_unique_nodes))
+                # The all_unique searcher can start searching the common nodes
+                # but everyone else can stop.
+                # This is the sort of thing where we would like to not have it
+                # start_searching all of the nodes, but only mark all of them
+                # as seen, and have it search only the actual tips. Otherwise
+                # it is another get_parent_map() traversal for it to figure out
+                # what we already should know.
+                all_unique_searcher.start_searching(common_to_all_unique_nodes)
+                common_searcher.stop_searching_any(common_to_all_unique_nodes)
+
+            next_unique_searchers = self._collapse_unique_searchers(
+                unique_tip_searchers, common_to_all_unique_nodes)
+            if len(unique_tip_searchers) != len(next_unique_searchers):
+                if 'graph' in debug.debug_flags:
+                    trace.mutter('Collapsed %d unique searchers => %d'
+                                 ' at %s iterations',
+                                 len(unique_tip_searchers),
+                                 len(next_unique_searchers),
+                                 all_unique_searcher._iterations)
+            unique_tip_searchers = next_unique_searchers
+
+    def get_parent_map(self, revisions):
+        """Get a map of key:parent_list for revisions.
+
+        This implementation delegates to get_parents, for old parent_providers
+        that do not supply get_parent_map.
+        """
+        result = {}
+        for rev, parents in self.get_parents(revisions):
+            if parents is not None:
+                result[rev] = parents
+        return result
+
+    def _make_breadth_first_searcher(self, revisions):
+        return _BreadthFirstSearcher(revisions, self)
+
+    def _find_border_ancestors(self, revisions):
+        """Find common ancestors with at least one uncommon descendant.
+
+        Border ancestors are identified using a breadth-first
+        search starting at the bottom of the graph.  Searches are stopped
+        whenever a node or one of its descendants is determined to be common.
+
+        This will scale with the number of uncommon ancestors.
+
+        As well as the border ancestors, a set of seen common ancestors and a
+        list of sets of seen ancestors for each input revision is returned.
+        This allows calculation of graph difference from the results of this
+        operation.
+        """
+        if None in revisions:
+            raise errors.InvalidRevisionId(None, self)
+        common_ancestors = set()
+        searchers = [self._make_breadth_first_searcher([r])
+                     for r in revisions]
+        active_searchers = searchers[:]
+        border_ancestors = set()
+
+        while True:
+            newly_seen = set()
+            for searcher in searchers:
+                new_ancestors = searcher.step()
+                if new_ancestors:
+                    newly_seen.update(new_ancestors)
+            new_common = set()
+            for revision in newly_seen:
+                if revision in common_ancestors:
+                    # Not a border ancestor because it was seen as common
+                    # already
+                    new_common.add(revision)
+                    continue
+                for searcher in searchers:
+                    if revision not in searcher.seen:
+                        break
+                else:
+                    # This is a border because it is a first common that we see
+                    # after walking for a while.
+                    border_ancestors.add(revision)
+                    new_common.add(revision)
+            if new_common:
+                for searcher in searchers:
+                    new_common.update(searcher.find_seen_ancestors(new_common))
+                for searcher in searchers:
+                    searcher.start_searching(new_common)
+                common_ancestors.update(new_common)
+
+            # Figure out what the searchers will be searching next, and if
+            # there is only 1 set being searched, then we are done searching,
+            # since all searchers would have to be searching the same data,
+            # thus it *must* be in common.
+            unique_search_sets = set()
+            for searcher in searchers:
+                will_search_set = frozenset(searcher._next_query)
+                if will_search_set not in unique_search_sets:
+                    # This searcher is searching a unique set of nodes, let it
+                    unique_search_sets.add(will_search_set)
+
+            if len(unique_search_sets) == 1:
+                nodes = unique_search_sets.pop()
+                uncommon_nodes = nodes.difference(common_ancestors)
+                if uncommon_nodes:
+                    raise AssertionError("Somehow we ended up converging"
+                                         " without actually marking them as"
+                                         " in common."
+                                         "\nStart_nodes: %s"
+                                         "\nuncommon_nodes: %s"
+                                         % (revisions, uncommon_nodes))
+                break
+        return border_ancestors, common_ancestors, searchers
+
+    def heads(self, keys):
+        """Return the heads from amongst keys.
+
+        This is done by searching the ancestries of each key.  Any key that is
+        reachable from another key is not returned; all the others are.
+
+        This operation scales with the relative depth between any two keys. If
+        any two keys are completely disconnected all ancestry of both sides
+        will be retrieved.
+
+        :param keys: An iterable of keys.
+        :return: A set of the heads. Note that as a set there is no ordering
+            information. Callers will need to filter their input to create
+            order if they need it.
+        """
+        candidate_heads = set(keys)
+        if revision.NULL_REVISION in candidate_heads:
+            # NULL_REVISION is only a head if it is the only entry
+            candidate_heads.remove(revision.NULL_REVISION)
+            if not candidate_heads:
+                return set([revision.NULL_REVISION])
+        if len(candidate_heads) < 2:
+            return candidate_heads
+        searchers = dict((c, self._make_breadth_first_searcher([c]))
+                          for c in candidate_heads)
+        active_searchers = dict(searchers)
+        # skip over the actual candidate for each searcher
+        for searcher in active_searchers.itervalues():
+            searcher.next()
+        # The common walker finds nodes that are common to two or more of the
+        # input keys, so that we don't access all history when a currently
+        # uncommon search point actually meets up with something behind a
+        # common search point. Common search points do not keep searches
+        # active; they just allow us to make searches inactive without
+        # accessing all history.
+        common_walker = self._make_breadth_first_searcher([])
+        while len(active_searchers) > 0:
+            ancestors = set()
+            # advance searches
+            try:
+                common_walker.next()
+            except StopIteration:
+                # No common points being searched at this time.
+                pass
+            for candidate in active_searchers.keys():
+                try:
+                    searcher = active_searchers[candidate]
+                except KeyError:
+                    # rare case: we deleted candidate in a previous iteration
+                    # through this for loop, because it was determined to be
+                    # a descendant of another candidate.
+                    continue
+                try:
+                    ancestors.update(searcher.next())
+                except StopIteration:
+                    del active_searchers[candidate]
+                    continue
+            # process found nodes
+            new_common = set()
+            for ancestor in ancestors:
+                if ancestor in candidate_heads:
+                    candidate_heads.remove(ancestor)
+                    del searchers[ancestor]
+                    if ancestor in active_searchers:
+                        del active_searchers[ancestor]
+                # it may meet up with a known common node
+                if ancestor in common_walker.seen:
+                    # some searcher has encountered our known common nodes:
+                    # just stop it
+                    ancestor_set = set([ancestor])
+                    for searcher in searchers.itervalues():
+                        searcher.stop_searching_any(ancestor_set)
+                else:
+                    # or it may have been just reached by all the searchers:
+                    for searcher in searchers.itervalues():
+                        if ancestor not in searcher.seen:
+                            break
+                    else:
+                        # The final active searcher has just reached this node,
+                        # making it be known as a descendant of all candidates,
+                        # so we can stop searching it, and any seen ancestors
+                        new_common.add(ancestor)
+                        for searcher in searchers.itervalues():
+                            seen_ancestors =\
+                                searcher.find_seen_ancestors([ancestor])
+                            searcher.stop_searching_any(seen_ancestors)
+            common_walker.start_searching(new_common)
+        return candidate_heads
+
+    def find_merge_order(self, tip_revision_id, lca_revision_ids):
+        """Find the order that each revision was merged into tip.
+
+        This basically just walks backwards with a stack, and walks left-first
+        until it finds a node to stop.
+        """
+        if len(lca_revision_ids) == 1:
+            return list(lca_revision_ids)
+        looking_for = set(lca_revision_ids)
+        # TODO: Is there a way we could do this "faster" by batching up the
+        # get_parent_map requests?
+        # TODO: Should we also be culling the ancestry search right away? We
+        # could add looking_for to the "stop" list, and walk their
+        # ancestry in batched mode. The flip side is it might mean we walk a
+        # lot of "stop" nodes, rather than only the minimum.
+        # Then again, without it we may trace back into ancestry we could have
+        # stopped early.
+        stack = [tip_revision_id]
+        found = []
+        stop = set()
+        while stack and looking_for:
+            next = stack.pop()
+            stop.add(next)
+            if next in looking_for:
+                found.append(next)
+                looking_for.remove(next)
+                if len(looking_for) == 1:
+                    found.append(looking_for.pop())
+                    break
+                continue
+            parent_ids = self.get_parent_map([next]).get(next, None)
+            if not parent_ids: # Ghost, nothing to search here
+                continue
+            for parent_id in reversed(parent_ids):
+                # TODO: (performance) We see the parent at this point, but we
+                #       wait to mark it until later to make sure we get left
+                #       parents before right parents. However, instead of
+                #       waiting until we have traversed enough parents, we
+                #       could instead note that we've found it, and once all
+                #       parents are in the stack, just reverse iterate the
+                #       stack for them.
+                if parent_id not in stop:
+                    # this will need to be searched
+                    stack.append(parent_id)
+                stop.add(parent_id)
+        return found
+
+    def find_lefthand_merger(self, merged_key, tip_key):
+        """Find the first lefthand ancestor of tip_key that merged merged_key.
+
+        We do this by first finding the descendants of merged_key, then
+        walking through the lefthand ancestry of tip_key until we find a key
+        that doesn't descend from merged_key.  Its child is the key that
+        merged merged_key.
+
+        :return: The first lefthand ancestor of tip_key to merge merged_key.
+            merged_key if it is a lefthand ancestor of tip_key.
+            None if no ancestor of tip_key merged merged_key.
+        """
+        descendants = self.find_descendants(merged_key, tip_key)
+        candidate_iterator = self.iter_lefthand_ancestry(tip_key)
+        last_candidate = None
+        for candidate in candidate_iterator:
+            if candidate not in descendants:
+                return last_candidate
+            last_candidate = candidate
+
+    def find_unique_lca(self, left_revision, right_revision,
+                        count_steps=False):
+        """Find a unique LCA.
+
+        Find lowest common ancestors.  If there is no unique  common
+        ancestor, find the lowest common ancestors of those ancestors.
+
+        Iteration stops when a unique lowest common ancestor is found.
+        The graph origin is necessarily a unique lowest common ancestor.
+
+        Note that None is not an acceptable substitute for NULL_REVISION.
+        in the input for this method.
+
+        :param count_steps: If True, the return value will be a tuple of
+            (unique_lca, steps) where steps is the number of times that
+            find_lca was run.  If False, only unique_lca is returned.
+        """
+        revisions = [left_revision, right_revision]
+        steps = 0
+        while True:
+            steps += 1
+            lca = self.find_lca(*revisions)
+            if len(lca) == 1:
+                result = lca.pop()
+                if count_steps:
+                    return result, steps
+                else:
+                    return result
+            if len(lca) == 0:
+                raise errors.NoCommonAncestor(left_revision, right_revision)
+            revisions = lca
+
+    def iter_ancestry(self, revision_ids):
+        """Iterate the ancestry of this revision.
+
+        :param revision_ids: Nodes to start the search
+        :return: Yield tuples mapping a revision_id to its parents for the
+            ancestry of revision_id.
+            Ghosts will be returned with None as their parents, and nodes
+            with no parents will have NULL_REVISION as their only parent. (As
+            defined by get_parent_map.)
+            There will also be a node for (NULL_REVISION, ())
+        """
+        pending = set(revision_ids)
+        processed = set()
+        while pending:
+            processed.update(pending)
+            next_map = self.get_parent_map(pending)
+            next_pending = set()
+            for item in next_map.iteritems():
+                yield item
+                next_pending.update(p for p in item[1] if p not in processed)
+            ghosts = pending.difference(next_map)
+            for ghost in ghosts:
+                yield (ghost, None)
+            pending = next_pending
+
+    def iter_lefthand_ancestry(self, start_key, stop_keys=None):
+        if stop_keys is None:
+            stop_keys = ()
+        next_key = start_key
+        def get_parents(key):
+            try:
+                return self._parents_provider.get_parent_map([key])[key]
+            except KeyError:
+                raise errors.RevisionNotPresent(next_key, self)
+        while True:
+            if next_key in stop_keys:
+                return
+            parents = get_parents(next_key)
+            yield next_key
+            if len(parents) == 0:
+                return
+            else:
+                next_key = parents[0]
+
+    def iter_topo_order(self, revisions):
+        """Iterate through the input revisions in topological order.
+
+        This sorting only ensures that parents come before their children.
+        An ancestor may sort after a descendant if the relationship is not
+        visible in the supplied list of revisions.
+        """
+        from bzrlib import tsort
+        sorter = tsort.TopoSorter(self.get_parent_map(revisions))
+        return sorter.iter_topo_order()
+
+    def is_ancestor(self, candidate_ancestor, candidate_descendant):
+        """Determine whether a revision is an ancestor of another.
+
+        We answer this using heads() as heads() has the logic to perform the
+        smallest number of parent lookups to determine the ancestral
+        relationship between N revisions.
+        """
+        return set([candidate_descendant]) == self.heads(
+            [candidate_ancestor, candidate_descendant])
+
+    def is_between(self, revid, lower_bound_revid, upper_bound_revid):
+        """Determine whether a revision is between two others.
+
+        returns true if and only if:
+        lower_bound_revid <= revid <= upper_bound_revid
+        """
+        return ((upper_bound_revid is None or
+                    self.is_ancestor(revid, upper_bound_revid)) and
+               (lower_bound_revid is None or
+                    self.is_ancestor(lower_bound_revid, revid)))
+
+    def _search_for_extra_common(self, common, searchers):
+        """Make sure that unique nodes are genuinely unique.
+
+        After _find_border_ancestors, all nodes marked "common" are indeed
+        common. Some of the nodes considered unique are not, due to history
+        shortcuts stopping the searches early.
+
+        We know that we have searched enough when all common search tips are
+        descended from all unique (uncommon) nodes because we know that a node
+        cannot be an ancestor of its own ancestor.
+
+        :param common: A set of common nodes
+        :param searchers: The searchers returned from _find_border_ancestors
+        :return: None
+        """
+        # Basic algorithm...
+        #   A) The passed in searchers should all be on the same tips, thus
+        #      they should be considered the "common" searchers.
+        #   B) We find the difference between the searchers, these are the
+        #      "unique" nodes for each side.
+        #   C) We do a quick culling so that we only start searching from the
+        #      more interesting unique nodes. (A unique ancestor is more
+        #      interesting than any of its children.)
+        #   D) We start searching for ancestors common to all unique nodes.
+        #   E) We have the common searchers stop searching any ancestors of
+        #      nodes found by (D)
+        #   F) When there are no more common search tips, we stop
+
+        # TODO: We need a way to remove unique_searchers when they overlap with
+        #       other unique searchers.
+        if len(searchers) != 2:
+            raise NotImplementedError(
+                "Algorithm not yet implemented for > 2 searchers")
+        common_searchers = searchers
+        left_searcher = searchers[0]
+        right_searcher = searchers[1]
+        unique = left_searcher.seen.symmetric_difference(right_searcher.seen)
+        if not unique: # No unique nodes, nothing to do
+            return
+        total_unique = len(unique)
+        unique = self._remove_simple_descendants(unique,
+                    self.get_parent_map(unique))
+        simple_unique = len(unique)
+
+        unique_searchers = []
+        for revision_id in unique:
+            if revision_id in left_searcher.seen:
+                parent_searcher = left_searcher
+            else:
+                parent_searcher = right_searcher
+            revs_to_search = parent_searcher.find_seen_ancestors([revision_id])
+            if not revs_to_search: # XXX: This shouldn't be possible
+                revs_to_search = [revision_id]
+            searcher = self._make_breadth_first_searcher(revs_to_search)
+            # We don't care about the starting nodes.
+            searcher.step()
+            unique_searchers.append(searcher)
+
+        # possible todo: aggregate the common searchers into a single common
+        #   searcher, just make sure that we include the nodes into the .seen
+        #   properties of the original searchers
+
+        ancestor_all_unique = None
+        for searcher in unique_searchers:
+            if ancestor_all_unique is None:
+                ancestor_all_unique = set(searcher.seen)
+            else:
+                ancestor_all_unique = ancestor_all_unique.intersection(
+                                            searcher.seen)
+
+        trace.mutter('Started %s unique searchers for %s unique revisions',
+                     simple_unique, total_unique)
+
+        while True: # If we have no more nodes we have nothing to do
+            newly_seen_common = set()
+            for searcher in common_searchers:
+                newly_seen_common.update(searcher.step())
+            newly_seen_unique = set()
+            for searcher in unique_searchers:
+                newly_seen_unique.update(searcher.step())
+            new_common_unique = set()
+            for revision in newly_seen_unique:
+                for searcher in unique_searchers:
+                    if revision not in searcher.seen:
+                        break
+                else:
+                    # This is a border because it is a first common that we see
+                    # after walking for a while.
+                    new_common_unique.add(revision)
+            if newly_seen_common:
+                # These are nodes descended from one of the 'common' searchers.
+                # Make sure all searchers are on the same page
+                for searcher in common_searchers:
+                    newly_seen_common.update(
+                        searcher.find_seen_ancestors(newly_seen_common))
+                # We start searching the whole ancestry. It is a bit wasteful,
+                # though. We really just want to mark all of these nodes as
+                # 'seen' and then start just the tips. However, it requires a
+                # get_parent_map() call to figure out the tips anyway, and all
+                # redundant requests should be fairly fast.
+                for searcher in common_searchers:
+                    searcher.start_searching(newly_seen_common)
+
+                # If a 'common' node is an ancestor of all unique searchers, we
+                # can stop searching it.
+                stop_searching_common = ancestor_all_unique.intersection(
+                                            newly_seen_common)
+                if stop_searching_common:
+                    for searcher in common_searchers:
+                        searcher.stop_searching_any(stop_searching_common)
+            if new_common_unique:
+                # We found some ancestors that are common
+                for searcher in unique_searchers:
+                    new_common_unique.update(
+                        searcher.find_seen_ancestors(new_common_unique))
+                # Since these are common, we can grab another set of ancestors
+                # that we have seen
+                for searcher in common_searchers:
+                    new_common_unique.update(
+                        searcher.find_seen_ancestors(new_common_unique))
+
+                # We can tell all of the unique searchers to start at these
+                # nodes, and tell all of the common searchers to *stop*
+                # searching these nodes
+                for searcher in unique_searchers:
+                    searcher.start_searching(new_common_unique)
+                for searcher in common_searchers:
+                    searcher.stop_searching_any(new_common_unique)
+                ancestor_all_unique.update(new_common_unique)
+
+                # Filter out searchers that don't actually search different
+                # nodes. We already have the ancestry intersection for them
+                next_unique_searchers = []
+                unique_search_sets = set()
+                for searcher in unique_searchers:
+                    will_search_set = frozenset(searcher._next_query)
+                    if will_search_set not in unique_search_sets:
+                        # This searcher is searching a unique set of nodes, let it
+                        unique_search_sets.add(will_search_set)
+                        next_unique_searchers.append(searcher)
+                unique_searchers = next_unique_searchers
+            for searcher in common_searchers:
+                if searcher._next_query:
+                    break
+            else:
+                # All common searcher have stopped searching
+                return
+
+    def _remove_simple_descendants(self, revisions, parent_map):
+        """remove revisions which are children of other ones in the set
+
+        This doesn't do any graph searching, it just checks the immediate
+        parent_map to find if there are any children which can be removed.
+
+        :param revisions: A set of revision_ids
+        :return: A set of revision_ids with the children removed
+        """
+        simple_ancestors = revisions.copy()
+        # TODO: jam 20071214 we *could* restrict it to searching only the
+        #       parent_map of revisions already present in 'revisions', but
+        #       considering the general use case, I think this is actually
+        #       better.
+
+        # This is the same as the following loop. I don't know that it is any
+        # faster.
+        ## simple_ancestors.difference_update(r for r, p_ids in parent_map.iteritems()
+        ##     if p_ids is not None and revisions.intersection(p_ids))
+        ## return simple_ancestors
+
+        # Yet Another Way, invert the parent map (which can be cached)
+        ## descendants = {}
+        ## for revision_id, parent_ids in parent_map.iteritems():
+        ##   for p_id in parent_ids:
+        ##       descendants.setdefault(p_id, []).append(revision_id)
+        ## for revision in revisions.intersection(descendants):
+        ##   simple_ancestors.difference_update(descendants[revision])
+        ## return simple_ancestors
+        for revision, parent_ids in parent_map.iteritems():
+            if parent_ids is None:
+                continue
+            for parent_id in parent_ids:
+                if parent_id in revisions:
+                    # This node has a parent present in the set, so we can
+                    # remove it
+                    simple_ancestors.discard(revision)
+                    break
+        return simple_ancestors
+
+
+class HeadsCache(object):
+    """A cache of results for graph heads calls."""
+
+    def __init__(self, graph):
+        self.graph = graph
+        self._heads = {}
+
+    def heads(self, keys):
+        """Return the heads of keys.
+
+        This matches the API of Graph.heads(), specifically the return value is
+        a set which can be mutated, and ordering of the input is not preserved
+        in the output.
+
+        :see also: Graph.heads.
+        :param keys: The keys to calculate heads for.
+        :return: A set containing the heads, which may be mutated without
+            affecting future lookups.
+        """
+        keys = frozenset(keys)
+        try:
+            return set(self._heads[keys])
+        except KeyError:
+            heads = self.graph.heads(keys)
+            self._heads[keys] = heads
+            return set(heads)
+
+
+class FrozenHeadsCache(object):
+    """Cache heads() calls, assuming the caller won't modify them."""
+
+    def __init__(self, graph):
+        self.graph = graph
+        self._heads = {}
+
+    def heads(self, keys):
+        """Return the heads of keys.
+
+        Similar to Graph.heads(). The main difference is that the return value
+        is a frozen set which cannot be mutated.
+
+        :see also: Graph.heads.
+        :param keys: The keys to calculate heads for.
+        :return: A frozenset containing the heads.
+        """
+        keys = frozenset(keys)
+        try:
+            return self._heads[keys]
+        except KeyError:
+            heads = frozenset(self.graph.heads(keys))
+            self._heads[keys] = heads
+            return heads
+
+    def cache(self, keys, heads):
+        """Store a known value."""
+        self._heads[frozenset(keys)] = frozenset(heads)
+
+
+class _BreadthFirstSearcher(object):
+    """Parallel search breadth-first the ancestry of revisions.
+
+    This class implements the iterator protocol, but additionally
+    1. provides a set of seen ancestors, and
+    2. allows some ancestries to be unsearched, via stop_searching_any
+    """
+
+    def __init__(self, revisions, parents_provider):
+        self._iterations = 0
+        self._next_query = set(revisions)
+        self.seen = set()
+        self._started_keys = set(self._next_query)
+        self._stopped_keys = set()
+        self._parents_provider = parents_provider
+        self._returning = 'next_with_ghosts'
+        self._current_present = set()
+        self._current_ghosts = set()
+        self._current_parents = {}
+
+    def __repr__(self):
+        if self._iterations:
+            prefix = "searching"
+        else:
+            prefix = "starting"
+        search = '%s=%r' % (prefix, list(self._next_query))
+        return ('_BreadthFirstSearcher(iterations=%d, %s,'
+                ' seen=%r)' % (self._iterations, search, list(self.seen)))
+
+    def get_state(self):
+        """Get the current state of this searcher.
+
+        :return: Tuple with started keys, excludes and included keys
+        """
+        if self._returning == 'next':
+            # We have to know the current nodes children to be able to list the
+            # exclude keys for them. However, while we could have a second
+            # look-ahead result buffer and shuffle things around, this method
+            # is typically only called once per search - when memoising the
+            # results of the search.
+            found, ghosts, next, parents = self._do_query(self._next_query)
+            # pretend we didn't query: perhaps we should tweak _do_query to be
+            # entirely stateless?
+            self.seen.difference_update(next)
+            next_query = next.union(ghosts)
+        else:
+            next_query = self._next_query
+        excludes = self._stopped_keys.union(next_query)
+        included_keys = self.seen.difference(excludes)
+        return self._started_keys, excludes, included_keys
+
+    def _get_result(self):
+        """Get a SearchResult for the current state of this searcher.
+
+        :return: A SearchResult for this search so far. The SearchResult is
+            static - the search can be advanced and the search result will not
+            be invalidated or altered.
+        """
+        from bzrlib.vf_search import SearchResult
+        (started_keys, excludes, included_keys) = self.get_state()
+        return SearchResult(started_keys, excludes, len(included_keys),
+            included_keys)
+
+    def step(self):
+        try:
+            return self.next()
+        except StopIteration:
+            return ()
+
+    def next(self):
+        """Return the next ancestors of this revision.
+
+        Ancestors are returned in the order they are seen in a breadth-first
+        traversal.  No ancestor will be returned more than once. Ancestors are
+        returned before their parentage is queried, so ghosts and missing
+        revisions (including the start revisions) are included in the result.
+        This can save a round trip in LCA style calculation by allowing
+        convergence to be detected without reading the data for the revision
+        the convergence occurs on.
+
+        :return: A set of revision_ids.
+        """
+        if self._returning != 'next':
+            # switch to returning the query, not the results.
+            self._returning = 'next'
+            self._iterations += 1
+        else:
+            self._advance()
+        if len(self._next_query) == 0:
+            raise StopIteration()
+        # We have seen what we're querying at this point as we are returning
+        # the query, not the results.
+        self.seen.update(self._next_query)
+        return self._next_query
+
+    def next_with_ghosts(self):
+        """Return the next found ancestors, with ghosts split out.
+
+        Ancestors are returned in the order they are seen in a breadth-first
+        traversal.  No ancestor will be returned more than once. Ancestors are
+        returned only after asking for their parents, which allows us to detect
+        which revisions are ghosts and which are not.
+
+        :return: A tuple with (present ancestors, ghost ancestors) sets.
+        """
+        if self._returning != 'next_with_ghosts':
+            # switch to returning the results, not the current query.
+            self._returning = 'next_with_ghosts'
+            self._advance()
+        if len(self._next_query) == 0:
+            raise StopIteration()
+        self._advance()
+        return self._current_present, self._current_ghosts
+
+    def _advance(self):
+        """Advance the search.
+
+        Updates self.seen, self._next_query, self._current_present,
+        self._current_ghosts, self._current_parents and self._iterations.
+        """
+        self._iterations += 1
+        found, ghosts, next, parents = self._do_query(self._next_query)
+        self._current_present = found
+        self._current_ghosts = ghosts
+        self._next_query = next
+        self._current_parents = parents
+        # ghosts are implicit stop points, otherwise the search cannot be
+        # repeated when ghosts are filled.
+        self._stopped_keys.update(ghosts)
+
+    def _do_query(self, revisions):
+        """Query for revisions.
+
+        Adds revisions to the seen set.
+
+        :param revisions: Revisions to query.
+        :return: A tuple: (set(found_revisions), set(ghost_revisions),
+           set(parents_of_found_revisions), dict(found_revisions:parents)).
+        """
+        found_revisions = set()
+        parents_of_found = set()
+        # revisions may contain nodes that point to other nodes in revisions:
+        # we want to filter them out.
+        seen = self.seen
+        seen.update(revisions)
+        parent_map = self._parents_provider.get_parent_map(revisions)
+        found_revisions.update(parent_map)
+        for rev_id, parents in parent_map.iteritems():
+            if parents is None:
+                continue
+            new_found_parents = [p for p in parents if p not in seen]
+            if new_found_parents:
+                # Calling set.update() with an empty generator is actually
+                # rather expensive.
+                parents_of_found.update(new_found_parents)
+        ghost_revisions = revisions - found_revisions
+        return found_revisions, ghost_revisions, parents_of_found, parent_map
+
+    def __iter__(self):
+        return self
+
+    def find_seen_ancestors(self, revisions):
+        """Find ancestors of these revisions that have already been seen.
+
+        This function generally makes the assumption that querying for the
+        parents of a node that has already been queried is reasonably cheap.
+        (eg, not a round trip to a remote host).
+        """
+        # TODO: Often we might ask one searcher for its seen ancestors, and
+        #       then ask another searcher the same question. This can result in
+        #       searching the same revisions repeatedly if the two searchers
+        #       have a lot of overlap.
+        all_seen = self.seen
+        pending = set(revisions).intersection(all_seen)
+        seen_ancestors = set(pending)
+
+        if self._returning == 'next':
+            # self.seen contains what nodes have been returned, not what nodes
+            # have been queried. We don't want to probe for nodes that haven't
+            # been searched yet.
+            not_searched_yet = self._next_query
+        else:
+            not_searched_yet = ()
+        pending.difference_update(not_searched_yet)
+        get_parent_map = self._parents_provider.get_parent_map
+        while pending:
+            parent_map = get_parent_map(pending)
+            all_parents = []
+            # We don't care if it is a ghost, since it can't be seen if it is
+            # a ghost
+            for parent_ids in parent_map.itervalues():
+                all_parents.extend(parent_ids)
+            next_pending = all_seen.intersection(all_parents).difference(seen_ancestors)
+            seen_ancestors.update(next_pending)
+            next_pending.difference_update(not_searched_yet)
+            pending = next_pending
+
+        return seen_ancestors
+
+    def stop_searching_any(self, revisions):
+        """
+        Remove any of the specified revisions from the search list.
+
+        None of the specified revisions are required to be present in the
+        search list.
+
+        It is okay to call stop_searching_any() for revisions which were seen
+        in previous iterations. It is the callers responsibility to call
+        find_seen_ancestors() to make sure that current search tips that are
+        ancestors of those revisions are also stopped.  All explicitly stopped
+        revisions will be excluded from the search result's get_keys(), though.
+        """
+        # TODO: does this help performance?
+        # if not revisions:
+        #     return set()
+        revisions = frozenset(revisions)
+        if self._returning == 'next':
+            stopped = self._next_query.intersection(revisions)
+            self._next_query = self._next_query.difference(revisions)
+        else:
+            stopped_present = self._current_present.intersection(revisions)
+            stopped = stopped_present.union(
+                self._current_ghosts.intersection(revisions))
+            self._current_present.difference_update(stopped)
+            self._current_ghosts.difference_update(stopped)
+            # stopping 'x' should stop returning parents of 'x', but
+            # not if 'y' always references those same parents
+            stop_rev_references = {}
+            for rev in stopped_present:
+                for parent_id in self._current_parents[rev]:
+                    if parent_id not in stop_rev_references:
+                        stop_rev_references[parent_id] = 0
+                    stop_rev_references[parent_id] += 1
+            # if only the stopped revisions reference it, the ref count will be
+            # 0 after this loop
+            for parents in self._current_parents.itervalues():
+                for parent_id in parents:
+                    try:
+                        stop_rev_references[parent_id] -= 1
+                    except KeyError:
+                        pass
+            stop_parents = set()
+            for rev_id, refs in stop_rev_references.iteritems():
+                if refs == 0:
+                    stop_parents.add(rev_id)
+            self._next_query.difference_update(stop_parents)
+        self._stopped_keys.update(stopped)
+        self._stopped_keys.update(revisions)
+        return stopped
+
+    def start_searching(self, revisions):
+        """Add revisions to the search.
+
+        The parents of revisions will be returned from the next call to next()
+        or next_with_ghosts(). If next_with_ghosts was the most recently used
+        next* call then the return value is the result of looking up the
+        ghost/not ghost status of revisions. (A tuple (present, ghosted)).
+        """
+        revisions = frozenset(revisions)
+        self._started_keys.update(revisions)
+        new_revisions = revisions.difference(self.seen)
+        if self._returning == 'next':
+            self._next_query.update(new_revisions)
+            self.seen.update(new_revisions)
+        else:
+            # perform a query on revisions
+            revs, ghosts, query, parents = self._do_query(revisions)
+            self._stopped_keys.update(ghosts)
+            self._current_present.update(revs)
+            self._current_ghosts.update(ghosts)
+            self._next_query.update(query)
+            self._current_parents.update(parents)
+            return revs, ghosts
+
+
+def invert_parent_map(parent_map):
+    """Given a map from child => parents, create a map of parent=>children"""
+    child_map = {}
+    for child, parents in parent_map.iteritems():
+        for p in parents:
+            # Any given parent is likely to have only a small handful
+            # of children, many will have only one. So we avoid mem overhead of
+            # a list, in exchange for extra copying of tuples
+            if p not in child_map:
+                child_map[p] = (child,)
+            else:
+                child_map[p] = child_map[p] + (child,)
+    return child_map
+
+
+def collapse_linear_regions(parent_map):
+    """Collapse regions of the graph that are 'linear'.
+
+    For example::
+
+      A:[B], B:[C]
+
+    can be collapsed by removing B and getting::
+
+      A:[C]
+
+    :param parent_map: A dictionary mapping children to their parents
+    :return: Another dictionary with 'linear' chains collapsed
+    """
+    # Note: this isn't a strictly minimal collapse. For example:
+    #   A
+    #  / \
+    # B   C
+    #  \ /
+    #   D
+    #   |
+    #   E
+    # Will not have 'D' removed, even though 'E' could fit. Also:
+    #   A
+    #   |    A
+    #   B => |
+    #   |    C
+    #   C
+    # A and C are both kept because they are edges of the graph. We *could* get
+    # rid of A if we wanted.
+    #   A
+    #  / \
+    # B   C
+    # |   |
+    # D   E
+    #  \ /
+    #   F
+    # Will not have any nodes removed, even though you do have an
+    # 'uninteresting' linear D->B and E->C
+    children = {}
+    for child, parents in parent_map.iteritems():
+        children.setdefault(child, [])
+        for p in parents:
+            children.setdefault(p, []).append(child)
+
+    orig_children = dict(children)
+    removed = set()
+    result = dict(parent_map)
+    for node in parent_map:
+        parents = result[node]
+        if len(parents) == 1:
+            parent_children = children[parents[0]]
+            if len(parent_children) != 1:
+                # This is not the only child
+                continue
+            node_children = children[node]
+            if len(node_children) != 1:
+                continue
+            child_parents = result.get(node_children[0], None)
+            if len(child_parents) != 1:
+                # This is not its only parent
+                continue
+            # The child of this node only points at it, and the parent only has
+            # this as a child. remove this node, and join the others together
+            result[node_children[0]] = parents
+            children[parents[0]] = node_children
+            del result[node]
+            del children[node]
+            removed.add(node)
+
+    return result
+
+
+class GraphThunkIdsToKeys(object):
+    """Forwards calls about 'ids' to be about keys internally."""
+
+    def __init__(self, graph):
+        self._graph = graph
+
+    def topo_sort(self):
+        return [r for (r,) in self._graph.topo_sort()]
+
+    def heads(self, ids):
+        """See Graph.heads()"""
+        as_keys = [(i,) for i in ids]
+        head_keys = self._graph.heads(as_keys)
+        return set([h[0] for h in head_keys])
+
+    def merge_sort(self, tip_revision):
+        nodes = self._graph.merge_sort((tip_revision,))
+        for node in nodes:
+            node.key = node.key[0]
+        return nodes
+
+    def add_node(self, revision, parents):
+        self._graph.add_node((revision,), [(p,) for p in parents])
+
+
+_counters = [0,0,0,0,0,0,0]
+try:
+    from bzrlib._known_graph_pyx import KnownGraph
+except ImportError, e:
+    osutils.failed_to_load_extension(e)
+    from bzrlib._known_graph_py import KnownGraph