diff options
author | Lorry <lorry@roadtrain.codethink.co.uk> | 2012-08-22 15:47:16 +0100 |
---|---|---|
committer | Lorry <lorry@roadtrain.codethink.co.uk> | 2012-08-22 15:47:16 +0100 |
commit | 25335618bf8755ce6b116ee14f47f5a1f2c821e9 (patch) | |
tree | d889d7ab3f9f985d0c54c534cb8052bd2e6d7163 /bzrlib/tree.py | |
download | bzr-tarball-25335618bf8755ce6b116ee14f47f5a1f2c821e9.tar.gz |
Tarball conversion
Diffstat (limited to 'bzrlib/tree.py')
-rw-r--r-- | bzrlib/tree.py | 1635 |
1 files changed, 1635 insertions, 0 deletions
diff --git a/bzrlib/tree.py b/bzrlib/tree.py new file mode 100644 index 0000000..82d52e1 --- /dev/null +++ b/bzrlib/tree.py @@ -0,0 +1,1635 @@ +# Copyright (C) 2005-2011 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +"""Tree classes, representing directory at point in time. +""" + +from __future__ import absolute_import + +import os + +from bzrlib.lazy_import import lazy_import +lazy_import(globals(), """ +import collections + +from bzrlib import ( + conflicts as _mod_conflicts, + debug, + delta, + errors, + filters, + inventory, + osutils, + revision as _mod_revision, + rules, + trace, + ) +from bzrlib.i18n import gettext +""") + +from bzrlib.decorators import needs_read_lock +from bzrlib.inter import InterObject +from bzrlib.symbol_versioning import ( + deprecated_in, + deprecated_method, + ) + + +class Tree(object): + """Abstract file tree. + + There are several subclasses: + + * `WorkingTree` exists as files on disk editable by the user. + + * `RevisionTree` is a tree as recorded at some point in the past. + + Trees can be compared, etc, regardless of whether they are working + trees or versioned trees. + """ + + def has_versioned_directories(self): + """Whether this tree can contain explicitly versioned directories. + + This defaults to True, but some implementations may want to override + it. + """ + return True + + def changes_from(self, other, want_unchanged=False, specific_files=None, + extra_trees=None, require_versioned=False, include_root=False, + want_unversioned=False): + """Return a TreeDelta of the changes from other to this tree. + + :param other: A tree to compare with. + :param specific_files: An optional list of file paths to restrict the + comparison to. When mapping filenames to ids, all matches in all + trees (including optional extra_trees) are used, and all children of + matched directories are included. + :param want_unchanged: An optional boolean requesting the inclusion of + unchanged entries in the result. + :param extra_trees: An optional list of additional trees to use when + mapping the contents of specific_files (paths) to file_ids. + :param require_versioned: An optional boolean (defaults to False). When + supplied and True all the 'specific_files' must be versioned, or + a PathsNotVersionedError will be thrown. + :param want_unversioned: Scan for unversioned paths. + + The comparison will be performed by an InterTree object looked up on + self and other. + """ + # Martin observes that Tree.changes_from returns a TreeDelta and this + # may confuse people, because the class name of the returned object is + # a synonym of the object referenced in the method name. + return InterTree.get(other, self).compare( + want_unchanged=want_unchanged, + specific_files=specific_files, + extra_trees=extra_trees, + require_versioned=require_versioned, + include_root=include_root, + want_unversioned=want_unversioned, + ) + + def iter_changes(self, from_tree, include_unchanged=False, + specific_files=None, pb=None, extra_trees=None, + require_versioned=True, want_unversioned=False): + """See InterTree.iter_changes""" + intertree = InterTree.get(from_tree, self) + return intertree.iter_changes(include_unchanged, specific_files, pb, + extra_trees, require_versioned, want_unversioned=want_unversioned) + + def conflicts(self): + """Get a list of the conflicts in the tree. + + Each conflict is an instance of bzrlib.conflicts.Conflict. + """ + return _mod_conflicts.ConflictList() + + def extras(self): + """For trees that can have unversioned files, return all such paths.""" + return [] + + def get_parent_ids(self): + """Get the parent ids for this tree. + + :return: a list of parent ids. [] is returned to indicate + a tree with no parents. + :raises: BzrError if the parents are not known. + """ + raise NotImplementedError(self.get_parent_ids) + + def has_filename(self, filename): + """True if the tree has given filename.""" + raise NotImplementedError(self.has_filename) + + def has_id(self, file_id): + raise NotImplementedError(self.has_id) + + @deprecated_method(deprecated_in((2, 4, 0))) + def __contains__(self, file_id): + return self.has_id(file_id) + + def has_or_had_id(self, file_id): + raise NotImplementedError(self.has_or_had_id) + + def is_ignored(self, filename): + """Check whether the filename is ignored by this tree. + + :param filename: The relative filename within the tree. + :return: True if the filename is ignored. + """ + return False + + def all_file_ids(self): + """Iterate through all file ids, including ids for missing files.""" + raise NotImplementedError(self.all_file_ids) + + def id2path(self, file_id): + """Return the path for a file id. + + :raises NoSuchId: + """ + raise NotImplementedError(self.id2path) + + def iter_entries_by_dir(self, specific_file_ids=None, yield_parents=False): + """Walk the tree in 'by_dir' order. + + This will yield each entry in the tree as a (path, entry) tuple. + The order that they are yielded is: + + Directories are walked in a depth-first lexicographical order, + however, whenever a directory is reached, all of its direct child + nodes are yielded in lexicographical order before yielding the + grandchildren. + + For example, in the tree:: + + a/ + b/ + c + d/ + e + f/ + g + + The yield order (ignoring root) would be:: + + a, f, a/b, a/d, a/b/c, a/d/e, f/g + + :param yield_parents: If True, yield the parents from the root leading + down to specific_file_ids that have been requested. This has no + impact if specific_file_ids is None. + """ + raise NotImplementedError(self.iter_entries_by_dir) + + def iter_child_entries(self, file_id, path=None): + """Iterate over the children of a directory or tree reference. + + :param file_id: File id of the directory/tree-reference + :param path: Optional path of the directory + :raise NoSuchId: When the file_id does not exist + :return: Iterator over entries in the directory + """ + raise NotImplementedError(self.iter_child_entries) + + def list_files(self, include_root=False, from_dir=None, recursive=True): + """List all files in this tree. + + :param include_root: Whether to include the entry for the tree root + :param from_dir: Directory under which to list files + :param recursive: Whether to list files recursively + :return: iterator over tuples of (path, versioned, kind, file_id, + inventory entry) + """ + raise NotImplementedError(self.list_files) + + def iter_references(self): + if self.supports_tree_reference(): + for path, entry in self.iter_entries_by_dir(): + if entry.kind == 'tree-reference': + yield path, entry.file_id + + def kind(self, file_id): + raise NotImplementedError("Tree subclass %s must implement kind" + % self.__class__.__name__) + + def stored_kind(self, file_id): + """File kind stored for this file_id. + + May not match kind on disk for working trees. Always available + for versioned files, even when the file itself is missing. + """ + return self.kind(file_id) + + def path_content_summary(self, path): + """Get a summary of the information about path. + + All the attributes returned are for the canonical form, not the + convenient form (if content filters are in use.) + + :param path: A relative path within the tree. + :return: A tuple containing kind, size, exec, sha1-or-link. + Kind is always present (see tree.kind()). + size is present if kind is file and the size of the + canonical form can be cheaply determined, None otherwise. + exec is None unless kind is file and the platform supports the 'x' + bit. + sha1-or-link is the link target if kind is symlink, or the sha1 if + it can be obtained without reading the file. + """ + raise NotImplementedError(self.path_content_summary) + + def get_reference_revision(self, file_id, path=None): + raise NotImplementedError("Tree subclass %s must implement " + "get_reference_revision" + % self.__class__.__name__) + + def _comparison_data(self, entry, path): + """Return a tuple of kind, executable, stat_value for a file. + + entry may be None if there is no inventory entry for the file, but + path must always be supplied. + + kind is None if there is no file present (even if an inventory id is + present). executable is False for non-file entries. + """ + raise NotImplementedError(self._comparison_data) + + def _file_size(self, entry, stat_value): + raise NotImplementedError(self._file_size) + + def get_file(self, file_id, path=None): + """Return a file object for the file file_id in the tree. + + If both file_id and path are defined, it is implementation defined as + to which one is used. + """ + raise NotImplementedError(self.get_file) + + def get_file_with_stat(self, file_id, path=None): + """Get a file handle and stat object for file_id. + + The default implementation returns (self.get_file, None) for backwards + compatibility. + + :param file_id: The file id to read. + :param path: The path of the file, if it is known. + :return: A tuple (file_handle, stat_value_or_None). If the tree has + no stat facility, or need for a stat cache feedback during commit, + it may return None for the second element of the tuple. + """ + return (self.get_file(file_id, path), None) + + def get_file_text(self, file_id, path=None): + """Return the byte content of a file. + + :param file_id: The file_id of the file. + :param path: The path of the file. + + If both file_id and path are supplied, an implementation may use + either one. + + :returns: A single byte string for the whole file. + """ + my_file = self.get_file(file_id, path) + try: + return my_file.read() + finally: + my_file.close() + + def get_file_lines(self, file_id, path=None): + """Return the content of a file, as lines. + + :param file_id: The file_id of the file. + :param path: The path of the file. + + If both file_id and path are supplied, an implementation may use + either one. + """ + return osutils.split_lines(self.get_file_text(file_id, path)) + + def get_file_verifier(self, file_id, path=None, stat_value=None): + """Return a verifier for a file. + + The default implementation returns a sha1. + + :param file_id: The handle for this file. + :param path: The path that this file can be found at. + These must point to the same object. + :param stat_value: Optional stat value for the object + :return: Tuple with verifier name and verifier data + """ + return ("SHA1", self.get_file_sha1(file_id, path=path, + stat_value=stat_value)) + + def get_file_sha1(self, file_id, path=None, stat_value=None): + """Return the SHA1 file for a file. + + :note: callers should use get_file_verifier instead + where possible, as the underlying repository implementation may + have quicker access to a non-sha1 verifier. + + :param file_id: The handle for this file. + :param path: The path that this file can be found at. + These must point to the same object. + :param stat_value: Optional stat value for the object + """ + raise NotImplementedError(self.get_file_sha1) + + def get_file_mtime(self, file_id, path=None): + """Return the modification time for a file. + + :param file_id: The handle for this file. + :param path: The path that this file can be found at. + These must point to the same object. + """ + raise NotImplementedError(self.get_file_mtime) + + def get_file_size(self, file_id): + """Return the size of a file in bytes. + + This applies only to regular files. If invoked on directories or + symlinks, it will return None. + :param file_id: The file-id of the file + """ + raise NotImplementedError(self.get_file_size) + + def is_executable(self, file_id, path=None): + """Check if a file is executable. + + :param file_id: The handle for this file. + :param path: The path that this file can be found at. + These must point to the same object. + """ + raise NotImplementedError(self.is_executable) + + def iter_files_bytes(self, desired_files): + """Iterate through file contents. + + Files will not necessarily be returned in the order they occur in + desired_files. No specific order is guaranteed. + + Yields pairs of identifier, bytes_iterator. identifier is an opaque + value supplied by the caller as part of desired_files. It should + uniquely identify the file version in the caller's context. (Examples: + an index number or a TreeTransform trans_id.) + + bytes_iterator is an iterable of bytestrings for the file. The + kind of iterable and length of the bytestrings are unspecified, but for + this implementation, it is a tuple containing a single bytestring with + the complete text of the file. + + :param desired_files: a list of (file_id, identifier) pairs + """ + for file_id, identifier in desired_files: + # We wrap the string in a tuple so that we can return an iterable + # of bytestrings. (Technically, a bytestring is also an iterable + # of bytestrings, but iterating through each character is not + # performant.) + cur_file = (self.get_file_text(file_id),) + yield identifier, cur_file + + def get_symlink_target(self, file_id, path=None): + """Get the target for a given file_id. + + It is assumed that the caller already knows that file_id is referencing + a symlink. + :param file_id: Handle for the symlink entry. + :param path: The path of the file. + If both file_id and path are supplied, an implementation may use + either one. + :return: The path the symlink points to. + """ + raise NotImplementedError(self.get_symlink_target) + + def get_root_id(self): + """Return the file_id for the root of this tree.""" + raise NotImplementedError(self.get_root_id) + + def annotate_iter(self, file_id, + default_revision=_mod_revision.CURRENT_REVISION): + """Return an iterator of revision_id, line tuples. + + For working trees (and mutable trees in general), the special + revision_id 'current:' will be used for lines that are new in this + tree, e.g. uncommitted changes. + :param file_id: The file to produce an annotated version from + :param default_revision: For lines that don't match a basis, mark them + with this revision id. Not all implementations will make use of + this value. + """ + raise NotImplementedError(self.annotate_iter) + + def _get_plan_merge_data(self, file_id, other, base): + from bzrlib import versionedfile + vf = versionedfile._PlanMergeVersionedFile(file_id) + last_revision_a = self._get_file_revision(file_id, vf, 'this:') + last_revision_b = other._get_file_revision(file_id, vf, 'other:') + if base is None: + last_revision_base = None + else: + last_revision_base = base._get_file_revision(file_id, vf, 'base:') + return vf, last_revision_a, last_revision_b, last_revision_base + + def plan_file_merge(self, file_id, other, base=None): + """Generate a merge plan based on annotations. + + If the file contains uncommitted changes in this tree, they will be + attributed to the 'current:' pseudo-revision. If the file contains + uncommitted changes in the other tree, they will be assigned to the + 'other:' pseudo-revision. + """ + data = self._get_plan_merge_data(file_id, other, base) + vf, last_revision_a, last_revision_b, last_revision_base = data + return vf.plan_merge(last_revision_a, last_revision_b, + last_revision_base) + + def plan_file_lca_merge(self, file_id, other, base=None): + """Generate a merge plan based lca-newness. + + If the file contains uncommitted changes in this tree, they will be + attributed to the 'current:' pseudo-revision. If the file contains + uncommitted changes in the other tree, they will be assigned to the + 'other:' pseudo-revision. + """ + data = self._get_plan_merge_data(file_id, other, base) + vf, last_revision_a, last_revision_b, last_revision_base = data + return vf.plan_lca_merge(last_revision_a, last_revision_b, + last_revision_base) + + def _iter_parent_trees(self): + """Iterate through parent trees, defaulting to Tree.revision_tree.""" + for revision_id in self.get_parent_ids(): + try: + yield self.revision_tree(revision_id) + except errors.NoSuchRevisionInTree: + yield self.repository.revision_tree(revision_id) + + def _get_file_revision(self, file_id, vf, tree_revision): + """Ensure that file_id, tree_revision is in vf to plan the merge.""" + + if getattr(self, '_repository', None) is None: + last_revision = tree_revision + parent_keys = [(file_id, t.get_file_revision(file_id)) for t in + self._iter_parent_trees()] + vf.add_lines((file_id, last_revision), parent_keys, + self.get_file_lines(file_id)) + repo = self.branch.repository + base_vf = repo.texts + else: + last_revision = self.get_file_revision(file_id) + base_vf = self._repository.texts + if base_vf not in vf.fallback_versionedfiles: + vf.fallback_versionedfiles.append(base_vf) + return last_revision + + def _check_retrieved(self, ie, f): + if not __debug__: + return + fp = osutils.fingerprint_file(f) + f.seek(0) + + if ie.text_size is not None: + if ie.text_size != fp['size']: + raise errors.BzrError( + "mismatched size for file %r in %r" % + (ie.file_id, self._store), + ["inventory expects %d bytes" % ie.text_size, + "file is actually %d bytes" % fp['size'], + "store is probably damaged/corrupt"]) + + if ie.text_sha1 != fp['sha1']: + raise errors.BzrError("wrong SHA-1 for file %r in %r" % + (ie.file_id, self._store), + ["inventory expects %s" % ie.text_sha1, + "file is actually %s" % fp['sha1'], + "store is probably damaged/corrupt"]) + + def path2id(self, path): + """Return the id for path in this tree.""" + raise NotImplementedError(self.path2id) + + def paths2ids(self, paths, trees=[], require_versioned=True): + """Return all the ids that can be reached by walking from paths. + + Each path is looked up in this tree and any extras provided in + trees, and this is repeated recursively: the children in an extra tree + of a directory that has been renamed under a provided path in this tree + are all returned, even if none exist under a provided path in this + tree, and vice versa. + + :param paths: An iterable of paths to start converting to ids from. + Alternatively, if paths is None, no ids should be calculated and None + will be returned. This is offered to make calling the api unconditional + for code that *might* take a list of files. + :param trees: Additional trees to consider. + :param require_versioned: If False, do not raise NotVersionedError if + an element of paths is not versioned in this tree and all of trees. + """ + return find_ids_across_trees(paths, [self] + list(trees), require_versioned) + + def iter_children(self, file_id): + """Iterate over the file ids of the children of an entry. + + :param file_id: File id of the entry + :return: Iterator over child file ids. + """ + raise NotImplementedError(self.iter_children) + + def lock_read(self): + """Lock this tree for multiple read only operations. + + :return: A bzrlib.lock.LogicalLockResult. + """ + pass + + def revision_tree(self, revision_id): + """Obtain a revision tree for the revision revision_id. + + The intention of this method is to allow access to possibly cached + tree data. Implementors of this method should raise NoSuchRevision if + the tree is not locally available, even if they could obtain the + tree via a repository or some other means. Callers are responsible + for finding the ultimate source for a revision tree. + + :param revision_id: The revision_id of the requested tree. + :return: A Tree. + :raises: NoSuchRevision if the tree cannot be obtained. + """ + raise errors.NoSuchRevisionInTree(self, revision_id) + + def unknowns(self): + """What files are present in this tree and unknown. + + :return: an iterator over the unknown files. + """ + return iter([]) + + def unlock(self): + pass + + def filter_unversioned_files(self, paths): + """Filter out paths that are versioned. + + :return: set of paths. + """ + raise NotImplementedError(self.filter_unversioned_files) + + def walkdirs(self, prefix=""): + """Walk the contents of this tree from path down. + + This yields all the data about the contents of a directory at a time. + After each directory has been yielded, if the caller has mutated the + list to exclude some directories, they are then not descended into. + + The data yielded is of the form: + ((directory-relpath, directory-path-from-root, directory-fileid), + [(relpath, basename, kind, lstat, path_from_tree_root, file_id, + versioned_kind), ...]), + - directory-relpath is the containing dirs relpath from prefix + - directory-path-from-root is the containing dirs path from / + - directory-fileid is the id of the directory if it is versioned. + - relpath is the relative path within the subtree being walked. + - basename is the basename + - kind is the kind of the file now. If unknonwn then the file is not + present within the tree - but it may be recorded as versioned. See + versioned_kind. + - lstat is the stat data *if* the file was statted. + - path_from_tree_root is the path from the root of the tree. + - file_id is the file_id if the entry is versioned. + - versioned_kind is the kind of the file as last recorded in the + versioning system. If 'unknown' the file is not versioned. + One of 'kind' and 'versioned_kind' must not be 'unknown'. + + :param prefix: Start walking from prefix within the tree rather than + at the root. This allows one to walk a subtree but get paths that are + relative to a tree rooted higher up. + :return: an iterator over the directory data. + """ + raise NotImplementedError(self.walkdirs) + + def supports_content_filtering(self): + return False + + def _content_filter_stack(self, path=None, file_id=None): + """The stack of content filters for a path if filtering is supported. + + Readers will be applied in first-to-last order. + Writers will be applied in last-to-first order. + Either the path or the file-id needs to be provided. + + :param path: path relative to the root of the tree + or None if unknown + :param file_id: file_id or None if unknown + :return: the list of filters - [] if there are none + """ + filter_pref_names = filters._get_registered_names() + if len(filter_pref_names) == 0: + return [] + if path is None: + path = self.id2path(file_id) + prefs = self.iter_search_rules([path], filter_pref_names).next() + stk = filters._get_filter_stack_for(prefs) + if 'filters' in debug.debug_flags: + trace.note(gettext("*** {0} content-filter: {1} => {2!r}").format(path,prefs,stk)) + return stk + + def _content_filter_stack_provider(self): + """A function that returns a stack of ContentFilters. + + The function takes a path (relative to the top of the tree) and a + file-id as parameters. + + :return: None if content filtering is not supported by this tree. + """ + if self.supports_content_filtering(): + return lambda path, file_id: \ + self._content_filter_stack(path, file_id) + else: + return None + + def iter_search_rules(self, path_names, pref_names=None, + _default_searcher=None): + """Find the preferences for filenames in a tree. + + :param path_names: an iterable of paths to find attributes for. + Paths are given relative to the root of the tree. + :param pref_names: the list of preferences to lookup - None for all + :param _default_searcher: private parameter to assist testing - don't use + :return: an iterator of tuple sequences, one per path-name. + See _RulesSearcher.get_items for details on the tuple sequence. + """ + if _default_searcher is None: + _default_searcher = rules._per_user_searcher + searcher = self._get_rules_searcher(_default_searcher) + if searcher is not None: + if pref_names is not None: + for path in path_names: + yield searcher.get_selected_items(path, pref_names) + else: + for path in path_names: + yield searcher.get_items(path) + + def _get_rules_searcher(self, default_searcher): + """Get the RulesSearcher for this tree given the default one.""" + searcher = default_searcher + return searcher + + +class InventoryTree(Tree): + """A tree that relies on an inventory for its metadata. + + Trees contain an `Inventory` object, and also know how to retrieve + file texts mentioned in the inventory, either from a working + directory or from a store. + + It is possible for trees to contain files that are not described + in their inventory or vice versa; for this use `filenames()`. + + Subclasses should set the _inventory attribute, which is considered + private to external API users. + """ + + def get_canonical_inventory_paths(self, paths): + """Like get_canonical_inventory_path() but works on multiple items. + + :param paths: A sequence of paths relative to the root of the tree. + :return: A list of paths, with each item the corresponding input path + adjusted to account for existing elements that match case + insensitively. + """ + return list(self._yield_canonical_inventory_paths(paths)) + + def get_canonical_inventory_path(self, path): + """Returns the first inventory item that case-insensitively matches path. + + If a path matches exactly, it is returned. If no path matches exactly + but more than one path matches case-insensitively, it is implementation + defined which is returned. + + If no path matches case-insensitively, the input path is returned, but + with as many path entries that do exist changed to their canonical + form. + + If you need to resolve many names from the same tree, you should + use get_canonical_inventory_paths() to avoid O(N) behaviour. + + :param path: A paths relative to the root of the tree. + :return: The input path adjusted to account for existing elements + that match case insensitively. + """ + return self._yield_canonical_inventory_paths([path]).next() + + def _yield_canonical_inventory_paths(self, paths): + for path in paths: + # First, if the path as specified exists exactly, just use it. + if self.path2id(path) is not None: + yield path + continue + # go walkin... + cur_id = self.get_root_id() + cur_path = '' + bit_iter = iter(path.split("/")) + for elt in bit_iter: + lelt = elt.lower() + new_path = None + for child in self.iter_children(cur_id): + try: + # XXX: it seem like if the child is known to be in the + # tree, we shouldn't need to go from its id back to + # its path -- mbp 2010-02-11 + # + # XXX: it seems like we could be more efficient + # by just directly looking up the original name and + # only then searching all children; also by not + # chopping paths so much. -- mbp 2010-02-11 + child_base = os.path.basename(self.id2path(child)) + if (child_base == elt): + # if we found an exact match, we can stop now; if + # we found an approximate match we need to keep + # searching because there might be an exact match + # later. + cur_id = child + new_path = osutils.pathjoin(cur_path, child_base) + break + elif child_base.lower() == lelt: + cur_id = child + new_path = osutils.pathjoin(cur_path, child_base) + except errors.NoSuchId: + # before a change is committed we can see this error... + continue + if new_path: + cur_path = new_path + else: + # got to the end of this directory and no entries matched. + # Return what matched so far, plus the rest as specified. + cur_path = osutils.pathjoin(cur_path, elt, *list(bit_iter)) + break + yield cur_path + # all done. + + @deprecated_method(deprecated_in((2, 5, 0))) + def _get_inventory(self): + return self._inventory + + inventory = property(_get_inventory, + doc="Inventory of this Tree") + + def _get_root_inventory(self): + return self._inventory + + root_inventory = property(_get_root_inventory, + doc="Root inventory of this tree") + + def _unpack_file_id(self, file_id): + """Find the inventory and inventory file id for a tree file id. + + :param file_id: The tree file id, as bytestring or tuple + :return: Inventory and inventory file id + """ + if isinstance(file_id, tuple): + if len(file_id) != 1: + raise ValueError("nested trees not yet supported: %r" % file_id) + file_id = file_id[0] + return self.root_inventory, file_id + + @needs_read_lock + def path2id(self, path): + """Return the id for path in this tree.""" + return self._path2inv_file_id(path)[1] + + def _path2inv_file_id(self, path): + """Lookup a inventory and inventory file id by path. + + :param path: Path to look up + :return: tuple with inventory and inventory file id + """ + # FIXME: Support nested trees + return self.root_inventory, self.root_inventory.path2id(path) + + def id2path(self, file_id): + """Return the path for a file id. + + :raises NoSuchId: + """ + inventory, file_id = self._unpack_file_id(file_id) + return inventory.id2path(file_id) + + def has_id(self, file_id): + inventory, file_id = self._unpack_file_id(file_id) + return inventory.has_id(file_id) + + def has_or_had_id(self, file_id): + inventory, file_id = self._unpack_file_id(file_id) + return inventory.has_id(file_id) + + def all_file_ids(self): + return set( + [entry.file_id for path, entry in self.iter_entries_by_dir()]) + + @deprecated_method(deprecated_in((2, 4, 0))) + def __iter__(self): + return iter(self.all_file_ids()) + + def filter_unversioned_files(self, paths): + """Filter out paths that are versioned. + + :return: set of paths. + """ + # NB: we specifically *don't* call self.has_filename, because for + # WorkingTrees that can indicate files that exist on disk but that + # are not versioned. + return set((p for p in paths if self.path2id(p) is None)) + + @needs_read_lock + def iter_entries_by_dir(self, specific_file_ids=None, yield_parents=False): + """Walk the tree in 'by_dir' order. + + This will yield each entry in the tree as a (path, entry) tuple. + The order that they are yielded is: + + See Tree.iter_entries_by_dir for details. + + :param yield_parents: If True, yield the parents from the root leading + down to specific_file_ids that have been requested. This has no + impact if specific_file_ids is None. + """ + if specific_file_ids is None: + inventory_file_ids = None + else: + inventory_file_ids = [] + for tree_file_id in specific_file_ids: + inventory, inv_file_id = self._unpack_file_id(tree_file_id) + if not inventory is self.root_inventory: # for now + raise AssertionError("%r != %r" % ( + inventory, self.root_inventory)) + inventory_file_ids.append(inv_file_id) + # FIXME: Handle nested trees + return self.root_inventory.iter_entries_by_dir( + specific_file_ids=inventory_file_ids, yield_parents=yield_parents) + + @needs_read_lock + def iter_child_entries(self, file_id, path=None): + inv, inv_file_id = self._unpack_file_id(file_id) + return inv[inv_file_id].children.itervalues() + + @deprecated_method(deprecated_in((2, 5, 0))) + def get_file_by_path(self, path): + return self.get_file(self.path2id(path), path) + + def iter_children(self, file_id, path=None): + """See Tree.iter_children.""" + entry = self.iter_entries_by_dir([file_id]).next()[1] + for child in getattr(entry, 'children', {}).itervalues(): + yield child.file_id + + +def find_ids_across_trees(filenames, trees, require_versioned=True): + """Find the ids corresponding to specified filenames. + + All matches in all trees will be used, and all children of matched + directories will be used. + + :param filenames: The filenames to find file_ids for (if None, returns + None) + :param trees: The trees to find file_ids within + :param require_versioned: if true, all specified filenames must occur in + at least one tree. + :return: a set of file ids for the specified filenames and their children. + """ + if not filenames: + return None + specified_path_ids = _find_ids_across_trees(filenames, trees, + require_versioned) + return _find_children_across_trees(specified_path_ids, trees) + + +def _find_ids_across_trees(filenames, trees, require_versioned): + """Find the ids corresponding to specified filenames. + + All matches in all trees will be used, but subdirectories are not scanned. + + :param filenames: The filenames to find file_ids for + :param trees: The trees to find file_ids within + :param require_versioned: if true, all specified filenames must occur in + at least one tree. + :return: a set of file ids for the specified filenames + """ + not_versioned = [] + interesting_ids = set() + for tree_path in filenames: + not_found = True + for tree in trees: + file_id = tree.path2id(tree_path) + if file_id is not None: + interesting_ids.add(file_id) + not_found = False + if not_found: + not_versioned.append(tree_path) + if len(not_versioned) > 0 and require_versioned: + raise errors.PathsNotVersionedError(not_versioned) + return interesting_ids + + +def _find_children_across_trees(specified_ids, trees): + """Return a set including specified ids and their children. + + All matches in all trees will be used. + + :param trees: The trees to find file_ids within + :return: a set containing all specified ids and their children + """ + interesting_ids = set(specified_ids) + pending = interesting_ids + # now handle children of interesting ids + # we loop so that we handle all children of each id in both trees + while len(pending) > 0: + new_pending = set() + for file_id in pending: + for tree in trees: + if not tree.has_or_had_id(file_id): + continue + for child_id in tree.iter_children(file_id): + if child_id not in interesting_ids: + new_pending.add(child_id) + interesting_ids.update(new_pending) + pending = new_pending + return interesting_ids + + +class InterTree(InterObject): + """This class represents operations taking place between two Trees. + + Its instances have methods like 'compare' and contain references to the + source and target trees these operations are to be carried out on. + + Clients of bzrlib should not need to use InterTree directly, rather they + should use the convenience methods on Tree such as 'Tree.compare()' which + will pass through to InterTree as appropriate. + """ + + # Formats that will be used to test this InterTree. If both are + # None, this InterTree will not be tested (e.g. because a complex + # setup is required) + _matching_from_tree_format = None + _matching_to_tree_format = None + + _optimisers = [] + + @classmethod + def is_compatible(kls, source, target): + # The default implementation is naive and uses the public API, so + # it works for all trees. + return True + + def _changes_from_entries(self, source_entry, target_entry, + source_path=None, target_path=None): + """Generate a iter_changes tuple between source_entry and target_entry. + + :param source_entry: An inventory entry from self.source, or None. + :param target_entry: An inventory entry from self.target, or None. + :param source_path: The path of source_entry, if known. If not known + it will be looked up. + :param target_path: The path of target_entry, if known. If not known + it will be looked up. + :return: A tuple, item 0 of which is an iter_changes result tuple, and + item 1 is True if there are any changes in the result tuple. + """ + if source_entry is None: + if target_entry is None: + return None + file_id = target_entry.file_id + else: + file_id = source_entry.file_id + if source_entry is not None: + source_versioned = True + source_name = source_entry.name + source_parent = source_entry.parent_id + if source_path is None: + source_path = self.source.id2path(file_id) + source_kind, source_executable, source_stat = \ + self.source._comparison_data(source_entry, source_path) + else: + source_versioned = False + source_name = None + source_parent = None + source_kind = None + source_executable = None + if target_entry is not None: + target_versioned = True + target_name = target_entry.name + target_parent = target_entry.parent_id + if target_path is None: + target_path = self.target.id2path(file_id) + target_kind, target_executable, target_stat = \ + self.target._comparison_data(target_entry, target_path) + else: + target_versioned = False + target_name = None + target_parent = None + target_kind = None + target_executable = None + versioned = (source_versioned, target_versioned) + kind = (source_kind, target_kind) + changed_content = False + if source_kind != target_kind: + changed_content = True + elif source_kind == 'file': + if not self.file_content_matches(file_id, file_id, source_path, + target_path, source_stat, target_stat): + changed_content = True + elif source_kind == 'symlink': + if (self.source.get_symlink_target(file_id) != + self.target.get_symlink_target(file_id)): + changed_content = True + elif source_kind == 'tree-reference': + if (self.source.get_reference_revision(file_id, source_path) + != self.target.get_reference_revision(file_id, target_path)): + changed_content = True + parent = (source_parent, target_parent) + name = (source_name, target_name) + executable = (source_executable, target_executable) + if (changed_content is not False or versioned[0] != versioned[1] + or parent[0] != parent[1] or name[0] != name[1] or + executable[0] != executable[1]): + changes = True + else: + changes = False + return (file_id, (source_path, target_path), changed_content, + versioned, parent, name, kind, executable), changes + + @needs_read_lock + def compare(self, want_unchanged=False, specific_files=None, + extra_trees=None, require_versioned=False, include_root=False, + want_unversioned=False): + """Return the changes from source to target. + + :return: A TreeDelta. + :param specific_files: An optional list of file paths to restrict the + comparison to. When mapping filenames to ids, all matches in all + trees (including optional extra_trees) are used, and all children of + matched directories are included. + :param want_unchanged: An optional boolean requesting the inclusion of + unchanged entries in the result. + :param extra_trees: An optional list of additional trees to use when + mapping the contents of specific_files (paths) to file_ids. + :param require_versioned: An optional boolean (defaults to False). When + supplied and True all the 'specific_files' must be versioned, or + a PathsNotVersionedError will be thrown. + :param want_unversioned: Scan for unversioned paths. + """ + trees = (self.source,) + if extra_trees is not None: + trees = trees + tuple(extra_trees) + # target is usually the newer tree: + specific_file_ids = self.target.paths2ids(specific_files, trees, + require_versioned=require_versioned) + if specific_files and not specific_file_ids: + # All files are unversioned, so just return an empty delta + # _compare_trees would think we want a complete delta + result = delta.TreeDelta() + fake_entry = inventory.InventoryFile('unused', 'unused', 'unused') + result.unversioned = [(path, None, + self.target._comparison_data(fake_entry, path)[0]) for path in + specific_files] + return result + return delta._compare_trees(self.source, self.target, want_unchanged, + specific_files, include_root, extra_trees=extra_trees, + require_versioned=require_versioned, + want_unversioned=want_unversioned) + + def iter_changes(self, include_unchanged=False, + specific_files=None, pb=None, extra_trees=[], + require_versioned=True, want_unversioned=False): + """Generate an iterator of changes between trees. + + A tuple is returned: + (file_id, (path_in_source, path_in_target), + changed_content, versioned, parent, name, kind, + executable) + + Changed_content is True if the file's content has changed. This + includes changes to its kind, and to a symlink's target. + + versioned, parent, name, kind, executable are tuples of (from, to). + If a file is missing in a tree, its kind is None. + + Iteration is done in parent-to-child order, relative to the target + tree. + + There is no guarantee that all paths are in sorted order: the + requirement to expand the search due to renames may result in children + that should be found early being found late in the search, after + lexically later results have been returned. + :param require_versioned: Raise errors.PathsNotVersionedError if a + path in the specific_files list is not versioned in one of + source, target or extra_trees. + :param specific_files: An optional list of file paths to restrict the + comparison to. When mapping filenames to ids, all matches in all + trees (including optional extra_trees) are used, and all children + of matched directories are included. The parents in the target tree + of the specific files up to and including the root of the tree are + always evaluated for changes too. + :param want_unversioned: Should unversioned files be returned in the + output. An unversioned file is defined as one with (False, False) + for the versioned pair. + """ + lookup_trees = [self.source] + if extra_trees: + lookup_trees.extend(extra_trees) + # The ids of items we need to examine to insure delta consistency. + precise_file_ids = set() + changed_file_ids = [] + if specific_files == []: + specific_file_ids = [] + else: + specific_file_ids = self.target.paths2ids(specific_files, + lookup_trees, require_versioned=require_versioned) + if specific_files is not None: + # reparented or added entries must have their parents included + # so that valid deltas can be created. The seen_parents set + # tracks the parents that we need to have. + # The seen_dirs set tracks directory entries we've yielded. + # After outputting version object in to_entries we set difference + # the two seen sets and start checking parents. + seen_parents = set() + seen_dirs = set() + if want_unversioned: + all_unversioned = sorted([(p.split('/'), p) for p in + self.target.extras() + if specific_files is None or + osutils.is_inside_any(specific_files, p)]) + all_unversioned = collections.deque(all_unversioned) + else: + all_unversioned = collections.deque() + to_paths = {} + from_entries_by_dir = list(self.source.iter_entries_by_dir( + specific_file_ids=specific_file_ids)) + from_data = dict((e.file_id, (p, e)) for p, e in from_entries_by_dir) + to_entries_by_dir = list(self.target.iter_entries_by_dir( + specific_file_ids=specific_file_ids)) + num_entries = len(from_entries_by_dir) + len(to_entries_by_dir) + entry_count = 0 + # the unversioned path lookup only occurs on real trees - where there + # can be extras. So the fake_entry is solely used to look up + # executable it values when execute is not supported. + fake_entry = inventory.InventoryFile('unused', 'unused', 'unused') + for target_path, target_entry in to_entries_by_dir: + while (all_unversioned and + all_unversioned[0][0] < target_path.split('/')): + unversioned_path = all_unversioned.popleft() + target_kind, target_executable, target_stat = \ + self.target._comparison_data(fake_entry, unversioned_path[1]) + yield (None, (None, unversioned_path[1]), True, (False, False), + (None, None), + (None, unversioned_path[0][-1]), + (None, target_kind), + (None, target_executable)) + source_path, source_entry = from_data.get(target_entry.file_id, + (None, None)) + result, changes = self._changes_from_entries(source_entry, + target_entry, source_path=source_path, target_path=target_path) + to_paths[result[0]] = result[1][1] + entry_count += 1 + if result[3][0]: + entry_count += 1 + if pb is not None: + pb.update('comparing files', entry_count, num_entries) + if changes or include_unchanged: + if specific_file_ids is not None: + new_parent_id = result[4][1] + precise_file_ids.add(new_parent_id) + changed_file_ids.append(result[0]) + yield result + # Ensure correct behaviour for reparented/added specific files. + if specific_files is not None: + # Record output dirs + if result[6][1] == 'directory': + seen_dirs.add(result[0]) + # Record parents of reparented/added entries. + versioned = result[3] + parents = result[4] + if not versioned[0] or parents[0] != parents[1]: + seen_parents.add(parents[1]) + while all_unversioned: + # yield any trailing unversioned paths + unversioned_path = all_unversioned.popleft() + to_kind, to_executable, to_stat = \ + self.target._comparison_data(fake_entry, unversioned_path[1]) + yield (None, (None, unversioned_path[1]), True, (False, False), + (None, None), + (None, unversioned_path[0][-1]), + (None, to_kind), + (None, to_executable)) + # Yield all remaining source paths + for path, from_entry in from_entries_by_dir: + file_id = from_entry.file_id + if file_id in to_paths: + # already returned + continue + if not self.target.has_id(file_id): + # common case - paths we have not emitted are not present in + # target. + to_path = None + else: + to_path = self.target.id2path(file_id) + entry_count += 1 + if pb is not None: + pb.update('comparing files', entry_count, num_entries) + versioned = (True, False) + parent = (from_entry.parent_id, None) + name = (from_entry.name, None) + from_kind, from_executable, stat_value = \ + self.source._comparison_data(from_entry, path) + kind = (from_kind, None) + executable = (from_executable, None) + changed_content = from_kind is not None + # the parent's path is necessarily known at this point. + changed_file_ids.append(file_id) + yield(file_id, (path, to_path), changed_content, versioned, parent, + name, kind, executable) + changed_file_ids = set(changed_file_ids) + if specific_file_ids is not None: + for result in self._handle_precise_ids(precise_file_ids, + changed_file_ids): + yield result + + def _get_entry(self, tree, file_id): + """Get an inventory entry from a tree, with missing entries as None. + + If the tree raises NotImplementedError on accessing .inventory, then + this is worked around using iter_entries_by_dir on just the file id + desired. + + :param tree: The tree to lookup the entry in. + :param file_id: The file_id to lookup. + """ + try: + inventory = tree.root_inventory + except NotImplementedError: + # No inventory available. + try: + iterator = tree.iter_entries_by_dir(specific_file_ids=[file_id]) + return iterator.next()[1] + except StopIteration: + return None + else: + try: + return inventory[file_id] + except errors.NoSuchId: + return None + + def _handle_precise_ids(self, precise_file_ids, changed_file_ids, + discarded_changes=None): + """Fill out a partial iter_changes to be consistent. + + :param precise_file_ids: The file ids of parents that were seen during + the iter_changes. + :param changed_file_ids: The file ids of already emitted items. + :param discarded_changes: An optional dict of precalculated + iter_changes items which the partial iter_changes had not output + but had calculated. + :return: A generator of iter_changes items to output. + """ + # process parents of things that had changed under the users + # requested paths to prevent incorrect paths or parent ids which + # aren't in the tree. + while precise_file_ids: + precise_file_ids.discard(None) + # Don't emit file_ids twice + precise_file_ids.difference_update(changed_file_ids) + if not precise_file_ids: + break + # If the there was something at a given output path in source, we + # have to include the entry from source in the delta, or we would + # be putting this entry into a used path. + paths = [] + for parent_id in precise_file_ids: + try: + paths.append(self.target.id2path(parent_id)) + except errors.NoSuchId: + # This id has been dragged in from the source by delta + # expansion and isn't present in target at all: we don't + # need to check for path collisions on it. + pass + for path in paths: + old_id = self.source.path2id(path) + precise_file_ids.add(old_id) + precise_file_ids.discard(None) + current_ids = precise_file_ids + precise_file_ids = set() + # We have to emit all of precise_file_ids that have been altered. + # We may have to output the children of some of those ids if any + # directories have stopped being directories. + for file_id in current_ids: + # Examine file_id + if discarded_changes: + result = discarded_changes.get(file_id) + old_entry = None + else: + result = None + if result is None: + old_entry = self._get_entry(self.source, file_id) + new_entry = self._get_entry(self.target, file_id) + result, changes = self._changes_from_entries( + old_entry, new_entry) + else: + changes = True + # Get this parents parent to examine. + new_parent_id = result[4][1] + precise_file_ids.add(new_parent_id) + if changes: + if (result[6][0] == 'directory' and + result[6][1] != 'directory'): + # This stopped being a directory, the old children have + # to be included. + if old_entry is None: + # Reusing a discarded change. + old_entry = self._get_entry(self.source, file_id) + precise_file_ids.update( + self.source.iter_children(file_id)) + changed_file_ids.add(result[0]) + yield result + + @needs_read_lock + def file_content_matches(self, source_file_id, target_file_id, + source_path=None, target_path=None, source_stat=None, target_stat=None): + """Check if two files are the same in the source and target trees. + + This only checks that the contents of the files are the same, + it does not touch anything else. + + :param source_file_id: File id of the file in the source tree + :param target_file_id: File id of the file in the target tree + :param source_path: Path of the file in the source tree + :param target_path: Path of the file in the target tree + :param source_stat: Optional stat value of the file in the source tree + :param target_stat: Optional stat value of the file in the target tree + :return: Boolean indicating whether the files have the same contents + """ + source_verifier_kind, source_verifier_data = self.source.get_file_verifier( + source_file_id, source_path, source_stat) + target_verifier_kind, target_verifier_data = self.target.get_file_verifier( + target_file_id, target_path, target_stat) + if source_verifier_kind == target_verifier_kind: + return (source_verifier_data == target_verifier_data) + # Fall back to SHA1 for now + if source_verifier_kind != "SHA1": + source_sha1 = self.source.get_file_sha1(source_file_id, + source_path, source_stat) + else: + source_sha1 = source_verifier_data + if target_verifier_kind != "SHA1": + target_sha1 = self.target.get_file_sha1(target_file_id, + target_path, target_stat) + else: + target_sha1 = target_verifier_data + return (source_sha1 == target_sha1) + +InterTree.register_optimiser(InterTree) + + +class MultiWalker(object): + """Walk multiple trees simultaneously, getting combined results.""" + + # Note: This could be written to not assume you can do out-of-order + # lookups. Instead any nodes that don't match in all trees could be + # marked as 'deferred', and then returned in the final cleanup loop. + # For now, I think it is "nicer" to return things as close to the + # "master_tree" order as we can. + + def __init__(self, master_tree, other_trees): + """Create a new MultiWalker. + + All trees being walked must implement "iter_entries_by_dir()", such + that they yield (path, object) tuples, where that object will have a + '.file_id' member, that can be used to check equality. + + :param master_tree: All trees will be 'slaved' to the master_tree such + that nodes in master_tree will be used as 'first-pass' sync points. + Any nodes that aren't in master_tree will be merged in a second + pass. + :param other_trees: A list of other trees to walk simultaneously. + """ + self._master_tree = master_tree + self._other_trees = other_trees + + # Keep track of any nodes that were properly processed just out of + # order, that way we don't return them at the end, we don't have to + # track *all* processed file_ids, just the out-of-order ones + self._out_of_order_processed = set() + + @staticmethod + def _step_one(iterator): + """Step an iter_entries_by_dir iterator. + + :return: (has_more, path, ie) + If has_more is False, path and ie will be None. + """ + try: + path, ie = iterator.next() + except StopIteration: + return False, None, None + else: + return True, path, ie + + @staticmethod + def _cmp_path_by_dirblock(path1, path2): + """Compare two paths based on what directory they are in. + + This generates a sort order, such that all children of a directory are + sorted together, and grandchildren are in the same order as the + children appear. But all grandchildren come after all children. + + :param path1: first path + :param path2: the second path + :return: negative number if ``path1`` comes first, + 0 if paths are equal + and a positive number if ``path2`` sorts first + """ + # Shortcut this special case + if path1 == path2: + return 0 + # This is stolen from _dirstate_helpers_py.py, only switching it to + # Unicode objects. Consider using encode_utf8() and then using the + # optimized versions, or maybe writing optimized unicode versions. + if not isinstance(path1, unicode): + raise TypeError("'path1' must be a unicode string, not %s: %r" + % (type(path1), path1)) + if not isinstance(path2, unicode): + raise TypeError("'path2' must be a unicode string, not %s: %r" + % (type(path2), path2)) + return cmp(MultiWalker._path_to_key(path1), + MultiWalker._path_to_key(path2)) + + @staticmethod + def _path_to_key(path): + dirname, basename = osutils.split(path) + return (dirname.split(u'/'), basename) + + def _lookup_by_file_id(self, extra_entries, other_tree, file_id): + """Lookup an inventory entry by file_id. + + This is called when an entry is missing in the normal order. + Generally this is because a file was either renamed, or it was + deleted/added. If the entry was found in the inventory and not in + extra_entries, it will be added to self._out_of_order_processed + + :param extra_entries: A dictionary of {file_id: (path, ie)}. This + should be filled with entries that were found before they were + used. If file_id is present, it will be removed from the + dictionary. + :param other_tree: The Tree to search, in case we didn't find the entry + yet. + :param file_id: The file_id to look for + :return: (path, ie) if found or (None, None) if not present. + """ + if file_id in extra_entries: + return extra_entries.pop(file_id) + # TODO: Is id2path better as the first call, or is + # inventory[file_id] better as a first check? + try: + cur_path = other_tree.id2path(file_id) + except errors.NoSuchId: + cur_path = None + if cur_path is None: + return (None, None) + else: + self._out_of_order_processed.add(file_id) + cur_ie = other_tree.root_inventory[file_id] + return (cur_path, cur_ie) + + def iter_all(self): + """Match up the values in the different trees.""" + for result in self._walk_master_tree(): + yield result + self._finish_others() + for result in self._walk_others(): + yield result + + def _walk_master_tree(self): + """First pass, walk all trees in lock-step. + + When we are done, all nodes in the master_tree will have been + processed. _other_walkers, _other_entries, and _others_extra will be + set on 'self' for future processing. + """ + # This iterator has the most "inlining" done, because it tends to touch + # every file in the tree, while the others only hit nodes that don't + # match. + master_iterator = self._master_tree.iter_entries_by_dir() + + other_walkers = [other.iter_entries_by_dir() + for other in self._other_trees] + other_entries = [self._step_one(walker) for walker in other_walkers] + # Track extra nodes in the other trees + others_extra = [{} for i in xrange(len(self._other_trees))] + + master_has_more = True + step_one = self._step_one + lookup_by_file_id = self._lookup_by_file_id + out_of_order_processed = self._out_of_order_processed + + while master_has_more: + (master_has_more, path, master_ie) = step_one(master_iterator) + if not master_has_more: + break + + file_id = master_ie.file_id + other_values = [] + other_values_append = other_values.append + next_other_entries = [] + next_other_entries_append = next_other_entries.append + for idx, (other_has_more, other_path, other_ie) in enumerate(other_entries): + if not other_has_more: + other_values_append(lookup_by_file_id( + others_extra[idx], self._other_trees[idx], file_id)) + next_other_entries_append((False, None, None)) + elif file_id == other_ie.file_id: + # This is the critical code path, as most of the entries + # should match between most trees. + other_values_append((other_path, other_ie)) + next_other_entries_append(step_one(other_walkers[idx])) + else: + # This walker did not match, step it until it either + # matches, or we know we are past the current walker. + other_walker = other_walkers[idx] + other_extra = others_extra[idx] + while (other_has_more and + self._cmp_path_by_dirblock(other_path, path) < 0): + other_file_id = other_ie.file_id + if other_file_id not in out_of_order_processed: + other_extra[other_file_id] = (other_path, other_ie) + other_has_more, other_path, other_ie = \ + step_one(other_walker) + if other_has_more and other_ie.file_id == file_id: + # We ended up walking to this point, match and step + # again + other_values_append((other_path, other_ie)) + other_has_more, other_path, other_ie = \ + step_one(other_walker) + else: + # This record isn't in the normal order, see if it + # exists at all. + other_values_append(lookup_by_file_id( + other_extra, self._other_trees[idx], file_id)) + next_other_entries_append((other_has_more, other_path, + other_ie)) + other_entries = next_other_entries + + # We've matched all the walkers, yield this datapoint + yield path, file_id, master_ie, other_values + self._other_walkers = other_walkers + self._other_entries = other_entries + self._others_extra = others_extra + + def _finish_others(self): + """Finish walking the other iterators, so we get all entries.""" + for idx, info in enumerate(self._other_entries): + other_extra = self._others_extra[idx] + (other_has_more, other_path, other_ie) = info + while other_has_more: + other_file_id = other_ie.file_id + if other_file_id not in self._out_of_order_processed: + other_extra[other_file_id] = (other_path, other_ie) + other_has_more, other_path, other_ie = \ + self._step_one(self._other_walkers[idx]) + del self._other_entries + + def _walk_others(self): + """Finish up by walking all the 'deferred' nodes.""" + # TODO: One alternative would be to grab all possible unprocessed + # file_ids, and then sort by path, and then yield them. That + # might ensure better ordering, in case a caller strictly + # requires parents before children. + for idx, other_extra in enumerate(self._others_extra): + others = sorted(other_extra.itervalues(), + key=lambda x: self._path_to_key(x[0])) + for other_path, other_ie in others: + file_id = other_ie.file_id + # We don't need to check out_of_order_processed here, because + # the lookup_by_file_id will be removing anything processed + # from the extras cache + other_extra.pop(file_id) + other_values = [(None, None) for i in xrange(idx)] + other_values.append((other_path, other_ie)) + for alt_idx, alt_extra in enumerate(self._others_extra[idx+1:]): + alt_idx = alt_idx + idx + 1 + alt_extra = self._others_extra[alt_idx] + alt_tree = self._other_trees[alt_idx] + other_values.append(self._lookup_by_file_id( + alt_extra, alt_tree, file_id)) + yield other_path, file_id, None, other_values |