summaryrefslogtreecommitdiff
path: root/fastimport/processors/filter_processor.py
diff options
context:
space:
mode:
Diffstat (limited to 'fastimport/processors/filter_processor.py')
-rw-r--r--fastimport/processors/filter_processor.py298
1 files changed, 298 insertions, 0 deletions
diff --git a/fastimport/processors/filter_processor.py b/fastimport/processors/filter_processor.py
new file mode 100644
index 0000000..0c8506e
--- /dev/null
+++ b/fastimport/processors/filter_processor.py
@@ -0,0 +1,298 @@
+# Copyright (C) 2009 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+"""Import processor that filters the input (and doesn't import)."""
+
+
+from bzrlib import osutils
+from bzrlib.trace import (
+ warning,
+ )
+from fastimport import (
+ commands,
+ helpers,
+ processor,
+ )
+
+
+class FilterProcessor(processor.ImportProcessor):
+ """An import processor that filters the input to include/exclude objects.
+
+ No changes to the current repository are made.
+
+ Here are the supported parameters:
+
+ * include_paths - a list of paths that commits must change in order to
+ be kept in the output stream
+
+ * exclude_paths - a list of paths that should not appear in the output
+ stream
+ """
+
+ known_params = [
+ 'include_paths',
+ 'exclude_paths',
+ ]
+
+ def pre_process(self):
+ self.includes = self.params.get('include_paths')
+ self.excludes = self.params.get('exclude_paths')
+ # What's the new root, if any
+ self.new_root = helpers.common_directory(self.includes)
+ # Buffer of blobs until we know we need them: mark -> cmd
+ self.blobs = {}
+ # These are the commits we've output so far
+ self.interesting_commits = set()
+ # Map of commit-id to list of parents
+ self.parents = {}
+
+ def pre_handler(self, cmd):
+ self.command = cmd
+ # Should this command be included in the output or not?
+ self.keep = False
+ # Blobs to dump into the output before dumping the command itself
+ self.referenced_blobs = []
+
+ def post_handler(self, cmd):
+ if not self.keep:
+ return
+ # print referenced blobs and the command
+ for blob_id in self.referenced_blobs:
+ self._print_command(self.blobs[blob_id])
+ self._print_command(self.command)
+
+ def progress_handler(self, cmd):
+ """Process a ProgressCommand."""
+ # These always pass through
+ self.keep = True
+
+ def blob_handler(self, cmd):
+ """Process a BlobCommand."""
+ # These never pass through directly. We buffer them and only
+ # output them if referenced by an interesting command.
+ self.blobs[cmd.id] = cmd
+ self.keep = False
+
+ def checkpoint_handler(self, cmd):
+ """Process a CheckpointCommand."""
+ # These always pass through
+ self.keep = True
+
+ def commit_handler(self, cmd):
+ """Process a CommitCommand."""
+ # These pass through if they meet the filtering conditions
+ interesting_filecmds = self._filter_filecommands(cmd.file_iter)
+ if interesting_filecmds:
+ # If all we have is a single deleteall, skip this commit
+ if len(interesting_filecmds) == 1 and isinstance(
+ interesting_filecmds[0], commands.FileDeleteAllCommand):
+ pass
+ else:
+ # Remember just the interesting file commands
+ self.keep = True
+ cmd.file_iter = iter(interesting_filecmds)
+
+ # Record the referenced blobs
+ for fc in interesting_filecmds:
+ if isinstance(fc, commands.FileModifyCommand):
+ if (fc.dataref is not None and
+ fc.kind != 'directory'):
+ self.referenced_blobs.append(fc.dataref)
+
+ # Update from and merges to refer to commits in the output
+ cmd.from_ = self._find_interesting_from(cmd.from_)
+ cmd.merges = self._find_interesting_merges(cmd.merges)
+ self.interesting_commits.add(cmd.id)
+
+ # Keep track of the parents
+ if cmd.from_ and cmd.merges:
+ parents = [cmd.from_] + cmd.merges
+ elif cmd.from_:
+ parents = [cmd.from_]
+ else:
+ parents = None
+ self.parents[":" + cmd.mark] = parents
+
+ def reset_handler(self, cmd):
+ """Process a ResetCommand."""
+ if cmd.from_ is None:
+ # We pass through resets that init a branch because we have to
+ # assume the branch might be interesting.
+ self.keep = True
+ else:
+ # Keep resets if they indirectly reference something we kept
+ cmd.from_ = self._find_interesting_from(cmd.from_)
+ self.keep = cmd.from_ is not None
+
+ def tag_handler(self, cmd):
+ """Process a TagCommand."""
+ # Keep tags if they indirectly reference something we kept
+ cmd.from_ = self._find_interesting_from(cmd.from_)
+ self.keep = cmd.from_ is not None
+
+ def feature_handler(self, cmd):
+ """Process a FeatureCommand."""
+ feature = cmd.feature_name
+ if feature not in commands.FEATURE_NAMES:
+ self.warning("feature %s is not supported - parsing may fail"
+ % (feature,))
+ # These always pass through
+ self.keep = True
+
+ def _print_command(self, cmd):
+ """Wrapper to avoid adding unnecessary blank lines."""
+ text = repr(cmd)
+ self.outf.write(text)
+ if not text.endswith("\n"):
+ self.outf.write("\n")
+
+ def _filter_filecommands(self, filecmd_iter):
+ """Return the filecommands filtered by includes & excludes.
+
+ :return: a list of FileCommand objects
+ """
+ if self.includes is None and self.excludes is None:
+ return list(filecmd_iter())
+
+ # Do the filtering, adjusting for the new_root
+ result = []
+ for fc in filecmd_iter():
+ if (isinstance(fc, commands.FileModifyCommand) or
+ isinstance(fc, commands.FileDeleteCommand)):
+ if self._path_to_be_kept(fc.path):
+ fc.path = self._adjust_for_new_root(fc.path)
+ else:
+ continue
+ elif isinstance(fc, commands.FileDeleteAllCommand):
+ pass
+ elif isinstance(fc, commands.FileRenameCommand):
+ fc = self._convert_rename(fc)
+ elif isinstance(fc, commands.FileCopyCommand):
+ fc = self._convert_copy(fc)
+ else:
+ warning("cannot handle FileCommands of class %s - ignoring",
+ fc.__class__)
+ continue
+ if fc is not None:
+ result.append(fc)
+ return result
+
+ def _path_to_be_kept(self, path):
+ """Does the given path pass the filtering criteria?"""
+ if self.excludes and (path in self.excludes
+ or osutils.is_inside_any(self.excludes, path)):
+ return False
+ if self.includes:
+ return (path in self.includes
+ or osutils.is_inside_any(self.includes, path))
+ return True
+
+ def _adjust_for_new_root(self, path):
+ """Adjust a path given the new root directory of the output."""
+ if self.new_root is None:
+ return path
+ elif path.startswith(self.new_root):
+ return path[len(self.new_root):]
+ else:
+ return path
+
+ def _find_interesting_parent(self, commit_ref):
+ while True:
+ if commit_ref in self.interesting_commits:
+ return commit_ref
+ parents = self.parents.get(commit_ref)
+ if not parents:
+ return None
+ commit_ref = parents[0]
+
+ def _find_interesting_from(self, commit_ref):
+ if commit_ref is None:
+ return None
+ return self._find_interesting_parent(commit_ref)
+
+ def _find_interesting_merges(self, commit_refs):
+ if commit_refs is None:
+ return None
+ merges = []
+ for commit_ref in commit_refs:
+ parent = self._find_interesting_parent(commit_ref)
+ if parent is not None:
+ merges.append(parent)
+ if merges:
+ return merges
+ else:
+ return None
+
+ def _convert_rename(self, fc):
+ """Convert a FileRenameCommand into a new FileCommand.
+
+ :return: None if the rename is being ignored, otherwise a
+ new FileCommand based on the whether the old and new paths
+ are inside or outside of the interesting locations.
+ """
+ old = fc.old_path
+ new = fc.new_path
+ keep_old = self._path_to_be_kept(old)
+ keep_new = self._path_to_be_kept(new)
+ if keep_old and keep_new:
+ fc.old_path = self._adjust_for_new_root(old)
+ fc.new_path = self._adjust_for_new_root(new)
+ return fc
+ elif keep_old:
+ # The file has been renamed to a non-interesting location.
+ # Delete it!
+ old = self._adjust_for_new_root(old)
+ return commands.FileDeleteCommand(old)
+ elif keep_new:
+ # The file has been renamed into an interesting location
+ # We really ought to add it but we don't currently buffer
+ # the contents of all previous files and probably never want
+ # to. Maybe fast-import-info needs to be extended to
+ # remember all renames and a config file can be passed
+ # into here ala fast-import?
+ warning("cannot turn rename of %s into an add of %s yet" %
+ (old, new))
+ return None
+
+ def _convert_copy(self, fc):
+ """Convert a FileCopyCommand into a new FileCommand.
+
+ :return: None if the copy is being ignored, otherwise a
+ new FileCommand based on the whether the source and destination
+ paths are inside or outside of the interesting locations.
+ """
+ src = fc.src_path
+ dest = fc.dest_path
+ keep_src = self._path_to_be_kept(src)
+ keep_dest = self._path_to_be_kept(dest)
+ if keep_src and keep_dest:
+ fc.src_path = self._adjust_for_new_root(src)
+ fc.dest_path = self._adjust_for_new_root(dest)
+ return fc
+ elif keep_src:
+ # The file has been copied to a non-interesting location.
+ # Ignore it!
+ return None
+ elif keep_dest:
+ # The file has been copied into an interesting location
+ # We really ought to add it but we don't currently buffer
+ # the contents of all previous files and probably never want
+ # to. Maybe fast-import-info needs to be extended to
+ # remember all copies and a config file can be passed
+ # into here ala fast-import?
+ warning("cannot turn copy of %s into an add of %s yet" %
+ (src, dest))
+ return None