diff options
Diffstat (limited to 'fastimport')
-rw-r--r-- | fastimport/helpers.py | 29 | ||||
-rw-r--r-- | fastimport/processors/__init__.py | 0 | ||||
-rw-r--r-- | fastimport/processors/filter_processor.py | 298 | ||||
-rw-r--r-- | fastimport/processors/info_processor.py | 281 | ||||
-rw-r--r-- | fastimport/processors/query_processor.py | 96 | ||||
-rw-r--r-- | fastimport/reftracker.py | 67 | ||||
-rw-r--r-- | fastimport/tests/__init__.py | 4 | ||||
-rw-r--r-- | fastimport/tests/test_filter_processor.py | 879 | ||||
-rw-r--r-- | fastimport/tests/test_head_tracking.py | 260 | ||||
-rw-r--r-- | fastimport/tests/test_helpers.py | 56 | ||||
-rw-r--r-- | fastimport/tests/test_parser.py | 284 |
11 files changed, 2254 insertions, 0 deletions
diff --git a/fastimport/helpers.py b/fastimport/helpers.py index 05cce6f..8e9a383 100644 --- a/fastimport/helpers.py +++ b/fastimport/helpers.py @@ -92,4 +92,33 @@ def binary_stream(stream): return stream +def common_directory(paths): + """Find the deepest common directory of a list of paths. + + :return: if no paths are provided, None is returned; + if there is no common directory, '' is returned; + otherwise the common directory with a trailing / is returned. + """ + from bzrlib import osutils + def get_dir_with_slash(path): + if path == '' or path.endswith('/'): + return path + else: + dirname, basename = osutils.split(path) + if dirname == '': + return dirname + else: + return dirname + '/' + + if not paths: + return None + elif len(paths) == 1: + return get_dir_with_slash(paths[0]) + else: + common = common_path(paths[0], paths[1]) + for path in paths[2:]: + common = common_path(common, path) + return get_dir_with_slash(common) + + diff --git a/fastimport/processors/__init__.py b/fastimport/processors/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/fastimport/processors/__init__.py diff --git a/fastimport/processors/filter_processor.py b/fastimport/processors/filter_processor.py new file mode 100644 index 0000000..0c8506e --- /dev/null +++ b/fastimport/processors/filter_processor.py @@ -0,0 +1,298 @@ +# Copyright (C) 2009 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Import processor that filters the input (and doesn't import).""" + + +from bzrlib import osutils +from bzrlib.trace import ( + warning, + ) +from fastimport import ( + commands, + helpers, + processor, + ) + + +class FilterProcessor(processor.ImportProcessor): + """An import processor that filters the input to include/exclude objects. + + No changes to the current repository are made. + + Here are the supported parameters: + + * include_paths - a list of paths that commits must change in order to + be kept in the output stream + + * exclude_paths - a list of paths that should not appear in the output + stream + """ + + known_params = [ + 'include_paths', + 'exclude_paths', + ] + + def pre_process(self): + self.includes = self.params.get('include_paths') + self.excludes = self.params.get('exclude_paths') + # What's the new root, if any + self.new_root = helpers.common_directory(self.includes) + # Buffer of blobs until we know we need them: mark -> cmd + self.blobs = {} + # These are the commits we've output so far + self.interesting_commits = set() + # Map of commit-id to list of parents + self.parents = {} + + def pre_handler(self, cmd): + self.command = cmd + # Should this command be included in the output or not? + self.keep = False + # Blobs to dump into the output before dumping the command itself + self.referenced_blobs = [] + + def post_handler(self, cmd): + if not self.keep: + return + # print referenced blobs and the command + for blob_id in self.referenced_blobs: + self._print_command(self.blobs[blob_id]) + self._print_command(self.command) + + def progress_handler(self, cmd): + """Process a ProgressCommand.""" + # These always pass through + self.keep = True + + def blob_handler(self, cmd): + """Process a BlobCommand.""" + # These never pass through directly. We buffer them and only + # output them if referenced by an interesting command. + self.blobs[cmd.id] = cmd + self.keep = False + + def checkpoint_handler(self, cmd): + """Process a CheckpointCommand.""" + # These always pass through + self.keep = True + + def commit_handler(self, cmd): + """Process a CommitCommand.""" + # These pass through if they meet the filtering conditions + interesting_filecmds = self._filter_filecommands(cmd.file_iter) + if interesting_filecmds: + # If all we have is a single deleteall, skip this commit + if len(interesting_filecmds) == 1 and isinstance( + interesting_filecmds[0], commands.FileDeleteAllCommand): + pass + else: + # Remember just the interesting file commands + self.keep = True + cmd.file_iter = iter(interesting_filecmds) + + # Record the referenced blobs + for fc in interesting_filecmds: + if isinstance(fc, commands.FileModifyCommand): + if (fc.dataref is not None and + fc.kind != 'directory'): + self.referenced_blobs.append(fc.dataref) + + # Update from and merges to refer to commits in the output + cmd.from_ = self._find_interesting_from(cmd.from_) + cmd.merges = self._find_interesting_merges(cmd.merges) + self.interesting_commits.add(cmd.id) + + # Keep track of the parents + if cmd.from_ and cmd.merges: + parents = [cmd.from_] + cmd.merges + elif cmd.from_: + parents = [cmd.from_] + else: + parents = None + self.parents[":" + cmd.mark] = parents + + def reset_handler(self, cmd): + """Process a ResetCommand.""" + if cmd.from_ is None: + # We pass through resets that init a branch because we have to + # assume the branch might be interesting. + self.keep = True + else: + # Keep resets if they indirectly reference something we kept + cmd.from_ = self._find_interesting_from(cmd.from_) + self.keep = cmd.from_ is not None + + def tag_handler(self, cmd): + """Process a TagCommand.""" + # Keep tags if they indirectly reference something we kept + cmd.from_ = self._find_interesting_from(cmd.from_) + self.keep = cmd.from_ is not None + + def feature_handler(self, cmd): + """Process a FeatureCommand.""" + feature = cmd.feature_name + if feature not in commands.FEATURE_NAMES: + self.warning("feature %s is not supported - parsing may fail" + % (feature,)) + # These always pass through + self.keep = True + + def _print_command(self, cmd): + """Wrapper to avoid adding unnecessary blank lines.""" + text = repr(cmd) + self.outf.write(text) + if not text.endswith("\n"): + self.outf.write("\n") + + def _filter_filecommands(self, filecmd_iter): + """Return the filecommands filtered by includes & excludes. + + :return: a list of FileCommand objects + """ + if self.includes is None and self.excludes is None: + return list(filecmd_iter()) + + # Do the filtering, adjusting for the new_root + result = [] + for fc in filecmd_iter(): + if (isinstance(fc, commands.FileModifyCommand) or + isinstance(fc, commands.FileDeleteCommand)): + if self._path_to_be_kept(fc.path): + fc.path = self._adjust_for_new_root(fc.path) + else: + continue + elif isinstance(fc, commands.FileDeleteAllCommand): + pass + elif isinstance(fc, commands.FileRenameCommand): + fc = self._convert_rename(fc) + elif isinstance(fc, commands.FileCopyCommand): + fc = self._convert_copy(fc) + else: + warning("cannot handle FileCommands of class %s - ignoring", + fc.__class__) + continue + if fc is not None: + result.append(fc) + return result + + def _path_to_be_kept(self, path): + """Does the given path pass the filtering criteria?""" + if self.excludes and (path in self.excludes + or osutils.is_inside_any(self.excludes, path)): + return False + if self.includes: + return (path in self.includes + or osutils.is_inside_any(self.includes, path)) + return True + + def _adjust_for_new_root(self, path): + """Adjust a path given the new root directory of the output.""" + if self.new_root is None: + return path + elif path.startswith(self.new_root): + return path[len(self.new_root):] + else: + return path + + def _find_interesting_parent(self, commit_ref): + while True: + if commit_ref in self.interesting_commits: + return commit_ref + parents = self.parents.get(commit_ref) + if not parents: + return None + commit_ref = parents[0] + + def _find_interesting_from(self, commit_ref): + if commit_ref is None: + return None + return self._find_interesting_parent(commit_ref) + + def _find_interesting_merges(self, commit_refs): + if commit_refs is None: + return None + merges = [] + for commit_ref in commit_refs: + parent = self._find_interesting_parent(commit_ref) + if parent is not None: + merges.append(parent) + if merges: + return merges + else: + return None + + def _convert_rename(self, fc): + """Convert a FileRenameCommand into a new FileCommand. + + :return: None if the rename is being ignored, otherwise a + new FileCommand based on the whether the old and new paths + are inside or outside of the interesting locations. + """ + old = fc.old_path + new = fc.new_path + keep_old = self._path_to_be_kept(old) + keep_new = self._path_to_be_kept(new) + if keep_old and keep_new: + fc.old_path = self._adjust_for_new_root(old) + fc.new_path = self._adjust_for_new_root(new) + return fc + elif keep_old: + # The file has been renamed to a non-interesting location. + # Delete it! + old = self._adjust_for_new_root(old) + return commands.FileDeleteCommand(old) + elif keep_new: + # The file has been renamed into an interesting location + # We really ought to add it but we don't currently buffer + # the contents of all previous files and probably never want + # to. Maybe fast-import-info needs to be extended to + # remember all renames and a config file can be passed + # into here ala fast-import? + warning("cannot turn rename of %s into an add of %s yet" % + (old, new)) + return None + + def _convert_copy(self, fc): + """Convert a FileCopyCommand into a new FileCommand. + + :return: None if the copy is being ignored, otherwise a + new FileCommand based on the whether the source and destination + paths are inside or outside of the interesting locations. + """ + src = fc.src_path + dest = fc.dest_path + keep_src = self._path_to_be_kept(src) + keep_dest = self._path_to_be_kept(dest) + if keep_src and keep_dest: + fc.src_path = self._adjust_for_new_root(src) + fc.dest_path = self._adjust_for_new_root(dest) + return fc + elif keep_src: + # The file has been copied to a non-interesting location. + # Ignore it! + return None + elif keep_dest: + # The file has been copied into an interesting location + # We really ought to add it but we don't currently buffer + # the contents of all previous files and probably never want + # to. Maybe fast-import-info needs to be extended to + # remember all copies and a config file can be passed + # into here ala fast-import? + warning("cannot turn copy of %s into an add of %s yet" % + (src, dest)) + return None diff --git a/fastimport/processors/info_processor.py b/fastimport/processors/info_processor.py new file mode 100644 index 0000000..6d8c15d --- /dev/null +++ b/fastimport/processors/info_processor.py @@ -0,0 +1,281 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Import processor that dump stats about the input (and doesn't import).""" + + +from fastimport import ( + commands, + processor, + reftracker, + ) +from fastimport.helpers import ( + invert_dict, + invert_dictset, + ) + + +class InfoProcessor(processor.ImportProcessor): + """An import processor that dumps statistics about the input. + + No changes to the current repository are made. + + As well as providing useful information about an import + stream before importing it, this processor is useful for + benchmarking the speed at which data can be extracted from + the source. + """ + + def __init__(self, params=None, verbose=0, outf=None): + processor.ImportProcessor.__init__(self, params, verbose, + outf=outf) + + def pre_process(self): + self.note("Collecting statistics ...") + # Init statistics + self.cmd_counts = {} + for cmd in commands.COMMAND_NAMES: + self.cmd_counts[cmd] = 0 + self.file_cmd_counts = {} + for fc in commands.FILE_COMMAND_NAMES: + self.file_cmd_counts[fc] = 0 + self.parent_counts = {} + self.max_parent_count = 0 + self.committers = set() + self.separate_authors_found = False + self.symlinks_found = False + self.executables_found = False + self.sha_blob_references = False + self.lightweight_tags = 0 + # Blob usage tracking + self.blobs = {} + for usage in ['new', 'used', 'unknown', 'unmarked']: + self.blobs[usage] = set() + self.blob_ref_counts = {} + # Head tracking + self.reftracker = reftracker.RefTracker() + # Stuff to cache: a map from mark to # of times that mark is merged + self.merges = {} + # Stuff to cache: these are maps from mark to sets + self.rename_old_paths = {} + self.copy_source_paths = {} + + def post_process(self): + # Dump statistics + cmd_names = commands.COMMAND_NAMES + fc_names = commands.FILE_COMMAND_NAMES + self._dump_stats_group("Command counts", + [(c, self.cmd_counts[c]) for c in cmd_names], str) + self._dump_stats_group("File command counts", + [(c, self.file_cmd_counts[c]) for c in fc_names], str) + + # Commit stats + if self.cmd_counts['commit']: + p_items = [] + for i in xrange(0, self.max_parent_count + 1): + if i in self.parent_counts: + count = self.parent_counts[i] + p_items.append(("parents-%d" % i, count)) + merges_count = len(self.merges.keys()) + p_items.append(('total revisions merged', merges_count)) + flags = { + 'separate authors found': self.separate_authors_found, + 'executables': self.executables_found, + 'symlinks': self.symlinks_found, + 'blobs referenced by SHA': self.sha_blob_references, + } + self._dump_stats_group("Parent counts", p_items, str) + self._dump_stats_group("Commit analysis", flags.iteritems(), _found) + heads = invert_dictset(self.reftracker.heads) + self._dump_stats_group("Head analysis", heads.iteritems(), None, + _iterable_as_config_list) + # note("\t%d\t%s" % (len(self.committers), 'unique committers')) + self._dump_stats_group("Merges", self.merges.iteritems(), None) + # We only show the rename old path and copy source paths when -vv + # (verbose=2) is specified. The output here for mysql's data can't + # be parsed currently so this bit of code needs more work anyhow .. + if self.verbose >= 2: + self._dump_stats_group("Rename old paths", + self.rename_old_paths.iteritems(), len, + _iterable_as_config_list) + self._dump_stats_group("Copy source paths", + self.copy_source_paths.iteritems(), len, + _iterable_as_config_list) + + # Blob stats + if self.cmd_counts['blob']: + # In verbose mode, don't list every blob used + if self.verbose: + del self.blobs['used'] + self._dump_stats_group("Blob usage tracking", + self.blobs.iteritems(), len, _iterable_as_config_list) + if self.blob_ref_counts: + blobs_by_count = invert_dict(self.blob_ref_counts) + blob_items = blobs_by_count.items() + blob_items.sort() + self._dump_stats_group("Blob reference counts", + blob_items, len, _iterable_as_config_list) + + # Other stats + if self.cmd_counts['reset']: + reset_stats = { + 'lightweight tags': self.lightweight_tags, + } + self._dump_stats_group("Reset analysis", reset_stats.iteritems()) + + def _dump_stats_group(self, title, items, normal_formatter=None, + verbose_formatter=None): + """Dump a statistics group. + + In verbose mode, do so as a config file so + that other processors can load the information if they want to. + :param normal_formatter: the callable to apply to the value + before displaying it in normal mode + :param verbose_formatter: the callable to apply to the value + before displaying it in verbose mode + """ + if self.verbose: + self.outf.write("[%s]\n" % (title,)) + for name, value in items: + if verbose_formatter is not None: + value = verbose_formatter(value) + if type(name) == str: + name = name.replace(' ', '-') + self.outf.write("%s = %s\n" % (name, value)) + self.outf.write("\n") + else: + self.outf.write("%s:\n" % (title,)) + for name, value in items: + if normal_formatter is not None: + value = normal_formatter(value) + self.outf.write("\t%s\t%s\n" % (value, name)) + + def progress_handler(self, cmd): + """Process a ProgressCommand.""" + self.cmd_counts[cmd.name] += 1 + + def blob_handler(self, cmd): + """Process a BlobCommand.""" + self.cmd_counts[cmd.name] += 1 + if cmd.mark is None: + self.blobs['unmarked'].add(cmd.id) + else: + self.blobs['new'].add(cmd.id) + # Marks can be re-used so remove it from used if already there. + # Note: we definitely do NOT want to remove it from multi if + # it's already in that set. + try: + self.blobs['used'].remove(cmd.id) + except KeyError: + pass + + def checkpoint_handler(self, cmd): + """Process a CheckpointCommand.""" + self.cmd_counts[cmd.name] += 1 + + def commit_handler(self, cmd): + """Process a CommitCommand.""" + self.cmd_counts[cmd.name] += 1 + self.committers.add(cmd.committer) + if cmd.author is not None: + self.separate_authors_found = True + for fc in cmd.file_iter(): + self.file_cmd_counts[fc.name] += 1 + if isinstance(fc, commands.FileModifyCommand): + if fc.is_executable: + self.executables_found = True + if fc.kind == commands.SYMLINK_KIND: + self.symlinks_found = True + if fc.dataref is not None: + if fc.dataref[0] == ':': + self._track_blob(fc.dataref) + else: + self.sha_blob_references = True + elif isinstance(fc, commands.FileRenameCommand): + self.rename_old_paths.setdefault(cmd.id, set()).add(fc.old_path) + elif isinstance(fc, commands.FileCopyCommand): + self.copy_source_paths.setdefault(cmd.id, set()).add(fc.src_path) + + # Track the heads + parents = self.reftracker.track_heads(cmd) + + # Track the parent counts + parent_count = len(parents) + if self.parent_counts.has_key(parent_count): + self.parent_counts[parent_count] += 1 + else: + self.parent_counts[parent_count] = 1 + if parent_count > self.max_parent_count: + self.max_parent_count = parent_count + + # Remember the merges + if cmd.merges: + #self.merges.setdefault(cmd.ref, set()).update(cmd.merges) + for merge in cmd.merges: + if merge in self.merges: + self.merges[merge] += 1 + else: + self.merges[merge] = 1 + + def reset_handler(self, cmd): + """Process a ResetCommand.""" + self.cmd_counts[cmd.name] += 1 + if cmd.ref.startswith('refs/tags/'): + self.lightweight_tags += 1 + else: + if cmd.from_ is not None: + self.reftracker.track_heads_for_ref( + cmd.ref, cmd.from_) + + def tag_handler(self, cmd): + """Process a TagCommand.""" + self.cmd_counts[cmd.name] += 1 + + def feature_handler(self, cmd): + """Process a FeatureCommand.""" + self.cmd_counts[cmd.name] += 1 + feature = cmd.feature_name + if feature not in commands.FEATURE_NAMES: + self.warning("feature %s is not supported - parsing may fail" + % (feature,)) + + def _track_blob(self, mark): + if mark in self.blob_ref_counts: + self.blob_ref_counts[mark] += 1 + pass + elif mark in self.blobs['used']: + self.blob_ref_counts[mark] = 2 + self.blobs['used'].remove(mark) + elif mark in self.blobs['new']: + self.blobs['used'].add(mark) + self.blobs['new'].remove(mark) + else: + self.blobs['unknown'].add(mark) + +def _found(b): + """Format a found boolean as a string.""" + return ['no', 'found'][b] + +def _iterable_as_config_list(s): + """Format an iterable as a sequence of comma-separated strings. + + To match what ConfigObj expects, a single item list has a trailing comma. + """ + items = sorted(s) + if len(items) == 1: + return "%s," % (items[0],) + else: + return ", ".join(items) diff --git a/fastimport/processors/query_processor.py b/fastimport/processors/query_processor.py new file mode 100644 index 0000000..76250e5 --- /dev/null +++ b/fastimport/processors/query_processor.py @@ -0,0 +1,96 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Import processor that queries the input (and doesn't import).""" + + +from fastimport import ( + commands, + processor, + ) + + +class QueryProcessor(processor.ImportProcessor): + """An import processor that queries the input. + + No changes to the current repository are made. + """ + + known_params = commands.COMMAND_NAMES + commands.FILE_COMMAND_NAMES + \ + ['commit-mark'] + + def __init__(self, params=None, verbose=False): + processor.ImportProcessor.__init__(self, params, verbose) + self.parsed_params = {} + self.interesting_commit = None + self._finished = False + if params: + if 'commit-mark' in params: + self.interesting_commit = params['commit-mark'] + del params['commit-mark'] + for name, value in params.iteritems(): + if value == 1: + # All fields + fields = None + else: + fields = value.split(',') + self.parsed_params[name] = fields + + def pre_handler(self, cmd): + """Hook for logic before each handler starts.""" + if self._finished: + return + if self.interesting_commit and cmd.name == 'commit': + if cmd.mark == self.interesting_commit: + print cmd.to_string() + self._finished = True + return + if self.parsed_params.has_key(cmd.name): + fields = self.parsed_params[cmd.name] + str = cmd.dump_str(fields, self.parsed_params, self.verbose) + print "%s" % (str,) + + def progress_handler(self, cmd): + """Process a ProgressCommand.""" + pass + + def blob_handler(self, cmd): + """Process a BlobCommand.""" + pass + + def checkpoint_handler(self, cmd): + """Process a CheckpointCommand.""" + pass + + def commit_handler(self, cmd): + """Process a CommitCommand.""" + for fc in cmd.file_iter(): + pass + + def reset_handler(self, cmd): + """Process a ResetCommand.""" + pass + + def tag_handler(self, cmd): + """Process a TagCommand.""" + pass + + def feature_handler(self, cmd): + """Process a FeatureCommand.""" + feature = cmd.feature_name + if feature not in commands.FEATURE_NAMES: + self.warning("feature %s is not supported - parsing may fail" + % (feature,)) diff --git a/fastimport/reftracker.py b/fastimport/reftracker.py new file mode 100644 index 0000000..3862180 --- /dev/null +++ b/fastimport/reftracker.py @@ -0,0 +1,67 @@ +# Copyright (C) 2009 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +"""Tracker of refs.""" + + +class RefTracker(object): + + def __init__(self): + # Head tracking: last ref, last id per ref & map of commit ids to ref*s* + self.last_ref = None + self.last_ids = {} + self.heads = {} + + def dump_stats(self, note): + self._show_stats_for(self.last_ids, "last-ids", note=note) + self._show_stats_for(self.heads, "heads", note=note) + + def clear(self): + self.last_ids.clear() + self.heads.clear() + + def track_heads(self, cmd): + """Track the repository heads given a CommitCommand. + + :param cmd: the CommitCommand + :return: the list of parents in terms of commit-ids + """ + # Get the true set of parents + if cmd.from_ is not None: + parents = [cmd.from_] + else: + last_id = self.last_ids.get(cmd.ref) + if last_id is not None: + parents = [last_id] + else: + parents = [] + parents.extend(cmd.merges) + + # Track the heads + self.track_heads_for_ref(cmd.ref, cmd.id, parents) + return parents + + def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None): + if parents is not None: + for parent in parents: + if parent in self.heads: + del self.heads[parent] + self.heads.setdefault(cmd_id, set()).add(cmd_ref) + self.last_ids[cmd_ref] = cmd_id + self.last_ref = cmd_ref + + diff --git a/fastimport/tests/__init__.py b/fastimport/tests/__init__.py index 2d80157..3a8e69f 100644 --- a/fastimport/tests/__init__.py +++ b/fastimport/tests/__init__.py @@ -26,6 +26,10 @@ def test_suite(): names = [ 'test_commands', 'test_errors', + 'test_filter_processor', + 'test_helpers', + 'test_head_tracking', + 'test_parser', ] module_names = ['fastimport.tests.' + name for name in names] result = unittest.TestSuite() diff --git a/fastimport/tests/test_filter_processor.py b/fastimport/tests/test_filter_processor.py new file mode 100644 index 0000000..af107d3 --- /dev/null +++ b/fastimport/tests/test_filter_processor.py @@ -0,0 +1,879 @@ +# Copyright (C) 2009 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Test FilterProcessor""" + +from cStringIO import StringIO + +from testtools import TestCase + +from fastimport import ( + parser, + ) + +from fastimport.processors import ( + filter_processor, + ) + + +# A sample input stream containing all (top level) import commands +_SAMPLE_ALL = \ +"""blob +mark :1 +data 4 +foo +commit refs/heads/master +mark :2 +committer Joe <joe@example.com> 1234567890 +1000 +data 14 +Initial import +M 644 :1 COPYING +checkpoint +progress first import done +reset refs/remote/origin/master +from :2 +tag v0.1 +from :2 +tagger Joe <joe@example.com> 1234567890 +1000 +data 12 +release v0.1 +""" + + +# A sample input stream creating the following tree: +# +# NEWS +# doc/README.txt +# doc/index.txt +_SAMPLE_WITH_DIR = \ +"""blob +mark :1 +data 9 +Welcome! +commit refs/heads/master +mark :100 +committer a <b@c> 1234798653 +0000 +data 4 +test +M 644 :1 doc/README.txt +blob +mark :2 +data 17 +Life +is +good ... +commit refs/heads/master +mark :101 +committer a <b@c> 1234798653 +0000 +data 8 +test +ing +from :100 +M 644 :2 NEWS +blob +mark :3 +data 19 +Welcome! +my friend +blob +mark :4 +data 11 +== Docs == +commit refs/heads/master +mark :102 +committer d <b@c> 1234798653 +0000 +data 8 +test +ing +from :101 +M 644 :3 doc/README.txt +M 644 :4 doc/index.txt +""" + + +class TestCaseWithFiltering(TestCase): + + def assertFiltering(self, input, params, expected): + outf = StringIO() + proc = filter_processor.FilterProcessor( + params=params) + proc.outf = outf + s = StringIO(input) + p = parser.ImportParser(s) + proc.process(p.iter_commands) + out = outf.getvalue() + self.assertEquals(expected, out) + + +class TestNoFiltering(TestCaseWithFiltering): + + def test_params_not_given(self): + self.assertFiltering(_SAMPLE_ALL, None, _SAMPLE_ALL) + + def test_params_are_none(self): + params = {'include_paths': None, 'exclude_paths': None} + self.assertFiltering(_SAMPLE_ALL, params, _SAMPLE_ALL) + + +class TestIncludePaths(TestCaseWithFiltering): + + def test_file_in_root(self): + # Things to note: + # * only referenced blobs are retained + # * from clause is dropped from the first command + params = {'include_paths': ['NEWS']} + self.assertFiltering(_SAMPLE_WITH_DIR, params, \ +"""blob +mark :2 +data 17 +Life +is +good ... +commit refs/heads/master +mark :101 +committer a <b@c> 1234798653 +0000 +data 8 +test +ing +M 644 :2 NEWS +""") + + def test_file_in_subdir(self): + # Additional things to note: + # * new root: path is now index.txt, not doc/index.txt + # * other files changed in matching commits are excluded + params = {'include_paths': ['doc/index.txt']} + self.assertFiltering(_SAMPLE_WITH_DIR, params, \ +"""blob +mark :4 +data 11 +== Docs == +commit refs/heads/master +mark :102 +committer d <b@c> 1234798653 +0000 +data 8 +test +ing +M 644 :4 index.txt +""") + + def test_file_with_changes(self): + # Additional things to note: + # * from updated to reference parents in the output + params = {'include_paths': ['doc/README.txt']} + self.assertFiltering(_SAMPLE_WITH_DIR, params, \ +"""blob +mark :1 +data 9 +Welcome! +commit refs/heads/master +mark :100 +committer a <b@c> 1234798653 +0000 +data 4 +test +M 644 :1 README.txt +blob +mark :3 +data 19 +Welcome! +my friend +commit refs/heads/master +mark :102 +committer d <b@c> 1234798653 +0000 +data 8 +test +ing +from :100 +M 644 :3 README.txt +""") + + def test_subdir(self): + params = {'include_paths': ['doc/']} + self.assertFiltering(_SAMPLE_WITH_DIR, params, \ +"""blob +mark :1 +data 9 +Welcome! +commit refs/heads/master +mark :100 +committer a <b@c> 1234798653 +0000 +data 4 +test +M 644 :1 README.txt +blob +mark :3 +data 19 +Welcome! +my friend +blob +mark :4 +data 11 +== Docs == +commit refs/heads/master +mark :102 +committer d <b@c> 1234798653 +0000 +data 8 +test +ing +from :100 +M 644 :3 README.txt +M 644 :4 index.txt +""") + + def test_multiple_files_in_subdir(self): + # The new root should be the subdrectory + params = {'include_paths': ['doc/README.txt', 'doc/index.txt']} + self.assertFiltering(_SAMPLE_WITH_DIR, params, \ +"""blob +mark :1 +data 9 +Welcome! +commit refs/heads/master +mark :100 +committer a <b@c> 1234798653 +0000 +data 4 +test +M 644 :1 README.txt +blob +mark :3 +data 19 +Welcome! +my friend +blob +mark :4 +data 11 +== Docs == +commit refs/heads/master +mark :102 +committer d <b@c> 1234798653 +0000 +data 8 +test +ing +from :100 +M 644 :3 README.txt +M 644 :4 index.txt +""") + + +class TestExcludePaths(TestCaseWithFiltering): + + def test_file_in_root(self): + params = {'exclude_paths': ['NEWS']} + self.assertFiltering(_SAMPLE_WITH_DIR, params, \ +"""blob +mark :1 +data 9 +Welcome! +commit refs/heads/master +mark :100 +committer a <b@c> 1234798653 +0000 +data 4 +test +M 644 :1 doc/README.txt +blob +mark :3 +data 19 +Welcome! +my friend +blob +mark :4 +data 11 +== Docs == +commit refs/heads/master +mark :102 +committer d <b@c> 1234798653 +0000 +data 8 +test +ing +from :100 +M 644 :3 doc/README.txt +M 644 :4 doc/index.txt +""") + + def test_file_in_subdir(self): + params = {'exclude_paths': ['doc/README.txt']} + self.assertFiltering(_SAMPLE_WITH_DIR, params, \ +"""blob +mark :2 +data 17 +Life +is +good ... +commit refs/heads/master +mark :101 +committer a <b@c> 1234798653 +0000 +data 8 +test +ing +M 644 :2 NEWS +blob +mark :4 +data 11 +== Docs == +commit refs/heads/master +mark :102 +committer d <b@c> 1234798653 +0000 +data 8 +test +ing +from :101 +M 644 :4 doc/index.txt +""") + + def test_subdir(self): + params = {'exclude_paths': ['doc/']} + self.assertFiltering(_SAMPLE_WITH_DIR, params, \ +"""blob +mark :2 +data 17 +Life +is +good ... +commit refs/heads/master +mark :101 +committer a <b@c> 1234798653 +0000 +data 8 +test +ing +M 644 :2 NEWS +""") + + def test_multple_files(self): + params = {'exclude_paths': ['doc/index.txt', 'NEWS']} + self.assertFiltering(_SAMPLE_WITH_DIR, params, \ +"""blob +mark :1 +data 9 +Welcome! +commit refs/heads/master +mark :100 +committer a <b@c> 1234798653 +0000 +data 4 +test +M 644 :1 doc/README.txt +blob +mark :3 +data 19 +Welcome! +my friend +commit refs/heads/master +mark :102 +committer d <b@c> 1234798653 +0000 +data 8 +test +ing +from :100 +M 644 :3 doc/README.txt +""") + + +class TestIncludeAndExcludePaths(TestCaseWithFiltering): + + def test_included_dir_and_excluded_file(self): + params = {'include_paths': ['doc/'], 'exclude_paths': ['doc/index.txt']} + self.assertFiltering(_SAMPLE_WITH_DIR, params, \ +"""blob +mark :1 +data 9 +Welcome! +commit refs/heads/master +mark :100 +committer a <b@c> 1234798653 +0000 +data 4 +test +M 644 :1 README.txt +blob +mark :3 +data 19 +Welcome! +my friend +commit refs/heads/master +mark :102 +committer d <b@c> 1234798653 +0000 +data 8 +test +ing +from :100 +M 644 :3 README.txt +""") + + +# A sample input stream creating the following tree: +# +# NEWS +# doc/README.txt +# doc/index.txt +# +# It then renames doc/README.txt => doc/README +_SAMPLE_WITH_RENAME_INSIDE = _SAMPLE_WITH_DIR + \ +"""commit refs/heads/master +mark :103 +committer d <b@c> 1234798653 +0000 +data 10 +move intro +from :102 +R doc/README.txt doc/README +""" + +# A sample input stream creating the following tree: +# +# NEWS +# doc/README.txt +# doc/index.txt +# +# It then renames doc/README.txt => README +_SAMPLE_WITH_RENAME_TO_OUTSIDE = _SAMPLE_WITH_DIR + \ +"""commit refs/heads/master +mark :103 +committer d <b@c> 1234798653 +0000 +data 10 +move intro +from :102 +R doc/README.txt README +""" + +# A sample input stream creating the following tree: +# +# NEWS +# doc/README.txt +# doc/index.txt +# +# It then renames NEWS => doc/NEWS +_SAMPLE_WITH_RENAME_TO_INSIDE = _SAMPLE_WITH_DIR + \ +"""commit refs/heads/master +mark :103 +committer d <b@c> 1234798653 +0000 +data 10 +move intro +from :102 +R NEWS doc/NEWS +""" + +class TestIncludePathsWithRenames(TestCaseWithFiltering): + + def test_rename_all_inside(self): + # These rename commands ought to be kept but adjusted for the new root + params = {'include_paths': ['doc/']} + self.assertFiltering(_SAMPLE_WITH_RENAME_INSIDE, params, \ +"""blob +mark :1 +data 9 +Welcome! +commit refs/heads/master +mark :100 +committer a <b@c> 1234798653 +0000 +data 4 +test +M 644 :1 README.txt +blob +mark :3 +data 19 +Welcome! +my friend +blob +mark :4 +data 11 +== Docs == +commit refs/heads/master +mark :102 +committer d <b@c> 1234798653 +0000 +data 8 +test +ing +from :100 +M 644 :3 README.txt +M 644 :4 index.txt +commit refs/heads/master +mark :103 +committer d <b@c> 1234798653 +0000 +data 10 +move intro +from :102 +R README.txt README +""") + + def test_rename_to_outside(self): + # These rename commands become deletes + params = {'include_paths': ['doc/']} + self.assertFiltering(_SAMPLE_WITH_RENAME_TO_OUTSIDE, params, \ +"""blob +mark :1 +data 9 +Welcome! +commit refs/heads/master +mark :100 +committer a <b@c> 1234798653 +0000 +data 4 +test +M 644 :1 README.txt +blob +mark :3 +data 19 +Welcome! +my friend +blob +mark :4 +data 11 +== Docs == +commit refs/heads/master +mark :102 +committer d <b@c> 1234798653 +0000 +data 8 +test +ing +from :100 +M 644 :3 README.txt +M 644 :4 index.txt +commit refs/heads/master +mark :103 +committer d <b@c> 1234798653 +0000 +data 10 +move intro +from :102 +D README.txt +""") + + def test_rename_to_inside(self): + # This ought to create a new file but doesn't yet + params = {'include_paths': ['doc/']} + self.assertFiltering(_SAMPLE_WITH_RENAME_TO_INSIDE, params, \ +"""blob +mark :1 +data 9 +Welcome! +commit refs/heads/master +mark :100 +committer a <b@c> 1234798653 +0000 +data 4 +test +M 644 :1 README.txt +blob +mark :3 +data 19 +Welcome! +my friend +blob +mark :4 +data 11 +== Docs == +commit refs/heads/master +mark :102 +committer d <b@c> 1234798653 +0000 +data 8 +test +ing +from :100 +M 644 :3 README.txt +M 644 :4 index.txt +""") + + +# A sample input stream creating the following tree: +# +# NEWS +# doc/README.txt +# doc/index.txt +# +# It then copies doc/README.txt => doc/README +_SAMPLE_WITH_COPY_INSIDE = _SAMPLE_WITH_DIR + \ +"""commit refs/heads/master +mark :103 +committer d <b@c> 1234798653 +0000 +data 10 +move intro +from :102 +C doc/README.txt doc/README +""" + +# A sample input stream creating the following tree: +# +# NEWS +# doc/README.txt +# doc/index.txt +# +# It then copies doc/README.txt => README +_SAMPLE_WITH_COPY_TO_OUTSIDE = _SAMPLE_WITH_DIR + \ +"""commit refs/heads/master +mark :103 +committer d <b@c> 1234798653 +0000 +data 10 +move intro +from :102 +C doc/README.txt README +""" + +# A sample input stream creating the following tree: +# +# NEWS +# doc/README.txt +# doc/index.txt +# +# It then copies NEWS => doc/NEWS +_SAMPLE_WITH_COPY_TO_INSIDE = _SAMPLE_WITH_DIR + \ +"""commit refs/heads/master +mark :103 +committer d <b@c> 1234798653 +0000 +data 10 +move intro +from :102 +C NEWS doc/NEWS +""" + + +class TestIncludePathsWithCopies(TestCaseWithFiltering): + + def test_copy_all_inside(self): + # These copy commands ought to be kept but adjusted for the new root + params = {'include_paths': ['doc/']} + self.assertFiltering(_SAMPLE_WITH_COPY_INSIDE, params, \ +"""blob +mark :1 +data 9 +Welcome! +commit refs/heads/master +mark :100 +committer a <b@c> 1234798653 +0000 +data 4 +test +M 644 :1 README.txt +blob +mark :3 +data 19 +Welcome! +my friend +blob +mark :4 +data 11 +== Docs == +commit refs/heads/master +mark :102 +committer d <b@c> 1234798653 +0000 +data 8 +test +ing +from :100 +M 644 :3 README.txt +M 644 :4 index.txt +commit refs/heads/master +mark :103 +committer d <b@c> 1234798653 +0000 +data 10 +move intro +from :102 +C README.txt README +""") + + def test_copy_to_outside(self): + # This can be ignored + params = {'include_paths': ['doc/']} + self.assertFiltering(_SAMPLE_WITH_COPY_TO_OUTSIDE, params, \ +"""blob +mark :1 +data 9 +Welcome! +commit refs/heads/master +mark :100 +committer a <b@c> 1234798653 +0000 +data 4 +test +M 644 :1 README.txt +blob +mark :3 +data 19 +Welcome! +my friend +blob +mark :4 +data 11 +== Docs == +commit refs/heads/master +mark :102 +committer d <b@c> 1234798653 +0000 +data 8 +test +ing +from :100 +M 644 :3 README.txt +M 644 :4 index.txt +""") + + def test_copy_to_inside(self): + # This ought to create a new file but doesn't yet + params = {'include_paths': ['doc/']} + self.assertFiltering(_SAMPLE_WITH_COPY_TO_INSIDE, params, \ +"""blob +mark :1 +data 9 +Welcome! +commit refs/heads/master +mark :100 +committer a <b@c> 1234798653 +0000 +data 4 +test +M 644 :1 README.txt +blob +mark :3 +data 19 +Welcome! +my friend +blob +mark :4 +data 11 +== Docs == +commit refs/heads/master +mark :102 +committer d <b@c> 1234798653 +0000 +data 8 +test +ing +from :100 +M 644 :3 README.txt +M 644 :4 index.txt +""") + + +# A sample input stream with deleteall's creating the following tree: +# +# NEWS +# doc/README.txt +# doc/index.txt +_SAMPLE_WITH_DELETEALL = \ +"""blob +mark :1 +data 9 +Welcome! +commit refs/heads/master +mark :100 +committer a <b@c> 1234798653 +0000 +data 4 +test +deleteall +M 644 :1 doc/README.txt +blob +mark :3 +data 19 +Welcome! +my friend +blob +mark :4 +data 11 +== Docs == +commit refs/heads/master +mark :102 +committer d <b@c> 1234798653 +0000 +data 8 +test +ing +from :100 +deleteall +M 644 :3 doc/README.txt +M 644 :4 doc/index.txt +""" + + +class TestIncludePathsWithDeleteAll(TestCaseWithFiltering): + + def test_deleteall(self): + params = {'include_paths': ['doc/index.txt']} + self.assertFiltering(_SAMPLE_WITH_DELETEALL, params, \ +"""blob +mark :4 +data 11 +== Docs == +commit refs/heads/master +mark :102 +committer d <b@c> 1234798653 +0000 +data 8 +test +ing +deleteall +M 644 :4 index.txt +""") + + +_SAMPLE_WITH_TAGS = _SAMPLE_WITH_DIR + \ +"""tag v0.1 +from :100 +tagger d <b@c> 1234798653 +0000 +data 12 +release v0.1 +tag v0.2 +from :102 +tagger d <b@c> 1234798653 +0000 +data 12 +release v0.2 +""" + +class TestIncludePathsWithTags(TestCaseWithFiltering): + + def test_tag_retention(self): + # If a tag references a commit with a parent we kept, + # keep the tag but adjust 'from' accordingly. + # Otherwise, delete the tag command. + params = {'include_paths': ['NEWS']} + self.assertFiltering(_SAMPLE_WITH_TAGS, params, \ +"""blob +mark :2 +data 17 +Life +is +good ... +commit refs/heads/master +mark :101 +committer a <b@c> 1234798653 +0000 +data 8 +test +ing +M 644 :2 NEWS +tag v0.2 +from :101 +tagger d <b@c> 1234798653 +0000 +data 12 +release v0.2 +""") + + +_SAMPLE_WITH_RESETS = _SAMPLE_WITH_DIR + \ +"""reset refs/heads/foo +reset refs/heads/bar +from :102 +""" + +class TestIncludePathsWithResets(TestCaseWithFiltering): + + def test_reset_retention(self): + # Resets init'ing a branch (without a from) are passed through. + # If a reset references a commit with a parent we kept, + # keep the reset but adjust 'from' accordingly. + params = {'include_paths': ['NEWS']} + self.assertFiltering(_SAMPLE_WITH_RESETS, params, \ +"""blob +mark :2 +data 17 +Life +is +good ... +commit refs/heads/master +mark :101 +committer a <b@c> 1234798653 +0000 +data 8 +test +ing +M 644 :2 NEWS +reset refs/heads/foo +reset refs/heads/bar +from :101 +""") diff --git a/fastimport/tests/test_head_tracking.py b/fastimport/tests/test_head_tracking.py new file mode 100644 index 0000000..7a1ba64 --- /dev/null +++ b/fastimport/tests/test_head_tracking.py @@ -0,0 +1,260 @@ +# Copyright (C) 2009 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Test tracking of heads""" + +from cStringIO import StringIO + +from fastimport import ( + commands, + parser, + ) + +import testtools + +from fastimport.reftracker import ( + RefTracker, + ) + + +# A sample input stream that only adds files to a branch +_SAMPLE_MAINLINE = \ +"""blob +mark :1 +data 9 +Welcome! +commit refs/heads/master +mark :100 +committer a <b@c> 1234798653 +0000 +data 4 +test +M 644 :1 doc/README.txt +blob +mark :2 +data 17 +Life +is +good ... +commit refs/heads/master +mark :101 +committer a <b@c> 1234798653 +0000 +data 8 +test +ing +from :100 +M 644 :2 NEWS +blob +mark :3 +data 19 +Welcome! +my friend +blob +mark :4 +data 11 +== Docs == +commit refs/heads/master +mark :102 +committer d <b@c> 1234798653 +0000 +data 8 +test +ing +from :101 +M 644 :3 doc/README.txt +M 644 :4 doc/index.txt +""" + +# A sample input stream that adds files to two branches +_SAMPLE_TWO_HEADS = \ +"""blob +mark :1 +data 9 +Welcome! +commit refs/heads/master +mark :100 +committer a <b@c> 1234798653 +0000 +data 4 +test +M 644 :1 doc/README.txt +blob +mark :2 +data 17 +Life +is +good ... +commit refs/heads/mybranch +mark :101 +committer a <b@c> 1234798653 +0000 +data 8 +test +ing +from :100 +M 644 :2 NEWS +blob +mark :3 +data 19 +Welcome! +my friend +blob +mark :4 +data 11 +== Docs == +commit refs/heads/master +mark :102 +committer d <b@c> 1234798653 +0000 +data 8 +test +ing +from :100 +M 644 :3 doc/README.txt +M 644 :4 doc/index.txt +""" + +# A sample input stream that adds files to two branches +_SAMPLE_TWO_BRANCHES_MERGED = \ +"""blob +mark :1 +data 9 +Welcome! +commit refs/heads/master +mark :100 +committer a <b@c> 1234798653 +0000 +data 4 +test +M 644 :1 doc/README.txt +blob +mark :2 +data 17 +Life +is +good ... +commit refs/heads/mybranch +mark :101 +committer a <b@c> 1234798653 +0000 +data 8 +test +ing +from :100 +M 644 :2 NEWS +blob +mark :3 +data 19 +Welcome! +my friend +blob +mark :4 +data 11 +== Docs == +commit refs/heads/master +mark :102 +committer d <b@c> 1234798653 +0000 +data 8 +test +ing +from :100 +M 644 :3 doc/README.txt +M 644 :4 doc/index.txt +commit refs/heads/master +mark :103 +committer d <b@c> 1234798653 +0000 +data 8 +test +ing +from :102 +merge :101 +D doc/index.txt +""" + +# A sample input stream that contains a reset +_SAMPLE_RESET = \ +"""blob +mark :1 +data 9 +Welcome! +commit refs/heads/master +mark :100 +committer a <b@c> 1234798653 +0000 +data 4 +test +M 644 :1 doc/README.txt +reset refs/remotes/origin/master +from :100 +""" + +# A sample input stream that contains a reset and more commits +_SAMPLE_RESET_WITH_MORE_COMMITS = \ +"""blob +mark :1 +data 9 +Welcome! +commit refs/heads/master +mark :100 +committer a <b@c> 1234798653 +0000 +data 4 +test +M 644 :1 doc/README.txt +reset refs/remotes/origin/master +from :100 +commit refs/remotes/origin/master +mark :101 +committer d <b@c> 1234798653 +0000 +data 8 +test +ing +from :100 +D doc/README.txt +""" + +class TestHeadTracking(testtools.TestCase): + + def assertHeads(self, input, expected): + s = StringIO(input) + p = parser.ImportParser(s) + reftracker = RefTracker() + for cmd in p.iter_commands(): + if isinstance(cmd, commands.CommitCommand): + reftracker.track_heads(cmd) + # eat the file commands + list(cmd.file_iter()) + elif isinstance(cmd, commands.ResetCommand): + if cmd.from_ is not None: + reftracker.track_heads_for_ref(cmd.ref, cmd.from_) + self.assertEqual(reftracker.heads, expected) + + def test_mainline(self): + self.assertHeads(_SAMPLE_MAINLINE, { + ':102': set(['refs/heads/master']), + }) + + def test_two_heads(self): + self.assertHeads(_SAMPLE_TWO_HEADS, { + ':101': set(['refs/heads/mybranch']), + ':102': set(['refs/heads/master']), + }) + + def test_two_branches_merged(self): + self.assertHeads(_SAMPLE_TWO_BRANCHES_MERGED, { + ':103': set(['refs/heads/master']), + }) + + def test_reset(self): + self.assertHeads(_SAMPLE_RESET, { + ':100': set(['refs/heads/master', 'refs/remotes/origin/master']), + }) + + def test_reset_with_more_commits(self): + self.assertHeads(_SAMPLE_RESET_WITH_MORE_COMMITS, { + ':101': set(['refs/remotes/origin/master']), + }) diff --git a/fastimport/tests/test_helpers.py b/fastimport/tests/test_helpers.py new file mode 100644 index 0000000..639e436 --- /dev/null +++ b/fastimport/tests/test_helpers.py @@ -0,0 +1,56 @@ +# Copyright (C) 2009 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Test the helper functions.""" + +import testtools + +from fastimport import ( + helpers, + ) + + +class TestCommonDirectory(testtools.TestCase): + + def test_no_paths(self): + c = helpers.common_directory(None) + self.assertEqual(c, None) + c = helpers.common_directory([]) + self.assertEqual(c, None) + + def test_one_path(self): + c = helpers.common_directory(['foo']) + self.assertEqual(c, '') + c = helpers.common_directory(['foo/']) + self.assertEqual(c, 'foo/') + c = helpers.common_directory(['foo/bar']) + self.assertEqual(c, 'foo/') + + def test_two_paths(self): + c = helpers.common_directory(['foo', 'bar']) + self.assertEqual(c, '') + c = helpers.common_directory(['foo/', 'bar']) + self.assertEqual(c, '') + c = helpers.common_directory(['foo/', 'foo/bar']) + self.assertEqual(c, 'foo/') + c = helpers.common_directory(['foo/bar/x', 'foo/bar/y']) + self.assertEqual(c, 'foo/bar/') + c = helpers.common_directory(['foo/bar/aa_x', 'foo/bar/aa_y']) + self.assertEqual(c, 'foo/bar/') + + def test_lots_of_paths(self): + c = helpers.common_directory(['foo/bar/x', 'foo/bar/y', 'foo/bar/z']) + self.assertEqual(c, 'foo/bar/') diff --git a/fastimport/tests/test_parser.py b/fastimport/tests/test_parser.py new file mode 100644 index 0000000..267ec13 --- /dev/null +++ b/fastimport/tests/test_parser.py @@ -0,0 +1,284 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Test the Import parsing""" + +import StringIO + +import testtools + +from fastimport import ( + errors, + parser, + ) + + +class TestLineBasedParser(testtools.TestCase): + + def test_push_line(self): + s = StringIO.StringIO("foo\nbar\nbaz\n") + p = parser.LineBasedParser(s) + self.assertEqual('foo', p.next_line()) + self.assertEqual('bar', p.next_line()) + p.push_line('bar') + self.assertEqual('bar', p.next_line()) + self.assertEqual('baz', p.next_line()) + self.assertEqual(None, p.next_line()) + + def test_read_bytes(self): + s = StringIO.StringIO("foo\nbar\nbaz\n") + p = parser.LineBasedParser(s) + self.assertEqual('fo', p.read_bytes(2)) + self.assertEqual('o\nb', p.read_bytes(3)) + self.assertEqual('ar', p.next_line()) + # Test that the line buffer is ignored + p.push_line('bar') + self.assertEqual('baz', p.read_bytes(3)) + # Test missing bytes + self.assertRaises(errors.MissingBytes, p.read_bytes, 10) + + def test_read_until(self): + # TODO + return + s = StringIO.StringIO("foo\nbar\nbaz\nabc\ndef\nghi\n") + p = parser.LineBasedParser(s) + self.assertEqual('foo\nbar', p.read_until('baz')) + self.assertEqual('abc', p.next_line()) + # Test that the line buffer is ignored + p.push_line('abc') + self.assertEqual('def', p.read_until('ghi')) + # Test missing terminator + self.assertRaises(errors.MissingTerminator, p.read_until('>>>')) + + +# Sample text +_sample_import_text = """ +progress completed +# Test blob formats +blob +mark :1 +data 4 +aaaablob +data 5 +bbbbb +# Commit formats +commit refs/heads/master +mark :2 +committer bugs bunny <bugs@bunny.org> now +data 14 +initial import +M 644 inline README +data 18 +Welcome from bugs +commit refs/heads/master +committer <bugs@bunny.org> now +data 13 +second commit +from :2 +M 644 inline README +data 23 +Welcome from bugs, etc. +# Miscellaneous +checkpoint +progress completed +# Test a commit without sub-commands (bug #351717) +commit refs/heads/master +mark :3 +author <bugs@bunny.org> now +committer <bugs@bunny.org> now +data 20 +first commit, empty +# Test a commit with a heredoc-style (delimited_data) messsage (bug #400960) +commit refs/heads/master +mark :4 +author <bugs@bunny.org> now +committer <bugs@bunny.org> now +data <<EOF +Commit with heredoc-style message +EOF +# Test a "submodule"/tree-reference +commit refs/heads/master +mark :5 +author <bugs@bunny.org> now +committer <bugs@bunny.org> now +data 15 +submodule test +M 160000 rev-id tree-id +# Test features +feature whatever +feature foo=bar +# Test commit with properties +commit refs/heads/master +mark :6 +committer <bugs@bunny.org> now +data 18 +test of properties +property p1 +property p2 5 hohum +property p3 16 alpha +beta +gamma +property p4 8 whatever +# Test a commit with multiple authors +commit refs/heads/master +mark :7 +author Fluffy <fluffy@bunny.org> now +author Daffy <daffy@duck.org> now +author Donald <donald@duck.org> now +committer <bugs@bunny.org> now +data 17 +multi-author test +""" + + +class TestImportParser(testtools.TestCase): + + def test_iter_commands(self): + s = StringIO.StringIO(_sample_import_text) + p = parser.ImportParser(s) + result = [] + for cmd in p.iter_commands(): + result.append(cmd) + if cmd.name == 'commit': + for fc in cmd.file_iter(): + result.append(fc) + self.assertEqual(len(result), 17) + cmd1 = result.pop(0) + self.assertEqual('progress', cmd1.name) + self.assertEqual('completed', cmd1.message) + cmd2 = result.pop(0) + self.assertEqual('blob', cmd2.name) + self.assertEqual('1', cmd2.mark) + self.assertEqual(':1', cmd2.id) + self.assertEqual('aaaa', cmd2.data) + self.assertEqual(4, cmd2.lineno) + cmd3 = result.pop(0) + self.assertEqual('blob', cmd3.name) + self.assertEqual('@7', cmd3.id) + self.assertEqual(None, cmd3.mark) + self.assertEqual('bbbbb', cmd3.data) + self.assertEqual(7, cmd3.lineno) + cmd4 = result.pop(0) + self.assertEqual('commit', cmd4.name) + self.assertEqual('2', cmd4.mark) + self.assertEqual(':2', cmd4.id) + self.assertEqual('initial import', cmd4.message) + self.assertEqual('bugs bunny', cmd4.committer[0]) + self.assertEqual('bugs@bunny.org', cmd4.committer[1]) + # FIXME: check timestamp and timezone as well + self.assertEqual(None, cmd4.author) + self.assertEqual(11, cmd4.lineno) + self.assertEqual('refs/heads/master', cmd4.ref) + self.assertEqual(None, cmd4.from_) + self.assertEqual([], cmd4.merges) + file_cmd1 = result.pop(0) + self.assertEqual('filemodify', file_cmd1.name) + self.assertEqual('README', file_cmd1.path) + self.assertEqual('file', file_cmd1.kind) + self.assertEqual(False, file_cmd1.is_executable) + self.assertEqual('Welcome from bugs\n', file_cmd1.data) + cmd5 = result.pop(0) + self.assertEqual('commit', cmd5.name) + self.assertEqual(None, cmd5.mark) + self.assertEqual('@19', cmd5.id) + self.assertEqual('second commit', cmd5.message) + self.assertEqual('', cmd5.committer[0]) + self.assertEqual('bugs@bunny.org', cmd5.committer[1]) + # FIXME: check timestamp and timezone as well + self.assertEqual(None, cmd5.author) + self.assertEqual(19, cmd5.lineno) + self.assertEqual('refs/heads/master', cmd5.ref) + self.assertEqual(':2', cmd5.from_) + self.assertEqual([], cmd5.merges) + file_cmd2 = result.pop(0) + self.assertEqual('filemodify', file_cmd2.name) + self.assertEqual('README', file_cmd2.path) + self.assertEqual('file', file_cmd2.kind) + self.assertEqual(False, file_cmd2.is_executable) + self.assertEqual('Welcome from bugs, etc.', file_cmd2.data) + cmd6 = result.pop(0) + self.assertEqual(cmd6.name, 'checkpoint') + cmd7 = result.pop(0) + self.assertEqual('progress', cmd7.name) + self.assertEqual('completed', cmd7.message) + cmd = result.pop(0) + self.assertEqual('commit', cmd.name) + self.assertEqual('3', cmd.mark) + self.assertEqual(None, cmd.from_) + cmd = result.pop(0) + self.assertEqual('commit', cmd.name) + self.assertEqual('4', cmd.mark) + self.assertEqual('Commit with heredoc-style message\n', cmd.message) + cmd = result.pop(0) + self.assertEqual('commit', cmd.name) + self.assertEqual('5', cmd.mark) + self.assertEqual('submodule test\n', cmd.message) + file_cmd1 = result.pop(0) + self.assertEqual('filemodify', file_cmd1.name) + self.assertEqual('tree-id', file_cmd1.path) + self.assertEqual('tree-reference', file_cmd1.kind) + self.assertEqual(False, file_cmd1.is_executable) + self.assertEqual("rev-id", file_cmd1.dataref) + cmd = result.pop(0) + self.assertEqual('feature', cmd.name) + self.assertEqual('whatever', cmd.feature_name) + self.assertEqual(None, cmd.value) + cmd = result.pop(0) + self.assertEqual('feature', cmd.name) + self.assertEqual('foo', cmd.feature_name) + self.assertEqual('bar', cmd.value) + cmd = result.pop(0) + self.assertEqual('commit', cmd.name) + self.assertEqual('6', cmd.mark) + self.assertEqual('test of properties', cmd.message) + self.assertEqual({ + 'p1': None, + 'p2': u'hohum', + 'p3': u'alpha\nbeta\ngamma', + 'p4': u'whatever', + }, cmd.properties) + cmd = result.pop(0) + self.assertEqual('commit', cmd.name) + self.assertEqual('7', cmd.mark) + self.assertEqual('multi-author test', cmd.message) + self.assertEqual('', cmd.committer[0]) + self.assertEqual('bugs@bunny.org', cmd.committer[1]) + self.assertEqual('Fluffy', cmd.author[0]) + self.assertEqual('fluffy@bunny.org', cmd.author[1]) + self.assertEqual('Daffy', cmd.more_authors[0][0]) + self.assertEqual('daffy@duck.org', cmd.more_authors[0][1]) + self.assertEqual('Donald', cmd.more_authors[1][0]) + self.assertEqual('donald@duck.org', cmd.more_authors[1][1]) + + +class TestStringParsing(testtools.TestCase): + + def test_unquote(self): + s = r'hello \"sweet\" wo\\r\tld' + self.assertEquals(r'hello "sweet" wo\r' + "\tld", + parser._unquote_c_string(s)) + + +class TestPathPairParsing(testtools.TestCase): + + def test_path_pair_simple(self): + p = parser.ImportParser("") + self.assertEqual(['foo', 'bar'], p._path_pair("foo bar")) + + def test_path_pair_spaces_in_first(self): + p = parser.ImportParser("") + self.assertEqual(['foo bar', 'baz'], + p._path_pair('"foo bar" baz')) |