summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJelmer Vernooij <jelmer@samba.org>2012-02-28 15:00:56 +0100
committerJelmer Vernooij <jelmer@samba.org>2012-02-28 15:00:56 +0100
commitf231d5373c39b3c85fed6c83a2f72acdce58e43d (patch)
tree924b950fc21a5b7337f2b3e07a2352fd273030ef
parent8105a9b226040ec7a9dd509bf21ca922615c49b2 (diff)
downloadbzr-fastimport-f231d5373c39b3c85fed6c83a2f72acdce58e43d.tar.gz
Reimport some modules removed from python-fastimport 0.9.2.
-rw-r--r--NEWS3
-rw-r--r--cache_manager.py6
-rw-r--r--cmds.py4
-rw-r--r--idmapfile.py64
-rw-r--r--processors/generic_processor.py2
-rw-r--r--processors/info_processor.py281
-rw-r--r--reftracker.py66
-rw-r--r--tests/test_head_tracking.py259
8 files changed, 679 insertions, 6 deletions
diff --git a/NEWS b/NEWS
index 3387c53..582762b 100644
--- a/NEWS
+++ b/NEWS
@@ -12,6 +12,9 @@ Compatibility
* Avoid using Tree.inventory directly, which is deprecated in bzr 2.6.
(Jelmer Vernooij)
+* Reimport some modules removed from python-fastimport 0.9.2.
+ (Jelmer Vernooij, #693507)
+
0.12 2012-02-09
Bug fixes
diff --git a/cache_manager.py b/cache_manager.py
index 9a0b651..e0f7f90 100644
--- a/cache_manager.py
+++ b/cache_manager.py
@@ -25,12 +25,12 @@ from bzrlib import lru_cache, trace
from bzrlib.plugins.fastimport import (
branch_mapper,
)
+from bzrlib.plugins.fastimport.reftracker import (
+ RefTracker,
+ )
from fastimport.helpers import (
single_plural,
)
-from fastimport.reftracker import (
- RefTracker,
- )
class _Cleanup(object):
diff --git a/cmds.py b/cmds.py
index d364b00..d90393a 100644
--- a/cmds.py
+++ b/cmds.py
@@ -307,7 +307,7 @@ class cmd_fast_import(Command):
def _generate_info(self, source):
from cStringIO import StringIO
from fastimport import parser
- from fastimport.processors import info_processor
+ from bzrlib.plugins.fastimport.processors import info_processor
stream = _get_source_stream(source)
output = StringIO()
try:
@@ -477,7 +477,7 @@ class cmd_fast_import_info(Command):
takes_options = ['verbose']
def run(self, source, verbose=False):
load_fastimport()
- from fastimport.processors import info_processor
+ from bzrlib.plugins.fastimport.processors import info_processor
return _run(source, info_processor.InfoProcessor, verbose=verbose)
diff --git a/idmapfile.py b/idmapfile.py
new file mode 100644
index 0000000..669dbce
--- /dev/null
+++ b/idmapfile.py
@@ -0,0 +1,64 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+"""Routines for saving and loading the id-map file."""
+
+import os
+
+
+def save_id_map(filename, revision_ids):
+ """Save the mapping of commit ids to revision ids to a file.
+
+ Throws the usual exceptions if the file cannot be opened,
+ written to or closed.
+
+ :param filename: name of the file to save the data to
+ :param revision_ids: a dictionary of commit ids to revision ids.
+ """
+ f = open(filename, 'wb')
+ try:
+ for commit_id, rev_id in revision_ids.iteritems():
+ f.write("%s %s\n" % (commit_id, rev_id))
+ f.flush()
+ finally:
+ f.close()
+
+
+def load_id_map(filename):
+ """Load the mapping of commit ids to revision ids from a file.
+
+ If the file does not exist, an empty result is returned.
+ If the file does exists but cannot be opened, read or closed,
+ the normal exceptions are thrown.
+
+ NOTE: It is assumed that commit-ids do not have embedded spaces.
+
+ :param filename: name of the file to save the data to
+ :result: map, count where:
+ map = a dictionary of commit ids to revision ids;
+ count = the number of keys in map
+ """
+ result = {}
+ count = 0
+ if os.path.exists(filename):
+ f = open(filename)
+ try:
+ for line in f:
+ parts = line[:-1].split(' ', 1)
+ result[parts[0]] = parts[1]
+ count += 1
+ finally:
+ f.close()
+ return result, count
diff --git a/processors/generic_processor.py b/processors/generic_processor.py
index 4ab0ac3..80b0c5d 100644
--- a/processors/generic_processor.py
+++ b/processors/generic_processor.py
@@ -40,6 +40,7 @@ except ImportError:
from bzrlib.plugins.fastimport import (
branch_updater,
cache_manager,
+ idmapfile,
marks_file,
revision_store,
)
@@ -47,7 +48,6 @@ from fastimport import (
commands,
errors as plugin_errors,
helpers,
- idmapfile,
processor,
)
diff --git a/processors/info_processor.py b/processors/info_processor.py
new file mode 100644
index 0000000..bb162e7
--- /dev/null
+++ b/processors/info_processor.py
@@ -0,0 +1,281 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+"""Import processor that dump stats about the input (and doesn't import)."""
+
+from bzrlib.plugins.fastimport import (
+ reftracker,
+ )
+from fastimport import (
+ commands,
+ processor,
+ )
+from fastimport.helpers import (
+ invert_dict,
+ invert_dictset,
+ )
+import stat
+
+
+class InfoProcessor(processor.ImportProcessor):
+ """An import processor that dumps statistics about the input.
+
+ No changes to the current repository are made.
+
+ As well as providing useful information about an import
+ stream before importing it, this processor is useful for
+ benchmarking the speed at which data can be extracted from
+ the source.
+ """
+
+ def __init__(self, params=None, verbose=0, outf=None):
+ processor.ImportProcessor.__init__(self, params, verbose,
+ outf=outf)
+
+ def pre_process(self):
+ # Init statistics
+ self.cmd_counts = {}
+ for cmd in commands.COMMAND_NAMES:
+ self.cmd_counts[cmd] = 0
+ self.file_cmd_counts = {}
+ for fc in commands.FILE_COMMAND_NAMES:
+ self.file_cmd_counts[fc] = 0
+ self.parent_counts = {}
+ self.max_parent_count = 0
+ self.committers = set()
+ self.separate_authors_found = False
+ self.symlinks_found = False
+ self.executables_found = False
+ self.sha_blob_references = False
+ self.lightweight_tags = 0
+ # Blob usage tracking
+ self.blobs = {}
+ for usage in ['new', 'used', 'unknown', 'unmarked']:
+ self.blobs[usage] = set()
+ self.blob_ref_counts = {}
+ # Head tracking
+ self.reftracker = reftracker.RefTracker()
+ # Stuff to cache: a map from mark to # of times that mark is merged
+ self.merges = {}
+ # Stuff to cache: these are maps from mark to sets
+ self.rename_old_paths = {}
+ self.copy_source_paths = {}
+
+ def post_process(self):
+ # Dump statistics
+ cmd_names = commands.COMMAND_NAMES
+ fc_names = commands.FILE_COMMAND_NAMES
+ self._dump_stats_group("Command counts",
+ [(c, self.cmd_counts[c]) for c in cmd_names], str)
+ self._dump_stats_group("File command counts",
+ [(c, self.file_cmd_counts[c]) for c in fc_names], str)
+
+ # Commit stats
+ if self.cmd_counts['commit']:
+ p_items = []
+ for i in xrange(0, self.max_parent_count + 1):
+ if i in self.parent_counts:
+ count = self.parent_counts[i]
+ p_items.append(("parents-%d" % i, count))
+ merges_count = len(self.merges.keys())
+ p_items.append(('total revisions merged', merges_count))
+ flags = {
+ 'separate authors found': self.separate_authors_found,
+ 'executables': self.executables_found,
+ 'symlinks': self.symlinks_found,
+ 'blobs referenced by SHA': self.sha_blob_references,
+ }
+ self._dump_stats_group("Parent counts", p_items, str)
+ self._dump_stats_group("Commit analysis", flags.iteritems(), _found)
+ heads = invert_dictset(self.reftracker.heads)
+ self._dump_stats_group("Head analysis", heads.iteritems(), None,
+ _iterable_as_config_list)
+ # note("\t%d\t%s" % (len(self.committers), 'unique committers'))
+ self._dump_stats_group("Merges", self.merges.iteritems(), None)
+ # We only show the rename old path and copy source paths when -vv
+ # (verbose=2) is specified. The output here for mysql's data can't
+ # be parsed currently so this bit of code needs more work anyhow ..
+ if self.verbose >= 2:
+ self._dump_stats_group("Rename old paths",
+ self.rename_old_paths.iteritems(), len,
+ _iterable_as_config_list)
+ self._dump_stats_group("Copy source paths",
+ self.copy_source_paths.iteritems(), len,
+ _iterable_as_config_list)
+
+ # Blob stats
+ if self.cmd_counts['blob']:
+ # In verbose mode, don't list every blob used
+ if self.verbose:
+ del self.blobs['used']
+ self._dump_stats_group("Blob usage tracking",
+ self.blobs.iteritems(), len, _iterable_as_config_list)
+ if self.blob_ref_counts:
+ blobs_by_count = invert_dict(self.blob_ref_counts)
+ blob_items = blobs_by_count.items()
+ blob_items.sort()
+ self._dump_stats_group("Blob reference counts",
+ blob_items, len, _iterable_as_config_list)
+
+ # Other stats
+ if self.cmd_counts['reset']:
+ reset_stats = {
+ 'lightweight tags': self.lightweight_tags,
+ }
+ self._dump_stats_group("Reset analysis", reset_stats.iteritems())
+
+ def _dump_stats_group(self, title, items, normal_formatter=None,
+ verbose_formatter=None):
+ """Dump a statistics group.
+
+ In verbose mode, do so as a config file so
+ that other processors can load the information if they want to.
+ :param normal_formatter: the callable to apply to the value
+ before displaying it in normal mode
+ :param verbose_formatter: the callable to apply to the value
+ before displaying it in verbose mode
+ """
+ if self.verbose:
+ self.outf.write("[%s]\n" % (title,))
+ for name, value in items:
+ if verbose_formatter is not None:
+ value = verbose_formatter(value)
+ if type(name) == str:
+ name = name.replace(' ', '-')
+ self.outf.write("%s = %s\n" % (name, value))
+ self.outf.write("\n")
+ else:
+ self.outf.write("%s:\n" % (title,))
+ for name, value in items:
+ if normal_formatter is not None:
+ value = normal_formatter(value)
+ self.outf.write("\t%s\t%s\n" % (value, name))
+
+ def progress_handler(self, cmd):
+ """Process a ProgressCommand."""
+ self.cmd_counts[cmd.name] += 1
+
+ def blob_handler(self, cmd):
+ """Process a BlobCommand."""
+ self.cmd_counts[cmd.name] += 1
+ if cmd.mark is None:
+ self.blobs['unmarked'].add(cmd.id)
+ else:
+ self.blobs['new'].add(cmd.id)
+ # Marks can be re-used so remove it from used if already there.
+ # Note: we definitely do NOT want to remove it from multi if
+ # it's already in that set.
+ try:
+ self.blobs['used'].remove(cmd.id)
+ except KeyError:
+ pass
+
+ def checkpoint_handler(self, cmd):
+ """Process a CheckpointCommand."""
+ self.cmd_counts[cmd.name] += 1
+
+ def commit_handler(self, cmd):
+ """Process a CommitCommand."""
+ self.cmd_counts[cmd.name] += 1
+ self.committers.add(cmd.committer)
+ if cmd.author is not None:
+ self.separate_authors_found = True
+ for fc in cmd.iter_files():
+ self.file_cmd_counts[fc.name] += 1
+ if isinstance(fc, commands.FileModifyCommand):
+ if fc.mode & 0111:
+ self.executables_found = True
+ if stat.S_ISLNK(fc.mode):
+ self.symlinks_found = True
+ if fc.dataref is not None:
+ if fc.dataref[0] == ':':
+ self._track_blob(fc.dataref)
+ else:
+ self.sha_blob_references = True
+ elif isinstance(fc, commands.FileRenameCommand):
+ self.rename_old_paths.setdefault(cmd.id, set()).add(fc.old_path)
+ elif isinstance(fc, commands.FileCopyCommand):
+ self.copy_source_paths.setdefault(cmd.id, set()).add(fc.src_path)
+
+ # Track the heads
+ parents = self.reftracker.track_heads(cmd)
+
+ # Track the parent counts
+ parent_count = len(parents)
+ if self.parent_counts.has_key(parent_count):
+ self.parent_counts[parent_count] += 1
+ else:
+ self.parent_counts[parent_count] = 1
+ if parent_count > self.max_parent_count:
+ self.max_parent_count = parent_count
+
+ # Remember the merges
+ if cmd.merges:
+ #self.merges.setdefault(cmd.ref, set()).update(cmd.merges)
+ for merge in cmd.merges:
+ if merge in self.merges:
+ self.merges[merge] += 1
+ else:
+ self.merges[merge] = 1
+
+ def reset_handler(self, cmd):
+ """Process a ResetCommand."""
+ self.cmd_counts[cmd.name] += 1
+ if cmd.ref.startswith('refs/tags/'):
+ self.lightweight_tags += 1
+ else:
+ if cmd.from_ is not None:
+ self.reftracker.track_heads_for_ref(
+ cmd.ref, cmd.from_)
+
+ def tag_handler(self, cmd):
+ """Process a TagCommand."""
+ self.cmd_counts[cmd.name] += 1
+
+ def feature_handler(self, cmd):
+ """Process a FeatureCommand."""
+ self.cmd_counts[cmd.name] += 1
+ feature = cmd.feature_name
+ if feature not in commands.FEATURE_NAMES:
+ self.warning("feature %s is not supported - parsing may fail"
+ % (feature,))
+
+ def _track_blob(self, mark):
+ if mark in self.blob_ref_counts:
+ self.blob_ref_counts[mark] += 1
+ pass
+ elif mark in self.blobs['used']:
+ self.blob_ref_counts[mark] = 2
+ self.blobs['used'].remove(mark)
+ elif mark in self.blobs['new']:
+ self.blobs['used'].add(mark)
+ self.blobs['new'].remove(mark)
+ else:
+ self.blobs['unknown'].add(mark)
+
+def _found(b):
+ """Format a found boolean as a string."""
+ return ['no', 'found'][b]
+
+def _iterable_as_config_list(s):
+ """Format an iterable as a sequence of comma-separated strings.
+
+ To match what ConfigObj expects, a single item list has a trailing comma.
+ """
+ items = sorted(s)
+ if len(items) == 1:
+ return "%s," % (items[0],)
+ else:
+ return ", ".join(items)
diff --git a/reftracker.py b/reftracker.py
new file mode 100644
index 0000000..44136c7
--- /dev/null
+++ b/reftracker.py
@@ -0,0 +1,66 @@
+# Copyright (C) 2009 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+"""Tracker of refs."""
+
+
+class RefTracker(object):
+
+ def __init__(self):
+ # Head tracking: last ref, last id per ref & map of commit ids to ref*s*
+ self.last_ref = None
+ self.last_ids = {}
+ self.heads = {}
+
+ def dump_stats(self, note):
+ self._show_stats_for(self.last_ids, "last-ids", note=note)
+ self._show_stats_for(self.heads, "heads", note=note)
+
+ def clear(self):
+ self.last_ids.clear()
+ self.heads.clear()
+
+ def track_heads(self, cmd):
+ """Track the repository heads given a CommitCommand.
+
+ :param cmd: the CommitCommand
+ :return: the list of parents in terms of commit-ids
+ """
+ # Get the true set of parents
+ if cmd.from_ is not None:
+ parents = [cmd.from_]
+ else:
+ last_id = self.last_ids.get(cmd.ref)
+ if last_id is not None:
+ parents = [last_id]
+ else:
+ parents = []
+ parents.extend(cmd.merges)
+
+ # Track the heads
+ self.track_heads_for_ref(cmd.ref, cmd.id, parents)
+ return parents
+
+ def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None):
+ if parents is not None:
+ for parent in parents:
+ if parent in self.heads:
+ del self.heads[parent]
+ self.heads.setdefault(cmd_id, set()).add(cmd_ref)
+ self.last_ids[cmd_ref] = cmd_id
+ self.last_ref = cmd_ref
+
+
diff --git a/tests/test_head_tracking.py b/tests/test_head_tracking.py
new file mode 100644
index 0000000..19f6c68
--- /dev/null
+++ b/tests/test_head_tracking.py
@@ -0,0 +1,259 @@
+# Copyright (C) 2009 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+"""Test tracking of heads"""
+
+from cStringIO import StringIO
+
+from fastimport import (
+ commands,
+ parser,
+ )
+
+import testtools
+
+from bzrlib.plugins.fastimport.reftracker import (
+ RefTracker,
+ )
+
+
+# A sample input stream that only adds files to a branch
+_SAMPLE_MAINLINE = \
+"""blob
+mark :1
+data 9
+Welcome!
+commit refs/heads/master
+mark :100
+committer a <b@c> 1234798653 +0000
+data 4
+test
+M 644 :1 doc/README.txt
+blob
+mark :2
+data 17
+Life
+is
+good ...
+commit refs/heads/master
+mark :101
+committer a <b@c> 1234798653 +0000
+data 8
+test
+ing
+from :100
+M 644 :2 NEWS
+blob
+mark :3
+data 19
+Welcome!
+my friend
+blob
+mark :4
+data 11
+== Docs ==
+commit refs/heads/master
+mark :102
+committer d <b@c> 1234798653 +0000
+data 8
+test
+ing
+from :101
+M 644 :3 doc/README.txt
+M 644 :4 doc/index.txt
+"""
+
+# A sample input stream that adds files to two branches
+_SAMPLE_TWO_HEADS = \
+"""blob
+mark :1
+data 9
+Welcome!
+commit refs/heads/master
+mark :100
+committer a <b@c> 1234798653 +0000
+data 4
+test
+M 644 :1 doc/README.txt
+blob
+mark :2
+data 17
+Life
+is
+good ...
+commit refs/heads/mybranch
+mark :101
+committer a <b@c> 1234798653 +0000
+data 8
+test
+ing
+from :100
+M 644 :2 NEWS
+blob
+mark :3
+data 19
+Welcome!
+my friend
+blob
+mark :4
+data 11
+== Docs ==
+commit refs/heads/master
+mark :102
+committer d <b@c> 1234798653 +0000
+data 8
+test
+ing
+from :100
+M 644 :3 doc/README.txt
+M 644 :4 doc/index.txt
+"""
+
+# A sample input stream that adds files to two branches
+_SAMPLE_TWO_BRANCHES_MERGED = \
+"""blob
+mark :1
+data 9
+Welcome!
+commit refs/heads/master
+mark :100
+committer a <b@c> 1234798653 +0000
+data 4
+test
+M 644 :1 doc/README.txt
+blob
+mark :2
+data 17
+Life
+is
+good ...
+commit refs/heads/mybranch
+mark :101
+committer a <b@c> 1234798653 +0000
+data 8
+test
+ing
+from :100
+M 644 :2 NEWS
+blob
+mark :3
+data 19
+Welcome!
+my friend
+blob
+mark :4
+data 11
+== Docs ==
+commit refs/heads/master
+mark :102
+committer d <b@c> 1234798653 +0000
+data 8
+test
+ing
+from :100
+M 644 :3 doc/README.txt
+M 644 :4 doc/index.txt
+commit refs/heads/master
+mark :103
+committer d <b@c> 1234798653 +0000
+data 8
+test
+ing
+from :102
+merge :101
+D doc/index.txt
+"""
+
+# A sample input stream that contains a reset
+_SAMPLE_RESET = \
+"""blob
+mark :1
+data 9
+Welcome!
+commit refs/heads/master
+mark :100
+committer a <b@c> 1234798653 +0000
+data 4
+test
+M 644 :1 doc/README.txt
+reset refs/remotes/origin/master
+from :100
+"""
+
+# A sample input stream that contains a reset and more commits
+_SAMPLE_RESET_WITH_MORE_COMMITS = \
+"""blob
+mark :1
+data 9
+Welcome!
+commit refs/heads/master
+mark :100
+committer a <b@c> 1234798653 +0000
+data 4
+test
+M 644 :1 doc/README.txt
+reset refs/remotes/origin/master
+from :100
+commit refs/remotes/origin/master
+mark :101
+committer d <b@c> 1234798653 +0000
+data 8
+test
+ing
+from :100
+D doc/README.txt
+"""
+
+class TestHeadTracking(testtools.TestCase):
+
+ def assertHeads(self, input, expected):
+ s = StringIO(input)
+ p = parser.ImportParser(s)
+ reftracker = RefTracker()
+ for cmd in p.iter_commands():
+ if isinstance(cmd, commands.CommitCommand):
+ reftracker.track_heads(cmd)
+ # eat the file commands
+ list(cmd.iter_files())
+ elif isinstance(cmd, commands.ResetCommand):
+ if cmd.from_ is not None:
+ reftracker.track_heads_for_ref(cmd.ref, cmd.from_)
+ self.assertEqual(reftracker.heads, expected)
+
+ def test_mainline(self):
+ self.assertHeads(_SAMPLE_MAINLINE, {
+ ':102': set(['refs/heads/master']),
+ })
+
+ def test_two_heads(self):
+ self.assertHeads(_SAMPLE_TWO_HEADS, {
+ ':101': set(['refs/heads/mybranch']),
+ ':102': set(['refs/heads/master']),
+ })
+
+ def test_two_branches_merged(self):
+ self.assertHeads(_SAMPLE_TWO_BRANCHES_MERGED, {
+ ':103': set(['refs/heads/master']),
+ })
+
+ def test_reset(self):
+ self.assertHeads(_SAMPLE_RESET, {
+ ':100': set(['refs/heads/master', 'refs/remotes/origin/master']),
+ })
+
+ def test_reset_with_more_commits(self):
+ self.assertHeads(_SAMPLE_RESET_WITH_MORE_COMMITS, {
+ ':101': set(['refs/remotes/origin/master']),
+ })