diff options
-rw-r--r-- | NEWS | 3 | ||||
-rw-r--r-- | branch_mapper.py | 61 | ||||
-rw-r--r-- | branch_updater.py | 6 | ||||
-rw-r--r-- | bzr_commit_handler.py | 21 | ||||
-rw-r--r-- | cache_manager.py | 34 | ||||
-rw-r--r-- | revision_store.py | 15 | ||||
-rwxr-xr-x | setup.py | 1 | ||||
-rw-r--r-- | tests/__init__.py | 21 | ||||
-rw-r--r-- | tests/test_branch_mapper.py | 42 | ||||
-rw-r--r-- | tests/test_revision_store.py | 147 |
10 files changed, 262 insertions, 89 deletions
@@ -56,6 +56,9 @@ Improvements * Large repositories now compress better thanks to a change in how file-ids are assigned. (Ian Clatworthy, John Arbash Meinel) +* Memory usage is improved by flushing blobs to a disk cache + when appropriate. (John Arbash Meinel) + * If a fast-import source ends in ".gz", it is assumed to be in gzip format and the stream is implicitly uncompressed. This means fast-import dump files generated by fast-export-from-xxx diff --git a/branch_mapper.py b/branch_mapper.py index 3bfc39b..acc37c9 100644 --- a/branch_mapper.py +++ b/branch_mapper.py @@ -14,50 +14,45 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -"""An object that maps bzr branch names <-> git ref names.""" +"""An object that maps git ref names to bzr branch names. Note that it is not +used to map git ref names to bzr tag names.""" + + +import re class BranchMapper(object): + _GIT_TRUNK_RE = re.compile('(?:git-)*trunk') - def git_to_bzr(self, ref_names): - """Get the mapping from git reference names to Bazaar branch names. - - :return: a dictionary with git reference names as keys and - the Bazaar branch names as values. + def git_to_bzr(self, ref_name): + """Map a git reference name to a Bazaar branch name. """ - bazaar_names = {} - for ref_name in sorted(ref_names): - parts = ref_name.split('/') - if parts[0] == 'refs': + parts = ref_name.split('/') + if parts[0] == 'refs': + parts.pop(0) + category = parts.pop(0) + if category == 'heads': + git_name = '/'.join(parts) + bazaar_name = self._git_to_bzr_name(git_name) + else: + if category == 'remotes' and parts[0] == 'origin': parts.pop(0) - category = parts.pop(0) - if category == 'heads': - git_name = '/'.join(parts) - bazaar_name = self._git_to_bzr_name(git_name) - else: - if category == 'remotes' and parts[0] == 'origin': - parts.pop(0) - git_name = '/'.join(parts) - if category.endswith('s'): - category = category[:-1] - name_no_ext = self._git_to_bzr_name(git_name) - bazaar_name = "%s.%s" % (name_no_ext, category) - bazaar_names[ref_name] = bazaar_name - return bazaar_names + git_name = '/'.join(parts) + if category.endswith('s'): + category = category[:-1] + name_no_ext = self._git_to_bzr_name(git_name) + bazaar_name = "%s.%s" % (name_no_ext, category) + return bazaar_name def _git_to_bzr_name(self, git_name): + # Make a simple name more bzr-like, by mapping git 'master' to bzr 'trunk'. + # To avoid collision, map git 'trunk' to bzr 'git-trunk'. Likewise + # 'git-trunk' to 'git-git-trunk' and so on, such that the mapping is + # one-to-one in both directions. if git_name == 'master': bazaar_name = 'trunk' - elif git_name.endswith('trunk'): + elif self._GIT_TRUNK_RE.match(git_name): bazaar_name = 'git-%s' % (git_name,) else: bazaar_name = git_name return bazaar_name - - def bzr_to_git(self, branch_names): - """Get the mapping from Bazaar branch names to git reference names. - - :return: a dictionary with Bazaar branch names as keys and - the git reference names as values. - """ - raise NotImplementedError(self.bzr_to_git) diff --git a/branch_updater.py b/branch_updater.py index 004f279..c19cfef 100644 --- a/branch_updater.py +++ b/branch_updater.py @@ -21,7 +21,6 @@ from operator import itemgetter from bzrlib import bzrdir, errors, osutils, transport from bzrlib.trace import error, note -import branch_mapper import helpers @@ -40,7 +39,6 @@ class BranchUpdater(object): self.heads_by_ref = heads_by_ref self.last_ref = last_ref self.tags = tags - self.name_mapper = branch_mapper.BranchMapper() self._branch_format = \ helpers.best_format_for_objects_in_a_repository(repo) @@ -84,7 +82,9 @@ class BranchUpdater(object): # Convert the reference names into Bazaar speak. If we haven't # already put the 'trunk' first, do it now. - git_to_bzr_map = self.name_mapper.git_to_bzr(ref_names) + git_to_bzr_map = {} + for ref_name in ref_names: + git_to_bzr_map[ref_name] = self.cache_mgr.branch_mapper.git_to_bzr(ref_name) if ref_names and self.branch is None: trunk = self.select_trunk(ref_names) git_bzr_items = [(trunk, git_to_bzr_map[trunk])] diff --git a/bzr_commit_handler.py b/bzr_commit_handler.py index 74c6b45..3226179 100644 --- a/bzr_commit_handler.py +++ b/bzr_commit_handler.py @@ -222,6 +222,9 @@ class GenericCommitHandler(processor.CommitHandler): def build_revision(self): rev_props = self._legal_revision_properties(self.command.properties) + if 'branch-nick' not in rev_props: + rev_props['branch-nick'] = self.cache_mgr.branch_mapper.git_to_bzr( + self.branch_ref) self._save_author_info(rev_props) committer = self.command.committer who = self._format_name_email(committer[0], committer[1]) @@ -652,16 +655,22 @@ class InventoryDeltaCommitHandler(GenericCommitHandler): def _get_proposed_inventory(self, delta): if len(self.parents): - new_inv = self.basis_inventory._get_mutable_inventory() + # new_inv = self.basis_inventory._get_mutable_inventory() + # Note that this will create unreferenced chk pages if we end up + # deleting entries, because this 'test' inventory won't end up + # used. However, it is cheaper than having to create a full copy of + # the inventory for every commit. + new_inv = self.basis_inventory.create_by_apply_delta(delta, + 'not-a-valid-revision-id:') else: new_inv = inventory.Inventory(revision_id=self.revision_id) # This is set in the delta so remove it to prevent a duplicate del new_inv[inventory.ROOT_ID] - try: - new_inv.apply_delta(delta) - except errors.InconsistentDelta: - self.mutter("INCONSISTENT DELTA IS:\n%s" % "\n".join([str(de) for de in delta])) - raise + try: + new_inv.apply_delta(delta) + except errors.InconsistentDelta: + self.mutter("INCONSISTENT DELTA IS:\n%s" % "\n".join([str(de) for de in delta])) + raise return new_inv def _add_entry(self, entry): diff --git a/cache_manager.py b/cache_manager.py index 6c9600f..464403f 100644 --- a/cache_manager.py +++ b/cache_manager.py @@ -24,7 +24,35 @@ import time import weakref from bzrlib import lru_cache, trace -from bzrlib.plugins.fastimport import helpers +from bzrlib.plugins.fastimport import branch_mapper, helpers + + +class _Cleanup(object): + """This class makes sure we clean up when CacheManager goes away. + + We use a helper class to ensure that we are never in a refcycle. + """ + + def __init__(self, disk_blobs): + self.disk_blobs = disk_blobs + self.tempdir = None + self.small_blobs = None + + def __del__(self): + self.finalize() + + def finalize(self): + if self.disk_blobs is not None: + for info in self.disk_blobs.itervalues(): + if info[-1] is not None: + os.unlink(info[-1]) + self.disk_blobs = None + if self.small_blobs is not None: + self.small_blobs.close() + self.small_blobs = None + if self.tempdir is not None: + shutils.rmtree(self.tempdir) + class _Cleanup(object): @@ -113,6 +141,10 @@ class CacheManager(object): # info not in file - possible when no blobs used pass + # BranchMapper has no state (for now?), but we keep it around rather + # than reinstantiate on every usage + self.branch_mapper = branch_mapper.BranchMapper() + def dump_stats(self, note=trace.note): """Dump some statistics about what we cached.""" # TODO: add in inventory stastistics diff --git a/revision_store.py b/revision_store.py index d0ea181..399dabe 100644 --- a/revision_store.py +++ b/revision_store.py @@ -1,4 +1,4 @@ -# Copyright (C) 2008 Canonical Ltd +# Copyright (C) 2008, 2009 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -48,10 +48,16 @@ class _TreeShim(object): if file_id in self._new_info_by_id: new_path = self._new_info_by_id[file_id][0] if new_path is None: - raise errors.NoSuchId() + raise errors.NoSuchId(self, file_id) + return new_path return self._basis_inv.id2path(file_id) def path2id(self, path): + # CommitBuilder currently only requires access to the root id. We don't + # build a map of renamed files, etc. One possibility if we ever *do* + # need more than just root, is to defer to basis_inv.path2id() and then + # check if the file_id is in our _new_info_by_id dict. And in that + # case, return _new_info_by_id[file_id][0] if path != '': raise NotImplementedError(_TreeShim.path2id) # TODO: Handle root renames? @@ -80,7 +86,6 @@ class _TreeShim(object): def get_reference_revision(self, file_id, path=None): raise NotImplementedError(_TreeShim.get_reference_revision) - def _delta_to_iter_changes(self): """Convert the inv_delta into an iter_changes repr.""" # iter_changes is: @@ -95,6 +100,9 @@ class _TreeShim(object): # ) basis_inv = self._basis_inv for old_path, new_path, file_id, ie in self._inv_delta: + # Perf: Would this be faster if we did 'if file_id in basis_inv'? + # Since the *very* common case is that the file already exists, it + # probably is better to optimize for that try: old_ie = basis_inv[file_id] except errors.NoSuchId: @@ -134,6 +142,7 @@ class _TreeShim(object): content_modified = (ie.text_sha1 != old_ie.text_sha1 or ie.text_size != old_ie.text_size) # TODO: ie.kind != old_ie.kind + # TODO: symlinks changing targets, content_modified? change = (file_id, (old_path, new_path), content_modified, @@ -17,6 +17,7 @@ if __name__ == '__main__': url="https://launchpad.net/bzr-fastimport", scripts=[], packages=['bzrlib.plugins.fastimport', + 'bzrlib.plugins.fastimport.exporters', 'bzrlib.plugins.fastimport.processors', 'bzrlib.plugins.fastimport.tests', ], diff --git a/tests/__init__.py b/tests/__init__.py index 711b605..cda5705 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -21,15 +21,16 @@ from bzrlib.tests.TestUtil import TestLoader, TestSuite def test_suite(): - module_names = [ - 'bzrlib.plugins.fastimport.tests.test_branch_mapper', - 'bzrlib.plugins.fastimport.tests.test_commands', - 'bzrlib.plugins.fastimport.tests.test_errors', - 'bzrlib.plugins.fastimport.tests.test_filter_processor', - 'bzrlib.plugins.fastimport.tests.test_generic_processor', - 'bzrlib.plugins.fastimport.tests.test_head_tracking', - 'bzrlib.plugins.fastimport.tests.test_helpers', - 'bzrlib.plugins.fastimport.tests.test_parser', - ] + module_names = [__name__ + '.' + x for x in [ + 'test_branch_mapper', + 'test_commands', + 'test_errors', + 'test_filter_processor', + 'test_generic_processor', + 'test_head_tracking', + 'test_helpers', + 'test_parser', + 'test_revision_store', + ]] loader = TestLoader() return loader.loadTestsFromModuleNames(module_names) diff --git a/tests/test_branch_mapper.py b/tests/test_branch_mapper.py index 0a50eec..00450c9 100644 --- a/tests/test_branch_mapper.py +++ b/tests/test_branch_mapper.py @@ -27,62 +27,38 @@ class TestBranchMapper(tests.TestCase): def test_git_to_bzr(self): m = branch_mapper.BranchMapper() - git_refs = [ - 'refs/heads/master', - 'refs/heads/foo', - 'refs/tags/master', - 'refs/tags/foo', - 'refs/remotes/origin/master', - 'refs/remotes/origin/foo', - ] - git_to_bzr_map = m.git_to_bzr(git_refs) - self.assertEqual(git_to_bzr_map, { + for git, bzr in { 'refs/heads/master': 'trunk', 'refs/heads/foo': 'foo', 'refs/tags/master': 'trunk.tag', 'refs/tags/foo': 'foo.tag', 'refs/remotes/origin/master': 'trunk.remote', 'refs/remotes/origin/foo': 'foo.remote', - }) + }.items(): + self.assertEqual(m.git_to_bzr(git), bzr) def test_git_to_bzr_with_slashes(self): m = branch_mapper.BranchMapper() - git_refs = [ - 'refs/heads/master/slave', - 'refs/heads/foo/bar', - 'refs/tags/master/slave', - 'refs/tags/foo/bar', - 'refs/remotes/origin/master/slave', - 'refs/remotes/origin/foo/bar', - ] - git_to_bzr_map = m.git_to_bzr(git_refs) - self.assertEqual(git_to_bzr_map, { + for git, bzr in { 'refs/heads/master/slave': 'master/slave', 'refs/heads/foo/bar': 'foo/bar', 'refs/tags/master/slave': 'master/slave.tag', 'refs/tags/foo/bar': 'foo/bar.tag', 'refs/remotes/origin/master/slave': 'master/slave.remote', 'refs/remotes/origin/foo/bar': 'foo/bar.remote', - }) + }.items(): + self.assertEqual(m.git_to_bzr(git), bzr) def test_git_to_bzr_for_trunk(self): # As 'master' in git is mapped to trunk in bzr, we need to handle # 'trunk' in git in a sensible way. m = branch_mapper.BranchMapper() - git_refs = [ - 'refs/heads/trunk', - 'refs/tags/trunk', - 'refs/remotes/origin/trunk', - 'refs/heads/git-trunk', - 'refs/tags/git-trunk', - 'refs/remotes/origin/git-trunk', - ] - git_to_bzr_map = m.git_to_bzr(git_refs) - self.assertEqual(git_to_bzr_map, { + for git, bzr in { 'refs/heads/trunk': 'git-trunk', 'refs/tags/trunk': 'git-trunk.tag', 'refs/remotes/origin/trunk': 'git-trunk.remote', 'refs/heads/git-trunk': 'git-git-trunk', 'refs/tags/git-trunk': 'git-git-trunk.tag', 'refs/remotes/origin/git-trunk':'git-git-trunk.remote', - }) + }.items(): + self.assertEqual(m.git_to_bzr(git), bzr) diff --git a/tests/test_revision_store.py b/tests/test_revision_store.py new file mode 100644 index 0000000..d850c95 --- /dev/null +++ b/tests/test_revision_store.py @@ -0,0 +1,147 @@ +# Copyright (C) 2008, 2009 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +"""Direct tests of the revision_store classes.""" + +from bzrlib import ( + branch, + errors, + inventory, + osutils, + tests, + ) + +from bzrlib.plugins.fastimport import ( + revision_store, + ) + + +class Test_TreeShim(tests.TestCase): + + def invAddEntry(self, inv, path, file_id=None): + if path.endswith('/'): + path = path[:-1] + kind = 'directory' + else: + kind = 'file' + parent_path, basename = osutils.split(path) + parent_id = inv.path2id(parent_path) + inv.add(inventory.make_entry(kind, basename, parent_id, file_id)) + + def make_trivial_basis_inv(self): + basis_inv = inventory.Inventory('TREE_ROOT') + self.invAddEntry(basis_inv, 'foo', 'foo-id') + self.invAddEntry(basis_inv, 'bar/', 'bar-id') + self.invAddEntry(basis_inv, 'bar/baz', 'baz-id') + return basis_inv + + def test_id2path_no_delta(self): + basis_inv = self.make_trivial_basis_inv() + shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv, + inv_delta=[], content_provider=None) + self.assertEqual('', shim.id2path('TREE_ROOT')) + self.assertEqual('foo', shim.id2path('foo-id')) + self.assertEqual('bar', shim.id2path('bar-id')) + self.assertEqual('bar/baz', shim.id2path('baz-id')) + self.assertRaises(errors.NoSuchId, shim.id2path, 'qux-id') + + def test_id2path_with_delta(self): + basis_inv = self.make_trivial_basis_inv() + foo_entry = inventory.make_entry('file', 'foo2', 'TREE_ROOT', 'foo-id') + inv_delta = [('foo', 'foo2', 'foo-id', foo_entry), + ('bar/baz', None, 'baz-id', None), + ] + + shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv, + inv_delta=inv_delta, + content_provider=None) + self.assertEqual('', shim.id2path('TREE_ROOT')) + self.assertEqual('foo2', shim.id2path('foo-id')) + self.assertEqual('bar', shim.id2path('bar-id')) + self.assertRaises(errors.NoSuchId, shim.id2path, 'baz-id') + + def test_path2id(self): + basis_inv = self.make_trivial_basis_inv() + shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv, + inv_delta=[], content_provider=None) + self.assertEqual('TREE_ROOT', shim.path2id('')) + # We don't want to ever give a wrong value, so for now we just raise + # NotImplementedError + self.assertRaises(NotImplementedError, shim.path2id, 'bar') + + def test_get_file_with_stat_content_in_stream(self): + basis_inv = self.make_trivial_basis_inv() + + def content_provider(file_id): + return 'content of\n' + file_id + '\n' + + shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv, + inv_delta=[], + content_provider=content_provider) + f_obj, stat_val = shim.get_file_with_stat('baz-id') + self.assertIs(None, stat_val) + self.assertEqualDiff('content of\nbaz-id\n', f_obj.read()) + + # TODO: Test when the content isn't in the stream, and we fall back to the + # repository that was passed in + + def test_get_symlink_target(self): + basis_inv = self.make_trivial_basis_inv() + ie = inventory.make_entry('symlink', 'link', 'TREE_ROOT', 'link-id') + ie.symlink_target = u'link-target' + basis_inv.add(ie) + shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv, + inv_delta=[], content_provider=None) + self.assertEqual(u'link-target', shim.get_symlink_target('link-id')) + + def test_get_symlink_target_from_delta(self): + basis_inv = self.make_trivial_basis_inv() + ie = inventory.make_entry('symlink', 'link', 'TREE_ROOT', 'link-id') + ie.symlink_target = u'link-target' + inv_delta = [(None, 'link', 'link-id', ie)] + shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv, + inv_delta=inv_delta, + content_provider=None) + self.assertEqual(u'link-target', shim.get_symlink_target('link-id')) + + def test__delta_to_iter_changes(self): + basis_inv = self.make_trivial_basis_inv() + foo_entry = inventory.make_entry('file', 'foo2', 'bar-id', 'foo-id') + link_entry = inventory.make_entry('symlink', 'link', 'TREE_ROOT', + 'link-id') + link_entry.symlink_target = u'link-target' + inv_delta = [('foo', 'bar/foo2', 'foo-id', foo_entry), + ('bar/baz', None, 'baz-id', None), + (None, 'link', 'link-id', link_entry), + ] + shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv, + inv_delta=inv_delta, + content_provider=None) + changes = list(shim._delta_to_iter_changes()) + expected = [('foo-id', ('foo', 'bar/foo2'), False, (True, True), + ('TREE_ROOT', 'bar-id'), ('foo', 'foo2'), + ('file', 'file'), (False, False)), + ('baz-id', ('bar/baz', None), True, (True, False), + ('bar-id', None), ('baz', None), + ('file', None), (False, None)), + ('link-id', (None, 'link'), True, (False, True), + (None, 'TREE_ROOT'), (None, 'link'), + (None, 'symlink'), (None, False)), + ] + # from pprint import pformat + # self.assertEqualDiff(pformat(expected), pformat(changes)) + self.assertEqual(expected, changes) + |