summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS3
-rw-r--r--branch_mapper.py61
-rw-r--r--branch_updater.py6
-rw-r--r--bzr_commit_handler.py21
-rw-r--r--cache_manager.py34
-rw-r--r--revision_store.py15
-rwxr-xr-xsetup.py1
-rw-r--r--tests/__init__.py21
-rw-r--r--tests/test_branch_mapper.py42
-rw-r--r--tests/test_revision_store.py147
10 files changed, 262 insertions, 89 deletions
diff --git a/NEWS b/NEWS
index 33caab0..6ba3dd7 100644
--- a/NEWS
+++ b/NEWS
@@ -56,6 +56,9 @@ Improvements
* Large repositories now compress better thanks to a change in
how file-ids are assigned. (Ian Clatworthy, John Arbash Meinel)
+* Memory usage is improved by flushing blobs to a disk cache
+ when appropriate. (John Arbash Meinel)
+
* If a fast-import source ends in ".gz", it is assumed to be in
gzip format and the stream is implicitly uncompressed. This
means fast-import dump files generated by fast-export-from-xxx
diff --git a/branch_mapper.py b/branch_mapper.py
index 3bfc39b..acc37c9 100644
--- a/branch_mapper.py
+++ b/branch_mapper.py
@@ -14,50 +14,45 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""An object that maps bzr branch names <-> git ref names."""
+"""An object that maps git ref names to bzr branch names. Note that it is not
+used to map git ref names to bzr tag names."""
+
+
+import re
class BranchMapper(object):
+ _GIT_TRUNK_RE = re.compile('(?:git-)*trunk')
- def git_to_bzr(self, ref_names):
- """Get the mapping from git reference names to Bazaar branch names.
-
- :return: a dictionary with git reference names as keys and
- the Bazaar branch names as values.
+ def git_to_bzr(self, ref_name):
+ """Map a git reference name to a Bazaar branch name.
"""
- bazaar_names = {}
- for ref_name in sorted(ref_names):
- parts = ref_name.split('/')
- if parts[0] == 'refs':
+ parts = ref_name.split('/')
+ if parts[0] == 'refs':
+ parts.pop(0)
+ category = parts.pop(0)
+ if category == 'heads':
+ git_name = '/'.join(parts)
+ bazaar_name = self._git_to_bzr_name(git_name)
+ else:
+ if category == 'remotes' and parts[0] == 'origin':
parts.pop(0)
- category = parts.pop(0)
- if category == 'heads':
- git_name = '/'.join(parts)
- bazaar_name = self._git_to_bzr_name(git_name)
- else:
- if category == 'remotes' and parts[0] == 'origin':
- parts.pop(0)
- git_name = '/'.join(parts)
- if category.endswith('s'):
- category = category[:-1]
- name_no_ext = self._git_to_bzr_name(git_name)
- bazaar_name = "%s.%s" % (name_no_ext, category)
- bazaar_names[ref_name] = bazaar_name
- return bazaar_names
+ git_name = '/'.join(parts)
+ if category.endswith('s'):
+ category = category[:-1]
+ name_no_ext = self._git_to_bzr_name(git_name)
+ bazaar_name = "%s.%s" % (name_no_ext, category)
+ return bazaar_name
def _git_to_bzr_name(self, git_name):
+ # Make a simple name more bzr-like, by mapping git 'master' to bzr 'trunk'.
+ # To avoid collision, map git 'trunk' to bzr 'git-trunk'. Likewise
+ # 'git-trunk' to 'git-git-trunk' and so on, such that the mapping is
+ # one-to-one in both directions.
if git_name == 'master':
bazaar_name = 'trunk'
- elif git_name.endswith('trunk'):
+ elif self._GIT_TRUNK_RE.match(git_name):
bazaar_name = 'git-%s' % (git_name,)
else:
bazaar_name = git_name
return bazaar_name
-
- def bzr_to_git(self, branch_names):
- """Get the mapping from Bazaar branch names to git reference names.
-
- :return: a dictionary with Bazaar branch names as keys and
- the git reference names as values.
- """
- raise NotImplementedError(self.bzr_to_git)
diff --git a/branch_updater.py b/branch_updater.py
index 004f279..c19cfef 100644
--- a/branch_updater.py
+++ b/branch_updater.py
@@ -21,7 +21,6 @@ from operator import itemgetter
from bzrlib import bzrdir, errors, osutils, transport
from bzrlib.trace import error, note
-import branch_mapper
import helpers
@@ -40,7 +39,6 @@ class BranchUpdater(object):
self.heads_by_ref = heads_by_ref
self.last_ref = last_ref
self.tags = tags
- self.name_mapper = branch_mapper.BranchMapper()
self._branch_format = \
helpers.best_format_for_objects_in_a_repository(repo)
@@ -84,7 +82,9 @@ class BranchUpdater(object):
# Convert the reference names into Bazaar speak. If we haven't
# already put the 'trunk' first, do it now.
- git_to_bzr_map = self.name_mapper.git_to_bzr(ref_names)
+ git_to_bzr_map = {}
+ for ref_name in ref_names:
+ git_to_bzr_map[ref_name] = self.cache_mgr.branch_mapper.git_to_bzr(ref_name)
if ref_names and self.branch is None:
trunk = self.select_trunk(ref_names)
git_bzr_items = [(trunk, git_to_bzr_map[trunk])]
diff --git a/bzr_commit_handler.py b/bzr_commit_handler.py
index 74c6b45..3226179 100644
--- a/bzr_commit_handler.py
+++ b/bzr_commit_handler.py
@@ -222,6 +222,9 @@ class GenericCommitHandler(processor.CommitHandler):
def build_revision(self):
rev_props = self._legal_revision_properties(self.command.properties)
+ if 'branch-nick' not in rev_props:
+ rev_props['branch-nick'] = self.cache_mgr.branch_mapper.git_to_bzr(
+ self.branch_ref)
self._save_author_info(rev_props)
committer = self.command.committer
who = self._format_name_email(committer[0], committer[1])
@@ -652,16 +655,22 @@ class InventoryDeltaCommitHandler(GenericCommitHandler):
def _get_proposed_inventory(self, delta):
if len(self.parents):
- new_inv = self.basis_inventory._get_mutable_inventory()
+ # new_inv = self.basis_inventory._get_mutable_inventory()
+ # Note that this will create unreferenced chk pages if we end up
+ # deleting entries, because this 'test' inventory won't end up
+ # used. However, it is cheaper than having to create a full copy of
+ # the inventory for every commit.
+ new_inv = self.basis_inventory.create_by_apply_delta(delta,
+ 'not-a-valid-revision-id:')
else:
new_inv = inventory.Inventory(revision_id=self.revision_id)
# This is set in the delta so remove it to prevent a duplicate
del new_inv[inventory.ROOT_ID]
- try:
- new_inv.apply_delta(delta)
- except errors.InconsistentDelta:
- self.mutter("INCONSISTENT DELTA IS:\n%s" % "\n".join([str(de) for de in delta]))
- raise
+ try:
+ new_inv.apply_delta(delta)
+ except errors.InconsistentDelta:
+ self.mutter("INCONSISTENT DELTA IS:\n%s" % "\n".join([str(de) for de in delta]))
+ raise
return new_inv
def _add_entry(self, entry):
diff --git a/cache_manager.py b/cache_manager.py
index 6c9600f..464403f 100644
--- a/cache_manager.py
+++ b/cache_manager.py
@@ -24,7 +24,35 @@ import time
import weakref
from bzrlib import lru_cache, trace
-from bzrlib.plugins.fastimport import helpers
+from bzrlib.plugins.fastimport import branch_mapper, helpers
+
+
+class _Cleanup(object):
+ """This class makes sure we clean up when CacheManager goes away.
+
+ We use a helper class to ensure that we are never in a refcycle.
+ """
+
+ def __init__(self, disk_blobs):
+ self.disk_blobs = disk_blobs
+ self.tempdir = None
+ self.small_blobs = None
+
+ def __del__(self):
+ self.finalize()
+
+ def finalize(self):
+ if self.disk_blobs is not None:
+ for info in self.disk_blobs.itervalues():
+ if info[-1] is not None:
+ os.unlink(info[-1])
+ self.disk_blobs = None
+ if self.small_blobs is not None:
+ self.small_blobs.close()
+ self.small_blobs = None
+ if self.tempdir is not None:
+ shutils.rmtree(self.tempdir)
+
class _Cleanup(object):
@@ -113,6 +141,10 @@ class CacheManager(object):
# info not in file - possible when no blobs used
pass
+ # BranchMapper has no state (for now?), but we keep it around rather
+ # than reinstantiate on every usage
+ self.branch_mapper = branch_mapper.BranchMapper()
+
def dump_stats(self, note=trace.note):
"""Dump some statistics about what we cached."""
# TODO: add in inventory stastistics
diff --git a/revision_store.py b/revision_store.py
index d0ea181..399dabe 100644
--- a/revision_store.py
+++ b/revision_store.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2008 Canonical Ltd
+# Copyright (C) 2008, 2009 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -48,10 +48,16 @@ class _TreeShim(object):
if file_id in self._new_info_by_id:
new_path = self._new_info_by_id[file_id][0]
if new_path is None:
- raise errors.NoSuchId()
+ raise errors.NoSuchId(self, file_id)
+ return new_path
return self._basis_inv.id2path(file_id)
def path2id(self, path):
+ # CommitBuilder currently only requires access to the root id. We don't
+ # build a map of renamed files, etc. One possibility if we ever *do*
+ # need more than just root, is to defer to basis_inv.path2id() and then
+ # check if the file_id is in our _new_info_by_id dict. And in that
+ # case, return _new_info_by_id[file_id][0]
if path != '':
raise NotImplementedError(_TreeShim.path2id)
# TODO: Handle root renames?
@@ -80,7 +86,6 @@ class _TreeShim(object):
def get_reference_revision(self, file_id, path=None):
raise NotImplementedError(_TreeShim.get_reference_revision)
-
def _delta_to_iter_changes(self):
"""Convert the inv_delta into an iter_changes repr."""
# iter_changes is:
@@ -95,6 +100,9 @@ class _TreeShim(object):
# )
basis_inv = self._basis_inv
for old_path, new_path, file_id, ie in self._inv_delta:
+ # Perf: Would this be faster if we did 'if file_id in basis_inv'?
+ # Since the *very* common case is that the file already exists, it
+ # probably is better to optimize for that
try:
old_ie = basis_inv[file_id]
except errors.NoSuchId:
@@ -134,6 +142,7 @@ class _TreeShim(object):
content_modified = (ie.text_sha1 != old_ie.text_sha1
or ie.text_size != old_ie.text_size)
# TODO: ie.kind != old_ie.kind
+ # TODO: symlinks changing targets, content_modified?
change = (file_id,
(old_path, new_path),
content_modified,
diff --git a/setup.py b/setup.py
index f4d5d0c..d360793 100755
--- a/setup.py
+++ b/setup.py
@@ -17,6 +17,7 @@ if __name__ == '__main__':
url="https://launchpad.net/bzr-fastimport",
scripts=[],
packages=['bzrlib.plugins.fastimport',
+ 'bzrlib.plugins.fastimport.exporters',
'bzrlib.plugins.fastimport.processors',
'bzrlib.plugins.fastimport.tests',
],
diff --git a/tests/__init__.py b/tests/__init__.py
index 711b605..cda5705 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -21,15 +21,16 @@ from bzrlib.tests.TestUtil import TestLoader, TestSuite
def test_suite():
- module_names = [
- 'bzrlib.plugins.fastimport.tests.test_branch_mapper',
- 'bzrlib.plugins.fastimport.tests.test_commands',
- 'bzrlib.plugins.fastimport.tests.test_errors',
- 'bzrlib.plugins.fastimport.tests.test_filter_processor',
- 'bzrlib.plugins.fastimport.tests.test_generic_processor',
- 'bzrlib.plugins.fastimport.tests.test_head_tracking',
- 'bzrlib.plugins.fastimport.tests.test_helpers',
- 'bzrlib.plugins.fastimport.tests.test_parser',
- ]
+ module_names = [__name__ + '.' + x for x in [
+ 'test_branch_mapper',
+ 'test_commands',
+ 'test_errors',
+ 'test_filter_processor',
+ 'test_generic_processor',
+ 'test_head_tracking',
+ 'test_helpers',
+ 'test_parser',
+ 'test_revision_store',
+ ]]
loader = TestLoader()
return loader.loadTestsFromModuleNames(module_names)
diff --git a/tests/test_branch_mapper.py b/tests/test_branch_mapper.py
index 0a50eec..00450c9 100644
--- a/tests/test_branch_mapper.py
+++ b/tests/test_branch_mapper.py
@@ -27,62 +27,38 @@ class TestBranchMapper(tests.TestCase):
def test_git_to_bzr(self):
m = branch_mapper.BranchMapper()
- git_refs = [
- 'refs/heads/master',
- 'refs/heads/foo',
- 'refs/tags/master',
- 'refs/tags/foo',
- 'refs/remotes/origin/master',
- 'refs/remotes/origin/foo',
- ]
- git_to_bzr_map = m.git_to_bzr(git_refs)
- self.assertEqual(git_to_bzr_map, {
+ for git, bzr in {
'refs/heads/master': 'trunk',
'refs/heads/foo': 'foo',
'refs/tags/master': 'trunk.tag',
'refs/tags/foo': 'foo.tag',
'refs/remotes/origin/master': 'trunk.remote',
'refs/remotes/origin/foo': 'foo.remote',
- })
+ }.items():
+ self.assertEqual(m.git_to_bzr(git), bzr)
def test_git_to_bzr_with_slashes(self):
m = branch_mapper.BranchMapper()
- git_refs = [
- 'refs/heads/master/slave',
- 'refs/heads/foo/bar',
- 'refs/tags/master/slave',
- 'refs/tags/foo/bar',
- 'refs/remotes/origin/master/slave',
- 'refs/remotes/origin/foo/bar',
- ]
- git_to_bzr_map = m.git_to_bzr(git_refs)
- self.assertEqual(git_to_bzr_map, {
+ for git, bzr in {
'refs/heads/master/slave': 'master/slave',
'refs/heads/foo/bar': 'foo/bar',
'refs/tags/master/slave': 'master/slave.tag',
'refs/tags/foo/bar': 'foo/bar.tag',
'refs/remotes/origin/master/slave': 'master/slave.remote',
'refs/remotes/origin/foo/bar': 'foo/bar.remote',
- })
+ }.items():
+ self.assertEqual(m.git_to_bzr(git), bzr)
def test_git_to_bzr_for_trunk(self):
# As 'master' in git is mapped to trunk in bzr, we need to handle
# 'trunk' in git in a sensible way.
m = branch_mapper.BranchMapper()
- git_refs = [
- 'refs/heads/trunk',
- 'refs/tags/trunk',
- 'refs/remotes/origin/trunk',
- 'refs/heads/git-trunk',
- 'refs/tags/git-trunk',
- 'refs/remotes/origin/git-trunk',
- ]
- git_to_bzr_map = m.git_to_bzr(git_refs)
- self.assertEqual(git_to_bzr_map, {
+ for git, bzr in {
'refs/heads/trunk': 'git-trunk',
'refs/tags/trunk': 'git-trunk.tag',
'refs/remotes/origin/trunk': 'git-trunk.remote',
'refs/heads/git-trunk': 'git-git-trunk',
'refs/tags/git-trunk': 'git-git-trunk.tag',
'refs/remotes/origin/git-trunk':'git-git-trunk.remote',
- })
+ }.items():
+ self.assertEqual(m.git_to_bzr(git), bzr)
diff --git a/tests/test_revision_store.py b/tests/test_revision_store.py
new file mode 100644
index 0000000..d850c95
--- /dev/null
+++ b/tests/test_revision_store.py
@@ -0,0 +1,147 @@
+# Copyright (C) 2008, 2009 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+"""Direct tests of the revision_store classes."""
+
+from bzrlib import (
+ branch,
+ errors,
+ inventory,
+ osutils,
+ tests,
+ )
+
+from bzrlib.plugins.fastimport import (
+ revision_store,
+ )
+
+
+class Test_TreeShim(tests.TestCase):
+
+ def invAddEntry(self, inv, path, file_id=None):
+ if path.endswith('/'):
+ path = path[:-1]
+ kind = 'directory'
+ else:
+ kind = 'file'
+ parent_path, basename = osutils.split(path)
+ parent_id = inv.path2id(parent_path)
+ inv.add(inventory.make_entry(kind, basename, parent_id, file_id))
+
+ def make_trivial_basis_inv(self):
+ basis_inv = inventory.Inventory('TREE_ROOT')
+ self.invAddEntry(basis_inv, 'foo', 'foo-id')
+ self.invAddEntry(basis_inv, 'bar/', 'bar-id')
+ self.invAddEntry(basis_inv, 'bar/baz', 'baz-id')
+ return basis_inv
+
+ def test_id2path_no_delta(self):
+ basis_inv = self.make_trivial_basis_inv()
+ shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv,
+ inv_delta=[], content_provider=None)
+ self.assertEqual('', shim.id2path('TREE_ROOT'))
+ self.assertEqual('foo', shim.id2path('foo-id'))
+ self.assertEqual('bar', shim.id2path('bar-id'))
+ self.assertEqual('bar/baz', shim.id2path('baz-id'))
+ self.assertRaises(errors.NoSuchId, shim.id2path, 'qux-id')
+
+ def test_id2path_with_delta(self):
+ basis_inv = self.make_trivial_basis_inv()
+ foo_entry = inventory.make_entry('file', 'foo2', 'TREE_ROOT', 'foo-id')
+ inv_delta = [('foo', 'foo2', 'foo-id', foo_entry),
+ ('bar/baz', None, 'baz-id', None),
+ ]
+
+ shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv,
+ inv_delta=inv_delta,
+ content_provider=None)
+ self.assertEqual('', shim.id2path('TREE_ROOT'))
+ self.assertEqual('foo2', shim.id2path('foo-id'))
+ self.assertEqual('bar', shim.id2path('bar-id'))
+ self.assertRaises(errors.NoSuchId, shim.id2path, 'baz-id')
+
+ def test_path2id(self):
+ basis_inv = self.make_trivial_basis_inv()
+ shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv,
+ inv_delta=[], content_provider=None)
+ self.assertEqual('TREE_ROOT', shim.path2id(''))
+ # We don't want to ever give a wrong value, so for now we just raise
+ # NotImplementedError
+ self.assertRaises(NotImplementedError, shim.path2id, 'bar')
+
+ def test_get_file_with_stat_content_in_stream(self):
+ basis_inv = self.make_trivial_basis_inv()
+
+ def content_provider(file_id):
+ return 'content of\n' + file_id + '\n'
+
+ shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv,
+ inv_delta=[],
+ content_provider=content_provider)
+ f_obj, stat_val = shim.get_file_with_stat('baz-id')
+ self.assertIs(None, stat_val)
+ self.assertEqualDiff('content of\nbaz-id\n', f_obj.read())
+
+ # TODO: Test when the content isn't in the stream, and we fall back to the
+ # repository that was passed in
+
+ def test_get_symlink_target(self):
+ basis_inv = self.make_trivial_basis_inv()
+ ie = inventory.make_entry('symlink', 'link', 'TREE_ROOT', 'link-id')
+ ie.symlink_target = u'link-target'
+ basis_inv.add(ie)
+ shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv,
+ inv_delta=[], content_provider=None)
+ self.assertEqual(u'link-target', shim.get_symlink_target('link-id'))
+
+ def test_get_symlink_target_from_delta(self):
+ basis_inv = self.make_trivial_basis_inv()
+ ie = inventory.make_entry('symlink', 'link', 'TREE_ROOT', 'link-id')
+ ie.symlink_target = u'link-target'
+ inv_delta = [(None, 'link', 'link-id', ie)]
+ shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv,
+ inv_delta=inv_delta,
+ content_provider=None)
+ self.assertEqual(u'link-target', shim.get_symlink_target('link-id'))
+
+ def test__delta_to_iter_changes(self):
+ basis_inv = self.make_trivial_basis_inv()
+ foo_entry = inventory.make_entry('file', 'foo2', 'bar-id', 'foo-id')
+ link_entry = inventory.make_entry('symlink', 'link', 'TREE_ROOT',
+ 'link-id')
+ link_entry.symlink_target = u'link-target'
+ inv_delta = [('foo', 'bar/foo2', 'foo-id', foo_entry),
+ ('bar/baz', None, 'baz-id', None),
+ (None, 'link', 'link-id', link_entry),
+ ]
+ shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv,
+ inv_delta=inv_delta,
+ content_provider=None)
+ changes = list(shim._delta_to_iter_changes())
+ expected = [('foo-id', ('foo', 'bar/foo2'), False, (True, True),
+ ('TREE_ROOT', 'bar-id'), ('foo', 'foo2'),
+ ('file', 'file'), (False, False)),
+ ('baz-id', ('bar/baz', None), True, (True, False),
+ ('bar-id', None), ('baz', None),
+ ('file', None), (False, None)),
+ ('link-id', (None, 'link'), True, (False, True),
+ (None, 'TREE_ROOT'), (None, 'link'),
+ (None, 'symlink'), (None, False)),
+ ]
+ # from pprint import pformat
+ # self.assertEqualDiff(pformat(expected), pformat(changes))
+ self.assertEqual(expected, changes)
+