From 02fe09a21d1548b901fede6f90434c6455cc675b Mon Sep 17 00:00:00 2001 From: Oleksandr Usov Date: Wed, 12 Oct 2011 11:24:29 +0100 Subject: Add function to rewrite refnames & tests for it --- exporter.py | 37 ++++++++++++++++++++++++++++++++++++- tests/test_exporter.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/exporter.py b/exporter.py index 78b5dba..e67a621 100644 --- a/exporter.py +++ b/exporter.py @@ -46,7 +46,7 @@ # set new_git_branch to the previously used name) from email.Utils import parseaddr -import sys, time +import sys, time, re import bzrlib.branch import bzrlib.revision @@ -111,7 +111,42 @@ def check_ref_format(refname): return False return True +def sanitize_ref_name_for_git(name_dict, refname): + """Rewrite refname so that it will be accepted by git-fast-import. + For the detailed rules see check_ref_format. + By rewriting the refname we are breaking uniqueness guarantees provided by bzr + so we have to manually + verify that resulting ref names are unique. + + :param name_dict: additional dictionary used to enforce uniqueness of resulting refname's + :param refname: refname to rewrite + :return: new refname + """ + newRefname = re.sub( + # '/.' in refname or startswith '.' + r"/\.|^\." + # '..' in refname + r"|\.\." + # ord(c) < 040 + r"|[" + "".join([chr(x) for x in range(040)]) + r"]" + # c in '\177 ~^:?*[' + r"|[\177 ~^:?*[]" + # last char in "/." + r"|[/.]$" + # endswith '.lock' + r"|.lock$" + # "@{" in refname + r"|@{" + # "\\" in refname + r"|\\", + "_", refname) + idx = name_dict.get(newRefname, 1) + name_dict[newRefname] = idx + 1 + if idx != 1: + # append index to the resulting refname if it's not unique + newRefname += "_" + str(idx) + return newRefname class BzrFastExporter(object): diff --git a/tests/test_exporter.py b/tests/test_exporter.py index c2a8442..957945f 100644 --- a/tests/test_exporter.py +++ b/tests/test_exporter.py @@ -24,6 +24,7 @@ from bzrlib import tests from bzrlib.plugins.fastimport.exporter import ( _get_output_stream, check_ref_format, + sanitize_ref_name_for_git ) from bzrlib.plugins.fastimport.tests import ( @@ -79,11 +80,55 @@ class CheckRefFormatTests(tests.TestCase): def test_invalid(self): self.assertFalse(check_ref_format('foo')) + self.assertFalse(check_ref_format('foo/.bar')) self.assertFalse(check_ref_format('heads/foo/')) + self.assertFalse(check_ref_format('heads/foo.')) self.assertFalse(check_ref_format('./foo')) self.assertFalse(check_ref_format('.refs/foo')) self.assertFalse(check_ref_format('heads/foo..bar')) self.assertFalse(check_ref_format('heads/foo?bar')) self.assertFalse(check_ref_format('heads/foo.lock')) self.assertFalse(check_ref_format('heads/v@{ation')) + self.assertFalse(check_ref_format('heads/foo\\bar')) self.assertFalse(check_ref_format('heads/foo\bar')) + self.assertFalse(check_ref_format('heads/foo bar')) + self.assertFalse(check_ref_format('heads/foo\020bar')) + self.assertFalse(check_ref_format('heads/foo\177bar')) + +class CheckRefnameRewriting(tests.TestCase): + """Tests for sanitize_ref_name_for_git function""" + + def test_passthrough_valid(self): + self.assertEqual(sanitize_ref_name_for_git(dict(), 'heads/foo'), 'heads/foo') + self.assertEqual(sanitize_ref_name_for_git(dict(), 'foo/bar/baz'), 'foo/bar/baz') + self.assertEqual(sanitize_ref_name_for_git(dict(), 'refs///heads/foo'), 'refs///heads/foo') + self.assertEqual(sanitize_ref_name_for_git(dict(), 'foo./bar'), 'foo./bar') + self.assertEqual(sanitize_ref_name_for_git(dict(), 'heads/foo@bar'), 'heads/foo@bar') + self.assertEqual(sanitize_ref_name_for_git(dict(), 'heads/fix.lock.error'), 'heads/fix.lock.error') + + def test_rewrite_to_unique_names(self): + self.assertEqual(sanitize_ref_name_for_git(dict(), 'heads/foo/'), 'heads/foo_') + # check that with persistent dictionary we generate unique names on each invocation + q = dict() + self.assertNotEqual( + sanitize_ref_name_for_git(q, 'heads/foo/'), + sanitize_ref_name_for_git(q, 'heads/foo/')) + self.assertNotEqual( + sanitize_ref_name_for_git(q, 'heads/foo/'), + sanitize_ref_name_for_git(q, 'heads/foo/')) + + def test_rewrite_invalid(self): + self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'foo./bar'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'heads/foo/'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'heads/foo.'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), './foo'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), '.refs/foo'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'heads/foo..bar'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'heads/foo?bar'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'heads/foo.lock'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'heads/v@{ation'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'heads/foo\bar'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'heads/foo\\bar'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'heads/foo bar'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'heads/foo\020bar'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'heads/foo\177bar'))) -- cgit v1.2.1 From 503d62deb715d7a14d8c12073c22180f087f828b Mon Sep 17 00:00:00 2001 From: Oleksandr Usov Date: Wed, 12 Oct 2011 12:20:53 +0100 Subject: Rewrite tag names when exporting plain stream --- cmds.py | 12 ++++++++++-- exporter.py | 20 ++++++++++++++++---- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/cmds.py b/cmds.py index 537e0f0..362113e 100644 --- a/cmds.py +++ b/cmds.py @@ -578,6 +578,10 @@ class cmd_fast_export(Command): future once the feature names and definitions are formally agreed to by the broader fast-import developer community. + Git has much stricter naming rules for tags and fast-export --plain + will skip tags which can't be imported into git. If you want to rename + these tags use --rewrite-tag-names. + :Examples: To produce data destined for import into Bazaar:: @@ -624,12 +628,15 @@ class cmd_fast_export(Command): Option('plain', help="Exclude metadata to maximise interoperability." ), + Option('rewrite-tag-names', + help="Rewrite invalid tag names in plain mode." + ), ] encoding_type = 'exact' def run(self, source, destination=None, verbose=False, git_branch="master", checkpoint=10000, marks=None, import_marks=None, export_marks=None, revision=None, - plain=True): + plain=True, rewrite_tag_names=False): load_fastimport() from bzrlib.plugins.fastimport import exporter @@ -639,7 +646,8 @@ class cmd_fast_export(Command): destination=destination, git_branch=git_branch, checkpoint=checkpoint, import_marks_file=import_marks, export_marks_file=export_marks, - revision=revision, verbose=verbose, plain_format=plain) + revision=revision, verbose=verbose, plain_format=plain, + rewrite_tags=rewrite_tag_names) return exporter.run() diff --git a/exporter.py b/exporter.py index e67a621..fd1de00 100644 --- a/exporter.py +++ b/exporter.py @@ -152,13 +152,17 @@ class BzrFastExporter(object): def __init__(self, source, destination, git_branch=None, checkpoint=-1, import_marks_file=None, export_marks_file=None, revision=None, - verbose=False, plain_format=False): + verbose=False, plain_format=False, rewrite_tags=False): """Export branch data in fast import format. :param plain_format: if True, 'classic' fast-import format is used without any extended features; if False, the generated data is richer and includes information like multiple authors, revision properties, etc. + + :param rewrite_tags: if True tag names will be rewritten to be + git-compatible. Otherwise tags which aren't valid for git will + be skiped. """ self.source = source self.outf = _get_output_stream(destination) @@ -169,6 +173,8 @@ class BzrFastExporter(object): self.revision = revision self.excluded_revisions = set() self.plain_format = plain_format + self.rewrite_tags = rewrite_tags + self.rewrite_dict = dict() self._multi_author_api_available = hasattr(bzrlib.revision.Revision, 'get_apparent_authors') self.properties_to_exclude = ['authors', 'author'] @@ -594,9 +600,15 @@ class BzrFastExporter(object): else: git_ref = 'refs/tags/%s' % tag.encode("utf-8") if self.plain_format and not check_ref_format(git_ref): - self.warning('not creating tag %r as its name would not be ' - 'valid in git.', git_ref) - continue + if self.rewrite_tags: + new_ref = sanitize_ref_name_for_git(self.rewrite_dict, git_ref) + self.warning('tag %r is exported as %r to be valid in git.', + git_ref, new_ref) + git_ref = new_ref + else: + self.warning('not creating tag %r as its name would not be ' + 'valid in git.', git_ref) + continue self.print_cmd(commands.ResetCommand(git_ref, ":" + str(mark))) def _next_tmp_branch_name(self): -- cgit v1.2.1 From f43bc2eb6afdf5f4ca21cd9b5f914588693d518b Mon Sep 17 00:00:00 2001 From: Oleksandr Usov Date: Mon, 17 Oct 2011 11:25:55 +0100 Subject: Implement comments from patch review: - style fixes - add integration test for --rewrite-tag-names - removed rewrite_dict as we can't really guarantee uniqness of tag names. --- exporter.py | 18 ++++++----------- tests/test_commands.py | 19 ++++++++++++++++++ tests/test_exporter.py | 54 ++++++++++++++++++++------------------------------ 3 files changed, 47 insertions(+), 44 deletions(-) diff --git a/exporter.py b/exporter.py index fd1de00..cad3241 100644 --- a/exporter.py +++ b/exporter.py @@ -111,7 +111,8 @@ def check_ref_format(refname): return False return True -def sanitize_ref_name_for_git(name_dict, refname): + +def sanitize_ref_name_for_git(refname): """Rewrite refname so that it will be accepted by git-fast-import. For the detailed rules see check_ref_format. @@ -119,11 +120,10 @@ def sanitize_ref_name_for_git(name_dict, refname): so we have to manually verify that resulting ref names are unique. - :param name_dict: additional dictionary used to enforce uniqueness of resulting refname's :param refname: refname to rewrite :return: new refname """ - newRefname = re.sub( + new_refname = re.sub( # '/.' in refname or startswith '.' r"/\.|^\." # '..' in refname @@ -141,12 +141,7 @@ def sanitize_ref_name_for_git(name_dict, refname): # "\\" in refname r"|\\", "_", refname) - idx = name_dict.get(newRefname, 1) - name_dict[newRefname] = idx + 1 - if idx != 1: - # append index to the resulting refname if it's not unique - newRefname += "_" + str(idx) - return newRefname + return new_refname class BzrFastExporter(object): @@ -174,7 +169,6 @@ class BzrFastExporter(object): self.excluded_revisions = set() self.plain_format = plain_format self.rewrite_tags = rewrite_tags - self.rewrite_dict = dict() self._multi_author_api_available = hasattr(bzrlib.revision.Revision, 'get_apparent_authors') self.properties_to_exclude = ['authors', 'author'] @@ -601,11 +595,11 @@ class BzrFastExporter(object): git_ref = 'refs/tags/%s' % tag.encode("utf-8") if self.plain_format and not check_ref_format(git_ref): if self.rewrite_tags: - new_ref = sanitize_ref_name_for_git(self.rewrite_dict, git_ref) + new_ref = sanitize_ref_name_for_git(git_ref) self.warning('tag %r is exported as %r to be valid in git.', git_ref, new_ref) git_ref = new_ref - else: + else: self.warning('not creating tag %r as its name would not be ' 'valid in git.', git_ref) continue diff --git a/tests/test_commands.py b/tests/test_commands.py index 282cfc3..5729660 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -80,6 +80,25 @@ class TestFastExport(ExternalBase): except AttributeError: # bzr < 2.4 self.failUnlessExists("br.fi") + def test_tag_rewriting(self): + tree = self.make_branch_and_tree("br") + tree.commit("pointless") + self.assertTrue(tree.branch.supports_tags()) + rev_id = tree.branch.dotted_revno_to_revision_id((1,)) + tree.branch.tags.set_tag("goodTag", rev_id) + tree.branch.tags.set_tag("bad Tag", rev_id) + + # first check --no-rewrite-tag-names + data = self.run_bzr("fast-export --plain --no-rewrite-tag-names br")[0] + self.assertNotEqual(-1, data.find("reset refs/tags/goodTag")) + self.assertEqual(data.find("reset refs/tags/"), data.rfind("reset refs/tags/")) + + # and now with --rewrite-tag-names + data = self.run_bzr("fast-export --plain --rewrite-tag-names br")[0] + self.assertNotEqual(-1, data.find("reset refs/tags/goodTag")) + # "bad Tag" should be exported as bad_Tag + self.assertNotEqual(-1, data.find("reset refs/tags/bad_Tag")) + simple_fast_import_stream = """commit refs/heads/master mark :1 diff --git a/tests/test_exporter.py b/tests/test_exporter.py index 957945f..f1c9530 100644 --- a/tests/test_exporter.py +++ b/tests/test_exporter.py @@ -95,40 +95,30 @@ class CheckRefFormatTests(tests.TestCase): self.assertFalse(check_ref_format('heads/foo\020bar')) self.assertFalse(check_ref_format('heads/foo\177bar')) + class CheckRefnameRewriting(tests.TestCase): """Tests for sanitize_ref_name_for_git function""" def test_passthrough_valid(self): - self.assertEqual(sanitize_ref_name_for_git(dict(), 'heads/foo'), 'heads/foo') - self.assertEqual(sanitize_ref_name_for_git(dict(), 'foo/bar/baz'), 'foo/bar/baz') - self.assertEqual(sanitize_ref_name_for_git(dict(), 'refs///heads/foo'), 'refs///heads/foo') - self.assertEqual(sanitize_ref_name_for_git(dict(), 'foo./bar'), 'foo./bar') - self.assertEqual(sanitize_ref_name_for_git(dict(), 'heads/foo@bar'), 'heads/foo@bar') - self.assertEqual(sanitize_ref_name_for_git(dict(), 'heads/fix.lock.error'), 'heads/fix.lock.error') - - def test_rewrite_to_unique_names(self): - self.assertEqual(sanitize_ref_name_for_git(dict(), 'heads/foo/'), 'heads/foo_') - # check that with persistent dictionary we generate unique names on each invocation - q = dict() - self.assertNotEqual( - sanitize_ref_name_for_git(q, 'heads/foo/'), - sanitize_ref_name_for_git(q, 'heads/foo/')) - self.assertNotEqual( - sanitize_ref_name_for_git(q, 'heads/foo/'), - sanitize_ref_name_for_git(q, 'heads/foo/')) - + self.assertEqual(sanitize_ref_name_for_git('heads/foo'), 'heads/foo') + self.assertEqual(sanitize_ref_name_for_git('foo/bar/baz'), 'foo/bar/baz') + self.assertEqual(sanitize_ref_name_for_git('refs///heads/foo'), 'refs///heads/foo') + self.assertEqual(sanitize_ref_name_for_git('foo./bar'), 'foo./bar') + self.assertEqual(sanitize_ref_name_for_git('heads/foo@bar'), 'heads/foo@bar') + self.assertEqual(sanitize_ref_name_for_git('heads/fix.lock.error'), 'heads/fix.lock.error') + def test_rewrite_invalid(self): - self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'foo./bar'))) - self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'heads/foo/'))) - self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'heads/foo.'))) - self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), './foo'))) - self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), '.refs/foo'))) - self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'heads/foo..bar'))) - self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'heads/foo?bar'))) - self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'heads/foo.lock'))) - self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'heads/v@{ation'))) - self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'heads/foo\bar'))) - self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'heads/foo\\bar'))) - self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'heads/foo bar'))) - self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'heads/foo\020bar'))) - self.assertTrue(check_ref_format(sanitize_ref_name_for_git(dict(), 'heads/foo\177bar'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git('foo./bar'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo/'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo.'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git('./foo'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git('.refs/foo'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo..bar'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo?bar'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo.lock'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/v@{ation'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo\bar'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo\\bar'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo bar'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo\020bar'))) + self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo\177bar'))) -- cgit v1.2.1