summaryrefslogtreecommitdiff
path: root/lorry
diff options
context:
space:
mode:
authorBen Hutchings <ben.hutchings@codethink.co.uk>2020-08-04 21:26:28 +0100
committerBen Hutchings <ben.hutchings@codethink.co.uk>2020-08-05 18:06:36 +0100
commit9d8c35bfa8a717c656a78c636c5a4600e5837ae7 (patch)
tree3c19de4abf241615e4dd80b068cdfd9fd9278670 /lorry
parenta934314cbdc56e30c77f554cd21772e5fbd306c3 (diff)
downloadlorry-9d8c35bfa8a717c656a78c636c5a4600e5837ae7.tar.gz
lorry: Prune unreachable commits from hg-fast-export marks file
By default, hg-fast-export refuses to export a Mercurial repository if it has unnamed heads. lorry passes the --force option that overrides this since we still want to convert the named heads, but doing this sets us up for failure later. If the unnamed heads also have no tags pointing to them, the corresponding git commits may be deleted by 'git gc'. However they will still be listed in the 'marks' file used to record state for incremental conversions, so a later run of hg-fast-export may assume they exist and refer to them by hash. Before running hg-fast-export, delete any lines in its marks file that refer to unreachable or non-existent commits. Closes #7.
Diffstat (limited to 'lorry')
-rwxr-xr-xlorry53
1 files changed, 52 insertions, 1 deletions
diff --git a/lorry b/lorry
index 955b46b..c2019d4 100755
--- a/lorry
+++ b/lorry
@@ -31,6 +31,8 @@ import email.message
import email.utils
import ftplib
import re
+import subprocess
+import tempfile
import yaml
@@ -621,7 +623,13 @@ class Lorry(cliapp.Application):
if not os.path.exists(gitdir):
self.needs_aggressive = True
self.run_program(['git', 'init', '--bare', gitdir])
-
+
+ # Since there are marks files in existing deployments that
+ # have broken references, fix up the marks file before rather
+ # than after running hg-fast-export
+ self.prune_unreachable_marks(gitdir,
+ os.path.join(gitdir, 'hg2git-marks'))
+
self.progress('.. fast-exporting into git')
self.run_program(['hg-fast-export', '-r', '../hg', '--quiet', '--force'],
cwd=gitdir)
@@ -718,6 +726,49 @@ class Lorry(cliapp.Application):
if self.settings['verbose']:
self.output.write('%s\n' % msg)
+ def prune_unreachable_marks(self, gitdir, marks_name):
+ if not os.path.exists(marks_name):
+ return
+
+ # Find reachable commits
+ reachable = set()
+ with subprocess.Popen(['git', 'rev-list', '--all'],
+ cwd=gitdir, stdin=subprocess.DEVNULL,
+ stdout=subprocess.PIPE,
+ universal_newlines=True) as rev_list_proc:
+ for line in rev_list_proc.stdout:
+ reachable.add(line.rstrip('\n'))
+
+ # Filter marks file to temporary file
+ mark_re = re.compile(r':(\S+) ([0-9a-f]{40,})\n')
+ marks_temp_fd, marks_temp_name = \
+ tempfile.mkstemp(dir=os.path.dirname(marks_name))
+ try:
+ with open(marks_temp_fd, 'w') as marks_out, \
+ open(marks_name, 'r') as marks_in:
+ for line in marks_in:
+ match = mark_re.match(line)
+ if not match:
+ msg = ('%s: failed to parse line "%s"'
+ % (marks_name, line.rstrip('\n')))
+ logging.warning(msg)
+ self.output.write('%s\n' % msg)
+ # We don't know whether it should be kept; err
+ # on the side of caution
+ marks_out.write(line)
+ elif match.group(2) in reachable:
+ marks_out.write(line)
+ else:
+ self.progress('%s: pruning unreachable commit %s'
+ % (marks_name, match.group(2)))
+
+ # On success, replace marks file with temporary file
+ os.rename(marks_temp_name, marks_name)
+ except:
+ # On failure, delete temporary file
+ os.unlink(marks_temp_name)
+ raise
+
if __name__ == '__main__':
Lorry(version=__version__).run()