diff options
author | Ben Hutchings <ben.hutchings@codethink.co.uk> | 2020-08-04 21:26:28 +0100 |
---|---|---|
committer | Ben Hutchings <ben.hutchings@codethink.co.uk> | 2020-08-05 18:06:36 +0100 |
commit | 9d8c35bfa8a717c656a78c636c5a4600e5837ae7 (patch) | |
tree | 3c19de4abf241615e4dd80b068cdfd9fd9278670 /lorry | |
parent | a934314cbdc56e30c77f554cd21772e5fbd306c3 (diff) | |
download | lorry-9d8c35bfa8a717c656a78c636c5a4600e5837ae7.tar.gz |
lorry: Prune unreachable commits from hg-fast-export marks file
By default, hg-fast-export refuses to export a Mercurial repository if
it has unnamed heads. lorry passes the --force option that overrides
this since we still want to convert the named heads, but doing this
sets us up for failure later.
If the unnamed heads also have no tags pointing to them, the
corresponding git commits may be deleted by 'git gc'. However they
will still be listed in the 'marks' file used to record state for
incremental conversions, so a later run of hg-fast-export may assume
they exist and refer to them by hash.
Before running hg-fast-export, delete any lines in its marks file that
refer to unreachable or non-existent commits.
Closes #7.
Diffstat (limited to 'lorry')
-rwxr-xr-x | lorry | 53 |
1 files changed, 52 insertions, 1 deletions
@@ -31,6 +31,8 @@ import email.message import email.utils import ftplib import re +import subprocess +import tempfile import yaml @@ -621,7 +623,13 @@ class Lorry(cliapp.Application): if not os.path.exists(gitdir): self.needs_aggressive = True self.run_program(['git', 'init', '--bare', gitdir]) - + + # Since there are marks files in existing deployments that + # have broken references, fix up the marks file before rather + # than after running hg-fast-export + self.prune_unreachable_marks(gitdir, + os.path.join(gitdir, 'hg2git-marks')) + self.progress('.. fast-exporting into git') self.run_program(['hg-fast-export', '-r', '../hg', '--quiet', '--force'], cwd=gitdir) @@ -718,6 +726,49 @@ class Lorry(cliapp.Application): if self.settings['verbose']: self.output.write('%s\n' % msg) + def prune_unreachable_marks(self, gitdir, marks_name): + if not os.path.exists(marks_name): + return + + # Find reachable commits + reachable = set() + with subprocess.Popen(['git', 'rev-list', '--all'], + cwd=gitdir, stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + universal_newlines=True) as rev_list_proc: + for line in rev_list_proc.stdout: + reachable.add(line.rstrip('\n')) + + # Filter marks file to temporary file + mark_re = re.compile(r':(\S+) ([0-9a-f]{40,})\n') + marks_temp_fd, marks_temp_name = \ + tempfile.mkstemp(dir=os.path.dirname(marks_name)) + try: + with open(marks_temp_fd, 'w') as marks_out, \ + open(marks_name, 'r') as marks_in: + for line in marks_in: + match = mark_re.match(line) + if not match: + msg = ('%s: failed to parse line "%s"' + % (marks_name, line.rstrip('\n'))) + logging.warning(msg) + self.output.write('%s\n' % msg) + # We don't know whether it should be kept; err + # on the side of caution + marks_out.write(line) + elif match.group(2) in reachable: + marks_out.write(line) + else: + self.progress('%s: pruning unreachable commit %s' + % (marks_name, match.group(2))) + + # On success, replace marks file with temporary file + os.rename(marks_temp_name, marks_name) + except: + # On failure, delete temporary file + os.unlink(marks_temp_name) + raise + if __name__ == '__main__': Lorry(version=__version__).run() |