From ae7a1aa61e899be6bb90510897046d27c8d6e126 Mon Sep 17 00:00:00 2001 From: Jonathan Maw Date: Tue, 5 Oct 2021 18:45:36 +0100 Subject: Gitify raw files, storing them in an LFS repo Now, if a lorry has type 'raw-file', it'll be stored as a large file in a 'raw-file-mirrors' repo (or whatever else 'raw-file-repo' is set to) --- README.md | 13 ++++++ lorry | 68 ++++++++++++++++++++++++++-- lorry.raw-file-importer | 118 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 195 insertions(+), 4 deletions(-) create mode 100755 lorry.raw-file-importer diff --git a/README.md b/README.md index a3e4905..d990196 100644 --- a/README.md +++ b/README.md @@ -282,6 +282,19 @@ will be tagged as 'bkai00mp.ttf') } } +### Raw File + +Lorry can store raw files in a git LFS repository, which may allow your git +server to serve those files via its repository browser. + +All raw files are stored in a single repository under subpaths generated from the leading components of the lorry's name. + + { + "pub/gnu/ccrtp": { + "type": "raw-file", + "url": "ftp://ftp.gnu.org/pub/gnu/ccrtp/ccrtp-2.1.2.tar.gz" + } + } Tips ---- diff --git a/lorry b/lorry index 0e047db..e539aba 100755 --- a/lorry +++ b/lorry @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright (C) 2011-2020 Codethink Limited +# Copyright (C) 2011-2021 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -237,6 +237,9 @@ class Lorry(cliapp.Application): self.settings.boolean(['check-certificates'], 'validate SSL/TLS server certificates', default=True) + self.settings.string(['raw-file-repo'], + 'Repository to store raw file mirrors in', + default='raw-file-mirrors') def process_args(self, args): status = 0 @@ -343,6 +346,7 @@ class Lorry(cliapp.Application): 'git': self.mirror_git, 'hg': self.gitify_hg, 'svn': self.gitify_svn, + 'raw-file': self.gitify_raw_file, 'tarball': functools.partial(self.gitify_archive, 'tar'), 'zip': functools.partial(self.gitify_archive, 'zip'), 'gzip': functools.partial(self.gitify_archive, 'gzip') @@ -350,7 +354,11 @@ class Lorry(cliapp.Application): vcstype = spec['type'] if vcstype not in table: raise cliapp.AppException('Unknown VCS type %s' % vcstype) - dirname = self.dirname(name) + if vcstype == 'raw-file': + # raw files all get added to a single repository. + dirname = self.dirname(self.settings['raw-file-repo']) + else: + dirname = self.dirname(name) if not os.path.exists(dirname): os.mkdir(dirname) @@ -402,10 +410,15 @@ class Lorry(cliapp.Application): if not self.settings['pull-only']: if len(self.settings['mirror-base-url-push']) > 0: + if vcstype == 'raw-file': + repo_name = self.settings['raw-file-repo'] + else: + repo_name = name + if 'refspecs' in spec: - self.push_to_mirror_server(name, active_repo, spec['refspecs']) + self.push_to_mirror_server(repo_name, active_repo, spec['refspecs']) else: - self.push_to_mirror_server(name, active_repo) + self.push_to_mirror_server(repo_name, active_repo) def migrate_oldstyle_repos(self, dirname): # Migrate old-style active repository @@ -792,6 +805,53 @@ class Lorry(cliapp.Application): *plugin_options], cwd=gitdir) + def gitify_raw_file(self, project_name, dirname, gitdir, spec): + raw_file_branch = 'master' + raw_file_refspecs = 'refs/heads/{branch}:refs/heads/{branch}'.format(branch=raw_file_branch) + # Fetch the file + url = spec['url'] + url_path = urllib.parse.urlparse(url)[2] + basename = os.path.basename(url_path) + file_dest = os.path.join(dirname, basename) + self.progress('.. checking if we need to fetch %s' % basename) + if file_missing_or_empty(file_dest): + self.progress('.. attempting to fetch.') + try: + with open(file_dest, 'wb') as raw_file: + urlfile = urllib.request.urlopen(spec['url']) + raw_file.write(urlfile.read()) + try: + # HTTP dates use (one of) the email date formats + url_date = email.utils.mktime_tz( + email.utils.parsedate_tz( + urlfile.info()['Last-Modified'])) + except (KeyError, ValueError, TypeError): + url_date = None + urlfile.close() + if url_date: + os.utime(file_dest, (url_date, url_date)) + except Exception: + if os.path.exists(file_dest): + os.unlink(file_dest) + raise + else: + self.progress('.. no need to run, nothing to do') + return + + self.ensure_gitdir(gitdir) + # Ensure the repo is up-to-date + pullurl = "%s/%s.git" % (self.settings['mirror-base-url-push'], self.settings['raw-file-repo']) + try: + self.run_program(['git', 'fetch', pullurl, raw_file_refspecs], cwd=gitdir) + except: + # TODO: Be more specific about which exceptions are fine + pass + + # Ensure the repo supports git LFS + self.run_program(['git', 'lfs', 'install'], cwd=gitdir) + + self.run_program(["%s.raw-file-importer" % lorry_path, file_dest, project_name], cwd=gitdir) + def gitify_archive(self, archive_type, project_name, dirname, gitdir, spec): assert archive_type in ['zip', 'gzip', 'tar'] diff --git a/lorry.raw-file-importer b/lorry.raw-file-importer new file mode 100755 index 0000000..1203e20 --- /dev/null +++ b/lorry.raw-file-importer @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 + +## Copyright 2021 Codethink Limited + +# raw file archive using git-lfs frontend for git-fast-import + +import hashlib +import os +import shutil +import subprocess +import sys +import time + +branch_name = 'master' +branch_ref = 'refs/heads/%s' % branch_name +committer_name = 'Lorry Raw File Importer' +committer_email = 'lorry-raw-file-importer@lorry' + +def commit_lfs_gitattributes(fast_import): + commit_time = int(time.time()) + commit = ( + 'commit {ref}\n' + 'committer {committer_name} <{committer_email}> {commit_time} +0000\n' + 'data < {commit_time} +0000\n' + 'data < 3: + print('usage:', sys.argv[0], '', '[]') + sys.exit(1) + + raw_file = sys.argv[1] + if len(sys.argv) < 3: + relpath = None + else: + relpath = sys.argv[2] + + last_commit = get_last_commit() + with subprocess.Popen('git fast-import --quiet', shell=True, + stdin=subprocess.PIPE) as import_proc: + if not last_commit: + commit_lfs_gitattributes(import_proc.stdin) + + commit_lfs_file(raw_file, relpath, last_commit, import_proc.stdin) + import_proc.stdin.close() + if import_proc.wait() != 0: + sys.exit(1) + + +main() -- cgit v1.2.1