summaryrefslogtreecommitdiff
path: root/lorry.raw-file-importer
diff options
context:
space:
mode:
Diffstat (limited to 'lorry.raw-file-importer')
-rwxr-xr-xlorry.raw-file-importer121
1 files changed, 121 insertions, 0 deletions
diff --git a/lorry.raw-file-importer b/lorry.raw-file-importer
new file mode 100755
index 0000000..bca46b5
--- /dev/null
+++ b/lorry.raw-file-importer
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+
+## Copyright 2021 Codethink Limited
+
+# raw file archive using git-lfs frontend for git-fast-import
+
+import hashlib
+import os
+import shutil
+import subprocess
+import sys
+import time
+
+branch_name = 'master'
+branch_ref = 'refs/heads/%s' % branch_name
+committer_name = 'Lorry Raw File Importer'
+committer_email = 'lorry-raw-file-importer@lorry'
+
+def commit_lfs_gitattributes(fast_import):
+ commit_time = int(time.time())
+ commit = (
+ 'commit {ref}\n'
+ 'committer {committer_name} <{committer_email}> {commit_time} +0000\n'
+ 'data <<EOM\n'
+ 'Ensure LFS is configured\n'
+ 'EOM\n'
+ 'M 100644 inline .gitattributes\n'
+ 'data <<EOM\n'
+ '* filter=lfs diff=lfs merge=lfs -text\n'
+ '.gitattributes filter diff merge text=auto\n'
+ 'EOM\n'
+ '\n'
+ ).format(
+ ref=branch_ref,
+ committer_name=committer_name, committer_email=committer_email, commit_time=commit_time,
+ )
+ fast_import.write(commit.encode('utf-8'))
+
+
+def commit_lfs_file(raw_file, relative_path, last_commit, fast_import):
+ # git-lfs-pointer of the file
+ ret = subprocess.run(['git', 'lfs', 'pointer', '--file', raw_file], capture_output=True, check=True)
+ pointer_digest = ret.stdout
+ datasize = len(pointer_digest)
+
+ # sha256sum of the file
+ # slightly wasteful because git-lfs-pointer also generates a sha256sum
+ with open(raw_file, 'rb') as f:
+ shasum = hashlib.sha256(f.read()).hexdigest()
+
+ # Add the file to the repo
+ out_dir = os.path.join('lfs', 'objects', shasum[0:2], shasum[2:4])
+ os.makedirs(out_dir, exist_ok=True)
+ shutil.copyfile(raw_file, os.path.join(out_dir, shasum))
+
+ # Commit the data to master
+ commit_time = int(time.time())
+ basename = os.path.basename(raw_file)
+ if relative_path:
+ path = '{}/{}'.format(relative_path, basename)
+ else:
+ path = basename
+
+ fromline = 'from {}\n'.format(last_commit) if last_commit else ''
+
+ commit = (
+ 'commit {ref}\n'
+ 'committer {committer_name} <{committer_email}> {commit_time} +0000\n'
+ 'data <<EOM\n'
+ 'import {basename}\n'
+ 'EOM\n'
+ '{fromline}'
+ 'M 100644 inline {path}\n'
+ 'data {datasize}\n'
+ '{data}\n'
+ '\n'
+ ).format(
+ ref=branch_ref,
+ committer_name=committer_name, committer_email=committer_email, commit_time=commit_time,
+ basename=basename,
+ fromline=fromline,
+ path=path,
+ datasize=datasize, data=pointer_digest.decode('utf-8'))
+ fast_import.write(commit.encode('utf-8'))
+
+
+def get_last_commit():
+ # show the full hash of the latest commit
+ out = subprocess.run(['git', 'rev-parse', branch_ref], capture_output=True, text=True)
+ if out.returncode != 0:
+ return None
+
+ # Will be same string as input when no commits exist yet
+ out = out.stdout.strip()
+ return None if out == branch_ref else out
+
+
+def main():
+ if len(sys.argv) < 2 or len(sys.argv) > 3:
+ print('usage:', sys.argv[0], '<file>', '[<relative path>]')
+ sys.exit(1)
+
+ raw_file = sys.argv[1]
+ if len(sys.argv) < 3:
+ relpath = None
+ else:
+ relpath = sys.argv[2]
+
+ last_commit = get_last_commit()
+ with subprocess.Popen('git fast-import --quiet', shell=True,
+ stdin=subprocess.PIPE) as import_proc:
+ if not last_commit:
+ commit_lfs_gitattributes(import_proc.stdin)
+
+ commit_lfs_file(raw_file, relpath, last_commit, import_proc.stdin)
+ import_proc.stdin.close()
+ if import_proc.wait() != 0:
+ sys.exit(1)
+
+
+main()