1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
|
#!/usr/bin/env python3
## Copyright 2021 Codethink Limited
# raw file archive using git-lfs frontend for git-fast-import
import hashlib
import os
import shutil
import subprocess
import sys
import time
branch_name = 'master'
branch_ref = 'refs/heads/%s' % branch_name
committer_name = 'Lorry Raw File Importer'
committer_email = 'lorry-raw-file-importer@lorry'
def commit_lfs_gitattributes(fast_import):
commit_time = int(time.time())
commit = (
'commit {ref}\n'
'committer {committer_name} <{committer_email}> {commit_time} +0000\n'
'data <<EOM\n'
'Ensure LFS is configured\n'
'EOM\n'
'M 100644 inline .gitattributes\n'
'data <<EOM\n'
'* filter=lfs diff=lfs merge=lfs -text\n'
'.gitattributes filter diff merge text=auto\n'
'EOM\n'
'\n'
).format(
ref=branch_ref,
committer_name=committer_name, committer_email=committer_email, commit_time=commit_time,
)
fast_import.write(commit.encode('utf-8'))
def commit_lfs_file(raw_file, relative_path, last_commit, fast_import):
# git-lfs-pointer of the file
ret = subprocess.run(['git', 'lfs', 'pointer', '--file', raw_file], capture_output=True, check=True)
pointer_digest = ret.stdout
datasize = len(pointer_digest)
# sha256sum of the file
# slightly wasteful because git-lfs-pointer also generates a sha256sum
with open(raw_file, 'rb') as f:
shasum = hashlib.sha256(f.read()).hexdigest()
# Add the file to the repo
out_dir = os.path.join('lfs', 'objects', shasum[0:2], shasum[2:4])
os.makedirs(out_dir, exist_ok=True)
shutil.copyfile(raw_file, os.path.join(out_dir, shasum))
# Commit the data to master
commit_time = int(time.time())
basename = os.path.basename(raw_file)
if relative_path:
path = '{}/{}'.format(relative_path, basename)
else:
path = basename
fromline = 'from {}\n'.format(last_commit) if last_commit else ''
commit = (
'commit {ref}\n'
'committer {committer_name} <{committer_email}> {commit_time} +0000\n'
'data <<EOM\n'
'import {basename}\n'
'EOM\n'
'{fromline}'
'M 100644 inline {path}\n'
'data {datasize}\n'
'{data}\n'
'\n'
).format(
ref=branch_ref,
committer_name=committer_name, committer_email=committer_email, commit_time=commit_time,
basename=basename,
fromline=fromline,
path=path,
datasize=datasize, data=pointer_digest.decode('utf-8'))
fast_import.write(commit.encode('utf-8'))
def get_last_commit():
# show the full hash of the latest commit
out = subprocess.run(['git', 'rev-parse', branch_ref], capture_output=True, text=True)
if out.returncode != 0:
return None
# Will be same string as input when no commits exist yet
out = out.stdout.strip()
return None if out == branch_ref else out
def main():
if len(sys.argv) < 2 or len(sys.argv) > 3:
print('usage:', sys.argv[0], '<file>', '[<relative path>]')
sys.exit(1)
raw_file = sys.argv[1]
if len(sys.argv) < 3:
relpath = None
else:
relpath = sys.argv[2]
last_commit = get_last_commit()
with subprocess.Popen('git fast-import --quiet', shell=True,
stdin=subprocess.PIPE) as import_proc:
if not last_commit:
commit_lfs_gitattributes(import_proc.stdin)
commit_lfs_file(raw_file, relpath, last_commit, import_proc.stdin)
import_proc.stdin.close()
if import_proc.wait() != 0:
sys.exit(1)
main()
|