summaryrefslogtreecommitdiff
path: root/lorry.raw-file-importer
blob: caaa8c96338b6b171c8b64d5679f6a3207b74443 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env python3

## Copyright 2021 Codethink Limited

# raw file archive using git-lfs frontend for git-fast-import

import hashlib
import os
import shutil
import subprocess
import sys
import time

branch_name = 'master'
branch_ref = 'refs/heads/%s' % branch_name
committer_name = 'Lorry Raw File Importer'
committer_email = 'lorry-raw-file-importer@lorry'

def commit_lfs_gitattributes(fast_import):
    commit_time = int(time.time())
    commit = (
        'commit {ref}\n'
        'committer {committer_name} <{committer_email}> {commit_time} +0000\n'
        'data <<EOM\n'
        'Ensure LFS is configured\n'
        'EOM\n'
        'M 100644 inline .gitattributes\n'
        'data <<EOM\n'
        '* filter=lfs diff=lfs merge=lfs -text\n'
        '.gitattributes filter diff merge text=auto\n'
        'EOM\n'
        '\n'
    ).format(
        ref=branch_ref,
        committer_name=committer_name, committer_email=committer_email, commit_time=commit_time,
    )
    fast_import.write(commit.encode('utf-8'))


def commit_lfs_file(raw_file, relative_path, last_commit, fast_import):
    # git-lfs-pointer of the file
    ret = subprocess.run(
        ['git', 'lfs', 'pointer', '--file', raw_file],
        stdout=subprocess.PIPE,
        check=True,
    )
    pointer_digest = ret.stdout
    datasize = len(pointer_digest)

    # sha256sum of the file
    # slightly wasteful because git-lfs-pointer also generates a sha256sum
    with open(raw_file, 'rb') as f:
        shasum = hashlib.sha256(f.read()).hexdigest()

    # Add the file to the repo
    out_dir = os.path.join('lfs', 'objects', shasum[0:2], shasum[2:4])
    os.makedirs(out_dir, exist_ok=True)
    shutil.copyfile(raw_file, os.path.join(out_dir, shasum))

    # Commit the data to master
    commit_time = int(time.time())
    basename = os.path.basename(raw_file)
    if relative_path:
        path = '{}/{}'.format(relative_path, basename)
    else:
        path = basename

    fromline = 'from {}\n'.format(last_commit) if last_commit else ''

    commit = (
        'commit {ref}\n'
        'committer {committer_name} <{committer_email}> {commit_time} +0000\n'
        'data <<EOM\n'
        'import {basename}\n'
        'EOM\n'
        '{fromline}'
        'M 100644 inline {path}\n'
        'data {datasize}\n'
        '{data}\n'
        '\n'
    ).format(
        ref=branch_ref,
        committer_name=committer_name, committer_email=committer_email, commit_time=commit_time,
        basename=basename,
        fromline=fromline,
        path=path,
        datasize=datasize, data=pointer_digest.decode('utf-8'))
    fast_import.write(commit.encode('utf-8'))


def get_last_commit():
    # show the full hash of the latest commit
    out = subprocess.run(
        ['git', 'rev-parse', branch_ref],
        stdout=subprocess.PIPE,
        universal_newlines=True,
    )
    if out.returncode != 0:
        return None

    # Will be same string as input when no commits exist yet
    out = out.stdout.strip()
    return None if out == branch_ref else out


def main():
    if len(sys.argv) < 2 or len(sys.argv) > 3:
        print('usage:', sys.argv[0], '<file>', '[<relative path>]')
        sys.exit(1)

    raw_file = sys.argv[1]
    if len(sys.argv) < 3:
        relpath = None
    else:
        relpath = sys.argv[2]

    last_commit = get_last_commit()
    with subprocess.Popen('git fast-import --quiet', shell=True,
                          stdin=subprocess.PIPE) as import_proc:
        if not last_commit:
            commit_lfs_gitattributes(import_proc.stdin)

        commit_lfs_file(raw_file, relpath, last_commit, import_proc.stdin)
        import_proc.stdin.close()
        if import_proc.wait() != 0:
            sys.exit(1)


main()