1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
|
#!/usr/bin/env python3
## Note: Modified for Baserock lorry.
## zip archive frontend for git-fast-import
##
## For example:
##
## mkdir project; cd project; git init
## python import-zips.py *.zip
## git log --stat import-zips
import calendar
import os.path
import struct
import subprocess
import sys
import time
from zipfile import ZipFile
branch_name = 'master'
branch_ref = 'refs/heads/%s' % branch_name
committer_name = 'Lorry Zip Importer'
committer_email = 'lorry-zip-importer@lorry'
# File header 'extra' field tags
EXT_TAG_UNIX0 = 0x000d # PKWARE Unix, aka Unix type 0
EXT_TAG_TIME = 0x5455 # Extended Timestamp
EXT_TIME_FLAG_MTIME = 1 # mtime present (and first)
EXT_TAG_UNIX1 = 0x5855 # Info-ZIP Unix type 1
# Iterate over fields within a file header 'extra' block
def zip_extra_fields(extra):
pos = 0
while len(extra) >= pos + 4:
tag, size = struct.unpack('<HH', extra[pos : pos + 4])
pos += 4
if len(extra) < pos + size:
return
yield tag, extra[pos : pos + size]
pos += size
# Make our best guess at the mtime of a zip file entry
def zip_info_mtime(info):
# Look for Unix-format mtime in the 'extra' block
for tag, data in zip_extra_fields(info.extra):
format = None
if tag in [EXT_TAG_UNIX0, EXT_TAG_UNIX1]:
format = '<4xL' # AcTime, ModTime
elif tag == EXT_TAG_TIME:
# First byte indicates which timestamps follow
if len(data) >= 1 and data[0] & EXT_TIME_FLAG_MTIME:
format = '<xL' # Flags, ModTime
if format:
min_len = struct.calcsize(format)
if len(data) >= min_len:
return struct.unpack(format, data[:min_len])[0]
# Timestamps in the main header are in local time, but the time
# zone offset is unspecified. We choose to interpret them as UTC.
return calendar.timegm(info.date_time + (0, 0, 0))
def export(zipfile, fast_import):
def printlines(list):
for str in list:
fast_import.write(str.encode('utf-8') + b"\n")
commit_time = 0
next_mark = 1
common_prefix = None
mark = dict()
zip = ZipFile(zipfile, 'r')
for name in zip.namelist():
if name.endswith('/'):
continue
info = zip.getinfo(name)
commit_time = max(commit_time, zip_info_mtime(info))
if common_prefix == None:
common_prefix = name[:name.rfind('/') + 1]
else:
while not name.startswith(common_prefix):
last_slash = common_prefix[:-1].rfind('/') + 1
common_prefix = common_prefix[:last_slash]
mark[name] = ':' + str(next_mark)
next_mark += 1
printlines(('blob', 'mark ' + mark[name], \
'data ' + str(info.file_size)))
fast_import.write(zip.read(name) + b"\n")
committer = committer_name + ' <' + committer_email + '> %d +0000' % \
commit_time
zipfile_basename = os.path.basename(zipfile)
printlines(('commit ' + branch_ref, 'committer ' + committer, \
'data <<EOM', 'Imported from ' + zipfile_basename + '.', 'EOM', \
'', 'deleteall'))
for name in mark.keys():
printlines(('M 100644 ' + mark[name] + ' ' +
name[len(common_prefix):],))
zipname, _ = os.path.splitext(zipfile_basename)
printlines(('', 'tag ' + zipname, \
'from ' + branch_ref, 'tagger ' + committer, \
'data <<EOM', 'Package ' + zipfile, 'EOM', ''))
def main():
if len(sys.argv) < 2:
print('usage:', sys.argv[0], '<zipfile>...')
sys.exit(1)
with subprocess.Popen('git fast-import --quiet', shell=True,
stdin=subprocess.PIPE) as import_proc:
for zipfile in sys.argv[1:]:
export(zipfile, import_proc.stdin)
import_proc.stdin.close()
if import_proc.wait() != 0:
sys.exit(1)
main()
|