diff options
author | Ben Hutchings <ben.hutchings@codethink.co.uk> | 2020-07-10 22:37:47 +0100 |
---|---|---|
committer | Ben Hutchings <ben.hutchings@codethink.co.uk> | 2020-07-15 00:35:34 +0100 |
commit | 47ac5b7452f5b6d112ac0bcda6250697ee2f6fa9 (patch) | |
tree | c2c4b5a407e50ae9f8e5dfa4c4e50048809d8f55 /lorry.zip-importer | |
parent | fea0ced99f95a593a89b52ea218884ad8bad2b8d (diff) | |
download | lorry-47ac5b7452f5b6d112ac0bcda6250697ee2f6fa9.tar.gz |
lorry.zip-importer: Use Unix-format mtimes if available
The Zip file format is extensible, and there are several extensions to
file metadata that provide a Unix-format mtime. These allow us to
avoid the local time ambiguity altogether.
Diffstat (limited to 'lorry.zip-importer')
-rwxr-xr-x | lorry.zip-importer | 50 |
1 files changed, 44 insertions, 6 deletions
diff --git a/lorry.zip-importer b/lorry.zip-importer index 3b1ddd6..dfb73eb 100755 --- a/lorry.zip-importer +++ b/lorry.zip-importer @@ -12,6 +12,7 @@ import calendar import os.path +import struct import subprocess import sys import time @@ -24,12 +25,52 @@ committer_name = 'Lorry Zip Importer' committer_email = 'lorry-zip-importer@lorry' +# File header 'extra' field tags +EXT_TAG_UNIX0 = 0x000d # PKWARE Unix, aka Unix type 0 +EXT_TAG_TIME = 0x5455 # Extended Timestamp +EXT_TIME_FLAG_MTIME = 1 # mtime present (and first) +EXT_TAG_UNIX1 = 0x5855 # Info-ZIP Unix type 1 + + +# Iterate over fields within a file header 'extra' block +def zip_extra_fields(extra): + pos = 0 + while len(extra) >= pos + 4: + tag, size = struct.unpack('<HH', extra[pos : pos + 4]) + pos += 4 + if len(extra) < pos + size: + return + yield tag, extra[pos : pos + size] + pos += size + + +# Make our best guess at the mtime of a zip file entry +def zip_info_mtime(info): + # Look for Unix-format mtime in the 'extra' block + for tag, data in zip_extra_fields(info.extra): + format = None + if tag in [EXT_TAG_UNIX0, EXT_TAG_UNIX1]: + format = '<4xL' # AcTime, ModTime + elif tag == EXT_TAG_TIME: + # First byte indicates which timestamps follow + if len(data) >= 1 and data[0] & EXT_TIME_FLAG_MTIME: + format = '<xL' # Flags, ModTime + if format: + min_len = struct.calcsize(format) + if len(data) >= min_len: + return struct.unpack(format, data[:min_len])[0] + + # Timestamps in the main header are in local time, but the time + # zone offset is unspecified. We choose to interpret them as UTC. + return calendar.timegm(info.date_time + (0, 0, 0)) + + def export(zipfile, fast_import): def printlines(list): for str in list: fast_import.write(str.encode('utf-8') + b"\n") - commit_time = (1970, 1, 1, 0, 0, 0) + commit_time = 0 next_mark = 1 common_prefix = None mark = dict() @@ -40,8 +81,7 @@ def export(zipfile, fast_import): continue info = zip.getinfo(name) - if commit_time < info.date_time: - commit_time = info.date_time + commit_time = max(commit_time, zip_info_mtime(info)) if common_prefix == None: common_prefix = name[:name.rfind('/') + 1] else: @@ -56,10 +96,8 @@ def export(zipfile, fast_import): 'data ' + str(info.file_size))) fast_import.write(zip.read(name) + b"\n") - # Zip file timestamps are in local time, but the time zone offset - # is unspecified. We choose to interpret them as UTC. committer = committer_name + ' <' + committer_email + '> %d +0000' % \ - calendar.timegm(commit_time + (0, 0, 0)) + commit_time zipfile_basename = os.path.basename(zipfile) printlines(('commit ' + branch_ref, 'committer ' + committer, \ |