summaryrefslogtreecommitdiff
path: root/lorry.zip-importer
diff options
context:
space:
mode:
authorBen Hutchings <ben.hutchings@codethink.co.uk>2020-07-10 22:37:47 +0100
committerBen Hutchings <ben.hutchings@codethink.co.uk>2020-07-15 00:35:34 +0100
commit47ac5b7452f5b6d112ac0bcda6250697ee2f6fa9 (patch)
treec2c4b5a407e50ae9f8e5dfa4c4e50048809d8f55 /lorry.zip-importer
parentfea0ced99f95a593a89b52ea218884ad8bad2b8d (diff)
downloadlorry-47ac5b7452f5b6d112ac0bcda6250697ee2f6fa9.tar.gz
lorry.zip-importer: Use Unix-format mtimes if available
The Zip file format is extensible, and there are several extensions to file metadata that provide a Unix-format mtime. These allow us to avoid the local time ambiguity altogether.
Diffstat (limited to 'lorry.zip-importer')
-rwxr-xr-xlorry.zip-importer50
1 files changed, 44 insertions, 6 deletions
diff --git a/lorry.zip-importer b/lorry.zip-importer
index 3b1ddd6..dfb73eb 100755
--- a/lorry.zip-importer
+++ b/lorry.zip-importer
@@ -12,6 +12,7 @@
import calendar
import os.path
+import struct
import subprocess
import sys
import time
@@ -24,12 +25,52 @@ committer_name = 'Lorry Zip Importer'
committer_email = 'lorry-zip-importer@lorry'
+# File header 'extra' field tags
+EXT_TAG_UNIX0 = 0x000d # PKWARE Unix, aka Unix type 0
+EXT_TAG_TIME = 0x5455 # Extended Timestamp
+EXT_TIME_FLAG_MTIME = 1 # mtime present (and first)
+EXT_TAG_UNIX1 = 0x5855 # Info-ZIP Unix type 1
+
+
+# Iterate over fields within a file header 'extra' block
+def zip_extra_fields(extra):
+ pos = 0
+ while len(extra) >= pos + 4:
+ tag, size = struct.unpack('<HH', extra[pos : pos + 4])
+ pos += 4
+ if len(extra) < pos + size:
+ return
+ yield tag, extra[pos : pos + size]
+ pos += size
+
+
+# Make our best guess at the mtime of a zip file entry
+def zip_info_mtime(info):
+ # Look for Unix-format mtime in the 'extra' block
+ for tag, data in zip_extra_fields(info.extra):
+ format = None
+ if tag in [EXT_TAG_UNIX0, EXT_TAG_UNIX1]:
+ format = '<4xL' # AcTime, ModTime
+ elif tag == EXT_TAG_TIME:
+ # First byte indicates which timestamps follow
+ if len(data) >= 1 and data[0] & EXT_TIME_FLAG_MTIME:
+ format = '<xL' # Flags, ModTime
+ if format:
+ min_len = struct.calcsize(format)
+ if len(data) >= min_len:
+ return struct.unpack(format, data[:min_len])[0]
+
+ # Timestamps in the main header are in local time, but the time
+ # zone offset is unspecified. We choose to interpret them as UTC.
+ return calendar.timegm(info.date_time + (0, 0, 0))
+
+
def export(zipfile, fast_import):
def printlines(list):
for str in list:
fast_import.write(str.encode('utf-8') + b"\n")
- commit_time = (1970, 1, 1, 0, 0, 0)
+ commit_time = 0
next_mark = 1
common_prefix = None
mark = dict()
@@ -40,8 +81,7 @@ def export(zipfile, fast_import):
continue
info = zip.getinfo(name)
- if commit_time < info.date_time:
- commit_time = info.date_time
+ commit_time = max(commit_time, zip_info_mtime(info))
if common_prefix == None:
common_prefix = name[:name.rfind('/') + 1]
else:
@@ -56,10 +96,8 @@ def export(zipfile, fast_import):
'data ' + str(info.file_size)))
fast_import.write(zip.read(name) + b"\n")
- # Zip file timestamps are in local time, but the time zone offset
- # is unspecified. We choose to interpret them as UTC.
committer = committer_name + ' <' + committer_email + '> %d +0000' % \
- calendar.timegm(commit_time + (0, 0, 0))
+ commit_time
zipfile_basename = os.path.basename(zipfile)
printlines(('commit ' + branch_ref, 'committer ' + committer, \