summaryrefslogtreecommitdiff
path: root/lorry
diff options
context:
space:
mode:
authorBen Hutchings <ben.hutchings@codethink.co.uk>2020-07-10 21:48:38 +0100
committerBen Hutchings <ben.hutchings@codethink.co.uk>2020-07-15 00:35:34 +0100
commit076e4a523167f550b56da9d540fcee7fcbcc290b (patch)
tree4971fa25943fe489aae1729020d91dda4f2a231d /lorry
parent9662091853cbb3e7c9d9962afce53e94aa238924 (diff)
downloadlorry-076e4a523167f550b56da9d540fcee7fcbcc290b.tar.gz
lorry: Copy modification time from server to downloaded files
lorry.gzip-importer uses the given file's modification time (mtime) as the commit time, but this is currently unrelated to the mtime on the server and so is unreproducible. Copy the Last-Modified field of the urllib response header, if present, to the mtime (and atime) of a file after downloading it. For FTP downloads, this response field is synthesised by our FTP handler. Closes #4.
Diffstat (limited to 'lorry')
-rwxr-xr-xlorry10
1 files changed, 10 insertions, 0 deletions
diff --git a/lorry b/lorry
index 4f7f574..eba2ef0 100755
--- a/lorry
+++ b/lorry
@@ -28,6 +28,7 @@ import traceback
import functools
import stat
import email.message
+import email.utils
import ftplib
import re
@@ -639,7 +640,16 @@ class Lorry(cliapp.Application):
with open(archive_dest, 'wb') as archive_file:
urlfile = urllib.request.urlopen(spec['url'])
archive_file.write(urlfile.read())
+ try:
+ # HTTP dates use (one of) the email date formats
+ url_date = email.utils.mktime_tz(
+ email.utils.parsedate_tz(
+ urlfile.info()['Last-Modified']))
+ except (KeyError, ValueError, TypeError):
+ url_date = None
urlfile.close()
+ if url_date:
+ os.utime(archive_dest, (url_date, url_date))
except Exception:
if os.path.exists(archive_dest):
os.unlink(archive_dest)